From 3f8fa1a7789faf5a291a22e75bbd42359b076c1f Mon Sep 17 00:00:00 2001
From: Jeremy Lewi <jeremy@lewi.us>
Date: Fri, 25 Oct 2024 14:23:34 -0700
Subject: [PATCH] Improve Handling Of Long Outputs (#328)

# Problem

Cell outputs can be very long. For example, if we run a query (gcloud,
SQL, etc...) the output could be very verbose. This output could eat up
the entire context allocated for the input document. As a result, we
might not have sufficiently meaningful context to prompt the model.

There was another bug in our doc tailer. We were applying character
limits to the rendered markdown. We were imposing this by tailing the
lines. This could produce invalid markdown. For example, we might end up
truncating the document in the middle of a code block so we wouldn't
have the opening triple quotes for the code block. We might also include
the output of the code block without including the code that it is
output for.

# Solution

First, we impose character limits in a way that is aware of cell
boundaries. We move truncation into the Block to Markdown conversion.
The conversion now takes the maximum length for the output string. The
conversion routine then figures out how much to allocate to the contents
of the cell and its outputs. This allows truncation to happen in a way
that can respect cell boundaries.

Second, if we truncate the code block or output we output a string
indicating that the output was truncated. We want the model to know that
output was truncated. We update our prompt to tell the LLM to look for
truncated output and to potentially deal with this by running commands
that will provide less verbose output.



Fix #299
---
 app/pkg/agent/prompt.tmpl               |  8 +++
 app/pkg/agent/test_data/examples.txt    |  8 +++
 app/pkg/agent/test_data/no_examples.txt |  8 +++
 app/pkg/docs/const.go                   |  1 +
 app/pkg/docs/converters.go              | 71 ++++++++++++++++++++++---
 app/pkg/docs/converters_test.go         | 27 ++++++++--
 app/pkg/docs/tailer.go                  | 24 ++++-----
 app/pkg/docs/tailer_test.go             | 35 ++++++++++--
 8 files changed, 151 insertions(+), 31 deletions(-)
diff --git a/app/pkg/agent/prompt.tmpl b/app/pkg/agent/prompt.tmpl
index b2d4699..9b0c82c 100644
--- a/app/pkg/agent/prompt.tmpl
+++ b/app/pkg/agent/prompt.tmpl
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 {{if .Examples}}
 Here are a bunch of examples of input documents along with the expected output.
diff --git a/app/pkg/agent/test_data/examples.txt b/app/pkg/agent/test_data/examples.txt
index daf76d5..23ec94f 100644
--- a/app/pkg/agent/test_data/examples.txt
+++ b/app/pkg/agent/test_data/examples.txt
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 
 Here are a bunch of examples of input documents along with the expected output.
diff --git a/app/pkg/agent/test_data/no_examples.txt b/app/pkg/agent/test_data/no_examples.txt
index 34ff679..db8f05e 100644
--- a/app/pkg/agent/test_data/no_examples.txt
+++ b/app/pkg/agent/test_data/no_examples.txt
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 
 Here's the actual document containing the problem or task to be solved:
diff --git a/app/pkg/docs/const.go b/app/pkg/docs/const.go
index d676a6a..b4ea01c 100644
--- a/app/pkg/docs/const.go
+++ b/app/pkg/docs/const.go
@@ -14,4 +14,5 @@ const (
 	//    https://github.com/jlewi/foyle/issues/286
 	StatefulRunmeOutputItemsMimeType = "stateful.runme/output-items"
 	StatefulRunmeTerminalMimeType    = "stateful.runme/terminal"
+	VSCodeNotebookStdOutMimeType     = "application/vnd.code.notebook.stdout "
 )
diff --git a/app/pkg/docs/converters.go b/app/pkg/docs/converters.go
index c05a7b3..e27919b 100644
--- a/app/pkg/docs/converters.go
+++ b/app/pkg/docs/converters.go
@@ -1,6 +1,7 @@
 package docs
 
 import (
+	"math"
 	"strings"
 
 	"github.com/jlewi/foyle/app/pkg/runme/converters"
@@ -11,29 +12,70 @@ import (
 	"github.com/stateful/runme/v3/pkg/document/editor"
 )
 
+const (
+	codeTruncationMessage = "<...code was truncated...>"
+	truncationMessage     = "<...stdout was truncated...>"
+)
+
 // BlockToMarkdown converts a block to markdown
-func BlockToMarkdown(block *v1alpha1.Block) string {
+// maxLength is a maximum length for the generated markdown. This is a soft limit and may be exceeded slightly
+// because we don't account for some characters like the outputLength and the truncation message
+// A value <=0 means no limit.
+func BlockToMarkdown(block *v1alpha1.Block, maxLength int) string {
 	sb := strings.Builder{}
-	writeBlockMarkdown(&sb, block)
+	writeBlockMarkdown(&sb, block, maxLength)
 	return sb.String()
 }
 
-func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) {
+func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block, maxLength int) {
+
+	maxInputLength := -1
+	maxOutputLength := -1
+
+	if maxLength > 0 {
+		// Allocate 50% of the max length for input and output
+		// This is crude. Arguably we could be dynamic e.g. if the output is < .5 maxLength we should allocate
+		// the unused capacity for inputs. But for simplicity we don't do that. We do allocate unused input capacity
+		// to the output. In practice outputs tend to be much longer than inputs. Inputs are human authored
+		// whereas outputs are more likely to be produced by a machine (e.g. log output) and therefore very long
+		maxInputLength = int(math.Floor(0.5*float64(maxLength)) + 1)
+		maxOutputLength = maxInputLength
+	}
+
 	switch block.GetKind() {
 	case v1alpha1.BlockKind_CODE:
 		// Code just gets written as a code block
 		sb.WriteString("```" + BASHLANG + "\n")
-		sb.WriteString(block.GetContents())
+
+		data := block.GetContents()
+		if len(data) > maxInputLength && maxInputLength > 0 {
+			data = tailLines(data, maxInputLength)
+			data = codeTruncationMessage + "\n" + data
+
+			remaining := maxLength - len(data)
+			if remaining > 0 {
+				maxOutputLength += remaining
+			}
+		}
+		sb.WriteString(data)
 		sb.WriteString("\n```\n")
 	default:
 		// Otherwise assume its a markdown block
-		sb.WriteString(block.GetContents() + "\n")
+
+		data := block.GetContents()
+		if len(data) > maxInputLength && maxInputLength > 0 {
+			data = tailLines(data, maxInputLength)
+			remaining := maxLength - len(data)
+			if remaining > 0 {
+				maxOutputLength += remaining
+			}
+		}
+		sb.WriteString(data + "\n")
 	}
 
 	// Handle the outputs
 	for _, output := range block.GetOutputs() {
 		for _, oi := range output.Items {
-
 			if oi.GetMime() == StatefulRunmeOutputItemsMimeType || oi.GetMime() == StatefulRunmeTerminalMimeType {
 				// See: https://github.com/jlewi/foyle/issues/286. This output item contains a JSON dictionary
 				// with a bunch of meta information that seems specific to Runme/stateful and not necessarily
@@ -45,11 +87,24 @@ func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) {
 				// renderers. https://github.com/stateful/vscode-runme/blob/3e36b16e3c41ad0fa38f0197f1713135e5edb27b/src/constants.ts#L6
 				// So for now we want to error on including useless data rather than silently dropping useful data.
 				// In the future we may want to revisit that.
+				//
 				continue
 			}
 
 			sb.WriteString("```" + OUTPUTLANG + "\n")
-			sb.WriteString(oi.GetTextData())
+			textData := oi.GetTextData()
+			if 0 < maxOutputLength && len(textData) > maxOutputLength {
+				textData = textData[:maxOutputLength]
+				sb.WriteString(textData)
+				// Don't write a newline before writing truncation because that is more likely to lead to confusion
+				// because people might not realize the line was truncated.
+				// Emit a message indicating that the output was truncated
+				// This is intended for the LLM so it knows that it is working with a truncated output.
+				sb.WriteString(truncationMessage)
+			} else {
+				sb.WriteString(textData)
+			}
+
 			sb.WriteString("\n```\n")
 		}
 	}
@@ -60,7 +115,7 @@ func BlocksToMarkdown(blocks []*v1alpha1.Block) string {
 	sb := strings.Builder{}
 
 	for _, block := range blocks {
-		writeBlockMarkdown(&sb, block)
+		writeBlockMarkdown(&sb, block, -1)
 	}
 
 	return sb.String()
diff --git a/app/pkg/docs/converters_test.go b/app/pkg/docs/converters_test.go
index 85b9e6b..243c2f6 100644
--- a/app/pkg/docs/converters_test.go
+++ b/app/pkg/docs/converters_test.go
@@ -12,9 +12,10 @@ import (
 
 func Test_BlockToMarkdown(t *testing.T) {
 	type testCase struct {
-		name     string
-		block    *v1alpha1.Block
-		expected string
+		name      string
+		block     *v1alpha1.Block
+		maxLength int
+		expected  string
 	}
 
 	testCases := []testCase{
@@ -69,10 +70,28 @@ func Test_BlockToMarkdown(t *testing.T) {
 			},
 			expected: "```bash\necho \"something something\"\n```\n```output\nShould be included\n```\n",
 		},
+		{
+			name: "truncate-output",
+			block: &v1alpha1.Block{
+				Kind:     v1alpha1.BlockKind_CODE,
+				Contents: "echo line1\nline2",
+				Outputs: []*v1alpha1.BlockOutput{
+					{
+						Items: []*v1alpha1.BlockOutputItem{
+							{
+								TextData: "some really long output",
+							},
+						},
+					},
+				},
+			},
+			maxLength: 10,
+			expected:  "```bash\n<...code was truncated...>\nline2\n```\n```output\nsome r<...stdout was truncated...>\n```\n",
+		},
 	}
 	for _, c := range testCases {
 		t.Run(c.name, func(t *testing.T) {
-			actual := BlockToMarkdown(c.block)
+			actual := BlockToMarkdown(c.block, c.maxLength)
 			if d := cmp.Diff(c.expected, actual); d != "" {
 				t.Errorf("Unexpected diff:\n%s", d)
 			}
diff --git a/app/pkg/docs/tailer.go b/app/pkg/docs/tailer.go
index c09dc68..68a247a 100644
--- a/app/pkg/docs/tailer.go
+++ b/app/pkg/docs/tailer.go
@@ -25,7 +25,6 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T
 	log := logs.FromContext(ctx)
 	mdBlocks := make([]string, len(blocks))
 
-	length := 0
 	firstBlock := len(blocks) - 1
 
 	assertion := &v1alpha1.Assertion{
@@ -34,20 +33,18 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T
 		Detail: "",
 		Id:     ulid.GenerateID(),
 	}
-	for ; firstBlock >= 0; firstBlock-- {
+
+	numBlocks := 0
+	for ; firstBlock >= 0 && maxCharLen > 0; firstBlock-- {
 		block := blocks[firstBlock]
-		md := BlockToMarkdown(block)
-		if length+len(md) > maxCharLen {
-			if length > 0 {
-				// If adding the block would exceed the max length and we already have at least one block then, break
-				break
-			} else {
-				// Since we haven't added any blocks yet, we need to add a truncated version of the last block
-				assertion.Result = v1alpha1.AssertResult_FAILED
-				md = tailLines(md, maxCharLen)
-			}
+		numBlocks += 1
+		md := BlockToMarkdown(block, maxCharLen)
+		maxCharLen = maxCharLen - len(md)
+		if maxCharLen <= 0 && numBlocks == 1 {
+			// Since this is the first block and its truncated we fail the assertion.
+			assertion.Result = v1alpha1.AssertResult_FAILED
 		}
-		length += len(md)
+
 		mdBlocks[firstBlock] = md
 	}
 
@@ -85,7 +82,6 @@ func tailLines(s string, maxLen int) string {
 	lines := strings.Split(s, "\n")
 
 	startIndex := len(lines) - 1
-	//if startIndex < 0 {}
 
 	length := len(lines[len(lines)-1])
 
diff --git a/app/pkg/docs/tailer_test.go b/app/pkg/docs/tailer_test.go
index 1f8609c..7d1656d 100644
--- a/app/pkg/docs/tailer_test.go
+++ b/app/pkg/docs/tailer_test.go
@@ -55,14 +55,39 @@ func Test_Tailer(t *testing.T) {
 			MaxChars: 12,
 			Expected: "Cell2\nCell3\n",
 		},
+		{
+			name: "truncate-outputs",
+			Doc: &v1alpha1.Doc{
+				Blocks: []*v1alpha1.Block{
+					{
+						Kind:     v1alpha1.BlockKind_CODE,
+						Contents: "Cell1",
+						Outputs: []*v1alpha1.BlockOutput{
+							{
+								Items: []*v1alpha1.BlockOutputItem{
+									{
+										TextData: "Output1\nOutput2\nOutput3",
+										Mime:     VSCodeNotebookStdOutMimeType,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			MaxChars: 12,
+			Expected: "```bash\nCell1\n```\n```output\nOutput1<...stdout was truncated...>\n```\n",
+		},
 	}
 
 	for _, c := range cases {
-		tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars)
-		actual := tailer.Text()
-		if d := cmp.Diff(c.Expected, actual); d != "" {
-			t.Fatalf("Expected text to be %s but got %s; diff:\n%v", c.Expected, tailer.Text(), d)
-		}
+		t.Run(c.name, func(t *testing.T) {
+			tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars)
+			actual := tailer.Text()
+			if d := cmp.Diff(c.Expected, actual); d != "" {
+				t.Fatalf("Unexpected diff:\n%v", d)
+			}
+		})
 	}
 }