Improve Handling Of Long Outputs (#328)

# Problem Cell outputs can be very long. For example, if we run a query (gcloud, SQL, etc...) the output could be very verbose. This output could eat up the entire context allocated for the input document. As a result, we might not have sufficiently meaningful context to prompt the model. There was another bug in our doc tailer. We were applying character limits to the rendered markdown. We were imposing this by tailing the lines. This could produce invalid markdown. For example, we might end up truncating the document in the middle of a code block so we wouldn't have the opening triple quotes for the code block. We might also include the output of the code block without including the code that it is output for. # Solution First, we impose character limits in a way that is aware of cell boundaries. We move truncation into the Block to Markdown conversion. The conversion now takes the maximum length for the output string. The conversion routine then figures out how much to allocate to the contents of the cell and its outputs. This allows truncation to happen in a way that can respect cell boundaries. Second, if we truncate the code block or output we output a string indicating that the output was truncated. We want the model to know that output was truncated. We update our prompt to tell the LLM to look for truncated output and to potentially deal with this by running commands that will provide less verbose output. Fix #299
jlewi · Oct 25, 2024 · 3f8fa1a · 3f8fa1a
1 parent b8ee786
commit 3f8fa1a
Show file tree

Hide file tree

Showing 8 changed files with 151 additions and 31 deletions.
diff --git a/app/pkg/agent/prompt.tmpl b/app/pkg/agent/prompt.tmpl
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 {{if .Examples}}
 Here are a bunch of examples of input documents along with the expected output.

diff --git a/app/pkg/agent/test_data/examples.txt b/app/pkg/agent/test_data/examples.txt
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 
 Here are a bunch of examples of input documents along with the expected output.

diff --git a/app/pkg/agent/test_data/no_examples.txt b/app/pkg/agent/test_data/no_examples.txt
@@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com.
 </reasoning>
 </example>
 
+* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>"
+* If the truncated output contains critical information to figure out what to do next, you should respond with a
+  suggestion on how to run the command so as to produce just the information you need with less verbosity
+
+  * If logging or SQL queries leads to truncated output, suggest alternative queries with
+    clauses to restrict the output to the rows and fields you need
+  * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the
+    fields you need
 
 
 Here's the actual document containing the problem or task to be solved:

diff --git a/app/pkg/docs/const.go b/app/pkg/docs/const.go
@@ -14,4 +14,5 @@ const (
 	//    https://github.com/jlewi/foyle/issues/286
 	StatefulRunmeOutputItemsMimeType = "stateful.runme/output-items"
 	StatefulRunmeTerminalMimeType    = "stateful.runme/terminal"
+	VSCodeNotebookStdOutMimeType     = "application/vnd.code.notebook.stdout "
 )
diff --git a/app/pkg/docs/converters.go b/app/pkg/docs/converters.go
@@ -1,6 +1,7 @@
 package docs
 
 import (
+	"math"
 	"strings"
 
 	"github.com/jlewi/foyle/app/pkg/runme/converters"
@@ -11,29 +12,70 @@ import (
 	"github.com/stateful/runme/v3/pkg/document/editor"
 )
 
+const (
+	codeTruncationMessage = "<...code was truncated...>"
+	truncationMessage     = "<...stdout was truncated...>"
+)
+
 // BlockToMarkdown converts a block to markdown
-func BlockToMarkdown(block *v1alpha1.Block) string {
+// maxLength is a maximum length for the generated markdown. This is a soft limit and may be exceeded slightly
+// because we don't account for some characters like the outputLength and the truncation message
+// A value <=0 means no limit.
+func BlockToMarkdown(block *v1alpha1.Block, maxLength int) string {
 	sb := strings.Builder{}
-	writeBlockMarkdown(&sb, block)
+	writeBlockMarkdown(&sb, block, maxLength)
 	return sb.String()
 }
 
-func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) {
+func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block, maxLength int) {
+
+	maxInputLength := -1
+	maxOutputLength := -1
+
+	if maxLength > 0 {
+		// Allocate 50% of the max length for input and output
+		// This is crude. Arguably we could be dynamic e.g. if the output is < .5 maxLength we should allocate
+		// the unused capacity for inputs. But for simplicity we don't do that. We do allocate unused input capacity
+		// to the output. In practice outputs tend to be much longer than inputs. Inputs are human authored
+		// whereas outputs are more likely to be produced by a machine (e.g. log output) and therefore very long
+		maxInputLength = int(math.Floor(0.5*float64(maxLength)) + 1)
+		maxOutputLength = maxInputLength
+	}
+
 	switch block.GetKind() {
 	case v1alpha1.BlockKind_CODE:
 		// Code just gets written as a code block
 		sb.WriteString("```" + BASHLANG + "\n")
-		sb.WriteString(block.GetContents())
+
+		data := block.GetContents()
+		if len(data) > maxInputLength && maxInputLength > 0 {
+			data = tailLines(data, maxInputLength)
+			data = codeTruncationMessage + "\n" + data
+
+			remaining := maxLength - len(data)
+			if remaining > 0 {
+				maxOutputLength += remaining
+			}
+		}
+		sb.WriteString(data)
 		sb.WriteString("\n```\n")
 	default:
 		// Otherwise assume its a markdown block
-		sb.WriteString(block.GetContents() + "\n")
+
+		data := block.GetContents()
+		if len(data) > maxInputLength && maxInputLength > 0 {
+			data = tailLines(data, maxInputLength)
+			remaining := maxLength - len(data)
+			if remaining > 0 {
+				maxOutputLength += remaining
+			}
+		}
+		sb.WriteString(data + "\n")
 	}
 
 	// Handle the outputs
 	for _, output := range block.GetOutputs() {
 		for _, oi := range output.Items {
-
 			if oi.GetMime() == StatefulRunmeOutputItemsMimeType || oi.GetMime() == StatefulRunmeTerminalMimeType {
 				// See: https://github.com/jlewi/foyle/issues/286. This output item contains a JSON dictionary
 				// with a bunch of meta information that seems specific to Runme/stateful and not necessarily
@@ -45,11 +87,24 @@ func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) {
 				// renderers. https://github.com/stateful/vscode-runme/blob/3e36b16e3c41ad0fa38f0197f1713135e5edb27b/src/constants.ts#L6
 				// So for now we want to error on including useless data rather than silently dropping useful data.
 				// In the future we may want to revisit that.
+				//
 				continue
 			}
 
 			sb.WriteString("```" + OUTPUTLANG + "\n")
-			sb.WriteString(oi.GetTextData())
+			textData := oi.GetTextData()
+			if 0 < maxOutputLength && len(textData) > maxOutputLength {
+				textData = textData[:maxOutputLength]
+				sb.WriteString(textData)
+				// Don't write a newline before writing truncation because that is more likely to lead to confusion
+				// because people might not realize the line was truncated.
+				// Emit a message indicating that the output was truncated
+				// This is intended for the LLM so it knows that it is working with a truncated output.
+				sb.WriteString(truncationMessage)
+			} else {
+				sb.WriteString(textData)
+			}
+
 			sb.WriteString("\n```\n")
 		}
 	}
@@ -60,7 +115,7 @@ func BlocksToMarkdown(blocks []*v1alpha1.Block) string {
 	sb := strings.Builder{}
 
 	for _, block := range blocks {
-		writeBlockMarkdown(&sb, block)
+		writeBlockMarkdown(&sb, block, -1)
 	}
 
 	return sb.String()

diff --git a/app/pkg/docs/converters_test.go b/app/pkg/docs/converters_test.go
@@ -12,9 +12,10 @@ import (
 
 func Test_BlockToMarkdown(t *testing.T) {
 	type testCase struct {
-		name     string
-		block    *v1alpha1.Block
-		expected string
+		name      string
+		block     *v1alpha1.Block
+		maxLength int
+		expected  string
 	}
 
 	testCases := []testCase{
@@ -69,10 +70,28 @@ func Test_BlockToMarkdown(t *testing.T) {
 			},
 			expected: "```bash\necho \"something something\"\n```\n```output\nShould be included\n```\n",
 		},
+		{
+			name: "truncate-output",
+			block: &v1alpha1.Block{
+				Kind:     v1alpha1.BlockKind_CODE,
+				Contents: "echo line1\nline2",
+				Outputs: []*v1alpha1.BlockOutput{
+					{
+						Items: []*v1alpha1.BlockOutputItem{
+							{
+								TextData: "some really long output",
+							},
+						},
+					},
+				},
+			},
+			maxLength: 10,
+			expected:  "```bash\n<...code was truncated...>\nline2\n```\n```output\nsome r<...stdout was truncated...>\n```\n",
+		},
 	}
 	for _, c := range testCases {
 		t.Run(c.name, func(t *testing.T) {
-			actual := BlockToMarkdown(c.block)
+			actual := BlockToMarkdown(c.block, c.maxLength)
 			if d := cmp.Diff(c.expected, actual); d != "" {
 				t.Errorf("Unexpected diff:\n%s", d)
 			}

diff --git a/app/pkg/docs/tailer.go b/app/pkg/docs/tailer.go
@@ -25,7 +25,6 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T
 	log := logs.FromContext(ctx)
 	mdBlocks := make([]string, len(blocks))
 
-	length := 0
 	firstBlock := len(blocks) - 1
 
 	assertion := &v1alpha1.Assertion{
@@ -34,20 +33,18 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T
 		Detail: "",
 		Id:     ulid.GenerateID(),
 	}
-	for ; firstBlock >= 0; firstBlock-- {
+
+	numBlocks := 0
+	for ; firstBlock >= 0 && maxCharLen > 0; firstBlock-- {
 		block := blocks[firstBlock]
-		md := BlockToMarkdown(block)
-		if length+len(md) > maxCharLen {
-			if length > 0 {
-				// If adding the block would exceed the max length and we already have at least one block then, break
-				break
-			} else {
-				// Since we haven't added any blocks yet, we need to add a truncated version of the last block
-				assertion.Result = v1alpha1.AssertResult_FAILED
-				md = tailLines(md, maxCharLen)
-			}
+		numBlocks += 1
+		md := BlockToMarkdown(block, maxCharLen)
+		maxCharLen = maxCharLen - len(md)
+		if maxCharLen <= 0 && numBlocks == 1 {
+			// Since this is the first block and its truncated we fail the assertion.
+			assertion.Result = v1alpha1.AssertResult_FAILED
 		}
-		length += len(md)
+
 		mdBlocks[firstBlock] = md
 	}
 
@@ -85,7 +82,6 @@ func tailLines(s string, maxLen int) string {
 	lines := strings.Split(s, "\n")
 
 	startIndex := len(lines) - 1
-	//if startIndex < 0 {}
 
 	length := len(lines[len(lines)-1])
 

diff --git a/app/pkg/docs/tailer_test.go b/app/pkg/docs/tailer_test.go
@@ -55,14 +55,39 @@ func Test_Tailer(t *testing.T) {
 			MaxChars: 12,
 			Expected: "Cell2\nCell3\n",
 		},
+		{
+			name: "truncate-outputs",
+			Doc: &v1alpha1.Doc{
+				Blocks: []*v1alpha1.Block{
+					{
+						Kind:     v1alpha1.BlockKind_CODE,
+						Contents: "Cell1",
+						Outputs: []*v1alpha1.BlockOutput{
+							{
+								Items: []*v1alpha1.BlockOutputItem{
+									{
+										TextData: "Output1\nOutput2\nOutput3",
+										Mime:     VSCodeNotebookStdOutMimeType,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			MaxChars: 12,
+			Expected: "```bash\nCell1\n```\n```output\nOutput1<...stdout was truncated...>\n```\n",
+		},
 	}
 
 	for _, c := range cases {
-		tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars)
-		actual := tailer.Text()
-		if d := cmp.Diff(c.Expected, actual); d != "" {
-			t.Fatalf("Expected text to be %s but got %s; diff:\n%v", c.Expected, tailer.Text(), d)
-		}
+		t.Run(c.name, func(t *testing.T) {
+			tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars)
+			actual := tailer.Text()
+			if d := cmp.Diff(c.Expected, actual); d != "" {
+				t.Fatalf("Unexpected diff:\n%v", d)
+			}
+		})
 	}
 }