From 3f8fa1a7789faf5a291a22e75bbd42359b076c1f Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 25 Oct 2024 14:23:34 -0700 Subject: [PATCH] Improve Handling Of Long Outputs (#328) # Problem Cell outputs can be very long. For example, if we run a query (gcloud, SQL, etc...) the output could be very verbose. This output could eat up the entire context allocated for the input document. As a result, we might not have sufficiently meaningful context to prompt the model. There was another bug in our doc tailer. We were applying character limits to the rendered markdown. We were imposing this by tailing the lines. This could produce invalid markdown. For example, we might end up truncating the document in the middle of a code block so we wouldn't have the opening triple quotes for the code block. We might also include the output of the code block without including the code that it is output for. # Solution First, we impose character limits in a way that is aware of cell boundaries. We move truncation into the Block to Markdown conversion. The conversion now takes the maximum length for the output string. The conversion routine then figures out how much to allocate to the contents of the cell and its outputs. This allows truncation to happen in a way that can respect cell boundaries. Second, if we truncate the code block or output we output a string indicating that the output was truncated. We want the model to know that output was truncated. We update our prompt to tell the LLM to look for truncated output and to potentially deal with this by running commands that will provide less verbose output. Fix #299 --- app/pkg/agent/prompt.tmpl | 8 +++ app/pkg/agent/test_data/examples.txt | 8 +++ app/pkg/agent/test_data/no_examples.txt | 8 +++ app/pkg/docs/const.go | 1 + app/pkg/docs/converters.go | 71 ++++++++++++++++++++++--- app/pkg/docs/converters_test.go | 27 ++++++++-- app/pkg/docs/tailer.go | 24 ++++----- app/pkg/docs/tailer_test.go | 35 ++++++++++-- 8 files changed, 151 insertions(+), 31 deletions(-) diff --git a/app/pkg/agent/prompt.tmpl b/app/pkg/agent/prompt.tmpl index b2d4699..9b0c82c 100644 --- a/app/pkg/agent/prompt.tmpl +++ b/app/pkg/agent/prompt.tmpl @@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com. +* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>" +* If the truncated output contains critical information to figure out what to do next, you should respond with a + suggestion on how to run the command so as to produce just the information you need with less verbosity + + * If logging or SQL queries leads to truncated output, suggest alternative queries with + clauses to restrict the output to the rows and fields you need + * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the + fields you need {{if .Examples}} Here are a bunch of examples of input documents along with the expected output. diff --git a/app/pkg/agent/test_data/examples.txt b/app/pkg/agent/test_data/examples.txt index daf76d5..23ec94f 100644 --- a/app/pkg/agent/test_data/examples.txt +++ b/app/pkg/agent/test_data/examples.txt @@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com. +* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>" +* If the truncated output contains critical information to figure out what to do next, you should respond with a + suggestion on how to run the command so as to produce just the information you need with less verbosity + + * If logging or SQL queries leads to truncated output, suggest alternative queries with + clauses to restrict the output to the rows and fields you need + * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the + fields you need Here are a bunch of examples of input documents along with the expected output. diff --git a/app/pkg/agent/test_data/no_examples.txt b/app/pkg/agent/test_data/no_examples.txt index 34ff679..db8f05e 100644 --- a/app/pkg/agent/test_data/no_examples.txt +++ b/app/pkg/agent/test_data/no_examples.txt @@ -84,6 +84,14 @@ Google Cloud service account developer@foyle-dev.iam.gserviceaccount.com. +* If the output of a command is really long it will be truncated as indicated by the string "<...stdout was truncated...>" +* If the truncated output contains critical information to figure out what to do next, you should respond with a + suggestion on how to run the command so as to produce just the information you need with less verbosity + + * If logging or SQL queries leads to truncated output, suggest alternative queries with + clauses to restrict the output to the rows and fields you need + * If dumping large JSON/YAML blobs leads to truncated output, provide a command to 1) save the data to a file and 2) then use tools like jq or yq to read the + fields you need Here's the actual document containing the problem or task to be solved: diff --git a/app/pkg/docs/const.go b/app/pkg/docs/const.go index d676a6a..b4ea01c 100644 --- a/app/pkg/docs/const.go +++ b/app/pkg/docs/const.go @@ -14,4 +14,5 @@ const ( // https://github.com/jlewi/foyle/issues/286 StatefulRunmeOutputItemsMimeType = "stateful.runme/output-items" StatefulRunmeTerminalMimeType = "stateful.runme/terminal" + VSCodeNotebookStdOutMimeType = "application/vnd.code.notebook.stdout " ) diff --git a/app/pkg/docs/converters.go b/app/pkg/docs/converters.go index c05a7b3..e27919b 100644 --- a/app/pkg/docs/converters.go +++ b/app/pkg/docs/converters.go @@ -1,6 +1,7 @@ package docs import ( + "math" "strings" "github.com/jlewi/foyle/app/pkg/runme/converters" @@ -11,29 +12,70 @@ import ( "github.com/stateful/runme/v3/pkg/document/editor" ) +const ( + codeTruncationMessage = "<...code was truncated...>" + truncationMessage = "<...stdout was truncated...>" +) + // BlockToMarkdown converts a block to markdown -func BlockToMarkdown(block *v1alpha1.Block) string { +// maxLength is a maximum length for the generated markdown. This is a soft limit and may be exceeded slightly +// because we don't account for some characters like the outputLength and the truncation message +// A value <=0 means no limit. +func BlockToMarkdown(block *v1alpha1.Block, maxLength int) string { sb := strings.Builder{} - writeBlockMarkdown(&sb, block) + writeBlockMarkdown(&sb, block, maxLength) return sb.String() } -func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) { +func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block, maxLength int) { + + maxInputLength := -1 + maxOutputLength := -1 + + if maxLength > 0 { + // Allocate 50% of the max length for input and output + // This is crude. Arguably we could be dynamic e.g. if the output is < .5 maxLength we should allocate + // the unused capacity for inputs. But for simplicity we don't do that. We do allocate unused input capacity + // to the output. In practice outputs tend to be much longer than inputs. Inputs are human authored + // whereas outputs are more likely to be produced by a machine (e.g. log output) and therefore very long + maxInputLength = int(math.Floor(0.5*float64(maxLength)) + 1) + maxOutputLength = maxInputLength + } + switch block.GetKind() { case v1alpha1.BlockKind_CODE: // Code just gets written as a code block sb.WriteString("```" + BASHLANG + "\n") - sb.WriteString(block.GetContents()) + + data := block.GetContents() + if len(data) > maxInputLength && maxInputLength > 0 { + data = tailLines(data, maxInputLength) + data = codeTruncationMessage + "\n" + data + + remaining := maxLength - len(data) + if remaining > 0 { + maxOutputLength += remaining + } + } + sb.WriteString(data) sb.WriteString("\n```\n") default: // Otherwise assume its a markdown block - sb.WriteString(block.GetContents() + "\n") + + data := block.GetContents() + if len(data) > maxInputLength && maxInputLength > 0 { + data = tailLines(data, maxInputLength) + remaining := maxLength - len(data) + if remaining > 0 { + maxOutputLength += remaining + } + } + sb.WriteString(data + "\n") } // Handle the outputs for _, output := range block.GetOutputs() { for _, oi := range output.Items { - if oi.GetMime() == StatefulRunmeOutputItemsMimeType || oi.GetMime() == StatefulRunmeTerminalMimeType { // See: https://github.com/jlewi/foyle/issues/286. This output item contains a JSON dictionary // with a bunch of meta information that seems specific to Runme/stateful and not necessarily @@ -45,11 +87,24 @@ func writeBlockMarkdown(sb *strings.Builder, block *v1alpha1.Block) { // renderers. https://github.com/stateful/vscode-runme/blob/3e36b16e3c41ad0fa38f0197f1713135e5edb27b/src/constants.ts#L6 // So for now we want to error on including useless data rather than silently dropping useful data. // In the future we may want to revisit that. + // continue } sb.WriteString("```" + OUTPUTLANG + "\n") - sb.WriteString(oi.GetTextData()) + textData := oi.GetTextData() + if 0 < maxOutputLength && len(textData) > maxOutputLength { + textData = textData[:maxOutputLength] + sb.WriteString(textData) + // Don't write a newline before writing truncation because that is more likely to lead to confusion + // because people might not realize the line was truncated. + // Emit a message indicating that the output was truncated + // This is intended for the LLM so it knows that it is working with a truncated output. + sb.WriteString(truncationMessage) + } else { + sb.WriteString(textData) + } + sb.WriteString("\n```\n") } } @@ -60,7 +115,7 @@ func BlocksToMarkdown(blocks []*v1alpha1.Block) string { sb := strings.Builder{} for _, block := range blocks { - writeBlockMarkdown(&sb, block) + writeBlockMarkdown(&sb, block, -1) } return sb.String() diff --git a/app/pkg/docs/converters_test.go b/app/pkg/docs/converters_test.go index 85b9e6b..243c2f6 100644 --- a/app/pkg/docs/converters_test.go +++ b/app/pkg/docs/converters_test.go @@ -12,9 +12,10 @@ import ( func Test_BlockToMarkdown(t *testing.T) { type testCase struct { - name string - block *v1alpha1.Block - expected string + name string + block *v1alpha1.Block + maxLength int + expected string } testCases := []testCase{ @@ -69,10 +70,28 @@ func Test_BlockToMarkdown(t *testing.T) { }, expected: "```bash\necho \"something something\"\n```\n```output\nShould be included\n```\n", }, + { + name: "truncate-output", + block: &v1alpha1.Block{ + Kind: v1alpha1.BlockKind_CODE, + Contents: "echo line1\nline2", + Outputs: []*v1alpha1.BlockOutput{ + { + Items: []*v1alpha1.BlockOutputItem{ + { + TextData: "some really long output", + }, + }, + }, + }, + }, + maxLength: 10, + expected: "```bash\n<...code was truncated...>\nline2\n```\n```output\nsome r<...stdout was truncated...>\n```\n", + }, } for _, c := range testCases { t.Run(c.name, func(t *testing.T) { - actual := BlockToMarkdown(c.block) + actual := BlockToMarkdown(c.block, c.maxLength) if d := cmp.Diff(c.expected, actual); d != "" { t.Errorf("Unexpected diff:\n%s", d) } diff --git a/app/pkg/docs/tailer.go b/app/pkg/docs/tailer.go index c09dc68..68a247a 100644 --- a/app/pkg/docs/tailer.go +++ b/app/pkg/docs/tailer.go @@ -25,7 +25,6 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T log := logs.FromContext(ctx) mdBlocks := make([]string, len(blocks)) - length := 0 firstBlock := len(blocks) - 1 assertion := &v1alpha1.Assertion{ @@ -34,20 +33,18 @@ func NewTailer(ctx context.Context, blocks []*v1alpha1.Block, maxCharLen int) *T Detail: "", Id: ulid.GenerateID(), } - for ; firstBlock >= 0; firstBlock-- { + + numBlocks := 0 + for ; firstBlock >= 0 && maxCharLen > 0; firstBlock-- { block := blocks[firstBlock] - md := BlockToMarkdown(block) - if length+len(md) > maxCharLen { - if length > 0 { - // If adding the block would exceed the max length and we already have at least one block then, break - break - } else { - // Since we haven't added any blocks yet, we need to add a truncated version of the last block - assertion.Result = v1alpha1.AssertResult_FAILED - md = tailLines(md, maxCharLen) - } + numBlocks += 1 + md := BlockToMarkdown(block, maxCharLen) + maxCharLen = maxCharLen - len(md) + if maxCharLen <= 0 && numBlocks == 1 { + // Since this is the first block and its truncated we fail the assertion. + assertion.Result = v1alpha1.AssertResult_FAILED } - length += len(md) + mdBlocks[firstBlock] = md } @@ -85,7 +82,6 @@ func tailLines(s string, maxLen int) string { lines := strings.Split(s, "\n") startIndex := len(lines) - 1 - //if startIndex < 0 {} length := len(lines[len(lines)-1]) diff --git a/app/pkg/docs/tailer_test.go b/app/pkg/docs/tailer_test.go index 1f8609c..7d1656d 100644 --- a/app/pkg/docs/tailer_test.go +++ b/app/pkg/docs/tailer_test.go @@ -55,14 +55,39 @@ func Test_Tailer(t *testing.T) { MaxChars: 12, Expected: "Cell2\nCell3\n", }, + { + name: "truncate-outputs", + Doc: &v1alpha1.Doc{ + Blocks: []*v1alpha1.Block{ + { + Kind: v1alpha1.BlockKind_CODE, + Contents: "Cell1", + Outputs: []*v1alpha1.BlockOutput{ + { + Items: []*v1alpha1.BlockOutputItem{ + { + TextData: "Output1\nOutput2\nOutput3", + Mime: VSCodeNotebookStdOutMimeType, + }, + }, + }, + }, + }, + }, + }, + MaxChars: 12, + Expected: "```bash\nCell1\n```\n```output\nOutput1<...stdout was truncated...>\n```\n", + }, } for _, c := range cases { - tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars) - actual := tailer.Text() - if d := cmp.Diff(c.Expected, actual); d != "" { - t.Fatalf("Expected text to be %s but got %s; diff:\n%v", c.Expected, tailer.Text(), d) - } + t.Run(c.name, func(t *testing.T) { + tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars) + actual := tailer.Text() + if d := cmp.Diff(c.Expected, actual); d != "" { + t.Fatalf("Unexpected diff:\n%v", d) + } + }) } }