From 59e6c53261a98f97c92886487597b5784b3d7141 Mon Sep 17 00:00:00 2001 From: Gernot Feichter Date: Mon, 18 Mar 2024 09:28:34 +0100 Subject: [PATCH] feat: support custom split functions in createCommandReaders Default is (still) split by line, but in some cases interesting command output spans multiple lines (e.g. commit messages) This also provides a split function definition that splits by a string. --- io/cmd.go | 24 ++++++++++++++++++--- io/scan.go | 34 ++++++++++++++++++++++++++++++ io/scan_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 io/scan.go create mode 100644 io/scan_test.go diff --git a/io/cmd.go b/io/cmd.go index 39b227a..e20e71e 100644 --- a/io/cmd.go +++ b/io/cmd.go @@ -82,7 +82,7 @@ func RunCmdWithOutputParser(config CmdConfig, prompt bool, regExpStruct ...*CmdO } cmd := config.GetCmd() - stdoutReader, stderrReader, err := createCommandReaders(cmd) + stdoutReader, stderrReader, err := createCommandReaders(cmd, config) if err != nil { return } @@ -95,6 +95,8 @@ func RunCmdWithOutputParser(config CmdConfig, prompt bool, regExpStruct ...*CmdO go func() { defer wg.Done() for stdoutReader.Scan() { + // Notice that a line might actually be a multiline string if CmdConfig specifies a Split() function that + // produces such! line, _ := processLine(regExpStruct, stdoutReader.Text(), errChan) if prompt { fmt.Fprintf(os.Stderr, line+"\n") @@ -175,7 +177,7 @@ func processLine(regExpStruct []*CmdOutputPattern, line string, errChan chan err // Create command stdout and stderr readers. // The returned readers are automatically closed after the running command exit and shouldn't be closed explicitly. // cmd - The command to execute -func createCommandReaders(cmd *exec.Cmd) (*bufio.Scanner, *bufio.Scanner, error) { +func createCommandReaders(cmd *exec.Cmd, config CmdConfig) (*bufio.Scanner, *bufio.Scanner, error) { stdoutReader, err := cmd.StdoutPipe() if err != nil { return nil, nil, err @@ -186,7 +188,13 @@ func createCommandReaders(cmd *exec.Cmd) (*bufio.Scanner, *bufio.Scanner, error) return nil, nil, err } - return bufio.NewScanner(stdoutReader), bufio.NewScanner(stderrReader), nil + stdoutScanner := bufio.NewScanner(stdoutReader) + stderrScanner := bufio.NewScanner(stderrReader) + if configSplit, ok := config.(CmdConfigSplit); ok { + stdoutScanner.Split(configSplit.Split()) + stderrScanner.Split(configSplit.Split()) + } + return stdoutScanner, stderrScanner, nil } type CmdConfig interface { @@ -196,6 +204,16 @@ type CmdConfig interface { GetErrWriter() io.WriteCloser } +// CmdConfigSplit +// Optional Extension to CmdConfig +// If the caller implements a split function for its CmdConfig, it will be honored +// Otherwise the default applies (split and parse command output by line) +// Note: Will not add the Split function to CmdConfig, since this would require refactoring the entire code base. +// Also, this allows implementing optionality +type CmdConfigSplit interface { + Split() bufio.SplitFunc +} + // RegExp - The regexp that the line will be searched upon. // MatchedResults - The slice result that was found by the regexp // Line - The output line from the external process diff --git a/io/scan.go b/io/scan.go new file mode 100644 index 0000000..14fbd07 --- /dev/null +++ b/io/scan.go @@ -0,0 +1,34 @@ +package io + +import "bytes" + +// This file is meant as extension point for go standard library file bufio/scan.go + +// SplitAt returns a function that splits cmd output by a given string. +// The returned function implements the type SplitFunc as defined in bufio/scan.go! +// Tribute: https://stackoverflow.com/a/57232670/21511203 +func SplitAt(substring string) func(data []byte, atEOF bool) (advance int, token []byte, err error) { + searchBytes := []byte(substring) + searchLen := len(searchBytes) + return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + dataLen := len(data) + + // Return nothing if at end of file and no data passed + if atEOF && dataLen == 0 { + return 0, nil, nil + } + + // Find next separator and return token + if i := bytes.Index(data, searchBytes); i >= 0 { + return i + searchLen, data[0:i], nil + } + + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return dataLen, data, nil + } + + // Request more data. + return 0, nil, nil + } +} diff --git a/io/scan_test.go b/io/scan_test.go new file mode 100644 index 0000000..08c8449 --- /dev/null +++ b/io/scan_test.go @@ -0,0 +1,56 @@ +package io + +import ( + "bufio" + "slices" + "strings" + "testing" +) + +func TestSplitAt(t *testing.T) { + // Define test cases + testCases := []struct { + scenarioDescription string + inputData string + substring string + expectedSplits []string + }{ + { + scenarioDescription: "Empty data", + inputData: "", + substring: "separator", + expectedSplits: []string{}, + }, + { + scenarioDescription: "Data does not contain the separator", + inputData: "someThing Without a matching SePaRaToR", + substring: "separator", + expectedSplits: []string{"someThing Without a matching SePaRaToR"}, + }, + { + scenarioDescription: "Data contains the separator once", + inputData: "AseparatorB", + substring: "separator", + expectedSplits: []string{"A", "B"}, + }, + { + scenarioDescription: "Data contains the separator more than once", + inputData: "AseparatorBseparatorC", + substring: "separator", + expectedSplits: []string{"A", "B", "C"}, + }, + } + + // Run test cases + for _, tc := range testCases { + scanner := bufio.NewScanner(strings.NewReader(tc.inputData)) + scanner.Split(SplitAt(tc.substring)) + actualSplits := []string{} + for scanner.Scan() { + actualSplits = append(actualSplits, scanner.Text()) + } + if !slices.Equal(tc.expectedSplits, actualSplits) { + t.Errorf("Test failed for scenario: %s, input data: %s, substring: %s\nExpected: %s\nActual: %s", tc.scenarioDescription, tc.inputData, tc.substring, tc.expectedSplits, actualSplits) + } + } +}