From 59e6c53261a98f97c92886487597b5784b3d7141 Mon Sep 17 00:00:00 2001
From: Gernot Feichter <gernot.feichter@bearingpoint.com>
Date: Mon, 18 Mar 2024 09:28:34 +0100
Subject: [PATCH] feat: support custom split functions in createCommandReaders

Default is (still) split by line, but in some cases interesting command output spans multiple lines (e.g. commit messages)

This also provides a split function definition that splits by a string.
---
 io/cmd.go       | 24 ++++++++++++++++++---
 io/scan.go      | 34 ++++++++++++++++++++++++++++++
 io/scan_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 io/scan.go
 create mode 100644 io/scan_test.go

diff --git a/io/cmd.go b/io/cmd.go
index 39b227a..e20e71e 100644
--- a/io/cmd.go
+++ b/io/cmd.go
@@ -82,7 +82,7 @@ func RunCmdWithOutputParser(config CmdConfig, prompt bool, regExpStruct ...*CmdO
 	}
 
 	cmd := config.GetCmd()
-	stdoutReader, stderrReader, err := createCommandReaders(cmd)
+	stdoutReader, stderrReader, err := createCommandReaders(cmd, config)
 	if err != nil {
 		return
 	}
@@ -95,6 +95,8 @@ func RunCmdWithOutputParser(config CmdConfig, prompt bool, regExpStruct ...*CmdO
 	go func() {
 		defer wg.Done()
 		for stdoutReader.Scan() {
+			// Notice that a line might actually be a multiline string if CmdConfig specifies a Split() function that
+			// produces such!
 			line, _ := processLine(regExpStruct, stdoutReader.Text(), errChan)
 			if prompt {
 				fmt.Fprintf(os.Stderr, line+"\n")
@@ -175,7 +177,7 @@ func processLine(regExpStruct []*CmdOutputPattern, line string, errChan chan err
 // Create command stdout and stderr readers.
 // The returned readers are automatically closed after the running command exit and shouldn't be closed explicitly.
 // cmd - The command to execute
-func createCommandReaders(cmd *exec.Cmd) (*bufio.Scanner, *bufio.Scanner, error) {
+func createCommandReaders(cmd *exec.Cmd, config CmdConfig) (*bufio.Scanner, *bufio.Scanner, error) {
 	stdoutReader, err := cmd.StdoutPipe()
 	if err != nil {
 		return nil, nil, err
@@ -186,7 +188,13 @@ func createCommandReaders(cmd *exec.Cmd) (*bufio.Scanner, *bufio.Scanner, error)
 		return nil, nil, err
 	}
 
-	return bufio.NewScanner(stdoutReader), bufio.NewScanner(stderrReader), nil
+	stdoutScanner := bufio.NewScanner(stdoutReader)
+	stderrScanner := bufio.NewScanner(stderrReader)
+	if configSplit, ok := config.(CmdConfigSplit); ok {
+		stdoutScanner.Split(configSplit.Split())
+		stderrScanner.Split(configSplit.Split())
+	}
+	return stdoutScanner, stderrScanner, nil
 }
 
 type CmdConfig interface {
@@ -196,6 +204,16 @@ type CmdConfig interface {
 	GetErrWriter() io.WriteCloser
 }
 
+// CmdConfigSplit
+// Optional Extension to CmdConfig
+// If the caller implements a split function for its CmdConfig, it will be honored
+// Otherwise the default applies (split and parse command output by line)
+// Note: Will not add the Split function to CmdConfig, since this would require refactoring the entire code base.
+// Also, this allows implementing optionality
+type CmdConfigSplit interface {
+	Split() bufio.SplitFunc
+}
+
 // RegExp - The regexp that the line will be searched upon.
 // MatchedResults - The slice result that was found by the regexp
 // Line - The output line from the external process
diff --git a/io/scan.go b/io/scan.go
new file mode 100644
index 0000000..14fbd07
--- /dev/null
+++ b/io/scan.go
@@ -0,0 +1,34 @@
+package io
+
+import "bytes"
+
+// This file is meant as extension point for go standard library file bufio/scan.go
+
+// SplitAt returns a function that splits cmd output by a given string.
+// The returned function implements the type SplitFunc as defined in bufio/scan.go!
+// Tribute: https://stackoverflow.com/a/57232670/21511203
+func SplitAt(substring string) func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	searchBytes := []byte(substring)
+	searchLen := len(searchBytes)
+	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+		dataLen := len(data)
+
+		// Return nothing if at end of file and no data passed
+		if atEOF && dataLen == 0 {
+			return 0, nil, nil
+		}
+
+		// Find next separator and return token
+		if i := bytes.Index(data, searchBytes); i >= 0 {
+			return i + searchLen, data[0:i], nil
+		}
+
+		// If we're at EOF, we have a final, non-terminated line. Return it.
+		if atEOF {
+			return dataLen, data, nil
+		}
+
+		// Request more data.
+		return 0, nil, nil
+	}
+}
diff --git a/io/scan_test.go b/io/scan_test.go
new file mode 100644
index 0000000..08c8449
--- /dev/null
+++ b/io/scan_test.go
@@ -0,0 +1,56 @@
+package io
+
+import (
+	"bufio"
+	"slices"
+	"strings"
+	"testing"
+)
+
+func TestSplitAt(t *testing.T) {
+	// Define test cases
+	testCases := []struct {
+		scenarioDescription string
+		inputData           string
+		substring           string
+		expectedSplits      []string
+	}{
+		{
+			scenarioDescription: "Empty data",
+			inputData:           "",
+			substring:           "separator",
+			expectedSplits:      []string{},
+		},
+		{
+			scenarioDescription: "Data does not contain the separator",
+			inputData:           "someThing Without a matching SePaRaToR",
+			substring:           "separator",
+			expectedSplits:      []string{"someThing Without a matching SePaRaToR"},
+		},
+		{
+			scenarioDescription: "Data contains the separator once",
+			inputData:           "AseparatorB",
+			substring:           "separator",
+			expectedSplits:      []string{"A", "B"},
+		},
+		{
+			scenarioDescription: "Data contains the separator more than once",
+			inputData:           "AseparatorBseparatorC",
+			substring:           "separator",
+			expectedSplits:      []string{"A", "B", "C"},
+		},
+	}
+
+	// Run test cases
+	for _, tc := range testCases {
+		scanner := bufio.NewScanner(strings.NewReader(tc.inputData))
+		scanner.Split(SplitAt(tc.substring))
+		actualSplits := []string{}
+		for scanner.Scan() {
+			actualSplits = append(actualSplits, scanner.Text())
+		}
+		if !slices.Equal(tc.expectedSplits, actualSplits) {
+			t.Errorf("Test failed for scenario: %s, input data: %s, substring: %s\nExpected: %s\nActual: %s", tc.scenarioDescription, tc.inputData, tc.substring, tc.expectedSplits, actualSplits)
+		}
+	}
+}