-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Python log parser - handle limited size log content #213
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,12 @@ const ( | |
Pip PythonTool = "pip" | ||
Pipenv PythonTool = "pipenv" | ||
Poetry PythonTool = "poetry" | ||
|
||
startDownloadingPattern = `^\s*Downloading\s` | ||
downloadingCaptureGroup = `[^\s]*` | ||
startUsingCachedPattern = `^\s*Using\scached\s` | ||
usingCacheCaptureGroup = `[\S]+` | ||
endPattern = `\s\(` | ||
) | ||
|
||
type PythonTool string | ||
|
@@ -152,6 +158,50 @@ func getFilePath(srcPath, fileName string) (string, error) { | |
return filePath, nil | ||
} | ||
|
||
// Create the CmdOutputPattern objects that can capture group content that may span multiple lines for logs that have line size limitations. | ||
// Since the log parser parse line by line, we need to create a parser that can capture group content that may span multiple lines. | ||
func getMultilineSplitCaptureOutputPattern(startCollectingPattern, captureGroup, endCollectingPattern string, handler func(pattern *gofrogcmd.CmdOutputPattern) (string, error)) (parsers []*gofrogcmd.CmdOutputPattern) { | ||
// Prepare regex patterns. | ||
oneLineRegex := regexp.MustCompile(startCollectingPattern + `(` + captureGroup + `)` + endCollectingPattern) | ||
startCollectionRegexp := regexp.MustCompile(startCollectingPattern) | ||
endCollectionRegexp := regexp.MustCompile(endCollectingPattern) | ||
|
||
// Create a parser for single line pattern matches. | ||
parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: oneLineRegex, ExecFunc: handler}) | ||
|
||
// Create a parser for multi line pattern matches. | ||
lineBuffer := "" | ||
collectingMultiLineValue := false | ||
parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: regexp.MustCompile(".*"), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the parser that we use goes over the content line by line so It will not include newlines anyway since it is the delimiter used for tokens. |
||
// Check if the line matches the startCollectingPattern. | ||
if !collectingMultiLineValue && startCollectionRegexp.Match([]byte(pattern.Line)) { | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Start collecting lines. | ||
collectingMultiLineValue = true | ||
lineBuffer = pattern.Line | ||
// We assume that the content is multiline so no need to check end at this point. | ||
// Single line will be handled and matched by the other parser. | ||
return pattern.Line, nil | ||
} | ||
if !collectingMultiLineValue { | ||
return pattern.Line, nil | ||
} | ||
// Add the line content to the buffer. | ||
lineBuffer += pattern.Line | ||
// Check if the line matches the endCollectingPattern. | ||
if endCollectionRegexp.Match([]byte(pattern.Line)) { | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
collectingMultiLineValue = false | ||
// Simulate a one line content check to make sure we have regex match. | ||
if oneLineRegex.Match([]byte(lineBuffer)) { | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return handler(&gofrogcmd.CmdOutputPattern{Line: pattern.Line, MatchedResults: oneLineRegex.FindStringSubmatch(lineBuffer)}) | ||
} | ||
} | ||
|
||
return pattern.Line, nil | ||
}}) | ||
|
||
return | ||
} | ||
|
||
func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) { | ||
if tool == Pipenv { | ||
// Add verbosity flag to pipenv commands to collect necessary data | ||
|
@@ -161,19 +211,14 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, | |
installCmd.Dir = srcPath | ||
|
||
dependenciesMap := map[string]entities.Dependency{} | ||
|
||
// Create regular expressions for log parsing. | ||
collectingRegexp := regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`) | ||
downloadingRegexp := regexp.MustCompile(`^\s*Downloading\s([^\s]*)\s\(`) | ||
usingCachedRegexp := regexp.MustCompile(`^\s*Using\scached\s([\S]+)\s\(`) | ||
alreadySatisfiedRegexp := regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`) | ||
parsers := []*gofrogcmd.CmdOutputPattern{} | ||
|
||
var packageName string | ||
expectingPackageFilePath := false | ||
|
||
// Extract downloaded package name. | ||
dependencyNameParser := gofrogcmd.CmdOutputPattern{ | ||
RegExp: collectingRegexp, | ||
parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ | ||
RegExp: regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`), | ||
ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
// If this pattern matched a second time before downloaded-file-name was found, prompt a message. | ||
if expectingPackageFilePath { | ||
|
@@ -186,7 +231,7 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, | |
} | ||
|
||
// Check for out of bound results. | ||
if len(pattern.MatchedResults)-1 < 0 { | ||
if len(pattern.MatchedResults)-1 <= 0 { | ||
log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line)) | ||
return pattern.Line, nil | ||
} | ||
|
@@ -197,49 +242,34 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, | |
|
||
return pattern.Line, nil | ||
}, | ||
} | ||
|
||
// Extract downloaded file, stored in Artifactory. | ||
downloadedFileParser := gofrogcmd.CmdOutputPattern{ | ||
RegExp: downloadingRegexp, | ||
ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
// Check for out of bound results. | ||
if len(pattern.MatchedResults)-1 < 0 { | ||
log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line)) | ||
return pattern.Line, nil | ||
} | ||
|
||
// If this pattern matched before package-name was found, do not collect this path. | ||
if !expectingPackageFilePath { | ||
log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName)) | ||
return pattern.Line, nil | ||
} | ||
}) | ||
|
||
// Save dependency information. | ||
filePath := pattern.MatchedResults[1] | ||
lastSlashIndex := strings.LastIndex(filePath, "/") | ||
var fileName string | ||
if lastSlashIndex == -1 { | ||
fileName = filePath | ||
} else { | ||
fileName = filePath[lastSlashIndex+1:] | ||
} | ||
dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName} | ||
expectingPackageFilePath = false | ||
|
||
log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName)) | ||
saveCaptureGroupAsDependencyInfo := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
fileName := extractFileNameFromRegexCaptureGroup(pattern) | ||
if fileName == "" { | ||
log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line)) | ||
return pattern.Line, nil | ||
}, | ||
} | ||
// If this pattern matched before package-name was found, do not collect this path. | ||
if !expectingPackageFilePath { | ||
log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName)) | ||
return pattern.Line, nil | ||
} | ||
// Save dependency information. | ||
dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName} | ||
expectingPackageFilePath = false | ||
log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName)) | ||
return pattern.Line, nil | ||
} | ||
|
||
cachedFileParser := gofrogcmd.CmdOutputPattern{ | ||
RegExp: usingCachedRegexp, | ||
ExecFunc: downloadedFileParser.ExecFunc, | ||
} | ||
// Extract downloaded file, stored in Artifactory. (value at log may be split into multiple lines) | ||
parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startDownloadingPattern, downloadingCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) | ||
// Extract cached file, stored in Artifactory. (value at log may be split into multiple lines) | ||
parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startUsingCachedPattern, usingCacheCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) | ||
|
||
// Extract already installed packages names. | ||
installedPackagesParser := gofrogcmd.CmdOutputPattern{ | ||
RegExp: alreadySatisfiedRegexp, | ||
parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ | ||
RegExp: regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`), | ||
ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
// Check for out of bound results. | ||
if len(pattern.MatchedResults)-1 < 0 { | ||
|
@@ -252,12 +282,28 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, | |
log.Debug(fmt.Sprintf("Found package: %s already installed", pattern.MatchedResults[1])) | ||
return pattern.Line, nil | ||
}, | ||
} | ||
}) | ||
|
||
// Execute command. | ||
_, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, &dependencyNameParser, &downloadedFileParser, &cachedFileParser, &installedPackagesParser) | ||
_, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, parsers...) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed running %s command with error: '%s - %s'", string(tool), err.Error(), errorOut) | ||
} | ||
return dependenciesMap, nil | ||
} | ||
|
||
func extractFileNameFromRegexCaptureGroup(pattern *gofrogcmd.CmdOutputPattern) (fileName string) { | ||
// Check for out of bound results (no captures). | ||
if len(pattern.MatchedResults) <= 1 { | ||
return "" | ||
} | ||
// Extract file information from capture group. | ||
filePath := pattern.MatchedResults[1] | ||
lastSlashIndex := strings.LastIndex(filePath, "/") | ||
if lastSlashIndex == -1 { | ||
fileName = filePath | ||
} else { | ||
fileName = filePath[lastSlashIndex+1:] | ||
} | ||
return | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
package pythonutils | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
"testing" | ||
|
||
gofrogcmd "github.com/jfrog/gofrog/io" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestGetMultilineCaptureOutputPattern(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
text string | ||
startCapturePattern string | ||
captureGroupPattern string | ||
endCapturePattern string | ||
expectedCapture string | ||
}{ | ||
{ | ||
name: "Using cached - single line captures", | ||
startCapturePattern: startUsingCachedPattern, | ||
captureGroupPattern: usingCacheCaptureGroup, | ||
endCapturePattern: endPattern, | ||
text: ` | ||
Looking in indexes: | ||
***localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/simple | ||
|
||
Collecting pexpect==4.8.0 (from -r /tmp/pipenv-qzun2hd3-requirements/pipenv-o_899oue-hashed-reqs.txt (line 1)) | ||
|
||
Using cached http://localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/packages/packages/39/7b/88dbb785881c28a102619d46423cb853b46dbccc70d3ac362d99773a78ce/pexpect-4.8.0-py2.py3-none-any.whl (59 kB)`, | ||
expectedCapture: `pexpect-4.8.0-py2.py3-none-any.whl`, | ||
}, | ||
{ | ||
name: "Using cached - multi line captures", | ||
startCapturePattern: startUsingCachedPattern, | ||
captureGroupPattern: usingCacheCaptureGroup, | ||
endCapturePattern: endPattern, | ||
text: ` | ||
Looking in indexes: | ||
***localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-16 | ||
98829624/simple | ||
|
||
Collecting pexpect==4.8.0 (from -r | ||
/tmp/pipenv-qzun2hd3-requirements/pipenv-o_899oue-hashed-reqs.txt (line 1)) | ||
|
||
Using cached | ||
http://localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/pa | ||
ckages/packages/39/7b/88dbb785881c28a102619d46423cb853b46dbccc70d3ac362d99773a78 | ||
ce/pexpect-4.8.0-py2.py3-none-any.whl (59 kB)`, | ||
expectedCapture: `pexpect-4.8.0-py2.py3-none-any.whl`, | ||
}, | ||
{ | ||
name: "Downloading - single line captures", | ||
startCapturePattern: startDownloadingPattern, | ||
captureGroupPattern: downloadingCaptureGroup, | ||
endCapturePattern: endPattern, | ||
text: ` Preparing metadata (pyproject.toml): finished with status 'done' | ||
Collecting PyYAML==5.1.2 (from jfrog-python-example==1.0) | ||
Downloading http://localhost:8081/artifactory/api/pypi/cli-pypi-virtual-1698829558/packages/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849582fe/PyYAML-5.1.2.tar.gz (265 kB) | ||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.0/265.0 kB 364.4 MB/s eta 0:00:00 | ||
Installing build dependencies: started`, | ||
expectedCapture: `PyYAML-5.1.2.tar.gz`, | ||
}, | ||
{ | ||
name: "Downloading - multi line captures", | ||
startCapturePattern: startDownloadingPattern, | ||
captureGroupPattern: downloadingCaptureGroup, | ||
endCapturePattern: endPattern, | ||
text: ` Preparing metadata (pyproject.toml): finished with status 'done' | ||
Collecting PyYAML==5.1.2 (from jfrog-python-example==1.0) | ||
Downloading http://localhost:8081/artifactory/api/pypi/cli-pypi-virtual-1698 | ||
829558/packages/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849 | ||
582fe/PyYAML-5.1.2.tar.gz (265 kB) | ||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.0/265.0 kB 364.4 MB/s eta 0:00:00 | ||
Installing build dependencies: started`, | ||
expectedCapture: `PyYAML-5.1.2.tar.gz`, | ||
}, | ||
} | ||
|
||
for _, testCase := range tests { | ||
t.Run(testCase.name, func(t *testing.T) { | ||
aggFunc, captures := validateCaptures(testCase.expectedCapture) | ||
runDummyTextStream(t, testCase.text, getMultilineSplitCaptureOutputPattern( | ||
testCase.startCapturePattern, | ||
testCase.captureGroupPattern, | ||
testCase.endCapturePattern, | ||
aggFunc, | ||
)) | ||
if assert.Len(t, (*captures), 1, fmt.Sprintf("Expected 1 captured group, got size: %d", len(*captures))) { | ||
assert.Equal(t, testCase.expectedCapture, (*captures)[0], fmt.Sprintf("Expected capture group: %s, got: %s", testCase.expectedCapture, (*captures)[0])) | ||
} | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}) | ||
} | ||
} | ||
|
||
func validateCaptures(expectedCaptures ...string) (func(pattern *gofrogcmd.CmdOutputPattern) (string, error), *[]string) { | ||
attiasas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
captures := []string{} | ||
aggFunc := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { | ||
captured := extractFileNameFromRegexCaptureGroup(pattern) | ||
for _, expectedCapture := range expectedCaptures { | ||
if expectedCapture == captured { | ||
captures = append(captures, expectedCapture) | ||
} | ||
} | ||
return pattern.Line, nil | ||
} | ||
return aggFunc, &captures | ||
} | ||
|
||
func runDummyTextStream(t *testing.T, txt string, parsers []*gofrogcmd.CmdOutputPattern) { | ||
// tokenize the text to be represented line by line to simulate expected cmd log output | ||
lines := strings.Split(txt, "\n") | ||
// iterate over the lines to simulate line text stream | ||
for _, line := range lines { | ||
for _, parser := range parsers { | ||
// check if the line matches the regexp of the parser | ||
if parser.RegExp.MatchString(line) { | ||
parser.MatchedResults = parser.RegExp.FindStringSubmatch(line) | ||
parser.Line = line | ||
// execute the parser function | ||
_, scannerError := parser.ExecFunc(parser) | ||
assert.NoError(t, scannerError) | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
regexp.MustCompile is heavy.
Let's make the effort to compile regex patterns on compilation time by putting them in variables outside the functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its dynamic patterns that are constructed by the given arguments to the method. I don't see any benefit to refactoring it outside because I need those values at the method