Skip to content

Commit

Permalink
improve checking for syntax errors
Browse files Browse the repository at this point in the history
fixes #4

The issue suggested fixing this at the scan level, but scan is just a tokenizer.  Parse knows what is expected syntactically.  The fix was made at the parse level and tests for malformed expressions were added.
  • Loading branch information
elrayle committed Feb 8, 2023
1 parent 9f1f925 commit a83a0a1
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 4 deletions.
94 changes: 90 additions & 4 deletions spdxexp/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,44 @@ func parse(source string) (*node, error) {
}

func (t *tokenStream) parseTokens() *node {
if len(t.tokens) == 0 {
// malformed with no tokens
t.err = errors.New("no tokens to parse")
return nil
}

node := t.parseExpression()
if t.err != nil {
return nil
}
if node == nil || t.hasMore() {

if node == nil {
// unable to parse expression for unknown reason
t.err = errors.New("syntax error")
return nil
} else if t.hasMore() {
// malformed with too many tokens - try to determine the cause

// check for close parenthesis without matching open parenthesis
closeParen := t.parseOperator(")")
if closeParen != nil {
t.err = errors.New("close parenthesis does not have a matching open parenthesis")
return nil
}

// check for licenses without operator
lic := t.parseLicense()
if lic != nil {
t.err = errors.New("licenses or expressions are not separated by an operator")
return nil
}

// cannot determine what syntax error occurred
t.err = errors.New("syntax error")
return nil
}

// all is well
return node
}

Expand Down Expand Up @@ -75,9 +105,15 @@ func (t *tokenStream) parseParenthesizedExpression() *node {
return nil
}

if !t.hasMore() {
// no more tokens, so missing closing paren
t.err = errors.New("open parenthesis does not have a matching close parenthesis")
return nil
}

closeParen := t.parseOperator(")")
if closeParen == nil {
t.err = errors.New("expected ')'")
t.err = errors.New("open parenthesis does not have a matching close parenthesis")
return nil
}

Expand Down Expand Up @@ -109,6 +145,44 @@ func (t *tokenStream) parseAtom() *node {
return licenseNode
}

// no atom found - try to determine the cause
if t.hasMore() {
// check for operators
operator := t.parseOperator(")")
if operator != nil {
if t.index == 1 {
t.err = errors.New("expression starts with close parenthesis")
} else {
t.err = errors.New("expected license or expression, but found close parenthesis")
}
return nil
}

operator = t.parseOperator("OR")
if operator != nil {
if t.index == 1 {
t.err = errors.New("expression starts with OR")
} else {
t.err = errors.New("expected license or expression, but found OR")
}
return nil
}

operator = t.parseOperator("AND")
if operator != nil {
if t.index == 1 {
t.err = errors.New("expression starts with AND")
} else {
t.err = errors.New("expected license or expression, but found AND")
}
return nil
}

// cannot determine what syntax error occurred
t.err = errors.New("syntax error")
return nil
}

t.err = errors.New("expected node, but found none")
return nil
}
Expand All @@ -132,12 +206,18 @@ func (t *tokenStream) parseExpression() *node {
}
op := strings.ToLower(*operator)

if !t.hasMore() {
// expression found and no more tokens to process
t.err = errors.New("expected expression following OR, but found none")
return nil
}

right := t.parseExpression()
if t.err != nil {
return nil
}
if right == nil {
t.err = errors.New("expected expression, but found none")
t.err = errors.New("expected expression following OR, but found none")
return nil
}

Expand Down Expand Up @@ -172,12 +252,18 @@ func (t *tokenStream) parseAnd() *node {
return left
}

if !t.hasMore() {
// expression found and no more tokens to process
t.err = errors.New("expected expression following AND, but found none")
return nil
}

right := t.parseAnd()
if t.err != nil {
return nil
}
if right == nil {
t.err = errors.New("expected expression, but found none")
t.err = errors.New("expected expression following AND, but found none")
return nil
}

Expand Down
124 changes: 124 additions & 0 deletions spdxexp/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,42 @@ func TestParseTokens(t *testing.T) {
},
"{ LEFT: { LEFT: MIT and RIGHT: Apache-1.0+ } or RIGHT: { LEFT: DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2 or RIGHT: GPL-2.0 with Bison-exception-2.2 } }", nil,
},
{"operator error - missing close parenthesis", getMissingEndParenTokens(0),
&node{}, "", errors.New("open parenthesis does not have a matching close parenthesis"),
},
{"operator error - missing open parenthesis", getMissingStartParenTokens(0),
&node{}, "", errors.New("close parenthesis does not have a matching open parenthesis"),
},
{"operator error - missing operator", getMissingOperatorTokens(0),
&node{}, "", errors.New("licenses or expressions are not separated by an operator"),
},
{"operator error - missing license after OR", getMissingSecondLicenseInORTokens(0),
&node{}, "", errors.New("expected expression following OR, but found none"),
},
{"operator error - missing license after AND", getMissingSecondLicenseInANDTokens(0),
&node{}, "", errors.New("expected expression following AND, but found none"),
},
{"operator error - starts with close parenthesis", getStartsWithCloseParenTokens(0),
&node{}, "", errors.New("expression starts with close parenthesis"),
},
{"operator error - starts with OR", getStartsWithORTokens(0),
&node{}, "", errors.New("expression starts with OR"),
},
{"operator error - starts with AND", getStartsWithANDTokens(0),
&node{}, "", errors.New("expression starts with AND"),
},
{"operator error - ends with OR", getEndsWithORTokens(0),
&node{}, "", errors.New("expected expression following OR, but found none"),
},
{"operator error - ends with AND", getEndsWithANDTokens(0),
&node{}, "", errors.New("expected expression following AND, but found none"),
},
{"operator error - OR immediately after operator", getDoubleORTokens(0),
&node{}, "", errors.New("expected license or expression, but found OR"),
},
{"operator error - AND immediately after operator", getDoubleANDTokens(0),
&node{}, "", errors.New("expected license or expression, but found AND"),
},
}

for _, test := range tests {
Expand Down Expand Up @@ -1328,6 +1364,94 @@ func getKitchSinkTokens(index int) *tokenStream {
return getTokenStream(tokens, index)
}

func getMissingOperatorTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: licenseToken, value: "Apache-2.0"})
return getTokenStream(tokens, index)
}

func getMissingSecondLicenseInORTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "OR"})
return getTokenStream(tokens, index)
}

func getMissingSecondLicenseInANDTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "AND"})
return getTokenStream(tokens, index)
}

func getStartsWithCloseParenTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: operatorToken, value: ")"})
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
return getTokenStream(tokens, index)
}

func getStartsWithORTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: operatorToken, value: "OR"})
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
return getTokenStream(tokens, index)
}

func getStartsWithANDTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: operatorToken, value: "AND"})
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
return getTokenStream(tokens, index)
}

func getEndsWithORTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "OR"})
return getTokenStream(tokens, index)
}

func getEndsWithANDTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "AND"})
return getTokenStream(tokens, index)
}

func getDoubleORTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "OR"})
tokens = append(tokens, token{role: operatorToken, value: "OR"})
tokens = append(tokens, token{role: licenseToken, value: "Apache-2.0"})
return getTokenStream(tokens, index)
}

func getDoubleANDTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: "AND"})
tokens = append(tokens, token{role: operatorToken, value: "AND"})
tokens = append(tokens, token{role: licenseToken, value: "Apache-2.0"})
return getTokenStream(tokens, index)
}

func getMissingStartParenTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
tokens = append(tokens, token{role: operatorToken, value: ")"})
return getTokenStream(tokens, index)
}

func getMissingEndParenTokens(index int) *tokenStream {
var tokens []token
tokens = append(tokens, token{role: operatorToken, value: "("})
tokens = append(tokens, token{role: licenseToken, value: "MIT"})
return getTokenStream(tokens, index)
}

func getTokenStream(tokens []token, index int) *tokenStream {
return &tokenStream{
tokens: tokens,
Expand Down

0 comments on commit a83a0a1

Please sign in to comment.