Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting work on negation, wip #106

Merged
merged 1 commit into from
Sep 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ err := parser.ParseString("size = 10", ast)
- `"...":<identifier>` Match the literal, specifying the exact lexer token type to match.
- `<expr> <expr> ...` Match expressions.
- `<expr> | <expr>` Match one of the alternatives.
- `!<expr>` Match any token that is not the start of the expression (eg: `@!";"` matches anything but the `;` character into the field).

The following modifiers can be used after any expression:

Expand Down Expand Up @@ -202,8 +203,8 @@ The best combination of speed, flexibility and usability is `lexer/regex.New()`.

Ordered by speed they are:

1. `lexer.DefaultDefinition` is based on the
[text/scanner](https://golang.org/pkg/text/scanner/) package and only allows
1. `lexer.DefaultDefinition` is based on the
[text/scanner](https://golang.org/pkg/text/scanner/) package and only allows
tokens provided by that package. This is the default lexer.
2. `lexer.Regexp()` (legacy) maps regular expression named subgroups to lexer symbols.
3. `lexer/regex.New()` is a more readable regex lexer, with each rule in the form `<name> = <regex>`.
Expand Down Expand Up @@ -372,7 +373,7 @@ There are a few areas where Participle can provide useful feedback to users of y
2. Participle will make a best effort to return as much of the AST up to the error location as possible.
3. Any node in the AST containing a field `Pos lexer.Position` or `Tok lexer.Token` will be automatically
populated from the nearest matching token.
4. Any node in the AST containing a field `EndPos lexer.Position` or `EndTok lexer.Token` will be
4. Any node in the AST containing a field `EndPos lexer.Position` or `EndTok lexer.Token` will be
automatically populated with the token at the end of the node.

These related pieces of information can be combined to provide fairly comprehensive error reporting.
14 changes: 14 additions & 0 deletions grammar.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ func (g *generatorContext) parseTermNoModifiers(slexer *structLexer) (node, erro
out, err = g.parseCapture(slexer)
case scanner.String, scanner.RawString, scanner.Char:
out, err = g.parseLiteral(slexer)
case '!':
return g.parseNegation(slexer)
case '[':
return g.parseOptional(slexer)
case '{':
Expand Down Expand Up @@ -282,6 +284,18 @@ func (g *generatorContext) parseGroup(slexer *structLexer) (node, error) {
return &group{expr: disj}, nil
}

// A token negation
//
// Accepts both the form !"some-literal" and !SomeNamedToken
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment accurate now?

func (g *generatorContext) parseNegation(slexer *structLexer) (node, error) {
_, _ = slexer.Next() // advance the parser since we have '!' right now.
next, err := g.parseTermNoModifiers(slexer)
if err != nil {
return nil, err
}
return &negation{next}, nil
}

// A literal string.
//
// Note that for this to match, the tokeniser must be able to produce this string. For example,
Expand Down
34 changes: 34 additions & 0 deletions nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,40 @@ func (l *literal) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.
return nil, nil
}

type negation struct {
node node
}

func (n *negation) String() string { return "!" + stringer(n.node) }

func (n *negation) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
// Create a branch to avoid advancing the parser, but call neither Stop nor Accept on it
// since we will discard a match.
branch := ctx.Branch()
notEOF, err := ctx.Peek(0)
if err != nil {
return nil, err
}
if notEOF.EOF() {
// EOF cannot match a negation, which expects something
return nil, nil
}

out, err = n.node.Parse(branch, parent)

if out != nil && err == nil {
// out being non-nil means that what we don't want is actually here, so we report nomatch
return nil, lexer.ErrorWithTokenf(notEOF, "unexpected '%s'", notEOF.Value)
}

// Just give the next token
next, err := ctx.Next()
if err != nil {
return nil, err
}
return []reflect.Value{reflect.ValueOf(next.Value)}, nil
}

// Attempt to transform values to given type.
//
// This will dereference pointers, and attempt to parse strings into integer values, floats, etc.
Expand Down
54 changes: 54 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1212,3 +1212,57 @@ func TestPointerToList(t *testing.T) {
// require.NoError(t, err)
// require.NotNil(t, ast.List)
// }

func TestNegation(t *testing.T) {
type grammar struct {
EverythingUntilSemicolon *[]string `@!';'* @';'`
}
p := mustTestParser(t, &grammar{})
ast := &grammar{}
err := p.ParseString(`hello world ;`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", "world", ";"}, ast.EverythingUntilSemicolon)

err = p.ParseString(`hello world`, ast)
require.Error(t, err)
}

func TestNegationWithPattern(t *testing.T) {
type grammar struct {
EverythingMoreComplex *[]string `@!(';' String)* @';' @String`
}

p := mustTestParser(t, &grammar{})
// j, err := json.MarshalIndent(p.root, "", " ")
// log.Print(j)
// log.Print(stringer(p.root))
ast := &grammar{}
err := p.ParseString(`hello world ; 'some-str'`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", "world", ";", `some-str`}, ast.EverythingMoreComplex)

err = p.ParseString(`hello ; world ; 'hey'`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", ";", "world", ";", `hey`}, ast.EverythingMoreComplex)

err = p.ParseString(`hello ; world ;`, ast)
require.Error(t, err)
}

func TestNegationWithDisjunction(t *testing.T) {
type grammar struct {
EverythingMoreComplex *[]string `@!(';' | ',')* @(';' | ',')`
}

// Note: we need more lookahead since (';' String) needs some before failing to match
ceymard marked this conversation as resolved.
Show resolved Hide resolved
p := mustTestParser(t, &grammar{})
ast := &grammar{}
err := p.ParseString(`hello world ;`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", "world", ";"}, ast.EverythingMoreComplex)

err = p.ParseString(`hello world , `, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", "world", ","}, ast.EverythingMoreComplex)

}
14 changes: 14 additions & 0 deletions stringer.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ func (s *stringerVisitor) visit(n node, depth int) { // nolint: gocognit
}
fmt.Fprint(s, "*")

case *negation:
fmt.Fprintf(s, "!")
composite := compositeNode(map[node]bool{}, n)
if composite {
fmt.Fprint(s, "(")
}
s.visit(n.node, depth)
if composite {
fmt.Fprint(s, ")")
}

case *literal:
fmt.Fprintf(s, "%q", n.s)
if n.t != lexer.EOF && n.s == "" {
Expand Down Expand Up @@ -153,6 +164,9 @@ func compositeNode(seen map[node]bool, n node) bool {
case *reference, *literal, *parseable:
return false

case *negation:
return compositeNode(seen, n.node)

case *strct:
return compositeNode(seen, n.expr)

Expand Down