Skip to content

Commit

Permalink
Provide a public lexer and use it in the parser
Browse files Browse the repository at this point in the history
This commit exposes a lexer that we can use to provide better IDE
and formatter support. The lexer based parser also typically provides
more accurate source locations, and more informative error messages.
  • Loading branch information
mtdowling committed Apr 18, 2023
1 parent 500d89c commit adfb244
Showing 238 changed files with 3,167 additions and 1,497 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
@@ -21,168 +21,217 @@
import software.amazon.smithy.model.node.BooleanNode;
import software.amazon.smithy.model.node.Node;
import software.amazon.smithy.model.node.NullNode;
import software.amazon.smithy.model.node.NumberNode;
import software.amazon.smithy.model.node.ObjectNode;
import software.amazon.smithy.model.node.StringNode;
import software.amazon.smithy.model.validation.Severity;
import software.amazon.smithy.model.validation.ValidationEvent;
import software.amazon.smithy.utils.Pair;

/**
* Parses IDL nodes.
* Parses Node values from a {@link IdlTokenizer}.
*/
final class IdlNodeParser {

private static final String SYNTACTIC_SHAPE_ID_TARGET = "SyntacticShapeIdTarget";

private IdlNodeParser() {}

static Node parseNode(IdlModelParser parser) {
return parseNode(parser, parser.currentLocation());
private IdlNodeParser() { }

/**
* Expects that the current token is a valid Node, and parses it into a {@link Node} value.
*
* <p>The tokenizer is advanced to the next token after parsing the Node value.</p>
*
* @param tokenizer Tokenizer to consume and advance.
* @param resolver Forward reference resolver.
* @return Returns the parsed node value.
* @throws ModelSyntaxException if the Node is not well-formed.
*/
static Node expectAndSkipNode(IdlTokenizer tokenizer, IdlReferenceResolver resolver) {
return expectAndSkipNode(tokenizer, resolver, tokenizer.getCurrentTokenLocation());
}

static Node parseNode(IdlModelParser parser, SourceLocation location) {
char c = parser.peek();
switch (c) {
case '{':
return parseObjectNode(parser, "object node", location);
case '[':
return parseArrayNode(parser, location);
case '"': {
if (peekTextBlock(parser)) {
return parseTextBlock(parser, location);
} else {
return new StringNode(IdlTextParser.parseQuotedString(parser), location);
}
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
return parser.parseNumberNode(location);
default: {
return parseNodeTextWithKeywords(parser, location, ParserUtils.parseShapeId(parser));
}
/**
* Expects that the current token is a valid Node, parses it into a {@link Node} value, and assigns it a custom
* {@link SourceLocation}.
*
* <p>The tokenizer is advanced to the next token after parsing the Node value.</p>
*
* @param tokenizer Tokenizer to consume and advance.
* @param resolver Forward reference resolver.
* @param location Source location to assign to the node.
* @return Returns the parsed node value.
* @throws ModelSyntaxException if the Node is not well-formed.
*/
static Node expectAndSkipNode(IdlTokenizer tokenizer, IdlReferenceResolver resolver, SourceLocation location) {
IdlToken token = tokenizer.expect(IdlToken.STRING, IdlToken.TEXT_BLOCK, IdlToken.NUMBER, IdlToken.IDENTIFIER,
IdlToken.LBRACE, IdlToken.LBRACKET);

switch (token) {
case STRING:
case TEXT_BLOCK:
Node result = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
tokenizer.next();
return result;
case IDENTIFIER:
String shapeId = tokenizer.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
return parseIdentifier(resolver, shapeId, location);
case NUMBER:
Number number = tokenizer.getCurrentTokenNumberValue();
tokenizer.next();
return new NumberNode(number, location);
case LBRACE:
return parseObjectNode(tokenizer, resolver, location);
case LBRACKET:
default:
return parseArrayNode(tokenizer, resolver, location);
}
}

static Node parseNodeTextWithKeywords(IdlModelParser parser, SourceLocation location, String text) {
switch (text) {
case "true":
/**
* Parse a Node identifier String, taking into account keywords and forward references.
*
* @param resolver Forward reference resolver.
* @param identifier Identifier to parse.
* @param location Source location to assign to the identifier.
* @return Returns the parsed identifier.
*/
static Node parseIdentifier(
IdlReferenceResolver resolver,
String identifier,
SourceLocation location
) {
Keyword keyword = Keyword.from(identifier);
return keyword == null
? parseSyntacticShapeId(resolver, identifier, location)
: keyword.createNode(location);
}

private enum Keyword {
TRUE {
@Override
protected Node createNode(SourceLocation location) {
return new BooleanNode(true, location);
case "false":
}
},
FALSE {
@Override
protected Node createNode(SourceLocation location) {
return new BooleanNode(false, location);
case "null":
}
},
NULL {
@Override
protected Node createNode(SourceLocation location) {
return new NullNode(location);
default:
// Unquoted node values syntactically are assumed to be references
// to shapes. A lazy string node is used because the shape ID may
// not be able to be resolved until after the entire model is loaded.
Pair<StringNode, Consumer<String>> pair = StringNode.createLazyString(text, location);
Consumer<String> consumer = pair.right;
parser.addForwardReference(text, (id, typeProvider) -> {
if (typeProvider.apply(id) == null) {
parser.emit(ValidationEvent.builder()
.id(SYNTACTIC_SHAPE_ID_TARGET)
.severity(Severity.DANGER)
.message(String.format("Syntactic shape ID `%s` does not resolve to a valid shape ID: "
+ "`%s`. Did you mean to quote this string? Are you missing a "
+ "model file?", text, id))
.sourceLocation(location)
.build());
}
consumer.accept(id.toString());
});
return pair.left;
}
};

protected abstract Node createNode(SourceLocation location);

static Keyword from(String keyword) {
switch (keyword) {
case "true":
return Keyword.TRUE;
case "false":
return Keyword.FALSE;
case "null":
return Keyword.NULL;
default:
return null;
}
}
}

static boolean peekTextBlock(IdlModelParser parser) {
return parser.peek() == '"'
&& parser.peek(1) == '"'
&& parser.peek(2) == '"';
}

static Node parseTextBlock(IdlModelParser parser, SourceLocation location) {
parser.expect('"');
parser.expect('"');
parser.expect('"');
return new StringNode(IdlTextParser.parseQuotedTextAndTextBlock(parser, true), location);
private static Node parseSyntacticShapeId(
IdlReferenceResolver resolver,
String identifier,
SourceLocation location
) {
// Unquoted node values syntactically are assumed to be references to shapes. A lazy string node is
// used because the shape ID may not be able to be resolved until after the entire model is loaded.
Pair<StringNode, Consumer<String>> pair = StringNode.createLazyString(identifier, location);
Consumer<String> consumer = pair.right;
resolver.resolve(identifier, (id, type) -> {
consumer.accept(id.toString());
if (type != null) {
return null;
} else {
return ValidationEvent.builder()
.id(SYNTACTIC_SHAPE_ID_TARGET)
.severity(Severity.DANGER)
.message(String.format("Syntactic shape ID `%s` does not resolve to a valid shape ID: "
+ "`%s`. Did you mean to quote this string? Are you missing a "
+ "model file?", identifier, id))
.sourceLocation(location)
.build();
}
});
return pair.left;
}

static ObjectNode parseObjectNode(IdlModelParser parser, String parent) {
return parseObjectNode(parser, parent, parser.currentLocation());
}
private static ArrayNode parseArrayNode(
IdlTokenizer tokenizer,
IdlReferenceResolver resolver,
SourceLocation location
) {
tokenizer.increaseNestingLevel();
ArrayNode.Builder builder = ArrayNode.builder().sourceLocation(location);

static ObjectNode parseObjectNode(IdlModelParser parser, String parent, SourceLocation location) {
parser.increaseNestingLevel();
ObjectNode.Builder builder = ObjectNode.builder()
.sourceLocation(location);
parser.expect('{');
parser.ws();
tokenizer.expect(IdlToken.LBRACKET);
tokenizer.next();
tokenizer.skipWsAndDocs();

while (!parser.eof()) {
char c = parser.peek();
if (c == '}') {
do {
if (tokenizer.getCurrentToken() == IdlToken.RBRACKET) {
break;
} else {
SourceLocation keyLocation = parser.currentLocation();
String key = parseNodeObjectKey(parser);
parser.ws();
parser.expect(':');
if (parser.peek() == '=') {
throw parser.syntax("The `:=` syntax may only be used when defining inline operation input and "
+ "output shapes.");
}
parser.ws();
Node value = parseNode(parser);
StringNode keyNode = new StringNode(key, keyLocation);
if (builder.hasMember(key)) {
throw parser.syntax("Duplicate member of " + parent + ": '" + keyNode.getValue() + '\'');
}
builder.withMember(keyNode, value);
parser.ws();
builder.withValue(expectAndSkipNode(tokenizer, resolver));
tokenizer.skipWsAndDocs();
}
}
} while (true);

parser.expect('}');
parser.decreaseNestingLevel();
tokenizer.expect(IdlToken.RBRACKET);
tokenizer.next();
tokenizer.decreaseNestingLevel();
return builder.build();
}

static String parseNodeObjectKey(IdlModelParser parser) {
if (parser.peek() == '"') {
return IdlTextParser.parseQuotedString(parser);
} else {
return ParserUtils.parseIdentifier(parser);
}
}

private static ArrayNode parseArrayNode(IdlModelParser parser, SourceLocation location) {
parser.increaseNestingLevel();
ArrayNode.Builder builder = ArrayNode.builder()
.sourceLocation(location);
parser.expect('[');
parser.ws();

while (!parser.eof()) {
char c = parser.peek();
if (c == ']') {
private static ObjectNode parseObjectNode(
IdlTokenizer tokenizer,
IdlReferenceResolver resolver,
SourceLocation location
) {
tokenizer.expect(IdlToken.LBRACE);
tokenizer.next();
tokenizer.skipWsAndDocs();
tokenizer.increaseNestingLevel();
ObjectNode.Builder builder = ObjectNode.builder().sourceLocation(location);

while (tokenizer.hasNext()) {
if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) {
break;
} else {
builder.withValue(parseNode(parser));
parser.ws();
}

String key = tokenizer.internString(tokenizer.getCurrentTokenStringSlice());
SourceLocation keyLocation = tokenizer.getCurrentTokenLocation();
tokenizer.next();
tokenizer.skipWsAndDocs();
tokenizer.expect(IdlToken.COLON);
tokenizer.next();
tokenizer.skipWsAndDocs();

Node value = expectAndSkipNode(tokenizer, resolver);
if (builder.hasMember(key)) {
throw new ModelSyntaxException("Duplicate member: '" + key + '\'', keyLocation);
}
builder.withMember(key, value);
tokenizer.skipWsAndDocs();
}

parser.expect(']');
parser.decreaseNestingLevel();
tokenizer.expect(IdlToken.RBRACE);
tokenizer.next();
tokenizer.decreaseNestingLevel();
return builder.build();
}
}
Loading

0 comments on commit adfb244

Please sign in to comment.