Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More complete parsing support for functions. #475

Merged
merged 3 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion parser/parse_node_kind.def
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ CARBON_PARSE_NODE_KIND(DeclarationEnd)
CARBON_PARSE_NODE_KIND(EmptyDeclaration)
CARBON_PARSE_NODE_KIND(DeclaredName)
CARBON_PARSE_NODE_KIND(FunctionDeclaration)
CARBON_PARSE_NODE_KIND(ParameterListEnd)
CARBON_PARSE_NODE_KIND(ParameterList)
CARBON_PARSE_NODE_KIND(ParameterListComma)
CARBON_PARSE_NODE_KIND(ParameterListEnd)
CARBON_PARSE_NODE_KIND(ParameterDeclaration)
CARBON_PARSE_NODE_KIND(ReturnType)
CARBON_PARSE_NODE_KIND(VariableDeclaration)
CARBON_PARSE_NODE_KIND(VariableInitializer)
CARBON_PARSE_NODE_KIND(FileEnd)
Expand All @@ -35,6 +38,7 @@ CARBON_PARSE_NODE_KIND(Condition)
CARBON_PARSE_NODE_KIND(ConditionEnd)
CARBON_PARSE_NODE_KIND(ContinueStatement)
CARBON_PARSE_NODE_KIND(BreakStatement)
CARBON_PARSE_NODE_KIND(ReturnStatement)
CARBON_PARSE_NODE_KIND(StatementEnd)

// Expressions.
Expand Down
111 changes: 109 additions & 2 deletions parser/parse_tree_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,83 @@ TEST_F(ParseTreeTest,
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, FunctionDeclarationWithParameterList) {
TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(Int bar, Int baz);");
fowles marked this conversation as resolved.
Show resolved Hide resolved
ParseTree tree = ParseTree::Parse(tokens, consumer);
EXPECT_FALSE(tree.HasErrors());
EXPECT_THAT(
tree,
MatchParseTreeNodes(
{MatchFunctionDeclaration(
MatchDeclaredName("foo"),
MatchParameterList(
MatchParameterDeclaration(MatchNameReference("Int"), "bar"),
MatchParameterListComma(),
MatchParameterDeclaration(MatchNameReference("Int"), "baz"),
MatchParameterListEnd()),
MatchDeclarationEnd()),
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, FunctionDefinitionWithParameterList) {
TokenizedBuffer tokens = GetTokenizedBuffer(
"fn foo(Int bar, Int baz) {\n"
" foo(baz, bar + baz);\n"
"}");
ParseTree tree = ParseTree::Parse(tokens, consumer);
EXPECT_FALSE(tree.HasErrors());
EXPECT_THAT(
tree,
MatchParseTreeNodes(
{MatchFunctionDeclaration(
MatchDeclaredName("foo"),
MatchParameterList(
MatchParameterDeclaration(MatchNameReference("Int"), "bar"),
MatchParameterListComma(),
MatchParameterDeclaration(MatchNameReference("Int"), "baz"),
MatchParameterListEnd()),
MatchCodeBlock(
MatchExpressionStatement(MatchCallExpression(
MatchNameReference("foo"), MatchNameReference("baz"),
MatchCallExpressionComma(),
MatchInfixOperator(MatchNameReference("bar"), "+",
MatchNameReference("baz")),
MatchCallExpressionEnd())),
MatchCodeBlockEnd())),
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, FunctionDeclarationWithReturnType) {
TokenizedBuffer tokens = GetTokenizedBuffer("fn foo() -> Int;");
ParseTree tree = ParseTree::Parse(tokens, consumer);
EXPECT_FALSE(tree.HasErrors());
EXPECT_THAT(
tree,
MatchParseTreeNodes(
{MatchFunctionDeclaration(MatchDeclaredName("foo"), MatchParameters(),
MatchReturnType(MatchNameReference("Int")),
MatchDeclarationEnd()),
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, FunctionDefinitionWithReturnType) {
TokenizedBuffer tokens = GetTokenizedBuffer(
"fn foo() -> Int {\n"
" return 42;\n"
"}");
ParseTree tree = ParseTree::Parse(tokens, consumer);
EXPECT_FALSE(tree.HasErrors());
EXPECT_THAT(tree,
MatchParseTreeNodes(
{MatchFunctionDeclaration(
MatchDeclaredName("foo"), MatchParameters(),
MatchReturnType(MatchNameReference("Int")),
MatchCodeBlock(MatchReturnStatement(MatchLiteral("42"),
MatchStatementEnd()),
MatchCodeBlockEnd())),
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(bar);");
ParseTree tree = ParseTree::Parse(tokens, consumer);
Expand All @@ -159,8 +236,9 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
EXPECT_THAT(tree,
MatchParseTreeNodes(
{MatchFunctionDeclaration(
HasError, MatchDeclaredName("foo"),
MatchParameterList(HasError, MatchParameterListEnd()),
MatchDeclaredName("foo"),
MatchParameterList(MatchNameReference("bar"), HasError,
MatchParameterListEnd()),
MatchDeclarationEnd()),
MatchFileEnd()}));
}
Expand Down Expand Up @@ -615,6 +693,35 @@ TEST_F(ParseTreeTest, WhileBreakContinue) {
MatchFileEnd()}));
}

TEST_F(ParseTreeTest, Return) {
TokenizedBuffer tokens = GetTokenizedBuffer(
"fn F() {\n"
" if (c)\n"
" return;\n"
"}\n"
"fn G(Int x) -> Int {\n"
" return x;\n"
"}");
ParseTree tree = ParseTree::Parse(tokens, consumer);
EXPECT_FALSE(tree.HasErrors());

EXPECT_THAT(
tree,
MatchParseTreeNodes(
{MatchFunctionWithBody(MatchIfStatement(
MatchCondition(MatchNameReference("c"), MatchConditionEnd()),
MatchReturnStatement(MatchStatementEnd()))),
MatchFunctionDeclaration(
MatchDeclaredName(),
MatchParameters(
MatchParameterDeclaration(MatchNameReference("Int"), "x")),
MatchReturnType(MatchNameReference("Int")),
MatchCodeBlock(MatchReturnStatement(MatchNameReference("x"),
MatchStatementEnd()),
MatchCodeBlockEnd())),
MatchFileEnd()}));
}

auto GetAndDropLine(llvm::StringRef& s) -> std::string {
auto newline_offset = s.find_first_of('\n');
llvm::StringRef line = s.slice(0, newline_offset);
Expand Down
175 changes: 120 additions & 55 deletions parser/parser_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
"Expected variable name after type in `var` declaration.";
};

struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
static constexpr llvm::StringLiteral ShortName = "syntax-error";
static constexpr llvm::StringLiteral Message =
"Expected parameter name after type in parameter declaration.";
};

struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
static constexpr llvm::StringLiteral ShortName = "syntax-error";
static constexpr llvm::StringLiteral Message =
Expand Down Expand Up @@ -104,11 +110,10 @@ struct ExpectedIdentifierAfterDot
"Expected identifier after `.`.";
};

struct UnexpectedTokenInFunctionArgs
: SimpleDiagnostic<UnexpectedTokenInFunctionArgs> {
struct UnexpectedTokenAfterListElement
: SimpleDiagnostic<UnexpectedTokenAfterListElement> {
static constexpr llvm::StringLiteral ShortName = "syntax-error";
static constexpr llvm::StringLiteral Message =
"Unexpected token in function argument list.";
static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
};

struct OperatorRequiresParentheses
Expand Down Expand Up @@ -331,19 +336,94 @@ auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
return llvm::None;
}

auto ParseTree::Parser::ParseFunctionSignature() -> Node {
template <typename ListElementParser, typename ListCompletionHandler>
auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser,
ParseNodeKind comma_kind,
ListCompletionHandler list_handler)
-> llvm::Optional<Node> {
// `(` element-list[opt] `)`
//
// element-list ::= element
// ::= element `,` element-list
TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());

bool has_errors = false;

// Parse elements, if any are specified.
if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
fowles marked this conversation as resolved.
Show resolved Hide resolved
while (true) {
bool element_error = !list_element_parser();
has_errors |= element_error;

TokenKind kind = tokens.GetKind(*position);
if (kind != TokenKind::CloseParen() && kind != TokenKind::Comma()) {
if (!element_error) {
emitter.EmitError<UnexpectedTokenAfterListElement>(*position);
}
has_errors = true;

auto end_of_element =
FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()});
// The lexer guarantees that parentheses are balanced.
assert(end_of_element && "missing matching `)` for `(`");
SkipTo(*end_of_element);
}

if (tokens.GetKind(*position) == TokenKind::CloseParen()) {
fowles marked this conversation as resolved.
Show resolved Hide resolved
break;
}

assert(tokens.GetKind(*position) == TokenKind::Comma());
AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
}
}

return list_handler(open_paren, Consume(TokenKind::CloseParen()), has_errors);
}

auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
// A parameter is of the form
// type identifier
auto start = StartSubtree();

// FIXME: Add support for parsing parameters.
auto type = ParseType();

auto close_paren =
ParseCloseParen(open_paren, ParseNodeKind::ParameterListEnd());
// FIXME: We can't use DeclaredName here because we need to use the
// identifier token as the root token in the parameter node.
auto name = ConsumeIf(TokenKind::Identifier());
fowles marked this conversation as resolved.
Show resolved Hide resolved
if (!name) {
emitter.EmitError<ExpectedParameterName>(*position);
return llvm::None;
}

return AddNode(ParseNodeKind::ParameterDeclaration(), *name, start,
/*has_error=*/!type);
}

// FIXME: Implement parsing of a return type.
auto ParseTree::Parser::ParseFunctionSignature() -> bool {
auto start = StartSubtree();

auto params = ParseParenList(
[&] { return ParseFunctionParameter(); },
ParseNodeKind::ParameterListComma(),
[&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
bool has_errors) {
AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
has_errors);
});

auto start_return_type = StartSubtree();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused here. If there is no arrow, do we not need to close the subtree in some way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think StartSubtree is perhaps not well-named; it just tracks a location where we could start a subtree. It's really just a stable handle to a location in the tree currently being built. Happy to rename. Something like GetPossibleSubtreeStartLocation but less wordy perhaps?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would appreciate that follow up

if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
auto return_type = ParseType();
AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
/*has_error=*/!return_type);
if (!return_type) {
return false;
}
}

return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
/*has_errors=*/!close_paren);
return params.hasValue();
}

auto ParseTree::Parser::ParseCodeBlock() -> Node {
Expand Down Expand Up @@ -406,11 +486,7 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
TokenizedBuffer::Token close_paren =
tokens.GetMatchedClosingToken(open_paren);

Node signature_n = ParseFunctionSignature();
assert(*std::prev(position) == close_paren &&
"Should have parsed through the close paren, whether successfully "
"or with errors.");
if (tree.node_impls[signature_n.index].has_error) {
if (!ParseFunctionSignature()) {
// Don't try to parse more of the function declaration, but consume a
// declaration ending semicolon if found (without going to a new line).
SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
Expand Down Expand Up @@ -578,41 +654,14 @@ auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
//
// expression-list ::= expression
// ::= expression `,` expression-list
TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());

// Parse arguments, if any are specified.
if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
while (true) {
bool argument_error = !ParseExpression();
has_errors |= argument_error;

if (tokens.GetKind(*position) == TokenKind::CloseParen()) {
break;
}

if (tokens.GetKind(*position) != TokenKind::Comma()) {
if (!argument_error) {
emitter.EmitError<UnexpectedTokenInFunctionArgs>(*position);
}
has_errors = true;

auto comma_position = FindNextOf({TokenKind::Comma()});
if (!comma_position) {
SkipTo(tokens.GetMatchedClosingToken(open_paren));
break;
}
SkipTo(*comma_position);
}

AddLeafNode(ParseNodeKind::CallExpressionComma(),
Consume(TokenKind::Comma()));
}
}

AddLeafNode(ParseNodeKind::CallExpressionEnd(),
Consume(TokenKind::CloseParen()));
return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
has_errors);
return ParseParenList(
[&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
[&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
bool has_arg_errors) {
AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
has_errors || has_arg_errors);
});
}

auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
Expand Down Expand Up @@ -778,20 +827,30 @@ auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
/*has_errors=*/!cond || !body);
}

auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind)
auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
KeywordStatementArgument argument)
-> llvm::Optional<Node> {
auto keyword_kind = tokens.GetKind(*position);
assert(keyword_kind.IsKeyword());

auto start = StartSubtree();
auto keyword = Consume(keyword_kind);

bool arg_error = false;
if ((argument == KeywordStatementArgument::Optional &&
tokens.GetKind(*position) != TokenKind::Semi()) ||
argument == KeywordStatementArgument::Mandatory) {
arg_error = !ParseExpression();
}

auto semi =
ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
if (!semi) {
emitter.EmitError<ExpectedSemiAfter>(*position,
{.preceding = keyword_kind});
// FIXME: Try to skip to a semicolon to recover.
}
return AddNode(kind, keyword, start, /*has_errors=*/!semi);
return AddNode(kind, keyword, start, /*has_errors=*/!semi || arg_error);
}

auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
Expand All @@ -806,10 +865,16 @@ auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
return ParseWhileStatement();

case TokenKind::ContinueKeyword():
return ParseKeywordStatement(ParseNodeKind::ContinueStatement());
return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
KeywordStatementArgument::None);

case TokenKind::BreakKeyword():
return ParseKeywordStatement(ParseNodeKind::BreakStatement());
return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
KeywordStatementArgument::None);

case TokenKind::ReturnKeyword():
return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
KeywordStatementArgument::Optional);

case TokenKind::OpenCurlyBrace():
return ParseCodeBlock();
Expand Down
Loading