From e05bb724e088d1fe2055ec4bde2acb188e1abded Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 19 Apr 2021 15:24:49 -0700 Subject: [PATCH 1/3] Parse parameters in function declarations. --- parser/parse_node_kind.def | 4 +- parser/parse_tree_test.cpp | 51 +++++++++++++- parser/parser_impl.cpp | 139 ++++++++++++++++++++++++------------- parser/parser_impl.h | 12 +++- 4 files changed, 155 insertions(+), 51 deletions(-) diff --git a/parser/parse_node_kind.def b/parser/parse_node_kind.def index c374f58667051..f25da9ec13116 100644 --- a/parser/parse_node_kind.def +++ b/parser/parse_node_kind.def @@ -18,8 +18,10 @@ CARBON_PARSE_NODE_KIND(DeclarationEnd) CARBON_PARSE_NODE_KIND(EmptyDeclaration) CARBON_PARSE_NODE_KIND(DeclaredName) CARBON_PARSE_NODE_KIND(FunctionDeclaration) -CARBON_PARSE_NODE_KIND(ParameterListEnd) CARBON_PARSE_NODE_KIND(ParameterList) +CARBON_PARSE_NODE_KIND(ParameterListComma) +CARBON_PARSE_NODE_KIND(ParameterListEnd) +CARBON_PARSE_NODE_KIND(ParameterDeclaration) CARBON_PARSE_NODE_KIND(VariableDeclaration) CARBON_PARSE_NODE_KIND(VariableInitializer) CARBON_PARSE_NODE_KIND(FileEnd) diff --git a/parser/parse_tree_test.cpp b/parser/parse_tree_test.cpp index 3c674cd23b98d..1437d1aba716b 100644 --- a/parser/parse_tree_test.cpp +++ b/parser/parse_tree_test.cpp @@ -150,6 +150,52 @@ TEST_F(ParseTreeTest, MatchFileEnd()})); } +TEST_F(ParseTreeTest, FunctionDeclarationWithParameterList) { + TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(Int bar, Int baz);"); + ParseTree tree = ParseTree::Parse(tokens, consumer); + EXPECT_FALSE(tree.HasErrors()); + EXPECT_THAT( + tree, + MatchParseTreeNodes( + {MatchFunctionDeclaration( + MatchDeclaredName("foo"), + MatchParameterList( + MatchParameterDeclaration(MatchNameReference("Int"), "bar"), + MatchParameterListComma(), + MatchParameterDeclaration(MatchNameReference("Int"), "baz"), + MatchParameterListEnd()), + MatchDeclarationEnd()), + MatchFileEnd()})); +} + +TEST_F(ParseTreeTest, FunctionDefinitionWithParameterList) { + TokenizedBuffer tokens = GetTokenizedBuffer( + "fn foo(Int bar, Int baz) {\n" + " foo(baz, bar + baz);\n" + "}"); + ParseTree tree = ParseTree::Parse(tokens, consumer); + EXPECT_FALSE(tree.HasErrors()); + EXPECT_THAT( + tree, + MatchParseTreeNodes( + {MatchFunctionDeclaration( + MatchDeclaredName("foo"), + MatchParameterList( + MatchParameterDeclaration(MatchNameReference("Int"), "bar"), + MatchParameterListComma(), + MatchParameterDeclaration(MatchNameReference("Int"), "baz"), + MatchParameterListEnd()), + MatchCodeBlock( + MatchExpressionStatement(MatchCallExpression( + MatchNameReference("foo"), MatchNameReference("baz"), + MatchCallExpressionComma(), + MatchInfixOperator(MatchNameReference("bar"), "+", + MatchNameReference("baz")), + MatchCallExpressionEnd())), + MatchCodeBlockEnd())), + MatchFileEnd()})); +} + TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) { TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(bar);"); ParseTree tree = ParseTree::Parse(tokens, consumer); @@ -159,8 +205,9 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) { EXPECT_THAT(tree, MatchParseTreeNodes( {MatchFunctionDeclaration( - HasError, MatchDeclaredName("foo"), - MatchParameterList(HasError, MatchParameterListEnd()), + MatchDeclaredName("foo"), + MatchParameterList(MatchNameReference("bar"), HasError, + MatchParameterListEnd()), MatchDeclarationEnd()), MatchFileEnd()})); } diff --git a/parser/parser_impl.cpp b/parser/parser_impl.cpp index 99bae88179360..73033b6007940 100644 --- a/parser/parser_impl.cpp +++ b/parser/parser_impl.cpp @@ -48,6 +48,12 @@ struct ExpectedVariableName : SimpleDiagnostic { "Expected variable name after type in `var` declaration."; }; +struct ExpectedParameterName : SimpleDiagnostic { + static constexpr llvm::StringLiteral ShortName = "syntax-error"; + static constexpr llvm::StringLiteral Message = + "Expected parameter name after type in parameter declaration."; +}; + struct UnrecognizedDeclaration : SimpleDiagnostic { static constexpr llvm::StringLiteral ShortName = "syntax-error"; static constexpr llvm::StringLiteral Message = @@ -104,11 +110,10 @@ struct ExpectedIdentifierAfterDot "Expected identifier after `.`."; }; -struct UnexpectedTokenInFunctionArgs - : SimpleDiagnostic { +struct UnexpectedTokenAfterListElement + : SimpleDiagnostic { static constexpr llvm::StringLiteral ShortName = "syntax-error"; - static constexpr llvm::StringLiteral Message = - "Unexpected token in function argument list."; + static constexpr llvm::StringLiteral Message = "Expected `,` or `)`."; }; struct OperatorRequiresParentheses @@ -331,19 +336,86 @@ auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren, return llvm::None; } -auto ParseTree::Parser::ParseFunctionSignature() -> Node { +template +auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser, + ParseNodeKind comma_kind, + ListCompletionHandler list_handler) + -> llvm::Optional { + // `(` element-list[opt] `)` + // + // element-list ::= element + // ::= element `,` element-list TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen()); + + bool has_errors = false; + + // Parse elements, if any are specified. + if (tokens.GetKind(*position) != TokenKind::CloseParen()) { + while (true) { + bool element_error = !list_element_parser(); + has_errors |= element_error; + + TokenKind kind = tokens.GetKind(*position); + if (kind != TokenKind::CloseParen() && kind != TokenKind::Comma()) { + if (!element_error) { + emitter.EmitError(*position); + } + has_errors = true; + + auto end_of_element = + FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()}); + // The lexer guarantees that parentheses are balanced. + assert(end_of_element && "missing matching `)` for `(`"); + SkipTo(*end_of_element); + } + + if (tokens.GetKind(*position) == TokenKind::CloseParen()) { + break; + } + + assert(tokens.GetKind(*position) == TokenKind::Comma()); + AddLeafNode(comma_kind, Consume(TokenKind::Comma())); + } + } + + return list_handler(open_paren, Consume(TokenKind::CloseParen()), has_errors); +} + +auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional { + // A parameter is of the form + // type identifier auto start = StartSubtree(); - // FIXME: Add support for parsing parameters. + auto type = ParseType(); + + // FIXME: We can't use DeclaredName here because we need to use the + // identifier token as the root token in the parameter node. + auto name = ConsumeIf(TokenKind::Identifier()); + if (!name) { + emitter.EmitError(*position); + return llvm::None; + } - auto close_paren = - ParseCloseParen(open_paren, ParseNodeKind::ParameterListEnd()); + return AddNode(ParseNodeKind::ParameterDeclaration(), *name, start, + /*has_error=*/!type); +} + +auto ParseTree::Parser::ParseFunctionSignature() -> bool { + auto start = StartSubtree(); + + auto params = ParseParenList( + [&] { return ParseFunctionParameter(); }, + ParseNodeKind::ParameterListComma(), + [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren, + bool has_errors) { + AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren); + return AddNode(ParseNodeKind::ParameterList(), open_paren, start, + has_errors); + }); // FIXME: Implement parsing of a return type. - return AddNode(ParseNodeKind::ParameterList(), open_paren, start, - /*has_errors=*/!close_paren); + return params.hasValue(); } auto ParseTree::Parser::ParseCodeBlock() -> Node { @@ -406,11 +478,11 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node { TokenizedBuffer::Token close_paren = tokens.GetMatchedClosingToken(open_paren); - Node signature_n = ParseFunctionSignature(); + bool signature_ok = ParseFunctionSignature(); assert(*std::prev(position) == close_paren && "Should have parsed through the close paren, whether successfully " "or with errors."); - if (tree.node_impls[signature_n.index].has_error) { + if (!signature_ok) { // Don't try to parse more of the function declaration, but consume a // declaration ending semicolon if found (without going to a new line). SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery); @@ -578,41 +650,14 @@ auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors) // // expression-list ::= expression // ::= expression `,` expression-list - TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen()); - - // Parse arguments, if any are specified. - if (tokens.GetKind(*position) != TokenKind::CloseParen()) { - while (true) { - bool argument_error = !ParseExpression(); - has_errors |= argument_error; - - if (tokens.GetKind(*position) == TokenKind::CloseParen()) { - break; - } - - if (tokens.GetKind(*position) != TokenKind::Comma()) { - if (!argument_error) { - emitter.EmitError(*position); - } - has_errors = true; - - auto comma_position = FindNextOf({TokenKind::Comma()}); - if (!comma_position) { - SkipTo(tokens.GetMatchedClosingToken(open_paren)); - break; - } - SkipTo(*comma_position); - } - - AddLeafNode(ParseNodeKind::CallExpressionComma(), - Consume(TokenKind::Comma())); - } - } - - AddLeafNode(ParseNodeKind::CallExpressionEnd(), - Consume(TokenKind::CloseParen())); - return AddNode(ParseNodeKind::CallExpression(), open_paren, start, - has_errors); + return ParseParenList( + [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(), + [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren, + bool has_arg_errors) { + AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren); + return AddNode(ParseNodeKind::CallExpression(), open_paren, start, + has_errors || has_arg_errors); + }); } auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional { diff --git a/parser/parser_impl.h b/parser/parser_impl.h index dd19afc580d7b..eb19737c7c1a6 100644 --- a/parser/parser_impl.h +++ b/parser/parser_impl.h @@ -116,10 +116,20 @@ class ParseTree::Parser { auto ParseCloseParen(TokenizedBuffer::Token open_paren, ParseNodeKind kind) -> llvm::Optional; + // Parses a parenthesized, comma-separated list. + template + auto ParseParenList(ListElementParser list_element_parser, + ParseNodeKind comma_kind, + ListCompletionHandler list_handler) + -> llvm::Optional; + + // Parses a single function parameter declaration. + auto ParseFunctionParameter() -> llvm::Optional; + // Parses the signature of the function, consisting of a parameter list and an // optional return type. Returns the root node of the signature which must be // based on the open parenthesis of the parameter list. - auto ParseFunctionSignature() -> Node; + auto ParseFunctionSignature() -> bool; // Parses a block of code: `{ ... }`. // From 6dd651119f523939a9264dd86889394b1a6ed2aa Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 19 Apr 2021 15:35:26 -0700 Subject: [PATCH 2/3] Parsing support for function return types. --- parser/parse_node_kind.def | 1 + parser/parse_tree_test.cpp | 31 +++++++++++++++++++++++++++++++ parser/parser_impl.cpp | 16 ++++++++++------ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/parser/parse_node_kind.def b/parser/parse_node_kind.def index f25da9ec13116..030de307b9025 100644 --- a/parser/parse_node_kind.def +++ b/parser/parse_node_kind.def @@ -22,6 +22,7 @@ CARBON_PARSE_NODE_KIND(ParameterList) CARBON_PARSE_NODE_KIND(ParameterListComma) CARBON_PARSE_NODE_KIND(ParameterListEnd) CARBON_PARSE_NODE_KIND(ParameterDeclaration) +CARBON_PARSE_NODE_KIND(ReturnType) CARBON_PARSE_NODE_KIND(VariableDeclaration) CARBON_PARSE_NODE_KIND(VariableInitializer) CARBON_PARSE_NODE_KIND(FileEnd) diff --git a/parser/parse_tree_test.cpp b/parser/parse_tree_test.cpp index 1437d1aba716b..580131f1e6d68 100644 --- a/parser/parse_tree_test.cpp +++ b/parser/parse_tree_test.cpp @@ -196,6 +196,37 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithParameterList) { MatchFileEnd()})); } +TEST_F(ParseTreeTest, FunctionDeclarationWithReturnType) { + TokenizedBuffer tokens = GetTokenizedBuffer("fn foo() -> Int;"); + ParseTree tree = ParseTree::Parse(tokens, consumer); + EXPECT_FALSE(tree.HasErrors()); + EXPECT_THAT( + tree, + MatchParseTreeNodes( + {MatchFunctionDeclaration(MatchDeclaredName("foo"), MatchParameters(), + MatchReturnType(MatchNameReference("Int")), + MatchDeclarationEnd()), + MatchFileEnd()})); +} + +TEST_F(ParseTreeTest, FunctionDefinitionWithReturnType) { + TokenizedBuffer tokens = GetTokenizedBuffer( + "fn foo() -> Int {\n" + " // return 42;\n" + "}"); + ParseTree tree = ParseTree::Parse(tokens, consumer); + EXPECT_FALSE(tree.HasErrors()); + EXPECT_THAT( + tree, + MatchParseTreeNodes( + {MatchFunctionDeclaration(MatchDeclaredName("foo"), MatchParameters(), + MatchReturnType(MatchNameReference("Int")), + MatchCodeBlock( + // TODO: Match a return statement. + MatchCodeBlockEnd())), + MatchFileEnd()})); +} + TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) { TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(bar);"); ParseTree tree = ParseTree::Parse(tokens, consumer); diff --git a/parser/parser_impl.cpp b/parser/parser_impl.cpp index 73033b6007940..a6e78cfae9f35 100644 --- a/parser/parser_impl.cpp +++ b/parser/parser_impl.cpp @@ -413,7 +413,15 @@ auto ParseTree::Parser::ParseFunctionSignature() -> bool { has_errors); }); - // FIXME: Implement parsing of a return type. + auto start_return_type = StartSubtree(); + if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) { + auto return_type = ParseType(); + AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type, + /*has_error=*/!return_type); + if (!return_type) { + return false; + } + } return params.hasValue(); } @@ -478,11 +486,7 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node { TokenizedBuffer::Token close_paren = tokens.GetMatchedClosingToken(open_paren); - bool signature_ok = ParseFunctionSignature(); - assert(*std::prev(position) == close_paren && - "Should have parsed through the close paren, whether successfully " - "or with errors."); - if (!signature_ok) { + if (!ParseFunctionSignature()) { // Don't try to parse more of the function declaration, but consume a // declaration ending semicolon if found (without going to a new line). SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery); From 6d1bac3dfb2d872ff3e2fa9ec79501b333d9c46b Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 19 Apr 2021 15:52:03 -0700 Subject: [PATCH 3/3] Parsing support for return statements. --- parser/parse_node_kind.def | 1 + parser/parse_tree_test.cpp | 49 ++++++++++++++++++++++++++++++-------- parser/parser_impl.cpp | 24 +++++++++++++++---- parser/parser_impl.h | 10 +++++++- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/parser/parse_node_kind.def b/parser/parse_node_kind.def index 030de307b9025..ab2cdadb64110 100644 --- a/parser/parse_node_kind.def +++ b/parser/parse_node_kind.def @@ -38,6 +38,7 @@ CARBON_PARSE_NODE_KIND(Condition) CARBON_PARSE_NODE_KIND(ConditionEnd) CARBON_PARSE_NODE_KIND(ContinueStatement) CARBON_PARSE_NODE_KIND(BreakStatement) +CARBON_PARSE_NODE_KIND(ReturnStatement) CARBON_PARSE_NODE_KIND(StatementEnd) // Expressions. diff --git a/parser/parse_tree_test.cpp b/parser/parse_tree_test.cpp index 580131f1e6d68..ab63aef30ab0a 100644 --- a/parser/parse_tree_test.cpp +++ b/parser/parse_tree_test.cpp @@ -212,19 +212,19 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithReturnType) { TEST_F(ParseTreeTest, FunctionDefinitionWithReturnType) { TokenizedBuffer tokens = GetTokenizedBuffer( "fn foo() -> Int {\n" - " // return 42;\n" + " return 42;\n" "}"); ParseTree tree = ParseTree::Parse(tokens, consumer); EXPECT_FALSE(tree.HasErrors()); - EXPECT_THAT( - tree, - MatchParseTreeNodes( - {MatchFunctionDeclaration(MatchDeclaredName("foo"), MatchParameters(), - MatchReturnType(MatchNameReference("Int")), - MatchCodeBlock( - // TODO: Match a return statement. - MatchCodeBlockEnd())), - MatchFileEnd()})); + EXPECT_THAT(tree, + MatchParseTreeNodes( + {MatchFunctionDeclaration( + MatchDeclaredName("foo"), MatchParameters(), + MatchReturnType(MatchNameReference("Int")), + MatchCodeBlock(MatchReturnStatement(MatchLiteral("42"), + MatchStatementEnd()), + MatchCodeBlockEnd())), + MatchFileEnd()})); } TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) { @@ -693,6 +693,35 @@ TEST_F(ParseTreeTest, WhileBreakContinue) { MatchFileEnd()})); } +TEST_F(ParseTreeTest, Return) { + TokenizedBuffer tokens = GetTokenizedBuffer( + "fn F() {\n" + " if (c)\n" + " return;\n" + "}\n" + "fn G(Int x) -> Int {\n" + " return x;\n" + "}"); + ParseTree tree = ParseTree::Parse(tokens, consumer); + EXPECT_FALSE(tree.HasErrors()); + + EXPECT_THAT( + tree, + MatchParseTreeNodes( + {MatchFunctionWithBody(MatchIfStatement( + MatchCondition(MatchNameReference("c"), MatchConditionEnd()), + MatchReturnStatement(MatchStatementEnd()))), + MatchFunctionDeclaration( + MatchDeclaredName(), + MatchParameters( + MatchParameterDeclaration(MatchNameReference("Int"), "x")), + MatchReturnType(MatchNameReference("Int")), + MatchCodeBlock(MatchReturnStatement(MatchNameReference("x"), + MatchStatementEnd()), + MatchCodeBlockEnd())), + MatchFileEnd()})); +} + auto GetAndDropLine(llvm::StringRef& s) -> std::string { auto newline_offset = s.find_first_of('\n'); llvm::StringRef line = s.slice(0, newline_offset); diff --git a/parser/parser_impl.cpp b/parser/parser_impl.cpp index a6e78cfae9f35..27efbbffc7c38 100644 --- a/parser/parser_impl.cpp +++ b/parser/parser_impl.cpp @@ -827,20 +827,30 @@ auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional { /*has_errors=*/!cond || !body); } -auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind) +auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind, + KeywordStatementArgument argument) -> llvm::Optional { auto keyword_kind = tokens.GetKind(*position); assert(keyword_kind.IsKeyword()); auto start = StartSubtree(); auto keyword = Consume(keyword_kind); + + bool arg_error = false; + if ((argument == KeywordStatementArgument::Optional && + tokens.GetKind(*position) != TokenKind::Semi()) || + argument == KeywordStatementArgument::Mandatory) { + arg_error = !ParseExpression(); + } + auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd()); if (!semi) { emitter.EmitError(*position, {.preceding = keyword_kind}); + // FIXME: Try to skip to a semicolon to recover. } - return AddNode(kind, keyword, start, /*has_errors=*/!semi); + return AddNode(kind, keyword, start, /*has_errors=*/!semi || arg_error); } auto ParseTree::Parser::ParseStatement() -> llvm::Optional { @@ -855,10 +865,16 @@ auto ParseTree::Parser::ParseStatement() -> llvm::Optional { return ParseWhileStatement(); case TokenKind::ContinueKeyword(): - return ParseKeywordStatement(ParseNodeKind::ContinueStatement()); + return ParseKeywordStatement(ParseNodeKind::ContinueStatement(), + KeywordStatementArgument::None); case TokenKind::BreakKeyword(): - return ParseKeywordStatement(ParseNodeKind::BreakStatement()); + return ParseKeywordStatement(ParseNodeKind::BreakStatement(), + KeywordStatementArgument::None); + + case TokenKind::ReturnKeyword(): + return ParseKeywordStatement(ParseNodeKind::ReturnStatement(), + KeywordStatementArgument::Optional); case TokenKind::OpenCurlyBrace(): return ParseCodeBlock(); diff --git a/parser/parser_impl.h b/parser/parser_impl.h index eb19737c7c1a6..b5508aafda091 100644 --- a/parser/parser_impl.h +++ b/parser/parser_impl.h @@ -199,8 +199,16 @@ class ParseTree::Parser { // Parses a while-statement. auto ParseWhileStatement() -> llvm::Optional; + enum class KeywordStatementArgument { + None, + Optional, + Mandatory, + }; + // Parses a statement of the form `keyword;` such as `break;` or `continue;`. - auto ParseKeywordStatement(ParseNodeKind kind) -> llvm::Optional; + auto ParseKeywordStatement(ParseNodeKind kind, + KeywordStatementArgument argument) + -> llvm::Optional; // Parses a statement. auto ParseStatement() -> llvm::Optional;