From 3bbeee1a1708e6cd4f1a9713f2c24a121bd2f9cc Mon Sep 17 00:00:00 2001 From: Aymeric Wibo Date: Mon, 12 Aug 2024 20:26:04 +0200 Subject: [PATCH] grammar: Move each grammar construct's parsing code it its own file --- build.sh | 8 +- compile_commands.json | 41 +++ flamingo/common.h | 13 +- flamingo/flamingo.c | 399 +----------------------- flamingo/grammar/assignment.h | 66 ++++ flamingo/grammar/block.h | 33 ++ flamingo/grammar/call.h | 75 +++++ flamingo/grammar/expr.h | 37 +++ flamingo/grammar/function_declaration.h | 91 ++++++ flamingo/grammar/identifier.h | 29 ++ flamingo/{import.c => grammar/import.h} | 16 +- flamingo/grammar/literal.h | 56 ++++ flamingo/grammar/print.h | 38 +++ flamingo/grammar/statement.h | 45 +++ flamingo/runtime/unicode/umachine.h | 4 +- flamingo/runtime/unicode/utf16.h | 4 +- flamingo/runtime/unicode/utf8.h | 4 +- hello_world.fl | 24 +- update.sh | 3 - 19 files changed, 551 insertions(+), 435 deletions(-) create mode 100644 compile_commands.json create mode 100644 flamingo/grammar/assignment.h create mode 100644 flamingo/grammar/block.h create mode 100644 flamingo/grammar/call.h create mode 100644 flamingo/grammar/expr.h create mode 100644 flamingo/grammar/function_declaration.h create mode 100644 flamingo/grammar/identifier.h rename flamingo/{import.c => grammar/import.h} (73%) create mode 100644 flamingo/grammar/literal.h create mode 100644 flamingo/grammar/print.h create mode 100644 flamingo/grammar/statement.h diff --git a/build.sh b/build.sh index b790325..ecef616 100644 --- a/build.sh +++ b/build.sh @@ -3,9 +3,13 @@ set -e mkdir -p bin -cc_flags="-std=c11 -g -Wall -Wextra -Werror -Iflamingo/runtime -Wno-unused-parameter" +cc_flags="-std=c11 -g -Wall -Wextra -Werror -Iflamingo/runtime -Iflamingo -Wno-unused-parameter" -cc $cc_flags -c flamingo/flamingo.c -o bin/flamingo.o +# XXX With the default error limit, clangd tells us that there are too many errors and it's stopping here. +# When the error limit is disabled like I'm doing here, it says there are no errors. +# Could this be a clangd bug? + +cc $cc_flags -ferror-limit=0 -c flamingo/flamingo.c -o bin/flamingo.o cc $cc_flags -c main.c -o bin/main.o cc $(find bin -name "*.o") $cc_flags -o bin/flamingo diff --git a/compile_commands.json b/compile_commands.json new file mode 100644 index 0000000..9d633ce --- /dev/null +++ b/compile_commands.json @@ -0,0 +1,41 @@ +[ + { + "arguments": [ + "cc", + "-c", + "-std=c11", + "-g", + "-Wall", + "-Wextra", + "-Werror", + "-Iflamingo/runtime", + "-Iflamingo", + "-Wno-unused-parameter", + "-ferror-limit=0", + "-o", + "bin/flamingo.o", + "flamingo/flamingo.c" + ], + "directory": "/home/obiwac/flamingo/flamingo", + "file": "flamingo/flamingo.c" + }, + { + "arguments": [ + "cc", + "-c", + "-std=c11", + "-g", + "-Wall", + "-Wextra", + "-Werror", + "-Iflamingo/runtime", + "-Iflamingo", + "-Wno-unused-parameter", + "-o", + "bin/main.o", + "main.c" + ], + "directory": "/home/obiwac/flamingo/flamingo", + "file": "main.c" + } +] \ No newline at end of file diff --git a/flamingo/common.h b/flamingo/common.h index 480aa00..f12419e 100644 --- a/flamingo/common.h +++ b/flamingo/common.h @@ -12,7 +12,18 @@ #include #include -__attribute__((format(printf, 2, 3))) static int error(flamingo_t* flamingo, char const* fmt, ...) { +static inline int parse_expr(flamingo_t* flamingo, TSNode node, flamingo_val_t** val); +static inline int parse_statement(flamingo_t* flamingo, TSNode node); +static inline int parse_block(flamingo_t* flamingo, TSNode node); +static inline int parse_print(flamingo_t* flamingo, TSNode node); +static inline int parse_literal(flamingo_t* flamingo, TSNode node, flamingo_val_t** val); +static inline int parse_identifier(flamingo_t* flamingo, TSNode node, flamingo_val_t** val); +static inline int parse_call(flamingo_t* flamingo, TSNode node, flamingo_val_t** val); +static inline int parse_assignment(flamingo_t* flamingo, TSNode node); +static inline int parse_import(flamingo_t* flamingo, TSNode node); +static inline int parse_function_declaration(flamingo_t* flamingo, TSNode node); + +__attribute__((format(printf, 2, 3))) static inline int error(flamingo_t* flamingo, char const* fmt, ...) { va_list args; va_start(args, fmt); diff --git a/flamingo/flamingo.c b/flamingo/flamingo.c index fff1b80..88502e1 100644 --- a/flamingo/flamingo.c +++ b/flamingo/flamingo.c @@ -4,10 +4,9 @@ #include "parser.c" #include "runtime/lib.c" -#include "common.h" -#include "import.c" -#include "scope.c" -#include "val.c" +#include +#include +#include typedef struct { TSParser* parser; @@ -17,9 +16,6 @@ typedef struct { extern TSLanguage const* tree_sitter_flamingo(void); -static int parse_expr(flamingo_t* flamingo, TSNode node, flamingo_val_t** val); -static int parse_statement(flamingo_t* flamingo, TSNode node); - int flamingo_create(flamingo_t* flamingo, char const* progname, char* src, size_t src_size) { flamingo->progname = progname; flamingo->errors_outstanding = false; @@ -107,395 +103,6 @@ void flamingo_register_cb_call(flamingo_t* flamingo, flamingo_cb_call_t cb, void fprintf(stderr, "%s: not implemented\n", __func__); } -static int parse_literal(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { - assert(strcmp(ts_node_type(node), "literal") == 0); - assert(ts_node_child_count(node) == 1); - - // if value already exists, free it - // XXX not sure when this is the case, just doing this for now to be safe! - - if (*val != NULL) { - val_free(*val); - val_init(*val); - } - - // otherwise, allocate it - - else { - *val = val_alloc(); - } - - // in any case, val should not be NULL from this point forth - - assert(*val != NULL); - - TSNode const child = ts_node_child(node, 0); - char const* const type = ts_node_type(child); - - if (strcmp(type, "string") == 0) { - (*val)->kind = FLAMINGO_VAL_KIND_STR; - - size_t const start = ts_node_start_byte(child); - size_t const end = ts_node_end_byte(child); - - // XXX remove one from each side as we don't want the quotes - - (*val)->str.size = end - start - 2; - (*val)->str.str = malloc((*val)->str.size); - - assert((*val)->str.str != NULL); - memcpy((*val)->str.str, flamingo->src + start + 1, (*val)->str.size); - - return 0; - } - - if (strcmp(type, "number") == 0) { - return error(flamingo, "number literals are not yet supported"); - } - - return error(flamingo, "unknown literal type: %s", type); -} - -static int parse_identifier(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { - assert(strcmp(ts_node_type(node), "identifier") == 0); - assert(ts_node_child_count(node) == 0); - - size_t const start = ts_node_start_byte(node); - size_t const end = ts_node_end_byte(node); - - char const* const identifier = flamingo->src + start; - size_t const size = end - start; - - flamingo_var_t* const var = flamingo_scope_find_var(flamingo, identifier, size); - - if (var == NULL) { - return error(flamingo, "could not find identifier: %.*s", (int) size, identifier); - } - - *val = var->val; - val_incref(*val); - - return 0; -} - -static int parse_call(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { - assert(strcmp(ts_node_type(node), "call") == 0); - assert(ts_node_child_count(node) == 3); - - // Get callable expression. - // TODO Evaluate this motherfucker. - - TSNode const callable_node = ts_node_child_by_field_name(node, "callable", 8); - char const* const callable_type = ts_node_type(callable_node); - - if (strcmp(callable_type, "expression") != 0) { - return error(flamingo, "expected identifier for function name, got %s", callable_type); - } - - // Get arguments. - // TODO Do something with these arguments. - - TSNode const args = ts_node_child_by_field_name(node, "args", 6); - bool const has_args = !ts_node_is_null(args); - - if (has_args) { - char const* const args_type = ts_node_type(args); - - if (strcmp(args_type, "arg_list") != 0) { - return error(flamingo, "expected arg_list for parameters, got %s", args_type); - } - } - - // Evaluate callable expression. - - flamingo_val_t* callable = NULL; - - if (parse_expr(flamingo, callable_node, &callable) < 0) { - return -1; - } - - if (callable->kind != FLAMINGO_VAL_KIND_FN) { - return error(flamingo, "callable has a value kind of %d, which is not callable", callable->kind); - } - - // Actually call the callable. - - /* TODO - The issue with this is that the body we're calling may further down the scope stack, and blocks just blindly push a new scope on top of the stack. - What we should really be doing is copying the scope stack, rolling it back to the scope where the function was declared, and then pushing a new scope on top of that. - Maybe there's a better way? Here's an illustration of the problem: - - fn fun2() { - print(a) # I should not have access to 'a' in here. - } - - fn fun1() { - fn fun3() { - print(a) # In fact, and this is a semi-unrelated issue, I should have access to 'a' here either. - } - - a = "hello" - fun2() - fun3() - } - */ - - TSNode* const body = callable->fn.body; - return parse_statement(flamingo, *body); -} - -static int parse_expr(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { - assert(strcmp(ts_node_type(node), "expression") == 0); - assert(ts_node_child_count(node) == 1); - - TSNode const child = ts_node_child(node, 0); - char const* const type = ts_node_type(child); - - // 'val == NULL' means that we don't care about the result of the expression and can discard it. - // These types of expressions are dead-ends if we're discarding the value and they can't have side-effect either, so just don't parse them. - - if (val != NULL && strcmp(type, "literal") == 0) { - return parse_literal(flamingo, child, val); - } - - if (val != NULL && strcmp(type, "identifier") == 0) { - return parse_identifier(flamingo, child, val); - } - - // These expressions do have side-effects, so we need to parse them anyway. - - if (strcmp(type, "call") == 0) { - return parse_call(flamingo, child, val); - } - - return error(flamingo, "unknown expression type: %s", type); -} - -static int parse_print(flamingo_t* flamingo, TSNode node) { - assert(ts_node_child_count(node) == 2); - - TSNode const msg_node = ts_node_child_by_field_name(node, "msg", 3); - char const* const type = ts_node_type(msg_node); - - if (strcmp(ts_node_type(msg_node), "expression") != 0) { - return error(flamingo, "expected expression for message, got %s", type); - } - - flamingo_val_t* val = NULL; - - if (parse_expr(flamingo, msg_node, &val) < 0) { - return -1; - } - - // XXX Don't forget to decrement reference at the end! - - if (val->kind == FLAMINGO_VAL_KIND_STR) { - printf("%.*s\n", (int) val->str.size, val->str.str); - val_decref(val); - - return 0; - } - - val_decref(val); - return error(flamingo, "can't print expression kind: %d", val->kind); -} - -static int parse_assignment(flamingo_t* flamingo, TSNode node) { - assert(ts_node_child_count(node) == 3); - - // Get RHS expression. - - TSNode const right_node = ts_node_child_by_field_name(node, "right", 5); - char const* const right_type = ts_node_type(right_node); - - if (strcmp(right_type, "expression") != 0) { - return error(flamingo, "expected expression for name, got %s", right_type); - } - - // Get identifier name. - - TSNode const left_node = ts_node_child_by_field_name(node, "left", 4); - char const* const left_type = ts_node_type(left_node); - - if (strcmp(left_type, "identifier") != 0) { - return error(flamingo, "expected identifier for name, got %s", left_type); - } - - size_t const start = ts_node_start_byte(left_node); - size_t const end = ts_node_end_byte(left_node); - - char const* const identifier = flamingo->src + start; - size_t const size = end - start; - - // Check if identifier is already in scope (or a previous one) and declare it if not. - // If it's a function, error. - - flamingo_var_t* var = flamingo_scope_find_var(flamingo, identifier, size); - - if (var == NULL) { - var = scope_add_var(cur_scope(flamingo), identifier, size); - } - - else if (var->val->kind == FLAMINGO_VAL_KIND_FN) { - return error(flamingo, "cannot assign to function '%.*s'", (int) size, identifier); - } - - // If variable is already in current or previous scope, since we're assigning a new value to it, we must decrement the reference counter of the previous value which was in the variable. - - else { - val_decref(var->val); - var->val = NULL; - } - - // Evaluate expression. - - if (parse_expr(flamingo, right_node, &var->val) < 0) { - return -1; - } - - return 0; -} - -static int parse_function_declaration(flamingo_t* flamingo, TSNode node) { - assert(ts_node_child_count(node) == 5); - - // Get qualifier list. - - TSNode const qualifiers_node = ts_node_child_by_field_name(node, "qualifiers", 10); - bool const has_qualifiers = !ts_node_is_null(qualifiers_node); - - if (has_qualifiers) { - char const* const qualifiers_type = ts_node_type(qualifiers_node); - - if (strcmp(qualifiers_type, "qualifier_list") != 0) { - return error(flamingo, "expected qualifier_list for qualifiers, got %s", qualifiers_type); - } - } - - // Get function name. - - TSNode const name_node = ts_node_child_by_field_name(node, "name", 4); - char const* const name_type = ts_node_type(name_node); - - if (strcmp(name_type, "identifier") != 0) { - return error(flamingo, "expected identifier for function name, got %s", name_type); - } - - size_t const start = ts_node_start_byte(name_node); - size_t const end = ts_node_end_byte(name_node); - - char const* const name = flamingo->src + start; - size_t const size = end - start; - - // Get function parameters. - // TODO Do something with these parameters. - - TSNode const params = ts_node_child_by_field_name(node, "params", 6); - bool const has_params = !ts_node_is_null(params); - - if (has_params) { - char const* const params_type = ts_node_type(params); - - if (strcmp(params_type, "param_list") != 0) { - return error(flamingo, "expected param_list for parameters, got %s", params_type); - } - } - - // Get function body. - - TSNode const body = ts_node_child_by_field_name(node, "body", 4); - char const* const body_type = ts_node_type(body); - - if (strcmp(body_type, "statement") != 0) { - return error(flamingo, "expected statement for body, got %s", body_type); - } - - // Check if identifier is already in scope (or a previous one) and error if it is. - // Right now, redeclaring functions is not allowed. - // Although this will probably work a bit differently once function prototypes are added. - - flamingo_var_t* const prev_var = flamingo_scope_find_var(flamingo, name, size); - - if (prev_var != NULL) { - char const* const thing = prev_var->val->kind == FLAMINGO_VAL_KIND_FN ? "function" : "variable"; - return error(flamingo, "the %s '%.*s' has already been declared in this scope", thing, (int) size, name); - } - - // Add function to scope. - - flamingo_var_t* const var = scope_add_var(cur_scope(flamingo), name, size); - - var->val = val_alloc(); - var->val->kind = FLAMINGO_VAL_KIND_FN; - - // Assign body node. - // Since I want 'flamingo.h' to be usable without importing all of Tree-sitter, 'var->val->fn.body' can't just be a 'TSNode'. - // Thus, since only this file knows about the size of 'TSNode', we must dynamically allocate this on the heap. - - var->val->fn.body_size = sizeof body; - var->val->fn.body = malloc(var->val->fn.body_size); - memcpy(var->val->fn.body, &body, var->val->fn.body_size); - - return 0; -} - -static int parse_block(flamingo_t* flamingo, TSNode node) { - assert(strcmp(ts_node_type(node), "block") == 0); - - scope_stack_push(flamingo); - size_t const n = ts_node_named_child_count(node); - - for (size_t i = 0; i < n; i++) { - TSNode const child = ts_node_named_child(node, i); - char const* const child_type = ts_node_type(child); - - if (strcmp(child_type, "statement") != 0) { - return error(flamingo, "expected statement in block, got %s", child_type); - } - - if (parse_statement(flamingo, child) < 0) { - return -1; - } - } - - scope_pop(flamingo); - - return 0; -} - -static int parse_statement(flamingo_t* flamingo, TSNode node) { - assert(ts_node_child_count(node) == 1); - - TSNode const child = ts_node_child(node, 0); - char const* const type = ts_node_type(child); - - if (strcmp(type, "block") == 0) { - return parse_block(flamingo, child); - } - - else if (strcmp(type, "print") == 0) { - return parse_print(flamingo, child); - } - - else if (strcmp(type, "assignment") == 0) { - return parse_assignment(flamingo, child); - } - - else if (strcmp(type, "function_declaration") == 0) { - return parse_function_declaration(flamingo, child); - } - - else if (strcmp(type, "expression") == 0) { - return parse_expr(flamingo, child, NULL); - } - - else if (strcmp(type, "import") == 0) { - return parse_import(flamingo, child); - } - - return error(flamingo, "unknown statment type: %s", type); -} - static int parse(flamingo_t* flamingo, TSNode node) { size_t const n = ts_node_child_count(node); diff --git a/flamingo/grammar/assignment.h b/flamingo/grammar/assignment.h new file mode 100644 index 0000000..e5bd2ad --- /dev/null +++ b/flamingo/grammar/assignment.h @@ -0,0 +1,66 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "expr.h" + +#include +#include +#include + +static int parse_assignment(flamingo_t* flamingo, TSNode node) { + assert(ts_node_child_count(node) == 3); + + // Get RHS expression. + + TSNode const right_node = ts_node_child_by_field_name(node, "right", 5); + char const* const right_type = ts_node_type(right_node); + + if (strcmp(right_type, "expression") != 0) { + return error(flamingo, "expected expression for name, got %s", right_type); + } + + // Get identifier name. + + TSNode const left_node = ts_node_child_by_field_name(node, "left", 4); + char const* const left_type = ts_node_type(left_node); + + if (strcmp(left_type, "identifier") != 0) { + return error(flamingo, "expected identifier for name, got %s", left_type); + } + + size_t const start = ts_node_start_byte(left_node); + size_t const end = ts_node_end_byte(left_node); + + char const* const identifier = flamingo->src + start; + size_t const size = end - start; + + // Check if identifier is already in scope (or a previous one) and declare it if not. + // If it's a function, error. + + flamingo_var_t* var = flamingo_scope_find_var(flamingo, identifier, size); + + if (var == NULL) { + var = scope_add_var(cur_scope(flamingo), identifier, size); + } + + else if (var->val->kind == FLAMINGO_VAL_KIND_FN) { + return error(flamingo, "cannot assign to function '%.*s'", (int) size, identifier); + } + + // If variable is already in current or previous scope, since we're assigning a new value to it, we must decrement the reference counter of the previous value which was in the variable. + + else { + val_decref(var->val); + var->val = NULL; + } + + // Evaluate expression. + + if (parse_expr(flamingo, right_node, &var->val) < 0) { + return -1; + } + + return 0; +} diff --git a/flamingo/grammar/block.h b/flamingo/grammar/block.h new file mode 100644 index 0000000..ccc12c7 --- /dev/null +++ b/flamingo/grammar/block.h @@ -0,0 +1,33 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "statement.h" + +#include +#include + +static int parse_block(flamingo_t* flamingo, TSNode node) { + assert(strcmp(ts_node_type(node), "block") == 0); + + scope_stack_push(flamingo); + size_t const n = ts_node_named_child_count(node); + + for (size_t i = 0; i < n; i++) { + TSNode const child = ts_node_named_child(node, i); + char const* const child_type = ts_node_type(child); + + if (strcmp(child_type, "statement") != 0) { + return error(flamingo, "expected statement in block, got %s", child_type); + } + + if (parse_statement(flamingo, child) < 0) { + return -1; + } + } + + scope_pop(flamingo); + + return 0; +} diff --git a/flamingo/grammar/call.h b/flamingo/grammar/call.h new file mode 100644 index 0000000..0c8a25c --- /dev/null +++ b/flamingo/grammar/call.h @@ -0,0 +1,75 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "expr.h" +#include "statement.h" + +#include + +static int parse_call(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { + assert(strcmp(ts_node_type(node), "call") == 0); + assert(ts_node_child_count(node) == 3); + + // Get callable expression. + // TODO Evaluate this motherfucker. + + TSNode const callable_node = ts_node_child_by_field_name(node, "callable", 8); + char const* const callable_type = ts_node_type(callable_node); + + if (strcmp(callable_type, "expression") != 0) { + return error(flamingo, "expected identifier for function name, got %s", callable_type); + } + + // Get arguments. + // TODO Do something with these arguments. + + TSNode const args = ts_node_child_by_field_name(node, "args", 6); + bool const has_args = !ts_node_is_null(args); + + if (has_args) { + char const* const args_type = ts_node_type(args); + + if (strcmp(args_type, "arg_list") != 0) { + return error(flamingo, "expected arg_list for parameters, got %s", args_type); + } + } + + // Evaluate callable expression. + + flamingo_val_t* callable = NULL; + + if (parse_expr(flamingo, callable_node, &callable) < 0) { + return -1; + } + + if (callable->kind != FLAMINGO_VAL_KIND_FN) { + return error(flamingo, "callable has a value kind of %d, which is not callable", callable->kind); + } + + // Actually call the callable. + + /* TODO + The issue with this is that the body we're calling may further down the scope stack, and blocks just blindly push a new scope on top of the stack. + What we should really be doing is copying the scope stack, rolling it back to the scope where the function was declared, and then pushing a new scope on top of that. + Maybe there's a better way? Here's an illustration of the problem: + + fn fun2() { + print(a) # I should not have access to 'a' in here. + } + + fn fun1() { + fn fun3() { + print(a) # In fact, and this is a semi-unrelated issue, I should have access to 'a' here either. + } + + a = "hello" + fun2() + fun3() + } + */ + + TSNode* const body = callable->fn.body; + return parse_statement(flamingo, *body); +} diff --git a/flamingo/grammar/expr.h b/flamingo/grammar/expr.h new file mode 100644 index 0000000..717ebe7 --- /dev/null +++ b/flamingo/grammar/expr.h @@ -0,0 +1,37 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "call.h" +#include "identifier.h" +#include "literal.h" + +#include + +static int parse_expr(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { + assert(strcmp(ts_node_type(node), "expression") == 0); + assert(ts_node_child_count(node) == 1); + + TSNode const child = ts_node_child(node, 0); + char const* const type = ts_node_type(child); + + // 'val == NULL' means that we don't care about the result of the expression and can discard it. + // These types of expressions are dead-ends if we're discarding the value and they can't have side-effect either, so just don't parse them. + + if (val != NULL && strcmp(type, "literal") == 0) { + return parse_literal(flamingo, child, val); + } + + if (val != NULL && strcmp(type, "identifier") == 0) { + return parse_identifier(flamingo, child, val); + } + + // These expressions do have side-effects, so we need to parse them anyway. + + if (strcmp(type, "call") == 0) { + return parse_call(flamingo, child, val); + } + + return error(flamingo, "unknown expression type: %s", type); +} diff --git a/flamingo/grammar/function_declaration.h b/flamingo/grammar/function_declaration.h new file mode 100644 index 0000000..e627292 --- /dev/null +++ b/flamingo/grammar/function_declaration.h @@ -0,0 +1,91 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include +#include +#include + +static int parse_function_declaration(flamingo_t* flamingo, TSNode node) { + assert(ts_node_child_count(node) == 5); + + // Get qualifier list. + + TSNode const qualifiers_node = ts_node_child_by_field_name(node, "qualifiers", 10); + bool const has_qualifiers = !ts_node_is_null(qualifiers_node); + + if (has_qualifiers) { + char const* const qualifiers_type = ts_node_type(qualifiers_node); + + if (strcmp(qualifiers_type, "qualifier_list") != 0) { + return error(flamingo, "expected qualifier_list for qualifiers, got %s", qualifiers_type); + } + } + + // Get function name. + + TSNode const name_node = ts_node_child_by_field_name(node, "name", 4); + char const* const name_type = ts_node_type(name_node); + + if (strcmp(name_type, "identifier") != 0) { + return error(flamingo, "expected identifier for function name, got %s", name_type); + } + + size_t const start = ts_node_start_byte(name_node); + size_t const end = ts_node_end_byte(name_node); + + char const* const name = flamingo->src + start; + size_t const size = end - start; + + // Get function parameters. + // TODO Do something with these parameters. + + TSNode const params = ts_node_child_by_field_name(node, "params", 6); + bool const has_params = !ts_node_is_null(params); + + if (has_params) { + char const* const params_type = ts_node_type(params); + + if (strcmp(params_type, "param_list") != 0) { + return error(flamingo, "expected param_list for parameters, got %s", params_type); + } + } + + // Get function body. + + TSNode const body = ts_node_child_by_field_name(node, "body", 4); + char const* const body_type = ts_node_type(body); + + if (strcmp(body_type, "statement") != 0) { + return error(flamingo, "expected statement for body, got %s", body_type); + } + + // Check if identifier is already in scope (or a previous one) and error if it is. + // Right now, redeclaring functions is not allowed. + // Although this will probably work a bit differently once function prototypes are added. + + flamingo_var_t* const prev_var = flamingo_scope_find_var(flamingo, name, size); + + if (prev_var != NULL) { + char const* const thing = prev_var->val->kind == FLAMINGO_VAL_KIND_FN ? "function" : "variable"; + return error(flamingo, "the %s '%.*s' has already been declared in this scope", thing, (int) size, name); + } + + // Add function to scope. + + flamingo_var_t* const var = scope_add_var(cur_scope(flamingo), name, size); + + var->val = val_alloc(); + var->val->kind = FLAMINGO_VAL_KIND_FN; + + // Assign body node. + // Since I want 'flamingo.h' to be usable without importing all of Tree-sitter, 'var->val->fn.body' can't just be a 'TSNode'. + // Thus, since only this file knows about the size of 'TSNode', we must dynamically allocate this on the heap. + + var->val->fn.body_size = sizeof body; + var->val->fn.body = malloc(var->val->fn.body_size); + memcpy(var->val->fn.body, &body, var->val->fn.body_size); + + return 0; +} diff --git a/flamingo/grammar/identifier.h b/flamingo/grammar/identifier.h new file mode 100644 index 0000000..d214695 --- /dev/null +++ b/flamingo/grammar/identifier.h @@ -0,0 +1,29 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include +#include + +static int parse_identifier(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { + assert(strcmp(ts_node_type(node), "identifier") == 0); + assert(ts_node_child_count(node) == 0); + + size_t const start = ts_node_start_byte(node); + size_t const end = ts_node_end_byte(node); + + char const* const identifier = flamingo->src + start; + size_t const size = end - start; + + flamingo_var_t* const var = flamingo_scope_find_var(flamingo, identifier, size); + + if (var == NULL) { + return error(flamingo, "could not find identifier: %.*s", (int) size, identifier); + } + + *val = var->val; + val_incref(*val); + + return 0; +} diff --git a/flamingo/import.c b/flamingo/grammar/import.h similarity index 73% rename from flamingo/import.c rename to flamingo/grammar/import.h index 6a06246..e5aff74 100644 --- a/flamingo/import.c +++ b/flamingo/grammar/import.h @@ -1,15 +1,20 @@ // This Source Form is subject to the terms of the AQUA Software License, // v. 1.0. Copyright (c) 2024 Aymeric Wibo -#include "common.h" -#include "flamingo.h" -#include "runtime/tree_sitter/api.h" +#pragma once -#include -#include +#include + +static int parse_import_path(flamingo_t* flamingo, TSNode node) { + assert(strcmp(ts_node_type(node), "import_path") == 0); + assert(ts_node_child_count(node) == 2); + + return 0; +} static int parse_import(flamingo_t* flamingo, TSNode node) { assert(strcmp(ts_node_type(node), "import") == 0); + printf("%d\n", ts_node_child_count(node)); assert(ts_node_child_count(node) == 2); TSNode const relative_node = ts_node_child_by_field_name(node, "relative", 8); @@ -25,6 +30,7 @@ static int parse_import(flamingo_t* flamingo, TSNode node) { // Parse the import path into an actual string path we can use. (void) is_relative; + (void) parse_import_path; return error(flamingo, "TODO"); } diff --git a/flamingo/grammar/literal.h b/flamingo/grammar/literal.h new file mode 100644 index 0000000..bf50578 --- /dev/null +++ b/flamingo/grammar/literal.h @@ -0,0 +1,56 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "common.h" +#include "val.c" + +static int parse_literal(flamingo_t* flamingo, TSNode node, flamingo_val_t** val) { + assert(strcmp(ts_node_type(node), "literal") == 0); + assert(ts_node_child_count(node) == 1); + + // if value already exists, free it + // XXX not sure when this is the case, just doing this for now to be safe! + + if (*val != NULL) { + val_free(*val); + val_init(*val); + } + + // otherwise, allocate it + + else { + *val = val_alloc(); + } + + // in any case, val should not be NULL from this point forth + + assert(*val != NULL); + + TSNode const child = ts_node_child(node, 0); + char const* const type = ts_node_type(child); + + if (strcmp(type, "string") == 0) { + (*val)->kind = FLAMINGO_VAL_KIND_STR; + + size_t const start = ts_node_start_byte(child); + size_t const end = ts_node_end_byte(child); + + // XXX remove one from each side as we don't want the quotes + + (*val)->str.size = end - start - 2; + (*val)->str.str = malloc((*val)->str.size); + + assert((*val)->str.str != NULL); + memcpy((*val)->str.str, flamingo->src + start + 1, (*val)->str.size); + + return 0; + } + + if (strcmp(type, "number") == 0) { + return error(flamingo, "number literals are not yet supported"); + } + + return error(flamingo, "unknown literal type: %s", type); +} diff --git a/flamingo/grammar/print.h b/flamingo/grammar/print.h new file mode 100644 index 0000000..82b4b15 --- /dev/null +++ b/flamingo/grammar/print.h @@ -0,0 +1,38 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "expr.h" + +#include +#include + +static int parse_print(flamingo_t* flamingo, TSNode node) { + assert(ts_node_child_count(node) == 2); + + TSNode const msg_node = ts_node_child_by_field_name(node, "msg", 3); + char const* const type = ts_node_type(msg_node); + + if (strcmp(ts_node_type(msg_node), "expression") != 0) { + return error(flamingo, "expected expression for message, got %s", type); + } + + flamingo_val_t* val = NULL; + + if (parse_expr(flamingo, msg_node, &val) < 0) { + return -1; + } + + // XXX Don't forget to decrement reference at the end! + + if (val->kind == FLAMINGO_VAL_KIND_STR) { + printf("%.*s\n", (int) val->str.size, val->str.str); + val_decref(val); + + return 0; + } + + val_decref(val); + return error(flamingo, "can't print expression kind: %d", val->kind); +} diff --git a/flamingo/grammar/statement.h b/flamingo/grammar/statement.h new file mode 100644 index 0000000..50cd46c --- /dev/null +++ b/flamingo/grammar/statement.h @@ -0,0 +1,45 @@ +// This Source Form is subject to the terms of the AQUA Software License, +// v. 1.0. Copyright (c) 2024 Aymeric Wibo + +#pragma once + +#include "assignment.h" +#include "block.h" +#include "function_declaration.h" +#include "import.h" +#include "print.h" + +#include + +static int parse_statement(flamingo_t* flamingo, TSNode node) { + assert(ts_node_child_count(node) == 1); + + TSNode const child = ts_node_child(node, 0); + char const* const type = ts_node_type(child); + + if (strcmp(type, "block") == 0) { + return parse_block(flamingo, child); + } + + else if (strcmp(type, "print") == 0) { + return parse_print(flamingo, child); + } + + else if (strcmp(type, "assignment") == 0) { + return parse_assignment(flamingo, child); + } + + else if (strcmp(type, "function_declaration") == 0) { + return parse_function_declaration(flamingo, child); + } + + else if (strcmp(type, "expression") == 0) { + return parse_expr(flamingo, child, NULL); + } + + else if (strcmp(type, "import") == 0) { + return parse_import(flamingo, child); + } + + return error(flamingo, "unknown statment type: %s", type); +} diff --git a/flamingo/runtime/unicode/umachine.h b/flamingo/runtime/unicode/umachine.h index 25dbf97..9195824 100644 --- a/flamingo/runtime/unicode/umachine.h +++ b/flamingo/runtime/unicode/umachine.h @@ -43,7 +43,7 @@ /* which are contained in the platform-specific file platform.h */ /*==========================================================================*/ -#include "ptypes.h" /* platform.h is included in ptypes.h */ +#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ /* * ANSI C headers: @@ -443,6 +443,6 @@ typedef int32_t UChar32; */ #define U_SENTINEL (-1) -#include "urename.h" +#include "unicode/urename.h" #endif diff --git a/flamingo/runtime/unicode/utf16.h b/flamingo/runtime/unicode/utf16.h index 9c31330..9fd7d5c 100644 --- a/flamingo/runtime/unicode/utf16.h +++ b/flamingo/runtime/unicode/utf16.h @@ -34,9 +34,9 @@ #ifndef __UTF16_H__ #define __UTF16_H__ -#include "umachine.h" +#include "unicode/umachine.h" #ifndef __UTF_H__ -# include "utf.h" +# include "unicode/utf.h" #endif /* single-code point definitions -------------------------------------------- */ diff --git a/flamingo/runtime/unicode/utf8.h b/flamingo/runtime/unicode/utf8.h index 6cf067e..bb00130 100644 --- a/flamingo/runtime/unicode/utf8.h +++ b/flamingo/runtime/unicode/utf8.h @@ -34,9 +34,9 @@ #ifndef __UTF8_H__ #define __UTF8_H__ -#include "umachine.h" +#include "unicode/umachine.h" #ifndef __UTF_H__ -# include "utf.h" +# include "unicode/utf.h" #endif /* internal definitions ----------------------------------------------------- */ diff --git a/hello_world.fl b/hello_world.fl index c02301d..64fc35e 100644 --- a/hello_world.fl +++ b/hello_world.fl @@ -1,23 +1,3 @@ -import .std +# import .std -fn fun3() { - print "Inside function 3" -} - -fn fun() { - fn fun2() { - print "Inside nested function" - fun3() - } - - print "Inside function" - fun2() - print "more stuff" -} - -fun() -fun() -print "test" -fun() -fun() -fun2() +print "Hello world" diff --git a/update.sh b/update.sh index 9756943..7d00b79 100644 --- a/update.sh +++ b/update.sh @@ -14,9 +14,6 @@ mv tree-sitter/lib/src/* flamingo/runtime rm -rf tree-sitter -find flamingo/runtime/unicode -name "*.h" -exec sed -i '' 's/"unicode\/\(.*\)\.h"/"\1.h"/g' {} ';' -sed -i '' 's/cstdint/stdint.h/g' flamingo/runtime/unicode/umachine.h - # Update tree-sitter-flamingo (i.e. src/parser.c and src/tree_sitter/parser.h). rm -rf tree-sitter-flamingo 2>/dev/null || true