From 07d2a25712a6ad04455fa3bda233fc9db7bcc3a5 Mon Sep 17 00:00:00 2001 From: John Stefanelli Date: Thu, 29 May 2025 17:13:23 +0200 Subject: [PATCH] [libjlx] Implement basic expression parsing --- jlx/src/main.cpp | 41 +++++++++- libjlx/modules/ast.cppm | 140 ++++++++++++++++++++++++++-------- libjlx/modules/tokenizer.cppm | 25 ++++-- 3 files changed, 164 insertions(+), 42 deletions(-) diff --git a/jlx/src/main.cpp b/jlx/src/main.cpp index ade47a2..1a744a3 100644 --- a/jlx/src/main.cpp +++ b/jlx/src/main.cpp @@ -1,19 +1,43 @@ #include #include #include +#include +#include import jlx; -int main(int, char**) { - std::istreambuf_iterator start(std::cin), end; +std::ifstream file; + +int main(int argc, char** argv) { + std::istreambuf_iterator start, end; + if (argc > 1) { + if (std::filesystem::is_regular_file(argv[1])) { + std::cout << "Opening: " << argv[1] << std::endl; + file = std::ifstream(argv[1]); + start = std::istreambuf_iterator(file); + } else { + std::cerr << "File not found: " << argv[1] << std::endl; + return 1; + } + } else { + start = std::istreambuf_iterator(std::cin); + } std::string data{start, end}; auto tokenizer = jlx::tokenizer(data); - std::optional res = tokenizer.read_token(); + std::optional res; std::vector tokens; + do { + res = tokenizer.read_token(); + if (res.has_value()) { + tokens.emplace_back(res.value()); + } + } while (res.has_value()); + + std::cout << "Read " << tokens.size() << " tokens\n"; auto last = std::string(); while(res.has_value()) { @@ -28,5 +52,16 @@ int main(int, char**) { auto root = parser.parse(); + if (root == nullptr) { + return 1; + } + + auto* rt = dynamic_cast(root.get()); + if (rt == nullptr) { + return 1; + } + + std::cout << "Parsed " << rt->statements.size() << " statements" << std::endl; + return 0; } diff --git a/libjlx/modules/ast.cppm b/libjlx/modules/ast.cppm index 178bcba..dbdae23 100644 --- a/libjlx/modules/ast.cppm +++ b/libjlx/modules/ast.cppm @@ -36,7 +36,7 @@ namespace jlx { std::bidirectional_iterator; }; - struct statement { + export struct statement { ast_type type; token t; @@ -47,7 +47,7 @@ namespace jlx { virtual ~statement() = default; }; - struct root_statement : public statement { + export struct root_statement : public statement { explicit root_statement(const token& t) : statement(Root, t) { } @@ -61,7 +61,7 @@ namespace jlx { ~root_statement() override = default; }; - struct expression : public statement { + export struct expression : public statement { explicit expression(const token& t) : statement(Expression, t) { } @@ -82,7 +82,7 @@ namespace jlx { ~expression() override = default; }; - struct block : public statement { + export struct block : public statement { explicit block(const token& t) : statement(Block, t) { } @@ -96,12 +96,12 @@ namespace jlx { ~block() override = default; }; - struct function_parameter { + export struct function_parameter { std::string name; std::string type; }; - struct function_declaration : public statement { + export struct function_declaration : public statement { explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) { } @@ -120,7 +120,7 @@ namespace jlx { ~function_declaration() override = default; }; - struct variable_declaration : public statement { + export struct variable_declaration : public statement { variable_declaration(const token& t) : statement(VariableDeclaration, t) { } @@ -133,7 +133,7 @@ namespace jlx { ~variable_declaration() override = default; }; - struct if_statement : public statement { + export struct if_statement : public statement { if_statement(const token& t) : statement(IfStatement, t) { } @@ -142,7 +142,7 @@ namespace jlx { std::unique_ptr block; }; - struct literal_value : public expression { + export struct literal_value : public expression { explicit literal_value(const token& t): expression(t) { et = EtLiteralValue; } @@ -155,7 +155,7 @@ namespace jlx { } }; - struct single_operation : public expression { + export struct single_operation : public expression { single_operation(const token& t, std::unique_ptr operand, const token& operator_token) : expression(t), operand(std::move(operand)), operator_token(operator_token) { et = EtSingleValueOperation; @@ -181,7 +181,7 @@ namespace jlx { token operator_token; }; - struct dual_operation : public expression { + export struct dual_operation : public expression { dual_operation(const token& t, std::unique_ptr first_operand, std::unique_ptr second_operand, const token& operator_token) : expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) { et = EtDualValueOperation; @@ -209,7 +209,7 @@ namespace jlx { token operator_token; }; - struct function_call : public expression { + export struct function_call : public expression { function_call(const token& t, std::string function_name, std::vector> arguments) : expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) { et = EtFunctionCall; @@ -242,7 +242,7 @@ namespace jlx { std::vector> arguments; }; - struct identifier_expression : public expression { + export struct identifier_expression : public expression { identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) { et = EtIdentifier; } @@ -262,33 +262,36 @@ namespace jlx { T current; E last; - inline void fail_invalid_token(const token& t) { - throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str()); + [[noreturn]] static inline void fail_invalid_token(const token& t) { + throw std::runtime_error(std::format("Invalid token {} at {}:{}:{}", t.content, t.source_file, t.line, t.col).c_str()); } - inline void fail_invalid_eof() { + [[noreturn]] static inline void fail_invalid_eof() { throw std::runtime_error("Unexpected end-of-file"); } - void next() { - current++; + void next(bool mandatory = true) { + ++current; - if (current == last) { + if (current == last && mandatory) { fail_invalid_eof(); } } std::unique_ptr parse_block() { + auto& start = *current; if (current->type != Punctuation || current->content != "{") { fail_invalid_token(*current); } next(); + std::vector> statements; while(current->type != Punctuation && current->content != "}") { - + statements.emplace_back(parse_statement()); } + next(false); - return nullptr; + return std::make_unique(start, std::move(statements)); } std::unique_ptr parse_variable_declaration() { @@ -320,9 +323,9 @@ namespace jlx { name = current->content; - next(); + next(false); - if (current->type == Punctuation && current->content == ":") { + if (current != last && current->type == Punctuation && current->content == ":") { next(); if (current->type != Identifier) { @@ -331,10 +334,10 @@ namespace jlx { type = current->content; - next(); + next(false); } - if (current->type == Operator && current->content == "=") { + if (current != last && current->type == Operator && current->content == "=") { next(); starting_value = parse_expression(); @@ -381,8 +384,80 @@ namespace jlx { return statement; } - std::unique_ptr parse_expression() { - return nullptr; + std::unique_ptr parse_expression(std::unique_ptr previous = nullptr) { + auto start = *current; + + if (current == last) { + if (previous != nullptr) { + return previous; + } + + fail_invalid_eof(); + } + + switch(current->type) { + case Identifier: + if (previous != nullptr) { + return previous; + } + next(false); + return parse_expression(std::make_unique(start, start.content)); + case Operator: { + if (previous == nullptr) { + auto& op = current->content; + if (op == "+" || op == "-" || op == "!") { + next(); + + auto expr = parse_expression(); + return std::make_unique(start, std::move(expr), start); + } + fail_invalid_token(*current); + } + + auto& op_token = *current; + next(); + + return std::make_unique(start, std::move(previous), parse_expression(), op_token); + } + case Number: + case String: + case Boolean: + if (previous != nullptr) { + return previous; + } + next(false); + + return parse_expression(std::make_unique(start)); + case Punctuation: + { + if (current->content == "(") { + if (previous != nullptr) { + return previous; + } + + auto ex = parse_expression(); + + if (current->type != Punctuation || current->content != ")") { + fail_invalid_token(*current); + } + + next(false); + + return ex; + } else if (current->content == ";") { + if (previous == nullptr) { + fail_invalid_token(*current); + } + + next(false); + + return previous; + } + } + fail_invalid_token(*current); + default: + fail_invalid_token(*current); + } } std::unique_ptr parse_function() { @@ -416,9 +491,8 @@ namespace jlx { if (!first) { if(current->type != Punctuation || current->content != ",") { fail_invalid_token(*current); - } else { - next(); } + next(); } std::string name; @@ -465,7 +539,7 @@ namespace jlx { return current->content; } - std::unique_ptr parse_top_level_statement() { + std::unique_ptr parse_statement(bool top_level = false) { if (current == last) { return nullptr; } @@ -475,12 +549,12 @@ namespace jlx { return parse_variable_declaration(); } else if (current->content == "if") { return parse_if_statement(); - } else if (current->content == "fun") { + } else if (current->content == "fun" && top_level) { return parse_function(); } } - return nullptr; + return parse_expression(); } public: @@ -496,7 +570,7 @@ namespace jlx { std::vector> top_level_statements; while(current != last) { - auto s = parse_top_level_statement(); + auto s = parse_statement(true); if (s == nullptr) { throw std::runtime_error("No statement parsed..."); } diff --git a/libjlx/modules/tokenizer.cppm b/libjlx/modules/tokenizer.cppm index 141b9ee..117c226 100644 --- a/libjlx/modules/tokenizer.cppm +++ b/libjlx/modules/tokenizer.cppm @@ -17,6 +17,7 @@ namespace jlx { Punctuation, Number, String, + Boolean, Keyword, Identifier, Operator @@ -30,6 +31,8 @@ namespace jlx { return "Number"; case String: return "String"; + case Boolean: + return "Boolean"; case Keyword: return "Keyword"; case Identifier: @@ -86,7 +89,7 @@ namespace jlx { "var" }}; - static constexpr std::array punctiations = {{ + static constexpr std::array punctuations = {{ '.', '(', '(', @@ -236,6 +239,16 @@ namespace jlx { auto word = buffer.str(); + if (word == "true" || word == "false") { + return token { + token_type::Boolean, + "mono_src", + word, + start_line, + start_col + }; + } + if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) { return token { token_type::Keyword, @@ -255,7 +268,7 @@ namespace jlx { } } - std::optional read_punctiation_token() { + std::optional read_punctuation_token() { auto res = source.peek(); if (!res.has_value()) { @@ -264,7 +277,7 @@ namespace jlx { auto val = res.value(); - if (std::find(punctiations.begin(), punctiations.end(), val) != punctiations.end()) { + if (std::find(punctuations.begin(), punctuations.end(), val) != punctuations.end()) { auto line = source.current_line(); auto col = source.current_col(); source.next(); @@ -348,9 +361,9 @@ namespace jlx { return read_identifier(); } - auto punctiation_res = read_punctiation_token(); - if (punctiation_res.has_value()) { - return punctiation_res; + auto punctuation_res = read_punctuation_token(); + if (punctuation_res.has_value()) { + return punctuation_res; } auto op_res = read_operator_token();