[libjlx] Implement basic expression parsing

This commit is contained in:
John Stefanelli 2025-05-29 17:13:23 +02:00
parent 50205e8185
commit 07d2a25712
Signed by: jstefanelli
GPG key ID: 60EDE2437640D2AA
3 changed files with 164 additions and 42 deletions

View file

@ -1,19 +1,43 @@
#include <iostream> #include <iostream>
#include <optional> #include <optional>
#include <vector> #include <vector>
#include <filesystem>
#include <fstream>
import jlx; import jlx;
int main(int, char**) { std::ifstream file;
std::istreambuf_iterator<char> start(std::cin), end;
int main(int argc, char** argv) {
std::istreambuf_iterator<char> start, end;
if (argc > 1) {
if (std::filesystem::is_regular_file(argv[1])) {
std::cout << "Opening: " << argv[1] << std::endl;
file = std::ifstream(argv[1]);
start = std::istreambuf_iterator<char>(file);
} else {
std::cerr << "File not found: " << argv[1] << std::endl;
return 1;
}
} else {
start = std::istreambuf_iterator<char>(std::cin);
}
std::string data{start, end}; std::string data{start, end};
auto tokenizer = jlx::tokenizer(data); auto tokenizer = jlx::tokenizer(data);
std::optional<jlx::token> res = tokenizer.read_token(); std::optional<jlx::token> res;
std::vector<jlx::token> tokens; std::vector<jlx::token> tokens;
do {
res = tokenizer.read_token();
if (res.has_value()) {
tokens.emplace_back(res.value());
}
} while (res.has_value());
std::cout << "Read " << tokens.size() << " tokens\n";
auto last = std::string(); auto last = std::string();
while(res.has_value()) { while(res.has_value()) {
@ -28,5 +52,16 @@ int main(int, char**) {
auto root = parser.parse(); auto root = parser.parse();
if (root == nullptr) {
return 1;
}
auto* rt = dynamic_cast<jlx::root_statement*>(root.get());
if (rt == nullptr) {
return 1;
}
std::cout << "Parsed " << rt->statements.size() << " statements" << std::endl;
return 0; return 0;
} }

View file

@ -36,7 +36,7 @@ namespace jlx {
std::bidirectional_iterator<T>; std::bidirectional_iterator<T>;
}; };
struct statement { export struct statement {
ast_type type; ast_type type;
token t; token t;
@ -47,7 +47,7 @@ namespace jlx {
virtual ~statement() = default; virtual ~statement() = default;
}; };
struct root_statement : public statement { export struct root_statement : public statement {
explicit root_statement(const token& t) : statement(Root, t) { explicit root_statement(const token& t) : statement(Root, t) {
} }
@ -61,7 +61,7 @@ namespace jlx {
~root_statement() override = default; ~root_statement() override = default;
}; };
struct expression : public statement { export struct expression : public statement {
explicit expression(const token& t) : statement(Expression, t) { explicit expression(const token& t) : statement(Expression, t) {
} }
@ -82,7 +82,7 @@ namespace jlx {
~expression() override = default; ~expression() override = default;
}; };
struct block : public statement { export struct block : public statement {
explicit block(const token& t) : statement(Block, t) { explicit block(const token& t) : statement(Block, t) {
} }
@ -96,12 +96,12 @@ namespace jlx {
~block() override = default; ~block() override = default;
}; };
struct function_parameter { export struct function_parameter {
std::string name; std::string name;
std::string type; std::string type;
}; };
struct function_declaration : public statement { export struct function_declaration : public statement {
explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) { explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) {
} }
@ -120,7 +120,7 @@ namespace jlx {
~function_declaration() override = default; ~function_declaration() override = default;
}; };
struct variable_declaration : public statement { export struct variable_declaration : public statement {
variable_declaration(const token& t) : statement(VariableDeclaration, t) { variable_declaration(const token& t) : statement(VariableDeclaration, t) {
} }
@ -133,7 +133,7 @@ namespace jlx {
~variable_declaration() override = default; ~variable_declaration() override = default;
}; };
struct if_statement : public statement { export struct if_statement : public statement {
if_statement(const token& t) : statement(IfStatement, t) { if_statement(const token& t) : statement(IfStatement, t) {
} }
@ -142,7 +142,7 @@ namespace jlx {
std::unique_ptr<block> block; std::unique_ptr<block> block;
}; };
struct literal_value : public expression { export struct literal_value : public expression {
explicit literal_value(const token& t): expression(t) { explicit literal_value(const token& t): expression(t) {
et = EtLiteralValue; et = EtLiteralValue;
} }
@ -155,7 +155,7 @@ namespace jlx {
} }
}; };
struct single_operation : public expression { export struct single_operation : public expression {
single_operation(const token& t, std::unique_ptr<expression> operand, const token& operator_token) : expression(t), single_operation(const token& t, std::unique_ptr<expression> operand, const token& operator_token) : expression(t),
operand(std::move(operand)), operator_token(operator_token) { operand(std::move(operand)), operator_token(operator_token) {
et = EtSingleValueOperation; et = EtSingleValueOperation;
@ -181,7 +181,7 @@ namespace jlx {
token operator_token; token operator_token;
}; };
struct dual_operation : public expression { export struct dual_operation : public expression {
dual_operation(const token& t, std::unique_ptr<expression> first_operand, std::unique_ptr<expression> second_operand, const token& operator_token) : dual_operation(const token& t, std::unique_ptr<expression> first_operand, std::unique_ptr<expression> second_operand, const token& operator_token) :
expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) { expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) {
et = EtDualValueOperation; et = EtDualValueOperation;
@ -209,7 +209,7 @@ namespace jlx {
token operator_token; token operator_token;
}; };
struct function_call : public expression { export struct function_call : public expression {
function_call(const token& t, std::string function_name, std::vector<std::unique_ptr<expression>> arguments) : function_call(const token& t, std::string function_name, std::vector<std::unique_ptr<expression>> arguments) :
expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) { expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) {
et = EtFunctionCall; et = EtFunctionCall;
@ -242,7 +242,7 @@ namespace jlx {
std::vector<std::unique_ptr<expression>> arguments; std::vector<std::unique_ptr<expression>> arguments;
}; };
struct identifier_expression : public expression { export struct identifier_expression : public expression {
identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) { identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) {
et = EtIdentifier; et = EtIdentifier;
} }
@ -262,33 +262,36 @@ namespace jlx {
T current; T current;
E last; E last;
inline void fail_invalid_token(const token& t) { [[noreturn]] static inline void fail_invalid_token(const token& t) {
throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str()); throw std::runtime_error(std::format("Invalid token {} at {}:{}:{}", t.content, t.source_file, t.line, t.col).c_str());
} }
inline void fail_invalid_eof() { [[noreturn]] static inline void fail_invalid_eof() {
throw std::runtime_error("Unexpected end-of-file"); throw std::runtime_error("Unexpected end-of-file");
} }
void next() { void next(bool mandatory = true) {
current++; ++current;
if (current == last) { if (current == last && mandatory) {
fail_invalid_eof(); fail_invalid_eof();
} }
} }
std::unique_ptr<block> parse_block() { std::unique_ptr<block> parse_block() {
auto& start = *current;
if (current->type != Punctuation || current->content != "{") { if (current->type != Punctuation || current->content != "{") {
fail_invalid_token(*current); fail_invalid_token(*current);
} }
next(); next();
std::vector<std::unique_ptr<statement>> statements;
while(current->type != Punctuation && current->content != "}") { while(current->type != Punctuation && current->content != "}") {
statements.emplace_back(parse_statement());
} }
next(false);
return nullptr; return std::make_unique<block>(start, std::move(statements));
} }
std::unique_ptr<variable_declaration> parse_variable_declaration() { std::unique_ptr<variable_declaration> parse_variable_declaration() {
@ -320,9 +323,9 @@ namespace jlx {
name = current->content; name = current->content;
next(); next(false);
if (current->type == Punctuation && current->content == ":") { if (current != last && current->type == Punctuation && current->content == ":") {
next(); next();
if (current->type != Identifier) { if (current->type != Identifier) {
@ -331,10 +334,10 @@ namespace jlx {
type = current->content; type = current->content;
next(); next(false);
} }
if (current->type == Operator && current->content == "=") { if (current != last && current->type == Operator && current->content == "=") {
next(); next();
starting_value = parse_expression(); starting_value = parse_expression();
@ -381,8 +384,80 @@ namespace jlx {
return statement; return statement;
} }
std::unique_ptr<expression> parse_expression() { std::unique_ptr<expression> parse_expression(std::unique_ptr<expression> previous = nullptr) {
return nullptr; auto start = *current;
if (current == last) {
if (previous != nullptr) {
return previous;
}
fail_invalid_eof();
}
switch(current->type) {
case Identifier:
if (previous != nullptr) {
return previous;
}
next(false);
return parse_expression(std::make_unique<identifier_expression>(start, start.content));
case Operator: {
if (previous == nullptr) {
auto& op = current->content;
if (op == "+" || op == "-" || op == "!") {
next();
auto expr = parse_expression();
return std::make_unique<single_operation>(start, std::move(expr), start);
}
fail_invalid_token(*current);
}
auto& op_token = *current;
next();
return std::make_unique<dual_operation>(start, std::move(previous), parse_expression(), op_token);
}
case Number:
case String:
case Boolean:
if (previous != nullptr) {
return previous;
}
next(false);
return parse_expression(std::make_unique<literal_value>(start));
case Punctuation:
{
if (current->content == "(") {
if (previous != nullptr) {
return previous;
}
auto ex = parse_expression();
if (current->type != Punctuation || current->content != ")") {
fail_invalid_token(*current);
}
next(false);
return ex;
} else if (current->content == ";") {
if (previous == nullptr) {
fail_invalid_token(*current);
}
next(false);
return previous;
}
}
fail_invalid_token(*current);
default:
fail_invalid_token(*current);
}
} }
std::unique_ptr<function_declaration> parse_function() { std::unique_ptr<function_declaration> parse_function() {
@ -416,9 +491,8 @@ namespace jlx {
if (!first) { if (!first) {
if(current->type != Punctuation || current->content != ",") { if(current->type != Punctuation || current->content != ",") {
fail_invalid_token(*current); fail_invalid_token(*current);
} else {
next();
} }
next();
} }
std::string name; std::string name;
@ -465,7 +539,7 @@ namespace jlx {
return current->content; return current->content;
} }
std::unique_ptr<statement> parse_top_level_statement() { std::unique_ptr<statement> parse_statement(bool top_level = false) {
if (current == last) { if (current == last) {
return nullptr; return nullptr;
} }
@ -475,12 +549,12 @@ namespace jlx {
return parse_variable_declaration(); return parse_variable_declaration();
} else if (current->content == "if") { } else if (current->content == "if") {
return parse_if_statement(); return parse_if_statement();
} else if (current->content == "fun") { } else if (current->content == "fun" && top_level) {
return parse_function(); return parse_function();
} }
} }
return nullptr; return parse_expression();
} }
public: public:
@ -496,7 +570,7 @@ namespace jlx {
std::vector<std::unique_ptr<statement>> top_level_statements; std::vector<std::unique_ptr<statement>> top_level_statements;
while(current != last) { while(current != last) {
auto s = parse_top_level_statement(); auto s = parse_statement(true);
if (s == nullptr) { if (s == nullptr) {
throw std::runtime_error("No statement parsed..."); throw std::runtime_error("No statement parsed...");
} }

View file

@ -17,6 +17,7 @@ namespace jlx {
Punctuation, Punctuation,
Number, Number,
String, String,
Boolean,
Keyword, Keyword,
Identifier, Identifier,
Operator Operator
@ -30,6 +31,8 @@ namespace jlx {
return "Number"; return "Number";
case String: case String:
return "String"; return "String";
case Boolean:
return "Boolean";
case Keyword: case Keyword:
return "Keyword"; return "Keyword";
case Identifier: case Identifier:
@ -86,7 +89,7 @@ namespace jlx {
"var" "var"
}}; }};
static constexpr std::array<char, 7> punctiations = {{ static constexpr std::array<char, 7> punctuations = {{
'.', '.',
'(', '(',
'(', '(',
@ -236,6 +239,16 @@ namespace jlx {
auto word = buffer.str(); auto word = buffer.str();
if (word == "true" || word == "false") {
return token {
token_type::Boolean,
"mono_src",
word,
start_line,
start_col
};
}
if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) { if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) {
return token { return token {
token_type::Keyword, token_type::Keyword,
@ -255,7 +268,7 @@ namespace jlx {
} }
} }
std::optional<token> read_punctiation_token() { std::optional<token> read_punctuation_token() {
auto res = source.peek(); auto res = source.peek();
if (!res.has_value()) { if (!res.has_value()) {
@ -264,7 +277,7 @@ namespace jlx {
auto val = res.value(); auto val = res.value();
if (std::find(punctiations.begin(), punctiations.end(), val) != punctiations.end()) { if (std::find(punctuations.begin(), punctuations.end(), val) != punctuations.end()) {
auto line = source.current_line(); auto line = source.current_line();
auto col = source.current_col(); auto col = source.current_col();
source.next(); source.next();
@ -348,9 +361,9 @@ namespace jlx {
return read_identifier(); return read_identifier();
} }
auto punctiation_res = read_punctiation_token(); auto punctuation_res = read_punctuation_token();
if (punctiation_res.has_value()) { if (punctuation_res.has_value()) {
return punctiation_res; return punctuation_res;
} }
auto op_res = read_operator_token(); auto op_res = read_operator_token();