509 lines
12 KiB
C++
509 lines
12 KiB
C++
module;
|
|
|
|
#include <stdexcept>
|
|
#include <format>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <optional>
|
|
|
|
export module jlx:ast;
|
|
|
|
import :tokenizer;
|
|
|
|
namespace jlx {
|
|
export enum ast_type {
|
|
Root,
|
|
Expression,
|
|
Block,
|
|
FunctionDeclaration,
|
|
VariableDeclaration,
|
|
LiteralValue,
|
|
IfStatement,
|
|
};
|
|
|
|
export enum expression_type {
|
|
EtInvalid = 0,
|
|
EtLiteralValue,
|
|
EtSingleValueOperation,
|
|
EtDualValueOperation,
|
|
EtFunctionCall,
|
|
EtIdentifier
|
|
};
|
|
|
|
export template<class T>
|
|
concept token_iterator = requires() {
|
|
requires std::same_as<typename T::value_type, jlx::token>;
|
|
std::bidirectional_iterator<T>;
|
|
};
|
|
|
|
struct statement {
|
|
ast_type type;
|
|
token t;
|
|
|
|
statement(ast_type type, const token& t) : type(type), t(t) {
|
|
|
|
}
|
|
|
|
virtual ~statement() = default;
|
|
};
|
|
|
|
struct root_statement : public statement {
|
|
explicit root_statement(const token& t) : statement(Root, t) {
|
|
|
|
}
|
|
|
|
explicit root_statement(std::vector<std::unique_ptr<statement>> statements, const token& t) : statement(Root, t), statements(std::move(statements)) {
|
|
|
|
}
|
|
|
|
std::vector<std::unique_ptr<statement>> statements;
|
|
|
|
~root_statement() override = default;
|
|
};
|
|
|
|
struct expression : public statement {
|
|
explicit expression(const token& t) : statement(Expression, t) {
|
|
|
|
}
|
|
|
|
expression(const expression&) = default;
|
|
expression& operator=(const expression& other) {
|
|
et = EtInvalid;
|
|
t = other.t;
|
|
evaluated_type = other.evaluated_type;
|
|
return *this;
|
|
}
|
|
|
|
expression_type et = EtInvalid;
|
|
std::optional<std::string> evaluated_type;
|
|
|
|
virtual std::unique_ptr<expression> clone() const = 0;
|
|
|
|
~expression() override = default;
|
|
};
|
|
|
|
struct block : public statement {
|
|
explicit block(const token& t) : statement(Block, t) {
|
|
|
|
}
|
|
|
|
explicit block(const token& t, std::vector<std::unique_ptr<statement>> statements) : statement(Block, t), statements(std::move(statements)) {
|
|
|
|
}
|
|
|
|
std::vector<std::unique_ptr<statement>> statements;
|
|
|
|
~block() override = default;
|
|
};
|
|
|
|
struct function_parameter {
|
|
std::string name;
|
|
std::string type;
|
|
};
|
|
|
|
struct function_declaration : public statement {
|
|
explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) {
|
|
|
|
}
|
|
|
|
function_declaration(const token& t, std::string name, std::vector<function_parameter> parameters, std::optional<std::string> return_type, std::unique_ptr<block> body) :
|
|
statement(FunctionDeclaration, t), name(std::move(name)), parameters(std::move(parameters)), return_type(std::move(return_type)), body(std::move(body)) {
|
|
|
|
}
|
|
|
|
std::string name;
|
|
std::vector<function_parameter> parameters;
|
|
std::optional<std::string> return_type;
|
|
std::unique_ptr<block> body;
|
|
|
|
|
|
~function_declaration() override = default;
|
|
};
|
|
|
|
struct variable_declaration : public statement {
|
|
variable_declaration(const token& t) : statement(VariableDeclaration, t) {
|
|
|
|
}
|
|
|
|
bool constant = true;
|
|
std::string name;
|
|
std::optional<std::string> type;
|
|
std::unique_ptr<expression> initial_expression;
|
|
|
|
~variable_declaration() override = default;
|
|
};
|
|
|
|
struct if_statement : public statement {
|
|
if_statement(const token& t) : statement(IfStatement, t) {
|
|
|
|
}
|
|
|
|
std::unique_ptr<expression> condition;
|
|
std::unique_ptr<block> block;
|
|
};
|
|
|
|
struct literal_value : public expression {
|
|
explicit literal_value(const token& t): expression(t) {
|
|
et = EtLiteralValue;
|
|
}
|
|
|
|
literal_value(const literal_value&) = default;
|
|
literal_value& operator=(const literal_value&) = default;
|
|
|
|
[[nodiscard]] std::unique_ptr<expression> clone() const override {
|
|
return std::make_unique<literal_value>(*this);
|
|
}
|
|
};
|
|
|
|
struct single_operation : public expression {
|
|
single_operation(const token& t, std::unique_ptr<expression> operand, const token& operator_token) : expression(t),
|
|
operand(std::move(operand)), operator_token(operator_token) {
|
|
et = EtSingleValueOperation;
|
|
}
|
|
|
|
single_operation(const single_operation& other) : expression(other.t), operand(other.operand->clone()), operator_token(other.operator_token) {
|
|
et = EtSingleValueOperation;
|
|
operand = other.operand->clone();
|
|
}
|
|
|
|
[[nodiscard]] std::unique_ptr<expression> clone() const override {
|
|
return std::make_unique<single_operation>(*this);
|
|
}
|
|
|
|
single_operation& operator=(const single_operation& other){
|
|
et = EtSingleValueOperation;
|
|
operand = other.operand->clone();
|
|
operator_token = other.operator_token;
|
|
return *this;
|
|
}
|
|
|
|
std::unique_ptr<expression> operand;
|
|
token operator_token;
|
|
};
|
|
|
|
struct dual_operation : public expression {
|
|
dual_operation(const token& t, std::unique_ptr<expression> first_operand, std::unique_ptr<expression> second_operand, const token& operator_token) :
|
|
expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) {
|
|
et = EtDualValueOperation;
|
|
}
|
|
|
|
dual_operation(const dual_operation& other) :
|
|
expression(other.t), first_operand(other.first_operand->clone()), second_operand(other.second_operand->clone()), operator_token(other.operator_token) {
|
|
et = EtDualValueOperation;
|
|
}
|
|
|
|
dual_operation& operator=(const dual_operation* other) {
|
|
et = EtDualValueOperation;
|
|
first_operand = other->first_operand->clone();
|
|
second_operand = other->second_operand->clone();
|
|
operator_token = other->operator_token;\
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] std::unique_ptr<expression> clone() const override {
|
|
return std::make_unique<dual_operation>(*this);
|
|
}
|
|
|
|
std::unique_ptr<expression> first_operand;
|
|
std::unique_ptr<expression> second_operand;
|
|
token operator_token;
|
|
};
|
|
|
|
struct function_call : public expression {
|
|
function_call(const token& t, std::string function_name, std::vector<std::unique_ptr<expression>> arguments) :
|
|
expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) {
|
|
et = EtFunctionCall;
|
|
}
|
|
|
|
function_call(const function_call& other) : expression(other.t) {
|
|
et = EtFunctionCall;
|
|
function_name = other.function_name;
|
|
arguments.reserve(other.arguments.size());
|
|
for(auto& arg : other.arguments) {
|
|
arguments.emplace_back(arg->clone());
|
|
}
|
|
}
|
|
|
|
function_call& operator=(const function_call& other) {
|
|
et = EtFunctionCall;
|
|
function_name = other.function_name;
|
|
arguments.reserve(other.arguments.size());
|
|
for(auto& arg : other.arguments) {
|
|
arguments.emplace_back(arg->clone());
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
[[nodiscard]] std::unique_ptr<expression> clone() const override {
|
|
return std::make_unique<function_call>(*this);
|
|
}
|
|
|
|
std::string function_name;
|
|
std::vector<std::unique_ptr<expression>> arguments;
|
|
};
|
|
|
|
struct identifier_expression : public expression {
|
|
identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) {
|
|
et = EtIdentifier;
|
|
}
|
|
|
|
identifier_expression(const identifier_expression&) = default;
|
|
identifier_expression& operator=(const identifier_expression&) = default;
|
|
|
|
[[nodiscard]] std::unique_ptr<expression> clone() const override {
|
|
return std::make_unique<identifier_expression>(*this);
|
|
}
|
|
|
|
std::string name;
|
|
};
|
|
|
|
export template<token_iterator T, std::sentinel_for<T> E>
|
|
class parser {
|
|
T current;
|
|
E last;
|
|
|
|
inline void fail_invalid_token(const token& t) {
|
|
throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str());
|
|
}
|
|
|
|
inline void fail_invalid_eof() {
|
|
throw std::runtime_error("Unexpected end-of-file");
|
|
}
|
|
|
|
void next() {
|
|
current++;
|
|
|
|
if (current == last) {
|
|
fail_invalid_eof();
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<block> parse_block() {
|
|
if (current->type != Punctuation || current->content != "{") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
next();
|
|
|
|
while(current->type != Punctuation && current->content != "}") {
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
std::unique_ptr<variable_declaration> parse_variable_declaration() {
|
|
if (current->type != Keyword) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
auto start = *current;
|
|
|
|
std::string name;
|
|
std::optional<std::string> type = std::nullopt;
|
|
std::unique_ptr<expression> starting_value = nullptr;
|
|
bool constant;
|
|
|
|
if (current->content == "let") {
|
|
constant = true;
|
|
} else if (current->content == "var") {
|
|
constant = false;
|
|
} else {
|
|
fail_invalid_token(*current);
|
|
return nullptr;
|
|
}
|
|
|
|
next();
|
|
|
|
if (current->type != Identifier) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
name = current->content;
|
|
|
|
next();
|
|
|
|
if (current->type == Punctuation && current->content == ":") {
|
|
next();
|
|
|
|
if (current->type != Identifier) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
type = current->content;
|
|
|
|
next();
|
|
}
|
|
|
|
if (current->type == Operator && current->content == "=") {
|
|
next();
|
|
|
|
starting_value = parse_expression();
|
|
}
|
|
|
|
auto var = std::make_unique<variable_declaration>(start);
|
|
var->constant = constant;
|
|
var->name = std::move(name);
|
|
var->type = std::move(type);
|
|
var->initial_expression = std::move(starting_value);
|
|
|
|
return var;
|
|
}
|
|
|
|
std::unique_ptr<if_statement> parse_if_statement() {
|
|
if (current->type != Keyword || current->content != "if") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
auto start = *current;
|
|
|
|
next();
|
|
|
|
if (current->type != Punctuation || current->content != "(") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
next();
|
|
|
|
auto expr = parse_expression();
|
|
|
|
if (current->type != Punctuation || current->content != ")") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
next();
|
|
|
|
auto block = parse_block();
|
|
|
|
auto statement = std::make_unique<if_statement>(start);
|
|
statement->block = std::move(block);
|
|
statement->condition = std::move(expr);
|
|
|
|
return statement;
|
|
}
|
|
|
|
std::unique_ptr<expression> parse_expression() {
|
|
return nullptr;
|
|
}
|
|
|
|
std::unique_ptr<function_declaration> parse_function() {
|
|
auto start = *current;
|
|
|
|
if (current->type != Keyword || current->content != "fun") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
next();
|
|
|
|
if (current->type != Identifier) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
std::string function_name = current->content;
|
|
|
|
next();
|
|
|
|
if (current->type != Punctuation || current->content != "(") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
next();
|
|
|
|
std::vector<function_parameter> params;
|
|
std::optional<std::string> return_type;
|
|
|
|
bool first = true;
|
|
while (current->type != Punctuation && current->content != ")") {
|
|
if (!first) {
|
|
if(current->type != Punctuation || current->content != ",") {
|
|
fail_invalid_token(*current);
|
|
} else {
|
|
next();
|
|
}
|
|
}
|
|
|
|
std::string name;
|
|
|
|
if (current->type != Identifier) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
name = current->content;
|
|
|
|
next();
|
|
|
|
if (current->type != Punctuation || current->content != ":") {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
next();
|
|
|
|
auto param_type = parse_type();
|
|
params.emplace_back(std::move(name), std::move(param_type));
|
|
|
|
next();
|
|
first = false;
|
|
}
|
|
next();
|
|
|
|
if (current->type == Punctuation && current->content == ":") {
|
|
next();
|
|
|
|
return_type = parse_type();
|
|
next();
|
|
}
|
|
|
|
auto block = parse_block();
|
|
|
|
return std::make_unique<function_declaration>(start, std::move(function_name), std::move(params), std::move(return_type), std::move(block));
|
|
}
|
|
|
|
std::string parse_type(){
|
|
if (current->type != Identifier) {
|
|
fail_invalid_token(*current);
|
|
}
|
|
|
|
return current->content;
|
|
}
|
|
|
|
std::unique_ptr<statement> parse_top_level_statement() {
|
|
if (current == last) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (current->type == token_type::Keyword) {
|
|
if (current->content == "let" || current->content == "var") {
|
|
return parse_variable_declaration();
|
|
} else if (current->content == "if") {
|
|
return parse_if_statement();
|
|
} else if (current->content == "fun") {
|
|
return parse_function();
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
public:
|
|
parser(T current, E last) : current(current), last(last) {
|
|
|
|
}
|
|
|
|
std::unique_ptr<statement> parse() {
|
|
if (current == last) {
|
|
return nullptr;
|
|
}
|
|
auto start = *current;
|
|
|
|
std::vector<std::unique_ptr<statement>> top_level_statements;
|
|
while(current != last) {
|
|
auto s = parse_top_level_statement();
|
|
if (s == nullptr) {
|
|
throw std::runtime_error("No statement parsed...");
|
|
}
|
|
top_level_statements.push_back(std::move(s));
|
|
}
|
|
|
|
return std::make_unique<root_statement>(std::move(top_level_statements), start);
|
|
}
|
|
};
|
|
}
|