[libjlx] Implement basic expression parsing
This commit is contained in:
parent
50205e8185
commit
07d2a25712
3 changed files with 164 additions and 42 deletions
|
|
@ -1,19 +1,43 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
import jlx;
|
import jlx;
|
||||||
|
|
||||||
int main(int, char**) {
|
std::ifstream file;
|
||||||
std::istreambuf_iterator<char> start(std::cin), end;
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
std::istreambuf_iterator<char> start, end;
|
||||||
|
if (argc > 1) {
|
||||||
|
if (std::filesystem::is_regular_file(argv[1])) {
|
||||||
|
std::cout << "Opening: " << argv[1] << std::endl;
|
||||||
|
file = std::ifstream(argv[1]);
|
||||||
|
start = std::istreambuf_iterator<char>(file);
|
||||||
|
} else {
|
||||||
|
std::cerr << "File not found: " << argv[1] << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
start = std::istreambuf_iterator<char>(std::cin);
|
||||||
|
}
|
||||||
|
|
||||||
std::string data{start, end};
|
std::string data{start, end};
|
||||||
|
|
||||||
auto tokenizer = jlx::tokenizer(data);
|
auto tokenizer = jlx::tokenizer(data);
|
||||||
|
|
||||||
std::optional<jlx::token> res = tokenizer.read_token();
|
std::optional<jlx::token> res;
|
||||||
|
|
||||||
std::vector<jlx::token> tokens;
|
std::vector<jlx::token> tokens;
|
||||||
|
do {
|
||||||
|
res = tokenizer.read_token();
|
||||||
|
if (res.has_value()) {
|
||||||
|
tokens.emplace_back(res.value());
|
||||||
|
}
|
||||||
|
} while (res.has_value());
|
||||||
|
|
||||||
|
std::cout << "Read " << tokens.size() << " tokens\n";
|
||||||
|
|
||||||
auto last = std::string();
|
auto last = std::string();
|
||||||
while(res.has_value()) {
|
while(res.has_value()) {
|
||||||
|
|
@ -28,5 +52,16 @@ int main(int, char**) {
|
||||||
|
|
||||||
auto root = parser.parse();
|
auto root = parser.parse();
|
||||||
|
|
||||||
|
if (root == nullptr) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto* rt = dynamic_cast<jlx::root_statement*>(root.get());
|
||||||
|
if (rt == nullptr) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Parsed " << rt->statements.size() << " statements" << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ namespace jlx {
|
||||||
std::bidirectional_iterator<T>;
|
std::bidirectional_iterator<T>;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct statement {
|
export struct statement {
|
||||||
ast_type type;
|
ast_type type;
|
||||||
token t;
|
token t;
|
||||||
|
|
||||||
|
|
@ -47,7 +47,7 @@ namespace jlx {
|
||||||
virtual ~statement() = default;
|
virtual ~statement() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct root_statement : public statement {
|
export struct root_statement : public statement {
|
||||||
explicit root_statement(const token& t) : statement(Root, t) {
|
explicit root_statement(const token& t) : statement(Root, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -61,7 +61,7 @@ namespace jlx {
|
||||||
~root_statement() override = default;
|
~root_statement() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct expression : public statement {
|
export struct expression : public statement {
|
||||||
explicit expression(const token& t) : statement(Expression, t) {
|
explicit expression(const token& t) : statement(Expression, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -82,7 +82,7 @@ namespace jlx {
|
||||||
~expression() override = default;
|
~expression() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block : public statement {
|
export struct block : public statement {
|
||||||
explicit block(const token& t) : statement(Block, t) {
|
explicit block(const token& t) : statement(Block, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -96,12 +96,12 @@ namespace jlx {
|
||||||
~block() override = default;
|
~block() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct function_parameter {
|
export struct function_parameter {
|
||||||
std::string name;
|
std::string name;
|
||||||
std::string type;
|
std::string type;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct function_declaration : public statement {
|
export struct function_declaration : public statement {
|
||||||
explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) {
|
explicit function_declaration(const token& t) : statement(FunctionDeclaration, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -120,7 +120,7 @@ namespace jlx {
|
||||||
~function_declaration() override = default;
|
~function_declaration() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct variable_declaration : public statement {
|
export struct variable_declaration : public statement {
|
||||||
variable_declaration(const token& t) : statement(VariableDeclaration, t) {
|
variable_declaration(const token& t) : statement(VariableDeclaration, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -133,7 +133,7 @@ namespace jlx {
|
||||||
~variable_declaration() override = default;
|
~variable_declaration() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct if_statement : public statement {
|
export struct if_statement : public statement {
|
||||||
if_statement(const token& t) : statement(IfStatement, t) {
|
if_statement(const token& t) : statement(IfStatement, t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -142,7 +142,7 @@ namespace jlx {
|
||||||
std::unique_ptr<block> block;
|
std::unique_ptr<block> block;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct literal_value : public expression {
|
export struct literal_value : public expression {
|
||||||
explicit literal_value(const token& t): expression(t) {
|
explicit literal_value(const token& t): expression(t) {
|
||||||
et = EtLiteralValue;
|
et = EtLiteralValue;
|
||||||
}
|
}
|
||||||
|
|
@ -155,7 +155,7 @@ namespace jlx {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct single_operation : public expression {
|
export struct single_operation : public expression {
|
||||||
single_operation(const token& t, std::unique_ptr<expression> operand, const token& operator_token) : expression(t),
|
single_operation(const token& t, std::unique_ptr<expression> operand, const token& operator_token) : expression(t),
|
||||||
operand(std::move(operand)), operator_token(operator_token) {
|
operand(std::move(operand)), operator_token(operator_token) {
|
||||||
et = EtSingleValueOperation;
|
et = EtSingleValueOperation;
|
||||||
|
|
@ -181,7 +181,7 @@ namespace jlx {
|
||||||
token operator_token;
|
token operator_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct dual_operation : public expression {
|
export struct dual_operation : public expression {
|
||||||
dual_operation(const token& t, std::unique_ptr<expression> first_operand, std::unique_ptr<expression> second_operand, const token& operator_token) :
|
dual_operation(const token& t, std::unique_ptr<expression> first_operand, std::unique_ptr<expression> second_operand, const token& operator_token) :
|
||||||
expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) {
|
expression(t), first_operand(std::move(first_operand)), second_operand(std::move(second_operand)), operator_token(operator_token) {
|
||||||
et = EtDualValueOperation;
|
et = EtDualValueOperation;
|
||||||
|
|
@ -209,7 +209,7 @@ namespace jlx {
|
||||||
token operator_token;
|
token operator_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct function_call : public expression {
|
export struct function_call : public expression {
|
||||||
function_call(const token& t, std::string function_name, std::vector<std::unique_ptr<expression>> arguments) :
|
function_call(const token& t, std::string function_name, std::vector<std::unique_ptr<expression>> arguments) :
|
||||||
expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) {
|
expression(t), function_name(std::move(function_name)), arguments(std::move(arguments)) {
|
||||||
et = EtFunctionCall;
|
et = EtFunctionCall;
|
||||||
|
|
@ -242,7 +242,7 @@ namespace jlx {
|
||||||
std::vector<std::unique_ptr<expression>> arguments;
|
std::vector<std::unique_ptr<expression>> arguments;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct identifier_expression : public expression {
|
export struct identifier_expression : public expression {
|
||||||
identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) {
|
identifier_expression(const token& t, std::string name) : expression(t), name(std::move(name)) {
|
||||||
et = EtIdentifier;
|
et = EtIdentifier;
|
||||||
}
|
}
|
||||||
|
|
@ -262,33 +262,36 @@ namespace jlx {
|
||||||
T current;
|
T current;
|
||||||
E last;
|
E last;
|
||||||
|
|
||||||
inline void fail_invalid_token(const token& t) {
|
[[noreturn]] static inline void fail_invalid_token(const token& t) {
|
||||||
throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str());
|
throw std::runtime_error(std::format("Invalid token {} at {}:{}:{}", t.content, t.source_file, t.line, t.col).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void fail_invalid_eof() {
|
[[noreturn]] static inline void fail_invalid_eof() {
|
||||||
throw std::runtime_error("Unexpected end-of-file");
|
throw std::runtime_error("Unexpected end-of-file");
|
||||||
}
|
}
|
||||||
|
|
||||||
void next() {
|
void next(bool mandatory = true) {
|
||||||
current++;
|
++current;
|
||||||
|
|
||||||
if (current == last) {
|
if (current == last && mandatory) {
|
||||||
fail_invalid_eof();
|
fail_invalid_eof();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<block> parse_block() {
|
std::unique_ptr<block> parse_block() {
|
||||||
|
auto& start = *current;
|
||||||
if (current->type != Punctuation || current->content != "{") {
|
if (current->type != Punctuation || current->content != "{") {
|
||||||
fail_invalid_token(*current);
|
fail_invalid_token(*current);
|
||||||
}
|
}
|
||||||
next();
|
next();
|
||||||
|
|
||||||
|
std::vector<std::unique_ptr<statement>> statements;
|
||||||
while(current->type != Punctuation && current->content != "}") {
|
while(current->type != Punctuation && current->content != "}") {
|
||||||
|
statements.emplace_back(parse_statement());
|
||||||
}
|
}
|
||||||
|
next(false);
|
||||||
|
|
||||||
return nullptr;
|
return std::make_unique<block>(start, std::move(statements));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<variable_declaration> parse_variable_declaration() {
|
std::unique_ptr<variable_declaration> parse_variable_declaration() {
|
||||||
|
|
@ -320,9 +323,9 @@ namespace jlx {
|
||||||
|
|
||||||
name = current->content;
|
name = current->content;
|
||||||
|
|
||||||
next();
|
next(false);
|
||||||
|
|
||||||
if (current->type == Punctuation && current->content == ":") {
|
if (current != last && current->type == Punctuation && current->content == ":") {
|
||||||
next();
|
next();
|
||||||
|
|
||||||
if (current->type != Identifier) {
|
if (current->type != Identifier) {
|
||||||
|
|
@ -331,10 +334,10 @@ namespace jlx {
|
||||||
|
|
||||||
type = current->content;
|
type = current->content;
|
||||||
|
|
||||||
next();
|
next(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current->type == Operator && current->content == "=") {
|
if (current != last && current->type == Operator && current->content == "=") {
|
||||||
next();
|
next();
|
||||||
|
|
||||||
starting_value = parse_expression();
|
starting_value = parse_expression();
|
||||||
|
|
@ -381,8 +384,80 @@ namespace jlx {
|
||||||
return statement;
|
return statement;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<expression> parse_expression() {
|
std::unique_ptr<expression> parse_expression(std::unique_ptr<expression> previous = nullptr) {
|
||||||
return nullptr;
|
auto start = *current;
|
||||||
|
|
||||||
|
if (current == last) {
|
||||||
|
if (previous != nullptr) {
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
|
||||||
|
fail_invalid_eof();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(current->type) {
|
||||||
|
case Identifier:
|
||||||
|
if (previous != nullptr) {
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
next(false);
|
||||||
|
return parse_expression(std::make_unique<identifier_expression>(start, start.content));
|
||||||
|
case Operator: {
|
||||||
|
if (previous == nullptr) {
|
||||||
|
auto& op = current->content;
|
||||||
|
if (op == "+" || op == "-" || op == "!") {
|
||||||
|
next();
|
||||||
|
|
||||||
|
auto expr = parse_expression();
|
||||||
|
return std::make_unique<single_operation>(start, std::move(expr), start);
|
||||||
|
}
|
||||||
|
fail_invalid_token(*current);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& op_token = *current;
|
||||||
|
next();
|
||||||
|
|
||||||
|
return std::make_unique<dual_operation>(start, std::move(previous), parse_expression(), op_token);
|
||||||
|
}
|
||||||
|
case Number:
|
||||||
|
case String:
|
||||||
|
case Boolean:
|
||||||
|
if (previous != nullptr) {
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
next(false);
|
||||||
|
|
||||||
|
return parse_expression(std::make_unique<literal_value>(start));
|
||||||
|
case Punctuation:
|
||||||
|
{
|
||||||
|
if (current->content == "(") {
|
||||||
|
if (previous != nullptr) {
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ex = parse_expression();
|
||||||
|
|
||||||
|
if (current->type != Punctuation || current->content != ")") {
|
||||||
|
fail_invalid_token(*current);
|
||||||
|
}
|
||||||
|
|
||||||
|
next(false);
|
||||||
|
|
||||||
|
return ex;
|
||||||
|
} else if (current->content == ";") {
|
||||||
|
if (previous == nullptr) {
|
||||||
|
fail_invalid_token(*current);
|
||||||
|
}
|
||||||
|
|
||||||
|
next(false);
|
||||||
|
|
||||||
|
return previous;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fail_invalid_token(*current);
|
||||||
|
default:
|
||||||
|
fail_invalid_token(*current);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<function_declaration> parse_function() {
|
std::unique_ptr<function_declaration> parse_function() {
|
||||||
|
|
@ -416,9 +491,8 @@ namespace jlx {
|
||||||
if (!first) {
|
if (!first) {
|
||||||
if(current->type != Punctuation || current->content != ",") {
|
if(current->type != Punctuation || current->content != ",") {
|
||||||
fail_invalid_token(*current);
|
fail_invalid_token(*current);
|
||||||
} else {
|
|
||||||
next();
|
|
||||||
}
|
}
|
||||||
|
next();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string name;
|
std::string name;
|
||||||
|
|
@ -465,7 +539,7 @@ namespace jlx {
|
||||||
return current->content;
|
return current->content;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<statement> parse_top_level_statement() {
|
std::unique_ptr<statement> parse_statement(bool top_level = false) {
|
||||||
if (current == last) {
|
if (current == last) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
@ -475,12 +549,12 @@ namespace jlx {
|
||||||
return parse_variable_declaration();
|
return parse_variable_declaration();
|
||||||
} else if (current->content == "if") {
|
} else if (current->content == "if") {
|
||||||
return parse_if_statement();
|
return parse_if_statement();
|
||||||
} else if (current->content == "fun") {
|
} else if (current->content == "fun" && top_level) {
|
||||||
return parse_function();
|
return parse_function();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nullptr;
|
return parse_expression();
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
@ -496,7 +570,7 @@ namespace jlx {
|
||||||
|
|
||||||
std::vector<std::unique_ptr<statement>> top_level_statements;
|
std::vector<std::unique_ptr<statement>> top_level_statements;
|
||||||
while(current != last) {
|
while(current != last) {
|
||||||
auto s = parse_top_level_statement();
|
auto s = parse_statement(true);
|
||||||
if (s == nullptr) {
|
if (s == nullptr) {
|
||||||
throw std::runtime_error("No statement parsed...");
|
throw std::runtime_error("No statement parsed...");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ namespace jlx {
|
||||||
Punctuation,
|
Punctuation,
|
||||||
Number,
|
Number,
|
||||||
String,
|
String,
|
||||||
|
Boolean,
|
||||||
Keyword,
|
Keyword,
|
||||||
Identifier,
|
Identifier,
|
||||||
Operator
|
Operator
|
||||||
|
|
@ -30,6 +31,8 @@ namespace jlx {
|
||||||
return "Number";
|
return "Number";
|
||||||
case String:
|
case String:
|
||||||
return "String";
|
return "String";
|
||||||
|
case Boolean:
|
||||||
|
return "Boolean";
|
||||||
case Keyword:
|
case Keyword:
|
||||||
return "Keyword";
|
return "Keyword";
|
||||||
case Identifier:
|
case Identifier:
|
||||||
|
|
@ -86,7 +89,7 @@ namespace jlx {
|
||||||
"var"
|
"var"
|
||||||
}};
|
}};
|
||||||
|
|
||||||
static constexpr std::array<char, 7> punctiations = {{
|
static constexpr std::array<char, 7> punctuations = {{
|
||||||
'.',
|
'.',
|
||||||
'(',
|
'(',
|
||||||
'(',
|
'(',
|
||||||
|
|
@ -236,6 +239,16 @@ namespace jlx {
|
||||||
|
|
||||||
auto word = buffer.str();
|
auto word = buffer.str();
|
||||||
|
|
||||||
|
if (word == "true" || word == "false") {
|
||||||
|
return token {
|
||||||
|
token_type::Boolean,
|
||||||
|
"mono_src",
|
||||||
|
word,
|
||||||
|
start_line,
|
||||||
|
start_col
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) {
|
if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) {
|
||||||
return token {
|
return token {
|
||||||
token_type::Keyword,
|
token_type::Keyword,
|
||||||
|
|
@ -255,7 +268,7 @@ namespace jlx {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<token> read_punctiation_token() {
|
std::optional<token> read_punctuation_token() {
|
||||||
auto res = source.peek();
|
auto res = source.peek();
|
||||||
|
|
||||||
if (!res.has_value()) {
|
if (!res.has_value()) {
|
||||||
|
|
@ -264,7 +277,7 @@ namespace jlx {
|
||||||
|
|
||||||
auto val = res.value();
|
auto val = res.value();
|
||||||
|
|
||||||
if (std::find(punctiations.begin(), punctiations.end(), val) != punctiations.end()) {
|
if (std::find(punctuations.begin(), punctuations.end(), val) != punctuations.end()) {
|
||||||
auto line = source.current_line();
|
auto line = source.current_line();
|
||||||
auto col = source.current_col();
|
auto col = source.current_col();
|
||||||
source.next();
|
source.next();
|
||||||
|
|
@ -348,9 +361,9 @@ namespace jlx {
|
||||||
return read_identifier();
|
return read_identifier();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto punctiation_res = read_punctiation_token();
|
auto punctuation_res = read_punctuation_token();
|
||||||
if (punctiation_res.has_value()) {
|
if (punctuation_res.has_value()) {
|
||||||
return punctiation_res;
|
return punctuation_res;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto op_res = read_operator_token();
|
auto op_res = read_operator_token();
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue