From e0a8808c313005063aa8a4b994f5291eac016da2 Mon Sep 17 00:00:00 2001 From: jstefanelli Date: Fri, 25 Apr 2025 13:02:27 +0200 Subject: [PATCH] [General] Syncing code --- .clangd | 3 + .gitginore => .gitignore | 0 CMakeLists.txt | 16 ++ CMakePresets.json | 28 +++ jlx/CMakeLists.txt | 9 + jlx/src/main.cpp | 32 +++ libjlx/CMakeLists.txt | 14 ++ libjlx/modules/ast.cppm | 223 ++++++++++++++++++++ libjlx/modules/main.cppm | 6 + libjlx/modules/sourceStream.cppm | 60 ++++++ libjlx/modules/tokenizer.cppm | 348 +++++++++++++++++++++++++++++++ libjlx/modules/utils.cppm | 25 +++ 12 files changed, 764 insertions(+) create mode 100644 .clangd rename .gitginore => .gitignore (100%) create mode 100644 CMakeLists.txt create mode 100644 CMakePresets.json create mode 100644 jlx/CMakeLists.txt create mode 100644 jlx/src/main.cpp create mode 100644 libjlx/CMakeLists.txt create mode 100644 libjlx/modules/ast.cppm create mode 100644 libjlx/modules/main.cppm create mode 100644 libjlx/modules/sourceStream.cppm create mode 100644 libjlx/modules/tokenizer.cppm create mode 100644 libjlx/modules/utils.cppm diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..fd91ce8 --- /dev/null +++ b/.clangd @@ -0,0 +1,3 @@ +CompileFlags: + Remove: [-fdeps-format=*,-fmodules-ts,-fmodule-mapper=* ] + CompilationDatabase: "build-debug" diff --git a/.gitginore b/.gitignore similarity index 100% rename from .gitginore rename to .gitignore diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ececd7e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 4.0) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED TRUE) + +project(jlx LANGUAGES CXX) + +include(CheckIPOSupported) +check_ipo_supported(RESULT JLX_IPO_OK OUTPUT JLX_IPO_MSG) + +if (NOT JLX_IPO_OK) + message("No IPO: ${JLX_IPO_MSG}") +endif() + +add_subdirectory(libjlx) +add_subdirectory(jlx) diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000..4f09a24 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,28 @@ +{ + "version": 10, + "cmakeMinimumRequired": { + "major": 4, + "minor": 0, + "patch": 0 + }, + "configurePresets": [ + { + "name": "debug", + "displayName": "Debug", + "binaryDir": "${sourceDir}/build-debug", + "generator": "Ninja", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "release", + "displayName": "Release", + "binaryDir": "${sourceDir}/build-release", + "generator": "Ninja", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } + } + ] +} diff --git a/jlx/CMakeLists.txt b/jlx/CMakeLists.txt new file mode 100644 index 0000000..d74082b --- /dev/null +++ b/jlx/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable(jlx ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) + +target_compile_options(jlx PRIVATE $,/W4 /WX,-Wall -Wextra -Werror>) +target_include_directories(jlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_link_libraries(jlx PRIVATE libjlx) + +if (JLX_IPO_OK) + set_property(TARGET jlx PROPERTY INTERPROCEDURAL_OPTIMIZATION $>>) +endif() diff --git a/jlx/src/main.cpp b/jlx/src/main.cpp new file mode 100644 index 0000000..ade47a2 --- /dev/null +++ b/jlx/src/main.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +import jlx; + +int main(int, char**) { + std::istreambuf_iterator start(std::cin), end; + + std::string data{start, end}; + + auto tokenizer = jlx::tokenizer(data); + + std::optional res = tokenizer.read_token(); + + std::vector tokens; + + auto last = std::string(); + while(res.has_value()) { + const auto& t = res.value(); + + tokens.emplace_back(t); + + res = tokenizer.read_token(); + } + + auto parser = jlx::parser::iterator, std::vector::iterator>(tokens.begin(), tokens.end()); + + auto root = parser.parse(); + + return 0; +} diff --git a/libjlx/CMakeLists.txt b/libjlx/CMakeLists.txt new file mode 100644 index 0000000..8e550a1 --- /dev/null +++ b/libjlx/CMakeLists.txt @@ -0,0 +1,14 @@ +add_library(libjlx STATIC) +set_target_properties(libjlx PROPERTIES PREFIX "") +target_sources(libjlx PUBLIC FILE_SET libjlx_modules TYPE CXX_MODULES FILES + "${CMAKE_CURRENT_SOURCE_DIR}/modules/main.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/modules/sourceStream.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/modules/tokenizer.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/modules/ast.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/modules/utils.cppm" +) +target_compile_options(libjlx PRIVATE $,/W4 /WX,-Wall -Wextra -Werror>) +target_include_directories(libjlx PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +if (JLX_IPO_OK) + set_property(TARGET libjlx PROPERTY INTERPROCEDURAL_OPTIMIZATION $>>) +endif() diff --git a/libjlx/modules/ast.cppm b/libjlx/modules/ast.cppm new file mode 100644 index 0000000..195c128 --- /dev/null +++ b/libjlx/modules/ast.cppm @@ -0,0 +1,223 @@ +module; + +#include +#include +#include +#include +#include + +export module jlx:ast; + +import :tokenizer; + +namespace jlx { + export enum ast_type { + Root, + Expression, + Block, + FunctionDeclaration, + SimpleIdentifier, + LiteralValue + }; + + export template + concept token_iterator = requires() { + requires std::same_as; + std::bidirectional_iterator; + }; + + struct statement { + ast_type type; + + statement(ast_type type) : type(type) { + + } + }; + + struct root_statement : public statement { + root_statement() : statement(Root) { + + } + + std::vector> statements; + }; + + struct expression : public statement { + expression() : statement(Expression) { + + } + }; + + struct block : public statement { + block() : statement(Block) { + + } + std::vector> statements; + }; + + struct function_parameter { + std::string name; + std::string type; + }; + + struct function_declaration : public statement { + function_declaration() : statement(FunctionDeclaration) { + + } + + std::string name; + std::vector parameters; + std::optional return_type; + std::unique_ptr body; + }; + + + export template E> + class parser { + T current; + E last; + + inline void fail_invalid_token(const token& t) { + throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str()); + } + + inline void fail_invalid_eof() { + throw std::runtime_error("Unexpected end-of-file"); + } + + void next() { + current++; + + if (current == last) { + fail_invalid_eof(); + } + } + + std::unique_ptr parse_block() { + if (current->type != Punctuation || current->content != "{") { + fail_invalid_token(*current); + } + next(); + + while(current->type != Punctuation && current->type != "}") { + + } + } + + std::unique_ptr parse_function() { + if (current->type != Keyword || current->content != "fun") { + fail_invalid_token(*current); + } + + next(); + + if (current->type != Identifier) { + fail_invalid_token(*current); + } + + std::string function_name = current->content; + + next(); + + if (current->type != Punctuation || current->content != "(") { + fail_invalid_token(*current); + } + + next(); + + std::vector params; + std::optional return_type; + + bool first = true; + while (current->type != Punctuation && current->content != ")") { + if (!first) { + if(current->type != Punctuation || current->content != ",") { + fail_invalid_token(*current); + } else { + next(); + } + } + + std::string name; + + if (current->type != Identifier) { + fail_invalid_token(*current); + } + + name = current->content; + + next(); + + if (current->type != Punctuation || current->content != ":") { + fail_invalid_token(*current); + } + + next(); + + auto param_type = parse_type(current, last); + params.push_back(std::move(name), std::move(param_type)); + + next(); + first = false; + } + next(); + + if (current->type == Punctuation && current->content == ":") { + next(); + + return_type = parse_type(current, last); + next(); + } + + auto block = parse_block(); + + return std::make_unique(std::move(function_name), std::move(params), std::move(return_type), std::move(block)); + } + + std::string parse_type(){ + if (current->type != Identifier) { + fail_invalid_token(*current); + } + + return current->content; + } + + std::unique_ptr parse_top_level_statement() { + if (current == last) { + return nullptr; + } + + if (current->type == token_type::Keyword) { + switch(current->content) { + case "let": + case "var": + parse_variable_declaration(current, last); + break; + case "if": + parse_if_statement(current, last); + break; + case "fun": + return parse_function(current, last); + } + } + } + public: + parser(T current, E last) : current(current), last(last) { + + } + + std::unique_ptr parse() { + + std::vector> top_level_statements; + while(current != last) { + auto s = parse_top_level_statement(); + if (s == nullptr) { + throw std::runtime_error("No statement parsed..."); + } + top_level_statements.push_back(std::move(s)); + } + + return std::make_unique(top_level_statements); + } + }; +} diff --git a/libjlx/modules/main.cppm b/libjlx/modules/main.cppm new file mode 100644 index 0000000..5566086 --- /dev/null +++ b/libjlx/modules/main.cppm @@ -0,0 +1,6 @@ +module; + +export module jlx; +export import :source_stream; +export import :tokenizer; +export import :ast; diff --git a/libjlx/modules/sourceStream.cppm b/libjlx/modules/sourceStream.cppm new file mode 100644 index 0000000..bbfb315 --- /dev/null +++ b/libjlx/modules/sourceStream.cppm @@ -0,0 +1,60 @@ +module; + +#include +#include +#include + +export module jlx:source_stream; + +namespace jlx { + export template + class source_stream { + public: + using char_traits = std::char_traits; + using str = std::basic_string; + protected: + str::size_type pos = 0; + std::size_t line = 1; + std::size_t col = 0; + const str input; + public: + source_stream(const str input) : input(std::move(input)){ + + } + + std::optional next() { + if (pos >= input.length()) { + return std::nullopt; + } + auto ch = input.at(pos++); + if(char_traits::to_int_type(ch) == 10) { + line += 1; + col = 0; + } else { + col++; + } + return ch; + } + + std::optional peek() const { + if (pos >= input.length()) { + return std::nullopt; + } + + return input.at(pos); + } + + bool eof() { + return pos >= input.length(); + } + + size_t current_line() const { + return line; + } + + size_t current_col() const { + return col; + } + }; +} + diff --git a/libjlx/modules/tokenizer.cppm b/libjlx/modules/tokenizer.cppm new file mode 100644 index 0000000..f245df6 --- /dev/null +++ b/libjlx/modules/tokenizer.cppm @@ -0,0 +1,348 @@ +module; + +#include +#include +#include +#include +#include +#include + +export module jlx:tokenizer; +import :source_stream; +import utils; + +namespace jlx { + export enum token_type { + Invalid = 0, + Punctuation, + Number, + String, + Keyword, + Identifier, + Operator + }; + + export constexpr std::string token_type_to_string(token_type t) { + switch(t) { + case Punctuation: + return "Punctuation"; + case Number: + return "Number"; + case String: + return "String"; + case Keyword: + return "Keyword"; + case Identifier: + return "Identifier"; + case Operator: + return "Operator"; + default: + return "Invalid"; + } + } + + export struct token { + token_type type; + std::string content; + std::size_t line; + std::size_t col; + }; + + export constexpr std::string token_to_string(const token& t) { + return std::format("{}({})", token_type_to_string(t.type), t.content); + } + + export class tokenizer_exception { + protected: + std::string msg; + public: + tokenizer_exception(std::string msg, std::size_t line, std::size_t col) : msg(std::format("Tokenizer exception at %d:%d. %s", line, col, msg)) { + + } + + const std::string& what() const { + return msg; + } + }; + + export class tokenizer { + source_stream source; + + static constexpr std::array keywords = {{ + "if", + "else", + "fun", + //"struct", + "let", + "var" + }}; + + static constexpr std::array punctiations = {{ + '.', + '(', + '(', + '{', + '}', + ':', + ';' + }}; + + static constexpr std::array operators = {{ + "=", + "+", + "-", + "*", + "/", + "%", + "==", + "!=", + "<=", + ">=", + ">", + "<" + }}; + + void skip_whitespace() { + while(!source.eof()) { + auto ch = source.peek(); + if (!ch.has_value() || !std::isspace(static_cast(ch.value()))) { + return; + } + + source.next(); + } + } + + token read_string_token() { + auto start_line = source.current_line(); + auto start_col = source.current_col(); + source.next(); + bool escape = false; + std::stringstream buffer; + while(!source.eof()) { + auto ch = source.next(); + if (!ch.has_value() || (!escape && ch.value() == '"')) { + break; + } + + auto val = ch.value(); + + if (val == '\n') { + continue; + } + + if (escape) { + switch(val) { + case '"': + buffer.put('"'); + break; + case '\\': + buffer.put('\\'); + break; + case 'n': + buffer.put('\n'); + break; + default: + throw tokenizer_exception("Invalid escape sequance ", source.current_line(), source.current_col()); + } + escape = false; + } else if (val == '\\') { + escape = true; + } else { + buffer.put(val); + } + } + + return { + token_type::String, + buffer.str(), + start_line, + start_col + }; + } + + std::optional read_decimal_token() { + auto res = source.peek(); + + if (!res.has_value()) { + return std::nullopt; + } + + std::stringstream buffer; + std::size_t start_line = source.current_line(); + std::size_t start_col = source.current_col(); + bool found_period = false; + + while(res.has_value() && (std::isdigit(static_cast(res.value())) || res.value() == '.')) { + auto val = res.value(); + if (val == '.') { + if (found_period) { + throw tokenizer_exception("Too many periods in numeric value", source.current_line(), source.current_col()); + } else { + found_period = true; + } + } + buffer.put(val); + source.next(); + res = source.peek(); + } + + return token { + token_type::Number, + buffer.str(), + start_line, + start_col + }; + } + + constexpr bool is_valid_identifier_start(char ch) { + return ch == '_' || isletter(ch); + } + + std::optional read_identifier() { + std::stringstream buffer; + + auto start_line = source.current_line(); + auto start_col = source.current_col(); + + while(!source.eof()) { + auto res = source.peek(); + + if (!res.has_value()) { + break; + } + + auto val = res.value(); + + if (val != '_' && !isletter(val) && !std::isdigit(static_cast(val))) { + break; + } + + buffer.put(val); + source.next(); + } + + auto word = buffer.str(); + + if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) { + return token { + token_type::Keyword, + word, + start_line, + start_col + }; + } else { + return token { + token_type::Identifier, + word, + start_line, + start_col + }; + } + } + + std::optional read_punctiation_token() { + auto res = source.peek(); + + if (!res.has_value()) { + return std::nullopt; + } + + auto val = res.value(); + + if (std::find(punctiations.begin(), punctiations.end(), val) != punctiations.end()) { + auto line = source.current_line(); + auto col = source.current_col(); + source.next(); + return token { + token_type::Punctuation, + std::string() + val, + line, + col + }; + } + + return std::nullopt; + } + + std::optional read_operator_token() { + std::stringstream buffer; + + auto line = source.current_line(); + auto col = source.current_col(); + while(!source.eof()) { + auto res = source.peek(); + + if (!res.has_value()) { + break; + } + + auto val = res.value(); + + if (!is_valid_character_from_set(operators, val)) { + break; + } + + buffer.put(val); + source.next(); + } + + + auto word = buffer.str(); + + if (std::find(operators.begin(), operators.end(), word) != operators.end()) { + return token { + token_type::Operator, + word, + line, + col + }; + } + + throw tokenizer_exception(std::format("Unknown operator '%s'", word), line, col); + } + + public: + tokenizer(std::string source) : source(std::move(source)) { + + } + + std::optional read_token() { + skip_whitespace(); + if (source.eof()) { + return std::nullopt; + } + + auto result = source.peek(); + if (!result.has_value()) { + return std::nullopt; + } + + auto val = result.value(); + + if (val == '"') { + return read_string_token(); + } + + if (std::isdigit(static_cast(val))) { + return read_decimal_token(); + } + + if (is_valid_identifier_start(val)) { + return read_identifier(); + } + + auto punctiation_res = read_punctiation_token(); + if (punctiation_res.has_value()) { + return punctiation_res; + } + + auto op_res = read_operator_token(); + if (op_res.has_value()) { + return op_res; + } + + throw tokenizer_exception(std::format("Unknown character '%c'", val), source.current_line(), source.current_col()); + } + }; +} diff --git a/libjlx/modules/utils.cppm b/libjlx/modules/utils.cppm new file mode 100644 index 0000000..d1d1ce2 --- /dev/null +++ b/libjlx/modules/utils.cppm @@ -0,0 +1,25 @@ +module; + +#include +#include +#include +#include + +export module utils; + +export constexpr bool isletter(char ch) { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); +} + + +export template +constexpr bool is_valid_character_from_set(std::array haystack, char needle) { + for(auto& a : haystack) { + for(auto& c : a) { + if (c == needle) { + return true; + } + } + } + return false; +}