[General] Syncing code

This commit is contained in:
jstefanelli 2025-04-25 13:02:27 +02:00
parent f653a2a83e
commit e0a8808c31
Signed by: jstefanelli
GPG key ID: 60EDE2437640D2AA
12 changed files with 764 additions and 0 deletions

3
.clangd Normal file
View file

@ -0,0 +1,3 @@
CompileFlags:
Remove: [-fdeps-format=*,-fmodules-ts,-fmodule-mapper=* ]
CompilationDatabase: "build-debug"

View file

16
CMakeLists.txt Normal file
View file

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 4.0)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
project(jlx LANGUAGES CXX)
include(CheckIPOSupported)
check_ipo_supported(RESULT JLX_IPO_OK OUTPUT JLX_IPO_MSG)
if (NOT JLX_IPO_OK)
message("No IPO: ${JLX_IPO_MSG}")
endif()
add_subdirectory(libjlx)
add_subdirectory(jlx)

28
CMakePresets.json Normal file
View file

@ -0,0 +1,28 @@
{
"version": 10,
"cmakeMinimumRequired": {
"major": 4,
"minor": 0,
"patch": 0
},
"configurePresets": [
{
"name": "debug",
"displayName": "Debug",
"binaryDir": "${sourceDir}/build-debug",
"generator": "Ninja",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
},
{
"name": "release",
"displayName": "Release",
"binaryDir": "${sourceDir}/build-release",
"generator": "Ninja",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
}
]
}

9
jlx/CMakeLists.txt Normal file
View file

@ -0,0 +1,9 @@
add_executable(jlx ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp)
target_compile_options(jlx PRIVATE $<IF:$<CXX_COMPILER_ID:Msvc>,/W4 /WX,-Wall -Wextra -Werror>)
target_include_directories(jlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(jlx PRIVATE libjlx)
if (JLX_IPO_OK)
set_property(TARGET jlx PROPERTY INTERPROCEDURAL_OPTIMIZATION $<BOOL:$<NOT:$<CONFIG:Debug>>>)
endif()

32
jlx/src/main.cpp Normal file
View file

@ -0,0 +1,32 @@
#include <iostream>
#include <optional>
#include <vector>
import jlx;
int main(int, char**) {
std::istreambuf_iterator<char> start(std::cin), end;
std::string data{start, end};
auto tokenizer = jlx::tokenizer(data);
std::optional<jlx::token> res = tokenizer.read_token();
std::vector<jlx::token> tokens;
auto last = std::string();
while(res.has_value()) {
const auto& t = res.value();
tokens.emplace_back(t);
res = tokenizer.read_token();
}
auto parser = jlx::parser<std::vector<jlx::token>::iterator, std::vector<jlx::token>::iterator>(tokens.begin(), tokens.end());
auto root = parser.parse();
return 0;
}

14
libjlx/CMakeLists.txt Normal file
View file

@ -0,0 +1,14 @@
add_library(libjlx STATIC)
set_target_properties(libjlx PROPERTIES PREFIX "")
target_sources(libjlx PUBLIC FILE_SET libjlx_modules TYPE CXX_MODULES FILES
"${CMAKE_CURRENT_SOURCE_DIR}/modules/main.cppm"
"${CMAKE_CURRENT_SOURCE_DIR}/modules/sourceStream.cppm"
"${CMAKE_CURRENT_SOURCE_DIR}/modules/tokenizer.cppm"
"${CMAKE_CURRENT_SOURCE_DIR}/modules/ast.cppm"
"${CMAKE_CURRENT_SOURCE_DIR}/modules/utils.cppm"
)
target_compile_options(libjlx PRIVATE $<IF:$<CXX_COMPILER_ID:Msvc>,/W4 /WX,-Wall -Wextra -Werror>)
target_include_directories(libjlx PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
if (JLX_IPO_OK)
set_property(TARGET libjlx PROPERTY INTERPROCEDURAL_OPTIMIZATION $<BOOL:$<NOT:$<CONFIG:Debug>>>)
endif()

223
libjlx/modules/ast.cppm Normal file
View file

@ -0,0 +1,223 @@
module;
#include <stdexcept>
#include <format>
#include <iterator>
#include <vector>
#include <memory>
export module jlx:ast;
import :tokenizer;
namespace jlx {
export enum ast_type {
Root,
Expression,
Block,
FunctionDeclaration,
SimpleIdentifier,
LiteralValue
};
export template<class T>
concept token_iterator = requires() {
requires std::same_as<decltype(typename T::value_type), jlx::token>;
std::bidirectional_iterator<T>;
};
struct statement {
ast_type type;
statement(ast_type type) : type(type) {
}
};
struct root_statement : public statement {
root_statement() : statement(Root) {
}
std::vector<std::unique_ptr<statement>> statements;
};
struct expression : public statement {
expression() : statement(Expression) {
}
};
struct block : public statement {
block() : statement(Block) {
}
std::vector<std::unique_ptr<statement>> statements;
};
struct function_parameter {
std::string name;
std::string type;
};
struct function_declaration : public statement {
function_declaration() : statement(FunctionDeclaration) {
}
std::string name;
std::vector<function_parameter> parameters;
std::optional<std::string> return_type;
std::unique_ptr<block> body;
};
export template<token_iterator T, std::sentinel_for<T> E>
class parser {
T current;
E last;
inline void fail_invalid_token(const token& t) {
throw std::runtime_error(std::format("Invalid token {} at {}:{}", t.content, t.line, t.col).c_str());
}
inline void fail_invalid_eof() {
throw std::runtime_error("Unexpected end-of-file");
}
void next() {
current++;
if (current == last) {
fail_invalid_eof();
}
}
std::unique_ptr<block> parse_block() {
if (current->type != Punctuation || current->content != "{") {
fail_invalid_token(*current);
}
next();
while(current->type != Punctuation && current->type != "}") {
}
}
std::unique_ptr<function_declaration> parse_function() {
if (current->type != Keyword || current->content != "fun") {
fail_invalid_token(*current);
}
next();
if (current->type != Identifier) {
fail_invalid_token(*current);
}
std::string function_name = current->content;
next();
if (current->type != Punctuation || current->content != "(") {
fail_invalid_token(*current);
}
next();
std::vector<function_parameter> params;
std::optional<std::string> return_type;
bool first = true;
while (current->type != Punctuation && current->content != ")") {
if (!first) {
if(current->type != Punctuation || current->content != ",") {
fail_invalid_token(*current);
} else {
next();
}
}
std::string name;
if (current->type != Identifier) {
fail_invalid_token(*current);
}
name = current->content;
next();
if (current->type != Punctuation || current->content != ":") {
fail_invalid_token(*current);
}
next();
auto param_type = parse_type(current, last);
params.push_back(std::move(name), std::move(param_type));
next();
first = false;
}
next();
if (current->type == Punctuation && current->content == ":") {
next();
return_type = parse_type(current, last);
next();
}
auto block = parse_block();
return std::make_unique<function_declaration>(std::move(function_name), std::move(params), std::move(return_type), std::move(block));
}
std::string parse_type(){
if (current->type != Identifier) {
fail_invalid_token(*current);
}
return current->content;
}
std::unique_ptr<statement> parse_top_level_statement() {
if (current == last) {
return nullptr;
}
if (current->type == token_type::Keyword) {
switch(current->content) {
case "let":
case "var":
parse_variable_declaration(current, last);
break;
case "if":
parse_if_statement(current, last);
break;
case "fun":
return parse_function(current, last);
}
}
}
public:
parser(T current, E last) : current(current), last(last) {
}
std::unique_ptr<statement> parse() {
std::vector<std::unique_ptr<statement>> top_level_statements;
while(current != last) {
auto s = parse_top_level_statement();
if (s == nullptr) {
throw std::runtime_error("No statement parsed...");
}
top_level_statements.push_back(std::move(s));
}
return std::make_unique<root_statement>(top_level_statements);
}
};
}

6
libjlx/modules/main.cppm Normal file
View file

@ -0,0 +1,6 @@
module;
export module jlx;
export import :source_stream;
export import :tokenizer;
export import :ast;

View file

@ -0,0 +1,60 @@
module;
#include <cstddef>
#include <string>
#include <optional>
export module jlx:source_stream;
namespace jlx {
export template<typename CharT>
class source_stream {
public:
using char_traits = std::char_traits<CharT>;
using str = std::basic_string<CharT, char_traits>;
protected:
str::size_type pos = 0;
std::size_t line = 1;
std::size_t col = 0;
const str input;
public:
source_stream(const str input) : input(std::move(input)){
}
std::optional<CharT> next() {
if (pos >= input.length()) {
return std::nullopt;
}
auto ch = input.at(pos++);
if(char_traits::to_int_type(ch) == 10) {
line += 1;
col = 0;
} else {
col++;
}
return ch;
}
std::optional<CharT> peek() const {
if (pos >= input.length()) {
return std::nullopt;
}
return input.at(pos);
}
bool eof() {
return pos >= input.length();
}
size_t current_line() const {
return line;
}
size_t current_col() const {
return col;
}
};
}

View file

@ -0,0 +1,348 @@
module;
#include <string>
#include <optional>
#include <cctype>
#include <sstream>
#include <format>
#include <iostream>
export module jlx:tokenizer;
import :source_stream;
import utils;
namespace jlx {
export enum token_type {
Invalid = 0,
Punctuation,
Number,
String,
Keyword,
Identifier,
Operator
};
export constexpr std::string token_type_to_string(token_type t) {
switch(t) {
case Punctuation:
return "Punctuation";
case Number:
return "Number";
case String:
return "String";
case Keyword:
return "Keyword";
case Identifier:
return "Identifier";
case Operator:
return "Operator";
default:
return "Invalid";
}
}
export struct token {
token_type type;
std::string content;
std::size_t line;
std::size_t col;
};
export constexpr std::string token_to_string(const token& t) {
return std::format("{}({})", token_type_to_string(t.type), t.content);
}
export class tokenizer_exception {
protected:
std::string msg;
public:
tokenizer_exception(std::string msg, std::size_t line, std::size_t col) : msg(std::format("Tokenizer exception at %d:%d. %s", line, col, msg)) {
}
const std::string& what() const {
return msg;
}
};
export class tokenizer {
source_stream<char> source;
static constexpr std::array<std::string, 5> keywords = {{
"if",
"else",
"fun",
//"struct",
"let",
"var"
}};
static constexpr std::array<char, 7> punctiations = {{
'.',
'(',
'(',
'{',
'}',
':',
';'
}};
static constexpr std::array<std::string, 12> operators = {{
"=",
"+",
"-",
"*",
"/",
"%",
"==",
"!=",
"<=",
">=",
">",
"<"
}};
void skip_whitespace() {
while(!source.eof()) {
auto ch = source.peek();
if (!ch.has_value() || !std::isspace(static_cast<unsigned int>(ch.value()))) {
return;
}
source.next();
}
}
token read_string_token() {
auto start_line = source.current_line();
auto start_col = source.current_col();
source.next();
bool escape = false;
std::stringstream buffer;
while(!source.eof()) {
auto ch = source.next();
if (!ch.has_value() || (!escape && ch.value() == '"')) {
break;
}
auto val = ch.value();
if (val == '\n') {
continue;
}
if (escape) {
switch(val) {
case '"':
buffer.put('"');
break;
case '\\':
buffer.put('\\');
break;
case 'n':
buffer.put('\n');
break;
default:
throw tokenizer_exception("Invalid escape sequance ", source.current_line(), source.current_col());
}
escape = false;
} else if (val == '\\') {
escape = true;
} else {
buffer.put(val);
}
}
return {
token_type::String,
buffer.str(),
start_line,
start_col
};
}
std::optional<token> read_decimal_token() {
auto res = source.peek();
if (!res.has_value()) {
return std::nullopt;
}
std::stringstream buffer;
std::size_t start_line = source.current_line();
std::size_t start_col = source.current_col();
bool found_period = false;
while(res.has_value() && (std::isdigit(static_cast<unsigned char>(res.value())) || res.value() == '.')) {
auto val = res.value();
if (val == '.') {
if (found_period) {
throw tokenizer_exception("Too many periods in numeric value", source.current_line(), source.current_col());
} else {
found_period = true;
}
}
buffer.put(val);
source.next();
res = source.peek();
}
return token {
token_type::Number,
buffer.str(),
start_line,
start_col
};
}
constexpr bool is_valid_identifier_start(char ch) {
return ch == '_' || isletter(ch);
}
std::optional<token> read_identifier() {
std::stringstream buffer;
auto start_line = source.current_line();
auto start_col = source.current_col();
while(!source.eof()) {
auto res = source.peek();
if (!res.has_value()) {
break;
}
auto val = res.value();
if (val != '_' && !isletter(val) && !std::isdigit(static_cast<unsigned char>(val))) {
break;
}
buffer.put(val);
source.next();
}
auto word = buffer.str();
if (std::find(keywords.begin(), keywords.end(), word) != keywords.end()) {
return token {
token_type::Keyword,
word,
start_line,
start_col
};
} else {
return token {
token_type::Identifier,
word,
start_line,
start_col
};
}
}
std::optional<token> read_punctiation_token() {
auto res = source.peek();
if (!res.has_value()) {
return std::nullopt;
}
auto val = res.value();
if (std::find(punctiations.begin(), punctiations.end(), val) != punctiations.end()) {
auto line = source.current_line();
auto col = source.current_col();
source.next();
return token {
token_type::Punctuation,
std::string() + val,
line,
col
};
}
return std::nullopt;
}
std::optional<token> read_operator_token() {
std::stringstream buffer;
auto line = source.current_line();
auto col = source.current_col();
while(!source.eof()) {
auto res = source.peek();
if (!res.has_value()) {
break;
}
auto val = res.value();
if (!is_valid_character_from_set(operators, val)) {
break;
}
buffer.put(val);
source.next();
}
auto word = buffer.str();
if (std::find(operators.begin(), operators.end(), word) != operators.end()) {
return token {
token_type::Operator,
word,
line,
col
};
}
throw tokenizer_exception(std::format("Unknown operator '%s'", word), line, col);
}
public:
tokenizer(std::string source) : source(std::move(source)) {
}
std::optional<token> read_token() {
skip_whitespace();
if (source.eof()) {
return std::nullopt;
}
auto result = source.peek();
if (!result.has_value()) {
return std::nullopt;
}
auto val = result.value();
if (val == '"') {
return read_string_token();
}
if (std::isdigit(static_cast<unsigned char>(val))) {
return read_decimal_token();
}
if (is_valid_identifier_start(val)) {
return read_identifier();
}
auto punctiation_res = read_punctiation_token();
if (punctiation_res.has_value()) {
return punctiation_res;
}
auto op_res = read_operator_token();
if (op_res.has_value()) {
return op_res;
}
throw tokenizer_exception(std::format("Unknown character '%c'", val), source.current_line(), source.current_col());
}
};
}

25
libjlx/modules/utils.cppm Normal file
View file

@ -0,0 +1,25 @@
module;
#include <cstddef>
#include <array>
#include <string>
#include <initializer_list>
export module utils;
export constexpr bool isletter(char ch) {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
export template<std::size_t N>
constexpr bool is_valid_character_from_set(std::array<std::string, N> haystack, char needle) {
for(auto& a : haystack) {
for(auto& c : a) {
if (c == needle) {
return true;
}
}
}
return false;
}