From 1f2cb4be8b5b9bbc8279ac7f3de00d2e0b62fd3f Mon Sep 17 00:00:00 2001 From: shabani005 Date: Wed, 5 Nov 2025 23:23:49 +0300 Subject: [PATCH] finished base --- README.md | 4 +- lexer.c | 210 --------------------------------------------------- parser3.c | 219 ------------------------------------------------------ 3 files changed, 2 insertions(+), 431 deletions(-) delete mode 100644 lexer.c delete mode 100644 parser3.c diff --git a/README.md b/README.md index dd742b9..ec0b12f 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ > Uninished, Not meant to be used by others. idk if it works on windows. ---------- -## Simple Interpreter implemented in C. +## Simple programming language VM implemented in C. (look at examples folder) Usage: ``` cc -o builder nob.c -./builder +./builder ``` diff --git a/lexer.c b/lexer.c deleted file mode 100644 index c52f78f..0000000 --- a/lexer.c +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -typedef enum { - TOKEN_PLUS, - TOKEN_MINUS, - TOKEN_INTEGER, - TOKEN_FLOAT, - TOKEN_SPACE, - TOKEN_STRING, - TOKEN_MUL, - TOKEN_DIV, - TOKEN_UNKNOWN, - TOKEN_EOF, - TOKEN_NEWLINE, - TOKEN_LPAREN, - TOKEN_RPAREN, - TOKEN_COMMA -} symbols; - -typedef enum { - BHV_STACK, - BHV_UNDEFINED, - BHV_NUMBER, - BHV_STRING, - BHV_FLOAT, -} symbol_bhv; - - -typedef struct { - symbols *type; - char **text; - size_t *text_len; - symbol_bhv *behaviour; - unsigned int *cursor_skip; - symbols *previous_token; - size_t capacity; - size_t size; -} Token; - - -void token_init(Token *tok, size_t capacity) { - tok->capacity = capacity; - tok->size = 0; - - tok->type = malloc(sizeof(symbols) * capacity); - tok->text = malloc(sizeof(char *) * capacity); - tok->text_len = malloc(sizeof(size_t) * capacity); - tok->behaviour = malloc(sizeof(symbol_bhv) * capacity); - tok->cursor_skip = malloc(sizeof(unsigned int) * capacity); - tok->previous_token = malloc(sizeof(symbols) * capacity); - - assert(tok->type && tok->text && tok->text_len && - tok->behaviour && tok->cursor_skip && tok->previous_token); -} - -void token_grow(Token *tok) { - size_t new_capacity = (tok->capacity == 0 ? 8 : tok->capacity * 2); - - tok->type = realloc(tok->type, new_capacity * sizeof(symbols)); - tok->text = realloc(tok->text, new_capacity * sizeof(char *)); - tok->text_len = realloc(tok->text_len, new_capacity * sizeof(size_t)); - tok->behaviour = realloc(tok->behaviour, new_capacity * sizeof(symbol_bhv)); - tok->cursor_skip = realloc(tok->cursor_skip, new_capacity * sizeof(unsigned int)); - tok->previous_token = realloc(tok->previous_token, new_capacity * sizeof(symbols)); - - assert(tok->type && tok->text && tok->text_len && - tok->behaviour && tok->cursor_skip && tok->previous_token); - - tok->capacity = new_capacity; -} - -void token_push(Token *tok, symbols type, const char *text, - symbol_bhv behaviour, size_t cursor_skip) { - if (tok->size >= tok->capacity) { - token_grow(tok); - } - - size_t i = tok->size; - - tok->type[i] = type; - tok->text[i] = strdup(text); - tok->text_len[i] = strlen(text); - tok->behaviour[i] = behaviour; - tok->cursor_skip[i] = cursor_skip; - - if (i > 0) - tok->previous_token[i] = tok->type[i - 1]; - else - tok->previous_token[i] = TOKEN_UNKNOWN; - - tok->size++; -} - -void token_free(Token *tok) { - for (size_t i = 0; i < tok->size; i++) { - free(tok->text[i]); - } - free(tok->type); - free(tok->text); - free(tok->text_len); - free(tok->behaviour); - free(tok->cursor_skip); - free(tok->previous_token); -} - - -int str_to_int(char *strint) { return atoi(strint); } -float str_to_float(char *strif) { return strtof(strif, NULL); } - -char *token_type_to_string(symbols type) { - switch (type) { - case TOKEN_PLUS: return "TOKEN_PLUS"; - case TOKEN_MINUS: return "TOKEN_MINUS"; - case TOKEN_INTEGER: return "TOKEN_INTEGER"; - case TOKEN_FLOAT: return "TOKEN_FLOAT"; - case TOKEN_SPACE: return "TOKEN_SPACE"; - case TOKEN_STRING: return "TOKEN_STRING"; - case TOKEN_MUL: return "TOKEN_MUL"; - case TOKEN_DIV: return "TOKEN_DIV"; - case TOKEN_LPAREN: return "TOKEN_LPAREN"; - case TOKEN_RPAREN: return "TOKEN_RPAREN"; - case TOKEN_COMMA: return "TOKEN_COMMA"; - case TOKEN_EOF: return "TOKEN_EOF"; - case TOKEN_NEWLINE: return "TOKEN_NEWLINE"; - case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN"; - default: return "UNKNOWN_SYMBOL"; - } -} - - -size_t read_from_tok(Token *tok, const char *input, size_t cursor) { - char buf[64]; - size_t start = cursor; - size_t i = 0; - - if (isdigit(input[cursor])) { - int dots_seen = 0; - while (isdigit(input[cursor]) || input[cursor] == '.') { - if (input[cursor] == '.') dots_seen++; - buf[i++] = input[cursor++]; - } - buf[i] = '\0'; - if (dots_seen == 0) { - token_push(tok, TOKEN_INTEGER, buf, BHV_NUMBER, cursor - start); - } else { - token_push(tok, TOKEN_FLOAT, buf, BHV_FLOAT, cursor - start); - } - } else if (isalpha(input[cursor])) { - while (isalpha(input[cursor])) { - buf[i++] = input[cursor++]; - } - buf[i] = '\0'; - token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start); - //refactor into separate function to use in parsing functions and definitions - } else { - buf[0] = input[cursor]; - buf[1] = '\0'; - switch (input[cursor]) { - case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break; - case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break; - case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break; - case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break; - case ' ': token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1); break; - case '\n': token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1); break; - case '(': token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1); break; - case ')': token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1); break; - case ',': token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1); break; - default: token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1); break; - } - cursor++; - } - - return cursor - start; -} - -Token tokenize_all(const char *input) { - Token tok; - token_init(&tok, 8); - - size_t i = 0; - size_t length = strlen(input); - - while (i < length) { - i += read_from_tok(&tok, input, i); - } - - token_push(&tok, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0); - return tok; -} - - -int main() { - char *input = "1 + 2 * 3 print"; - - Token tokens = tokenize_all(input); - - for (size_t i = 0; i < tokens.size; i++) { - printf("[%s] \"%s\"\n", token_type_to_string(tokens.type[i]), tokens.text[i]); - } - - token_free(&tokens); - return 0; -} diff --git a/parser3.c b/parser3.c deleted file mode 100644 index d266465..0000000 --- a/parser3.c +++ /dev/null @@ -1,219 +0,0 @@ -#include "./lexer.h" -#define NB_IMPLEMENTATION -#include "./nb.h" - -int get_prec(symbols op){ - switch (op) { - case TOKEN_MUL: - case TOKEN_DIV: - return 2; break; - case TOKEN_PLUS: - case TOKEN_MINUS: - return 1; break; - default: return 0; - } -} -// parse - -bool is_left_asc(symbols op){ - switch (op) { - case TOKEN_MUL: - case TOKEN_DIV: - case TOKEN_PLUS: - case TOKEN_MINUS: - return true; break; - default: return false; - } -} - -Token *global_tok = NULL; - -typedef enum { - SYM_VAR, - SYM_FUNC, -} SymbolKind; - -typedef struct { - const char* name; - size_t ret_count; - size_t arg_count; - symbols arg_types[16]; - symbols ret_type; - SymbolKind symbol_kind; - bool builtin; -} Symbol; - - -static Symbol builtins[] = { - { "print", 1, 1, { TOKEN_UNKNOWN }, TOKEN_EOF, SYM_FUNC, true }, -}; - - -typedef struct { - Symbol *symbols; - size_t size; - size_t capacity; -} SymbolTable; - - -static int builtin_num = sizeof(builtins)/sizeof(builtins[0]); - -static SymbolTable global_env = { - .size = sizeof(builtins)/sizeof(builtins[0]), - .capacity = sizeof(builtins)/sizeof(builtins[0]), - .symbols = builtins}; - - -Symbol *symbol_lookup(SymbolTable *table, const char *n){ - for (size_t i=0; isize; ++i){ - if(strcmp(n, table->symbols[i].name) == 0){ - return &table->symbols[i]; - } - } - return NULL; -} - -// fn add(x: int, y: int) int { -// return x+y; -// } - - -void symbol_table_init(SymbolTable *table, size_t initial_capacity) { - table->symbols = malloc(sizeof(Symbol) * initial_capacity); - if (!table->symbols) { - fprintf(stderr, "symbol_table_init: malloc failed\n"); - exit(1); - } - table->size = 0; - table->capacity = initial_capacity; -} - -void symbol_table_add(SymbolTable *table, Symbol sym) { - if (table->size >= table->capacity) { - table->capacity = (table->capacity == 0) ? 8 : table->capacity * 2; - table->symbols = realloc(table->symbols, sizeof(Symbol) * table->capacity); - if (!table->symbols) { - fprintf(stderr, "symbol_table_add: realloc failed\n"); - exit(1); - } - } - table->symbols[table->size++] = sym; -} - - -void symbol_table_free(SymbolTable *table) { - free(table->symbols); - table->symbols = NULL; - table->size = 0; - table->capacity = 0; -} - - -Token build_rpn(Token *inp, SymbolTable *symtab) { - Token output; - Token stack; - - token_init(&output, 16); - token_init(&stack, 16); - - for (size_t i = 0; i < inp->size; ++i) { - symbols type = inp->type[i]; - const char *text = inp->text[i]; - - if (type == TOKEN_IDENTIFIER && i + 1 < inp->size && inp->type[i + 1] == TOKEN_LPAREN) { - Symbol *found = symbol_lookup(symtab, text); - if (!found) { - Symbol sym = { - .name = strdup(text), - .arg_count = 0, - .ret_type = TOKEN_EOF, - .symbol_kind = SYM_FUNC, - .builtin = false - }; - symbol_table_add(symtab, sym); - } - token_push(&stack, type, text, inp->behaviour[i], 0); - } else if (type == TOKEN_IDENTIFIER) { - Symbol *found = symbol_lookup(symtab, text); - if (!found) { - Symbol sym = { - .name = strdup(text), - .arg_count = 0, - .ret_type = TOKEN_UNKNOWN, - .symbol_kind = SYM_VAR, - .builtin = false - }; - symbol_table_add(symtab, sym); - } - token_push(&output, type, text, inp->behaviour[i], 0); - } else if (type == TOKEN_LPAREN) { - token_push(&stack, type, text, inp->behaviour[i], 0); - } else if (type == TOKEN_RPAREN) { - while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN) { - token_push(&output, stack.type[stack.size - 1], - stack.text[stack.size - 1], - stack.behaviour[stack.size - 1], 0); - stack.size--; - } - if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_LPAREN) - stack.size--; - if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_IDENTIFIER) { - token_push(&output, stack.type[stack.size - 1], - stack.text[stack.size - 1], - stack.behaviour[stack.size - 1], 0); - stack.size--; - } - } else if (type == TOKEN_INTEGER || type == TOKEN_FLOAT || type == TOKEN_STRING) { - token_push(&output, type, text, inp->behaviour[i], 0); - } else if (is_left_asc(type)) { - while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN && - (get_prec(stack.type[stack.size - 1]) > get_prec(type) || - get_prec(stack.type[stack.size - 1]) == get_prec(type)) && - is_left_asc(type)) { - token_push(&output, stack.type[stack.size - 1], - stack.text[stack.size - 1], - stack.behaviour[stack.size - 1], 0); - stack.size--; - } - token_push(&stack, type, text, inp->behaviour[i], 0); - } - } - - while (stack.size > 0) { - token_push(&output, stack.type[stack.size - 1], - stack.text[stack.size - 1], - stack.behaviour[stack.size - 1], 0); - stack.size--; - } - - token_push(&output, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0); - return output; -} - -void print_token(Token *tk){ - for (size_t i=0; isize; ++i){ - printf("TokenNum: %zu Type: %s Value: %s\n", i, tk->tktype[i], tk->text[i]); - } -} - - - - -int main(int argc, char **argv){ - if (argc < 2) return -1; - const char ts[] = "\"hello\" hi + 2 2.312"; - const char math[] = "print(((1+2)*6)/18)"; // = 1 - const char print[] = "print(\"hello\")"; - const char simple[] = "1 + ( 3 + 3 )/4+4*3"; - - - char* read = nb_read_file(argv[1]); - Token tk = tokenize_all(read); - printf("INPUT: %s\n", read); - SymbolTable table = {0}; - symbol_table_init(&table, 32); - - - Token rpn = build_rpn(&tk, &table); - print_token(&rpn); -}