From 1f2cb4be8b5b9bbc8279ac7f3de00d2e0b62fd3f Mon Sep 17 00:00:00 2001
From: shabani005 <hasan.shabani2005@gmail.com>
Date: Wed, 5 Nov 2025 23:23:49 +0300
Subject: [PATCH] finished base

---
 README.md |   4 +-
 lexer.c   | 210 ---------------------------------------------------
 parser3.c | 219 ------------------------------------------------------
 3 files changed, 2 insertions(+), 431 deletions(-)
 delete mode 100644 lexer.c
 delete mode 100644 parser3.c
diff --git a/README.md b/README.md
index dd742b9..ec0b12f 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@
 > Uninished, Not meant to be used by others. idk if it works on windows.
 ----------
 
-## Simple Interpreter implemented in C. 
+## Simple programming language VM implemented in C. (look at examples folder) 
 Usage:
 ```
 cc -o builder nob.c
-./builder
+./builder <source file>
 ```
diff --git a/lexer.c b/lexer.c
deleted file mode 100644
index c52f78f..0000000
--- a/lexer.c
+++ /dev/null
@@ -1,210 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-
-
-typedef enum {
-  TOKEN_PLUS,
-  TOKEN_MINUS,
-  TOKEN_INTEGER,
-  TOKEN_FLOAT,
-  TOKEN_SPACE,
-  TOKEN_STRING,
-  TOKEN_MUL,
-  TOKEN_DIV,
-  TOKEN_UNKNOWN,
-  TOKEN_EOF,
-  TOKEN_NEWLINE,
-  TOKEN_LPAREN,
-  TOKEN_RPAREN,
-  TOKEN_COMMA
-} symbols;
-
-typedef enum {
-  BHV_STACK,
-  BHV_UNDEFINED,
-  BHV_NUMBER,
-  BHV_STRING,
-  BHV_FLOAT,
-} symbol_bhv;
-
-
-typedef struct {
-  symbols *type;
-  char **text;
-  size_t *text_len;
-  symbol_bhv *behaviour;
-  unsigned int *cursor_skip;
-  symbols *previous_token;
-  size_t capacity;
-  size_t size;
-} Token;
-
-
-void token_init(Token *tok, size_t capacity) {
-  tok->capacity = capacity;
-  tok->size = 0;
-
-  tok->type = malloc(sizeof(symbols) * capacity);
-  tok->text = malloc(sizeof(char *) * capacity);
-  tok->text_len = malloc(sizeof(size_t) * capacity);
-  tok->behaviour = malloc(sizeof(symbol_bhv) * capacity);
-  tok->cursor_skip = malloc(sizeof(unsigned int) * capacity);
-  tok->previous_token = malloc(sizeof(symbols) * capacity);
-
-  assert(tok->type && tok->text && tok->text_len &&
-         tok->behaviour && tok->cursor_skip && tok->previous_token);
-}
-
-void token_grow(Token *tok) {
-  size_t new_capacity = (tok->capacity == 0 ? 8 : tok->capacity * 2);
-
-  tok->type = realloc(tok->type, new_capacity * sizeof(symbols));
-  tok->text = realloc(tok->text, new_capacity * sizeof(char *));
-  tok->text_len = realloc(tok->text_len, new_capacity * sizeof(size_t));
-  tok->behaviour = realloc(tok->behaviour, new_capacity * sizeof(symbol_bhv));
-  tok->cursor_skip = realloc(tok->cursor_skip, new_capacity * sizeof(unsigned int));
-  tok->previous_token = realloc(tok->previous_token, new_capacity * sizeof(symbols));
-
-  assert(tok->type && tok->text && tok->text_len &&
-         tok->behaviour && tok->cursor_skip && tok->previous_token);
-
-  tok->capacity = new_capacity;
-}
-
-void token_push(Token *tok, symbols type, const char *text,
-                symbol_bhv behaviour, size_t cursor_skip) {
-  if (tok->size >= tok->capacity) {
-    token_grow(tok);
-  }
-
-  size_t i = tok->size;
-
-  tok->type[i] = type;
-  tok->text[i] = strdup(text);
-  tok->text_len[i] = strlen(text);
-  tok->behaviour[i] = behaviour;
-  tok->cursor_skip[i] = cursor_skip;
-
-  if (i > 0)
-    tok->previous_token[i] = tok->type[i - 1];
-  else
-    tok->previous_token[i] = TOKEN_UNKNOWN;
-
-  tok->size++;
-}
-
-void token_free(Token *tok) {
-  for (size_t i = 0; i < tok->size; i++) {
-    free(tok->text[i]);
-  }
-  free(tok->type);
-  free(tok->text);
-  free(tok->text_len);
-  free(tok->behaviour);
-  free(tok->cursor_skip);
-  free(tok->previous_token);
-}
-
-
-int str_to_int(char *strint) { return atoi(strint); }
-float str_to_float(char *strif) { return strtof(strif, NULL); }
-
-char *token_type_to_string(symbols type) {
-  switch (type) {
-    case TOKEN_PLUS: return "TOKEN_PLUS";
-    case TOKEN_MINUS: return "TOKEN_MINUS";
-    case TOKEN_INTEGER: return "TOKEN_INTEGER";
-    case TOKEN_FLOAT: return "TOKEN_FLOAT";
-    case TOKEN_SPACE: return "TOKEN_SPACE";
-    case TOKEN_STRING: return "TOKEN_STRING";
-    case TOKEN_MUL: return "TOKEN_MUL";
-    case TOKEN_DIV: return "TOKEN_DIV";
-    case TOKEN_LPAREN: return "TOKEN_LPAREN";
-    case TOKEN_RPAREN: return "TOKEN_RPAREN";
-    case TOKEN_COMMA: return "TOKEN_COMMA";
-    case TOKEN_EOF: return "TOKEN_EOF";
-    case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
-    case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
-    default: return "UNKNOWN_SYMBOL";
-  }
-}
-
-
-size_t read_from_tok(Token *tok, const char *input, size_t cursor) {
-  char buf[64];
-  size_t start = cursor;
-  size_t i = 0;
-
-  if (isdigit(input[cursor])) {
-    int dots_seen = 0;
-    while (isdigit(input[cursor]) || input[cursor] == '.') {
-      if (input[cursor] == '.') dots_seen++;
-      buf[i++] = input[cursor++];
-    }
-    buf[i] = '\0';
-    if (dots_seen == 0) {
-      token_push(tok, TOKEN_INTEGER, buf, BHV_NUMBER, cursor - start);
-    } else {
-      token_push(tok, TOKEN_FLOAT, buf, BHV_FLOAT, cursor - start);
-    }
-  } else if (isalpha(input[cursor])) {
-    while (isalpha(input[cursor])) {
-      buf[i++] = input[cursor++];
-    }
-    buf[i] = '\0';
-    token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start); 
-    //refactor into separate function to use in parsing functions and definitions  
-  } else {
-    buf[0] = input[cursor];
-    buf[1] = '\0';
-    switch (input[cursor]) {
-      case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break;
-      case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break;
-      case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break;
-      case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break;
-      case ' ': token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1); break;
-      case '\n': token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1); break;
-      case '(': token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1); break;
-      case ')': token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1); break;
-      case ',': token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1); break;
-      default: token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1); break;
-    }
-    cursor++;
-  }
-
-  return cursor - start;
-}
-
-Token tokenize_all(const char *input) {
-  Token tok;
-  token_init(&tok, 8);
-
-  size_t i = 0;
-  size_t length = strlen(input);
-
-  while (i < length) {
-    i += read_from_tok(&tok, input, i);
-  }
-
-  token_push(&tok, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
-  return tok;
-}
-
-
-int main() {
-  char *input = "1 + 2 * 3 print";
-
-  Token tokens = tokenize_all(input);
-
-  for (size_t i = 0; i < tokens.size; i++) {
-    printf("[%s] \"%s\"\n", token_type_to_string(tokens.type[i]), tokens.text[i]);
-  }
-
-  token_free(&tokens);
-  return 0;
-}
diff --git a/parser3.c b/parser3.c
deleted file mode 100644
index d266465..0000000
--- a/parser3.c
+++ /dev/null
@@ -1,219 +0,0 @@
-#include "./lexer.h"
-#define NB_IMPLEMENTATION
-#include "./nb.h"
-
-int get_prec(symbols op){
-  switch (op) {
-    case TOKEN_MUL:
-    case TOKEN_DIV:
-       return 2; break;
-    case TOKEN_PLUS:
-    case TOKEN_MINUS:
-      return 1; break;
-    default: return 0;
-  }
-}
-// parse
-
-bool is_left_asc(symbols op){
-  switch (op) {
-    case TOKEN_MUL:
-    case TOKEN_DIV:
-    case TOKEN_PLUS:
-    case TOKEN_MINUS:
-      return true; break;
-    default: return false;
-  }
-}
-
-Token *global_tok = NULL;
-
-typedef enum {
-  SYM_VAR,
-  SYM_FUNC,
-} SymbolKind;
-
-typedef struct {
-  const char* name;
-  size_t ret_count;
-  size_t arg_count;
-  symbols arg_types[16];
-  symbols ret_type;
-  SymbolKind symbol_kind;
-  bool builtin;
-} Symbol;
-
-
-static Symbol builtins[] = {
-    { "print", 1, 1, { TOKEN_UNKNOWN }, TOKEN_EOF, SYM_FUNC, true },
-};
-
-
-typedef struct {
-  Symbol *symbols;
-  size_t size;
-  size_t capacity;
-} SymbolTable; 
-
-
-static int builtin_num = sizeof(builtins)/sizeof(builtins[0]);
-
-static SymbolTable global_env = {
-  .size = sizeof(builtins)/sizeof(builtins[0]),
-  .capacity = sizeof(builtins)/sizeof(builtins[0]),
-  .symbols = builtins};
-
-
-Symbol *symbol_lookup(SymbolTable *table, const char *n){
-  for (size_t i=0; i<table->size; ++i){
-    if(strcmp(n, table->symbols[i].name) == 0){
-      return &table->symbols[i];
-    }
-  }
-  return NULL;
-}
-
-// fn add(x: int, y: int) int {
-//   return x+y;
-// }
-
-
-void symbol_table_init(SymbolTable *table, size_t initial_capacity) {
-    table->symbols = malloc(sizeof(Symbol) * initial_capacity);
-    if (!table->symbols) {
-        fprintf(stderr, "symbol_table_init: malloc failed\n");
-        exit(1);
-    }
-    table->size = 0;
-    table->capacity = initial_capacity;
-}
-
-void symbol_table_add(SymbolTable *table, Symbol sym) {
-    if (table->size >= table->capacity) {
-        table->capacity = (table->capacity == 0) ? 8 : table->capacity * 2;
-        table->symbols = realloc(table->symbols, sizeof(Symbol) * table->capacity);
-        if (!table->symbols) {
-            fprintf(stderr, "symbol_table_add: realloc failed\n");
-            exit(1);
-        }
-    }
-    table->symbols[table->size++] = sym;
-}
-
-
-void symbol_table_free(SymbolTable *table) {
-    free(table->symbols);
-    table->symbols = NULL;
-    table->size = 0;
-    table->capacity = 0;
-}
-
-
-Token build_rpn(Token *inp, SymbolTable *symtab) {
-    Token output;
-    Token stack;
-
-    token_init(&output, 16);
-    token_init(&stack, 16);
-
-    for (size_t i = 0; i < inp->size; ++i) {
-        symbols type = inp->type[i];
-        const char *text = inp->text[i];
-
-        if (type == TOKEN_IDENTIFIER && i + 1 < inp->size && inp->type[i + 1] == TOKEN_LPAREN) {
-            Symbol *found = symbol_lookup(symtab, text);
-            if (!found) {
-                Symbol sym = {
-                    .name = strdup(text),
-                    .arg_count = 0,
-                    .ret_type = TOKEN_EOF,
-                    .symbol_kind = SYM_FUNC,
-                    .builtin = false
-                };
-                symbol_table_add(symtab, sym);
-            }
-            token_push(&stack, type, text, inp->behaviour[i], 0);
-        } else if (type == TOKEN_IDENTIFIER) {
-            Symbol *found = symbol_lookup(symtab, text);
-            if (!found) {
-                Symbol sym = {
-                    .name = strdup(text),
-                    .arg_count = 0,
-                    .ret_type = TOKEN_UNKNOWN,
-                    .symbol_kind = SYM_VAR,
-                    .builtin = false
-                };
-                symbol_table_add(symtab, sym);
-            }
-            token_push(&output, type, text, inp->behaviour[i], 0);
-        } else if (type == TOKEN_LPAREN) {
-            token_push(&stack, type, text, inp->behaviour[i], 0);
-        } else if (type == TOKEN_RPAREN) {
-            while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN) {
-                token_push(&output, stack.type[stack.size - 1],
-                           stack.text[stack.size - 1],
-                           stack.behaviour[stack.size - 1], 0);
-                stack.size--;
-            }
-            if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_LPAREN)
-                stack.size--;
-            if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_IDENTIFIER) {
-                token_push(&output, stack.type[stack.size - 1],
-                           stack.text[stack.size - 1],
-                           stack.behaviour[stack.size - 1], 0);
-                stack.size--;
-            }
-        } else if (type == TOKEN_INTEGER || type == TOKEN_FLOAT || type == TOKEN_STRING) {
-            token_push(&output, type, text, inp->behaviour[i], 0);
-        } else if (is_left_asc(type)) {
-            while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN &&
-                   (get_prec(stack.type[stack.size - 1]) > get_prec(type) ||
-                    get_prec(stack.type[stack.size - 1]) == get_prec(type)) &&
-                   is_left_asc(type)) {
-                token_push(&output, stack.type[stack.size - 1],
-                           stack.text[stack.size - 1],
-                           stack.behaviour[stack.size - 1], 0);
-                stack.size--;
-            }
-            token_push(&stack, type, text, inp->behaviour[i], 0);
-        }
-    }
-
-    while (stack.size > 0) {
-        token_push(&output, stack.type[stack.size - 1],
-                   stack.text[stack.size - 1],
-                   stack.behaviour[stack.size - 1], 0);
-        stack.size--;
-    }
-
-    token_push(&output, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
-    return output;
-}
-
-void print_token(Token *tk){
-  for (size_t i=0; i<tk->size; ++i){
-    printf("TokenNum: %zu Type: %s Value: %s\n", i, tk->tktype[i], tk->text[i]);
-  }
-}
-
-
-
-
-int main(int argc, char **argv){
-  if (argc < 2) return -1;
-  const char ts[] = "\"hello\" hi + 2 2.312"; 
-  const char math[] = "print(((1+2)*6)/18)"; // = 1
-  const char print[] = "print(\"hello\")";
-  const char simple[] = "1 + (  3 + 3  )/4+4*3";
-  
-
-  char* read = nb_read_file(argv[1]);
-  Token tk = tokenize_all(read);
-  printf("INPUT: %s\n", read);
-  SymbolTable table = {0};
-  symbol_table_init(&table, 32);
-
-
-  Token rpn = build_rpn(&tk, &table);
-  print_token(&rpn); 
-}