lexer.c

#include <assert.h>
#include <ctype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <stdbool.h>

typedef struct{
  char* mstr;
} mstring;

typedef struct{
  int mint;
} mint;

typedef struct{
  float myfloat;
} mfloat;

int str_to_int(char *strint){
  int new_int = atoi(strint);
  return new_int;
}

float str_to_float(char *strif){
  char *fptr;
  float new_int = strtof(strif, &fptr);
  return new_int;
}


typedef enum{
  TOKEN_PLUS,
  TOKEN_MINUS,
  TOKEN_INTEGER,
  TOKEN_FLOAT,
  TOKEN_SPACE,
  TOKEN_STRING,
  TOKEN_MUL,
  TOKEN_DIV,
  intdef,
  TOKEN_UNKNOWN,
  TOKEN_EOF,
} symbols;

typedef enum{
  BHV_STACK,
  BHV_UNDEFINED,
  BHV_NUMBER,
  BHV_STRING,
  BHV_FLOAT,
} symbol_bhv;


typedef struct{
  symbols type;
  char* text;
  size_t text_len;
  symbol_bhv behaviour;
  uint cursor_skip;
  symbols previous_token;
} Token;

// since I now have tokenize all I dont really need previous_token. I can just ast walk it without each individual token carrying all data

typedef struct{
  Token* unit;
  size_t size;
  size_t capacity;
} TokenArr;
// maybe should add cursor even for TokenArr to use C's printf % to add whitespace in order to move something like this
// input = 1 + 323 + =-=-
//                   ^
//                   |- Unknown Token 


typedef struct{
  char *content;
  // size_t cursor;
  // size_t line;
} Lexer;


// will not nesseccarilly use AST. just could be useful in the future.

typedef enum{
  AST_NUMBER,
  AST_BINARY_OP,
} ASTNodeType;

typedef struct ASTNode ASTNode;

struct ASTNode {
  ASTNodeType type;
  union {
    struct { double value; } number;
    struct {
      char op;
      ASTNode* left;
      ASTNode* right;
    } binary;
  } data;
};

typedef struct{
  Token* tokens;
  size_t cursor;
} parser;
// tokenArr to token*

// Lexer 
void lexer_new(char *content, size_t content_len){
  (void) content;
  (void) content_len;
}
// Token
void lexer_next(Lexer *mylexer){
  (void) mylexer;
}

Token parser_peek(parser* p){
  return p->tokens[p->cursor];
}

Token parser_advance(parser* p){
  return p->tokens[p->cursor++];
}

bool parser_match(parser* p, symbols tokent){
  if (parser_peek(p).type == tokent){
    parser_advance(p);
    return true;
  } else {
    return false;
  }
} 

ASTNode* ast_new_number(double val){
  ASTNode* node = malloc(sizeof(ASTNode));
  node->type = AST_NUMBER;
  node->data.number.value = val;
  return node;
}

ASTNode* ast_new_binary(char op, ASTNode* l, ASTNode* r){
  ASTNode* node = malloc(sizeof(ASTNode));
  node->type = AST_BINARY_OP;
  node->data.binary.op = op;
  node->data.binary.left = l;
  node->data.binary.right = r;
  // maybe need to fix
  return node;
}

ASTNode* parse_factor(parser* p) {
    Token tok = parser_peek(p);
    if (tok.type == TOKEN_EOF) {
      fprintf(stderr, "Unexpected end of input in factor\n");
      exit(EXIT_FAILURE);
    }
    if (tok.type == TOKEN_INTEGER || tok.type == TOKEN_FLOAT) {
        parser_advance(p);
        double v = atof(tok.text);
        return ast_new_number(v);
    }
    fprintf(stderr, "Unexpected token '%s' in factor\n", tok.text);
    exit(EXIT_FAILURE);
}


ASTNode* parse_term(parser* p) {
    ASTNode* node = parse_factor(p);
    while (true) {
        Token tok = parser_peek(p);
        if (tok.type == TOKEN_MUL || tok.type == TOKEN_DIV) {
            parser_advance(p);
            ASTNode* right = parse_factor(p);
            node = ast_new_binary(tok.text[0], node, right);
        } else {
            break;
        }
    }
    return node;
}


ASTNode* parse_expression(parser* p) {
    ASTNode* node = parse_term(p);
    while (true) {
        Token tok = parser_peek(p);
        if (tok.type == TOKEN_PLUS || tok.type == TOKEN_MINUS) {
            parser_advance(p);
            ASTNode* right = parse_term(p);
            node = ast_new_binary(tok.text[0], node, right);
        } else {
            break;
        }
    }
    return node;
}

double eval_ast(ASTNode* node) {
    if (node->type == AST_NUMBER) {
        return node->data.number.value;
    }
    double L = eval_ast(node->data.binary.left);
    double R = eval_ast(node->data.binary.right);
    switch (node->data.binary.op) {
        case '+': return L + R;
        case '-': return L - R;
        case '*': return L * R;
        case '/': return L / R;
        default:
            fprintf(stderr, "Unknown op '%c'\n", node->data.binary.op);
            exit(EXIT_FAILURE);
    }
}


// will implement a stack for arithmetic later. do I want a compiler or interpreter? since this is a learning experience im gonna do the easier thing first

Token read_from_tok(char* text, uint cursor){ 
    Token mytoks;
    
    static char buf[64];
    size_t i = 0;
    mytoks.cursor_skip = 1;

    // integer logic. will have to somehow detect "." for floats but it will be hard to do because the way I wrote this code is shit
    // ie: checking for . depends on the switch statement. so I will have to maybe add previous_token to the token struct. Actually a feasible idea.
    // will I need to set previous_token to the current token? maybe.
    if (isdigit(text[cursor])) {
        size_t start = cursor;
        int dots_seen = 0;
        while ( isdigit(text[cursor]) || text[cursor] == '.') {
          if (text[cursor] == '.') {
            dots_seen +=1;
            assert(dots_seen < 2);
          }
          buf[i++] = text[cursor++];
        }
        
        // recheck this assert later
          

        buf[i] = '\0';

        if (!dots_seen){
          mytoks.type = TOKEN_INTEGER;
          mytoks.behaviour = BHV_NUMBER;
        } else {
          mytoks.type = TOKEN_FLOAT;
          mytoks.behaviour = BHV_FLOAT;
        }

       
        mytoks.cursor_skip = cursor - start;
        mytoks.text = strdup(buf);
        mytoks.text_len = i;
    } 
    // string logic 
    else if (isalpha(text[cursor])){
        size_t start = cursor;
        while (isalpha(text[cursor])) {
            buf[i++] = text[cursor++];
        }
        buf[i] = '\0';
        mytoks.type = TOKEN_STRING;
        mytoks.behaviour = BHV_STRING; 
        mytoks.cursor_skip = cursor - start;
        mytoks.text = strdup(buf);
        mytoks.text_len = i;
    }
    
    else {
       buf[0] = text[cursor];
       buf[1] = '\0';
       
       switch (text[cursor]){
         case '+':
           mytoks.type = TOKEN_PLUS;
         // asigning text is not really needed unless for debug. could however be useful for codegen later. NOW IT BECAME A MUST LOL
           mytoks.text = strdup("+");
           mytoks.behaviour = BHV_STACK;
                      break;
         case '-':
           mytoks.type = TOKEN_MINUS;
           mytoks.text = strdup("-");
           mytoks.behaviour = BHV_STACK; 
           break;
         case ' ':
           mytoks.type = TOKEN_SPACE;
           mytoks.text = strdup("space");
           break;
         case '*':
          mytoks.type = TOKEN_MUL;
          mytoks.text = strdup("*");
          mytoks.behaviour = BHV_STACK;
          break;
         case '/':
          mytoks.type = TOKEN_DIV;
          mytoks.text = strdup("/");
          mytoks.behaviour = BHV_STACK;
          break;
         default:
           mytoks.type = TOKEN_UNKNOWN;
           mytoks.behaviour = BHV_UNDEFINED;
           mytoks.text = strdup(buf);
           
    } 
  }
  return mytoks;
}


void tokenarr_push(TokenArr* arr, Token tok) {
    if (arr->size >= arr->capacity) {
        arr->capacity = arr->capacity ? arr->capacity * 2 : 8;
        arr->unit = realloc(arr->unit, arr->capacity * sizeof(Token));
        assert(arr->unit != NULL);
    }
    arr->unit[arr->size++] = tok;
}

TokenArr tokenize_all(const char* input) {
    TokenArr arr = {NULL, 0, 0};
    size_t i = 0;
    size_t len = strlen(input);
    while (i < len) {
        Token tok = read_from_tok((char*)input, i);
        i += tok.cursor_skip;
        if (tok.type == TOKEN_SPACE) {
            free(tok.text);
            continue;
        }
        tokenarr_push(&arr, tok);
    }
    Token eof = {0};
    eof.type = TOKEN_EOF;
    eof.text = strdup("EOF");
    eof.text_len = 3;
    eof.behaviour = BHV_UNDEFINED;
    eof.cursor_skip = 0;
    tokenarr_push(&arr, eof);
    return arr;
}


// Token* c

void token_parser(Token mytok, char* input){
  int length1 = strlen(input);
  int i=0;


  while (i < length1) {
  mytok = read_from_tok(input, i);
  
  printf("Text: %s\n", mytok.text);
  printf("Behaviour: %d\n", mytok.behaviour);
      if (mytok.behaviour == BHV_STACK){
         printf("this is stack lil bro\n");
      }
    i++;
  }
}


// operators accepted in int/digit or whatever type def only when they have a digit before AND after them 


/*
int main(){
  Token newtok;
  char* input = "8";

  parser(newtok, input);
}
*/

char* token_type_to_string(symbols type) {
    switch (type) {
        case TOKEN_PLUS:     return "TOKEN_PLUS";
        case TOKEN_MINUS:    return "TOKEN_MINUS";
        case TOKEN_INTEGER:  return "TOKEN_INTEGER";
        case TOKEN_FLOAT:    return "TOKEN_FLOAT";
        case TOKEN_SPACE:    return "TOKEN_SPACE";
        case TOKEN_STRING:   return "TOKEN_STRING";
        case intdef:         return "intdef";
        case TOKEN_UNKNOWN:  return "TOKEN_UNKNOWN";
        default:             return "UNKNOWN_SYMBOL";
    }
}

void main2() {
    char* input = "323.23 + Hello world 102102";
    int length1 = strlen(input);
    int i = 0;
    printf("input: %s\n\n", input);
    while (i < length1) {
        Token result = read_from_tok(input, i); 
        printf("text: %s\ntype: %u (%s)\n\n", result.text, result.type, token_type_to_string(result.type));
        i += result.cursor_skip;  
    }
}


void mathparser(const char* input) {  
    TokenArr stack = tokenize_all(input);  
    float result = 0;
    float current = 0;  
    float sign = 1;
    float op = 0;  
    
    for (size_t i = 0; i < stack.size; ++i) {  
        switch (stack.unit[i].type) {  
            case TOKEN_INTEGER:
                {  
                float value = str_to_float(stack.unit[i].text);  
                if (op == 1) {  
                    current *= value;  
                    op = 0;  
                } else if (op == 2) { 
                    current /= value;  
                    op = 0;  
                } else {  
                    current = value;  
                }
                break;  
            }

            case TOKEN_FLOAT:
                {  
                float value = str_to_float(stack.unit[i].text);  
                if (op == 1) {  
                    current *= value;  
                    op = 0;  
                } else if (op == 2) { 
                    current /= value;  
                    op = 0;  
                } else {  
                    current = value;  
                }
                break;  
            }
            case TOKEN_PLUS:  
                result += sign * current;  
                sign = 1;  
                op = 0;  
                break;  
            case TOKEN_MINUS:  
                result += sign * current;  
                sign = -1;  
                op = 0;  
                break;  
            case TOKEN_MUL:  
                op = 1;  
                break;  
            case TOKEN_DIV:  
                op = 2;  
                break;  
            default:  
                break;  
        }
    }
    result += sign * current;  
    printf("%f\n", result);  
    for (size_t j = 0; j < stack.size; ++j) {  
        free(stack.unit[j].text);  
    }
    free(stack.unit);  
}


int main4() {
    char* input = "print(5) hello";
    printf("input: %s\n\n", input);

    TokenArr arr = tokenize_all(input);
        
    for (size_t j = 0; j < arr.size; ++j) {
        Token* result = &arr.unit[j];
        printf("text: %s\ntype: %u (%s)\n\n", result->text, result->type, token_type_to_string(result->type));
    }
    
    printf("================ Tokenized =================\n");
    
    for (size_t j = 0; j < arr.size; ++j) {
        Token* result = &arr.unit[j];
        printf("text: %s, type: %u (%s) || ", result->text, result->type, token_type_to_string(result->type));
    }
    printf("\n");
    for (size_t j = 0; j < arr.size; ++j) {
      free(arr.unit[j].text);
    }
    free(arr.unit);
    return 0;
}


int main5(){
    char* input = "40/2.3 * 10 + 400";
    printf("input: %s\n", input);
    mathparser(input);  
}


int main() {
    const char* input = "40/2.3 * 10 + 400 - 5";
    printf("Input: %s\n", input);

    TokenArr toks = tokenize_all(input);

    parser p = { toks.unit, 0 };
    ASTNode* root = parse_expression(&p);

    double result = eval_ast(root);
    printf("AST Result: %f\n", result);
    return 0;
}
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`#include <assert.h>`
filename update 2025-07-20 22:24:25 +03:00			`#include <ctype.h>`
			`#include <stddef.h>`
			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <sys/types.h>`
implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00			`#include <stdbool.h>`
filename update 2025-07-20 22:24:25 +03:00
			`typedef struct{`
			`char* mstr;`
			`} mstring;`

			`typedef struct{`
			`int mint;`
			`} mint;`

			`typedef struct{`
			`float myfloat;`
			`} mfloat;`

added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`int str_to_int(char *strint){`
			`int new_int = atoi(strint);`
			`return new_int;`
			`}`

string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`float str_to_float(char *strif){`
			`char *fptr;`
			`float new_int = strtof(strif, &fptr);`
			`return new_int;`
			`}`


filename update 2025-07-20 22:24:25 +03:00
			`typedef enum{`
			`TOKEN_PLUS,`
			`TOKEN_MINUS,`
			`TOKEN_INTEGER,`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`TOKEN_FLOAT,`
lexer fix 2025-07-20 22:40:11 +03:00			`TOKEN_SPACE,`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`TOKEN_STRING,`
mul and div 2025-07-28 18:07:23 +03:00			`TOKEN_MUL,`
			`TOKEN_DIV,`
filename update 2025-07-20 22:24:25 +03:00			`intdef,`
			`TOKEN_UNKNOWN,`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`TOKEN_EOF,`
filename update 2025-07-20 22:24:25 +03:00			`} symbols;`

implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`typedef enum{`
			`BHV_STACK,`
			`BHV_UNDEFINED,`
			`BHV_NUMBER,`
			`BHV_STRING,`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`BHV_FLOAT,`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`} symbol_bhv;`

implemented basis for AST 2025-07-23 18:32:45 +03:00


filename update 2025-07-20 22:24:25 +03:00			`typedef struct{`
			`symbols type;`
			`char* text;`
			`size_t text_len;`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`symbol_bhv behaviour;`
			`uint cursor_skip;`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`symbols previous_token;`
filename update 2025-07-20 22:24:25 +03:00			`} Token;`

added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`// since I now have tokenize all I dont really need previous_token. I can just ast walk it without each individual token carrying all data`

implemented basis for AST 2025-07-23 18:32:45 +03:00			`typedef struct{`
			`Token* unit;`
			`size_t size;`
			`size_t capacity;`
			`} TokenArr;`
implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00			`// maybe should add cursor even for TokenArr to use C's printf % to add whitespace in order to move something like this`
			`// input = 1 + 323 + =-=-`
			`// ^`
			`// \|- Unknown Token`

implemented basis for AST 2025-07-23 18:32:45 +03:00
filename update 2025-07-20 22:24:25 +03:00			`typedef struct{`
			`char *content;`
			`// size_t cursor;`
			`// size_t line;`
			`} Lexer;`


added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`// will not nesseccarilly use AST. just could be useful in the future.`

			`typedef enum{`
			`AST_NUMBER,`
			`AST_BINARY_OP,`
			`} ASTNodeType;`

math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`typedef struct ASTNode ASTNode;`

			`struct ASTNode {`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`ASTNodeType type;`
			`union {`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`struct { double value; } number;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`struct {`
			`char op;`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`ASTNode* left;`
			`ASTNode* right;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`} binary;`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`} data;`
			`};`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00
			`typedef struct{`
implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00			`Token* tokens;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`size_t cursor;`
			`} parser;`
implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00			`// tokenArr to token*`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00
filename update 2025-07-20 22:24:25 +03:00			`// Lexer`
			`void lexer_new(char *content, size_t content_len){`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`(void) content;`
			`(void) content_len;`
filename update 2025-07-20 22:24:25 +03:00			`}`
			`// Token`
			`void lexer_next(Lexer *mylexer){`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`(void) mylexer;`
filename update 2025-07-20 22:24:25 +03:00			`}`

implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00			`Token parser_peek(parser* p){`
			`return p->tokens[p->cursor];`
			`}`

			`Token parser_advance(parser* p){`
			`return p->tokens[p->cursor++];`
			`}`

			`bool parser_match(parser* p, symbols tokent){`
			`if (parser_peek(p).type == tokent){`
			`parser_advance(p);`
			`return true;`
			`} else {`
			`return false;`
			`}`
			`}`

math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`ASTNode* ast_new_number(double val){`
			`ASTNode* node = malloc(sizeof(ASTNode));`
			`node->type = AST_NUMBER;`
			`node->data.number.value = val;`
			`return node;`
			`}`

			`ASTNode* ast_new_binary(char op, ASTNode* l, ASTNode* r){`
			`ASTNode* node = malloc(sizeof(ASTNode));`
			`node->type = AST_BINARY_OP;`
			`node->data.binary.op = op;`
			`node->data.binary.left = l;`
			`node->data.binary.right = r;`
			`// maybe need to fix`
			`return node;`
			`}`

			`ASTNode* parse_factor(parser* p) {`
			`Token tok = parser_peek(p);`
			`if (tok.type == TOKEN_EOF) {`
			`fprintf(stderr, "Unexpected end of input in factor\n");`
			`exit(EXIT_FAILURE);`
			`}`
			`if (tok.type == TOKEN_INTEGER \|\| tok.type == TOKEN_FLOAT) {`
			`parser_advance(p);`
			`double v = atof(tok.text);`
			`return ast_new_number(v);`
			`}`
			`fprintf(stderr, "Unexpected token '%s' in factor\n", tok.text);`
			`exit(EXIT_FAILURE);`
			`}`


			`ASTNode* parse_term(parser* p) {`
			`ASTNode* node = parse_factor(p);`
			`while (true) {`
			`Token tok = parser_peek(p);`
			`if (tok.type == TOKEN_MUL \|\| tok.type == TOKEN_DIV) {`
			`parser_advance(p);`
			`ASTNode* right = parse_factor(p);`
			`node = ast_new_binary(tok.text[0], node, right);`
			`} else {`
			`break;`
			`}`
			`}`
			`return node;`
			`}`


			`ASTNode* parse_expression(parser* p) {`
			`ASTNode* node = parse_term(p);`
			`while (true) {`
			`Token tok = parser_peek(p);`
			`if (tok.type == TOKEN_PLUS \|\| tok.type == TOKEN_MINUS) {`
			`parser_advance(p);`
			`ASTNode* right = parse_term(p);`
			`node = ast_new_binary(tok.text[0], node, right);`
			`} else {`
			`break;`
			`}`
			`}`
			`return node;`
			`}`

			`double eval_ast(ASTNode* node) {`
			`if (node->type == AST_NUMBER) {`
			`return node->data.number.value;`
			`}`
			`double L = eval_ast(node->data.binary.left);`
			`double R = eval_ast(node->data.binary.right);`
			`switch (node->data.binary.op) {`
			`case '+': return L + R;`
			`case '-': return L - R;`
			`case '': return L R;`
			`case '/': return L / R;`
			`default:`
			`fprintf(stderr, "Unknown op '%c'\n", node->data.binary.op);`
			`exit(EXIT_FAILURE);`
			`}`
			`}`



implemented some parts for AST parsing 2025-08-05 02:30:30 +03:00
lexer readability 2025-07-20 22:49:50 +03:00			`// will implement a stack for arithmetic later. do I want a compiler or interpreter? since this is a learning experience im gonna do the easier thing first`
filename update 2025-07-20 22:24:25 +03:00
			`Token read_from_tok(char* text, uint cursor){`
			`Token mytoks;`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00
filename update 2025-07-20 22:24:25 +03:00			`static char buf[64];`
			`size_t i = 0;`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.cursor_skip = 1;`

the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`// integer logic. will have to somehow detect "." for floats but it will be hard to do because the way I wrote this code is shit`
			`// ie: checking for . depends on the switch statement. so I will have to maybe add previous_token to the token struct. Actually a feasible idea.`
			`// will I need to set previous_token to the current token? maybe.`
lexer readability 2025-07-20 22:42:44 +03:00			`if (isdigit(text[cursor])) {`
filename update 2025-07-20 22:24:25 +03:00			`size_t start = cursor;`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`int dots_seen = 0;`
			`while ( isdigit(text[cursor]) \|\| text[cursor] == '.') {`
			`if (text[cursor] == '.') {`
			`dots_seen +=1;`
			`assert(dots_seen < 2);`
			`}`
			`buf[i++] = text[cursor++];`
filename update 2025-07-20 22:24:25 +03:00			`}`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00
			`// recheck this assert later`


filename update 2025-07-20 22:24:25 +03:00			`buf[i] = '\0';`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00
			`if (!dots_seen){`
			`mytoks.type = TOKEN_INTEGER;`
			`mytoks.behaviour = BHV_NUMBER;`
			`} else {`
			`mytoks.type = TOKEN_FLOAT;`
			`mytoks.behaviour = BHV_FLOAT;`
			`}`


implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.cursor_skip = cursor - start;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`mytoks.text = strdup(buf);`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.text_len = i;`
the bases of the tokenizer is basically complete? 2025-07-22 15:57:51 +03:00			`}`
			`// string logic`
			`else if (isalpha(text[cursor])){`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`size_t start = cursor;`
			`while (isalpha(text[cursor])) {`
			`buf[i++] = text[cursor++];`
			`}`
			`buf[i] = '\0';`
			`mytoks.type = TOKEN_STRING;`
			`mytoks.behaviour = BHV_STRING;`
			`mytoks.cursor_skip = cursor - start;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`mytoks.text = strdup(buf);`
filename update 2025-07-20 22:24:25 +03:00			`mytoks.text_len = i;`
lexer readability 2025-07-20 22:42:44 +03:00			`}`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00
lexer readability 2025-07-20 22:42:44 +03:00			`else {`
filename update 2025-07-20 22:24:25 +03:00			`buf[0] = text[cursor];`
			`buf[1] = '\0';`

			`switch (text[cursor]){`
			`case '+':`
			`mytoks.type = TOKEN_PLUS;`
mul and div 2025-07-28 18:07:23 +03:00			`// asigning text is not really needed unless for debug. could however be useful for codegen later. NOW IT BECAME A MUST LOL`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`mytoks.text = strdup("+");`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.behaviour = BHV_STACK;`
			`break;`
filename update 2025-07-20 22:24:25 +03:00			`case '-':`
			`mytoks.type = TOKEN_MINUS;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`mytoks.text = strdup("-");`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.behaviour = BHV_STACK;`
filename update 2025-07-20 22:24:25 +03:00			`break;`
lexer fix 2025-07-20 22:40:11 +03:00			`case ' ':`
			`mytoks.type = TOKEN_SPACE;`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`mytoks.text = strdup("space");`
lexer fix 2025-07-20 22:40:11 +03:00			`break;`
mul and div 2025-07-28 18:07:23 +03:00			`case '*':`
			`mytoks.type = TOKEN_MUL;`
			`mytoks.text = strdup("*");`
			`mytoks.behaviour = BHV_STACK;`
			`break;`
			`case '/':`
			`mytoks.type = TOKEN_DIV;`
			`mytoks.text = strdup("/");`
			`mytoks.behaviour = BHV_STACK;`
			`break;`
filename update 2025-07-20 22:24:25 +03:00			`default:`
lexer fix 2025-07-20 22:40:11 +03:00			`mytoks.type = TOKEN_UNKNOWN;`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`mytoks.behaviour = BHV_UNDEFINED;`
fixed unknown tokens after migrating to strdup 2025-07-24 16:19:49 +03:00			`mytoks.text = strdup(buf);`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00
filename update 2025-07-20 22:24:25 +03:00			`}`
			`}`
			`return mytoks;`
			`}`


implemented basis for AST 2025-07-23 18:32:45 +03:00			`void tokenarr_push(TokenArr* arr, Token tok) {`
			`if (arr->size >= arr->capacity) {`
			`arr->capacity = arr->capacity ? arr->capacity * 2 : 8;`
			`arr->unit = realloc(arr->unit, arr->capacity * sizeof(Token));`
			`assert(arr->unit != NULL);`
			`}`
			`arr->unit[arr->size++] = tok;`
			`}`

			`TokenArr tokenize_all(const char* input) {`
			`TokenArr arr = {NULL, 0, 0};`
			`size_t i = 0;`
			`size_t len = strlen(input);`
			`while (i < len) {`
			`Token tok = read_from_tok((char*)input, i);`
			`i += tok.cursor_skip;`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`if (tok.type == TOKEN_SPACE) {`
			`free(tok.text);`
			`continue;`
			`}`
			`tokenarr_push(&arr, tok);`
implemented basis for AST 2025-07-23 18:32:45 +03:00			`}`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`Token eof = {0};`
			`eof.type = TOKEN_EOF;`
			`eof.text = strdup("EOF");`
			`eof.text_len = 3;`
			`eof.behaviour = BHV_UNDEFINED;`
			`eof.cursor_skip = 0;`
			`tokenarr_push(&arr, eof);`
implemented basis for AST 2025-07-23 18:32:45 +03:00			`return arr;`
			`}`



filename update 2025-07-20 22:24:25 +03:00			`// Token* c`

added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00			`void token_parser(Token mytok, char* input){`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`int length1 = strlen(input);`
			`int i=0;`



			`while (i < length1) {`
			`mytok = read_from_tok(input, i);`

			`printf("Text: %s\n", mytok.text);`
			`printf("Behaviour: %d\n", mytok.behaviour);`
			`if (mytok.behaviour == BHV_STACK){`
			`printf("this is stack lil bro\n");`
			`}`
			`i++;`
			`}`
			`}`
filename update 2025-07-20 22:24:25 +03:00

			`// operators accepted in int/digit or whatever type def only when they have a digit before AND after them`

implemented a lot of stuff 2025-07-21 13:34:20 +03:00
			`/*`
filename update 2025-07-20 22:24:25 +03:00			`int main(){`
			`Token newtok;`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`char* input = "8";`

			`parser(newtok, input);`
			`}`
			`*/`

implemented basis for AST 2025-07-23 18:32:45 +03:00			`char* token_type_to_string(symbols type) {`
more informational main for debug 2025-07-22 16:18:37 +03:00			`switch (type) {`
			`case TOKEN_PLUS: return "TOKEN_PLUS";`
			`case TOKEN_MINUS: return "TOKEN_MINUS";`
			`case TOKEN_INTEGER: return "TOKEN_INTEGER";`
			`case TOKEN_FLOAT: return "TOKEN_FLOAT";`
			`case TOKEN_SPACE: return "TOKEN_SPACE";`
			`case TOKEN_STRING: return "TOKEN_STRING";`
			`case intdef: return "intdef";`
			`case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";`
			`default: return "UNKNOWN_SYMBOL";`
			`}`
			`}`

implemented basis for AST 2025-07-23 18:32:45 +03:00			`void main2() {`
more informational main for debug 2025-07-22 16:18:37 +03:00			`char* input = "323.23 + Hello world 102102";`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`int length1 = strlen(input);`
			`int i = 0;`
more informational main for debug 2025-07-22 16:18:37 +03:00			`printf("input: %s\n\n", input);`
implemented a lot of stuff 2025-07-21 13:34:20 +03:00			`while (i < length1) {`
more informational main for debug 2025-07-22 16:18:37 +03:00			`Token result = read_from_tok(input, i);`
			`printf("text: %s\ntype: %u (%s)\n\n", result.text, result.type, token_type_to_string(result.type));`
mul and div 2025-07-28 18:07:23 +03:00			`i += result.cursor_skip;`
filename update 2025-07-20 22:24:25 +03:00			`}`
			`}`
implemented basis for AST 2025-07-23 18:32:45 +03:00


string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`void mathparser(const char* input) {`
mul and div 2025-07-28 18:07:23 +03:00			`TokenArr stack = tokenize_all(input);`
string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`float result = 0;`
			`float current = 0;`
			`float sign = 1;`
			`float op = 0;`
mul and div 2025-07-28 18:07:23 +03:00
			`for (size_t i = 0; i < stack.size; ++i) {`
			`switch (stack.unit[i].type) {`
string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`case TOKEN_INTEGER:`
			`{`
			`float value = str_to_float(stack.unit[i].text);`
			`if (op == 1) {`
			`current *= value;`
			`op = 0;`
			`} else if (op == 2) {`
			`current /= value;`
			`op = 0;`
			`} else {`
			`current = value;`
			`}`
			`break;`
			`}`

			`case TOKEN_FLOAT:`
			`{`
			`float value = str_to_float(stack.unit[i].text);`
mul and div 2025-07-28 18:07:23 +03:00			`if (op == 1) {`
			`current *= value;`
			`op = 0;`
			`} else if (op == 2) {`
			`current /= value;`
			`op = 0;`
			`} else {`
			`current = value;`
			`}`
			`break;`
			`}`
			`case TOKEN_PLUS:`
			`result += sign * current;`
			`sign = 1;`
			`op = 0;`
			`break;`
			`case TOKEN_MINUS:`
			`result += sign * current;`
			`sign = -1;`
			`op = 0;`
			`break;`
			`case TOKEN_MUL:`
			`op = 1;`
			`break;`
			`case TOKEN_DIV:`
			`op = 2;`
			`break;`
			`default:`
			`break;`
addition and substraction added. supports multiple numbers 2025-07-28 17:56:35 +03:00			`}`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`}`
string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`result += sign * current;`
			`printf("%f\n", result);`
mul and div 2025-07-28 18:07:23 +03:00			`for (size_t j = 0; j < stack.size; ++j) {`
			`free(stack.unit[j].text);`
addition and substraction added. supports multiple numbers 2025-07-28 17:56:35 +03:00			`}`
mul and div 2025-07-28 18:07:23 +03:00			`free(stack.unit);`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`}`


			`int main4() {`
fixed unknown tokens after migrating to strdup 2025-07-24 16:19:49 +03:00			`char* input = "print(5) hello";`
implemented basis for AST 2025-07-23 18:32:45 +03:00			`printf("input: %s\n\n", input);`

			`TokenArr arr = tokenize_all(input);`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00
implemented basis for AST 2025-07-23 18:32:45 +03:00			`for (size_t j = 0; j < arr.size; ++j) {`
			`Token* result = &arr.unit[j];`
			`printf("text: %s\ntype: %u (%s)\n\n", result->text, result->type, token_type_to_string(result->type));`
			`}`
added AST stucts even though im not use I will be using them in the future. walking on top of behaviours could be good enough? 2025-07-24 16:17:10 +03:00
			`printf("================ Tokenized =================\n");`

			`for (size_t j = 0; j < arr.size; ++j) {`
			`Token* result = &arr.unit[j];`
			`printf("text: %s, type: %u (%s) \|\| ", result->text, result->type, token_type_to_string(result->type));`
			`}`
			`printf("\n");`
			`for (size_t j = 0; j < arr.size; ++j) {`
			`free(arr.unit[j].text);`
			`}`
implemented basis for AST 2025-07-23 18:32:45 +03:00			`free(arr.unit);`
			`return 0;`
			`}`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00




math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00			`int main5(){`
string to float fixed. stupid mistake 2025-07-28 18:41:15 +03:00			`char* input = "40/2.3 * 10 + 400";`
			`printf("input: %s\n", input);`
			`mathparser(input);`
added additon to the language. but in a shit way. will generalize it later 2025-07-28 17:30:45 +03:00			`}`
math AST parser finished. later more complex stuff 2025-08-05 12:06:22 +03:00

			`int main() {`
			`const char* input = "40/2.3 * 10 + 400 - 5";`
			`printf("Input: %s\n", input);`

			`TokenArr toks = tokenize_all(input);`

			`parser p = { toks.unit, 0 };`
			`ASTNode* root = parse_expression(&p);`

			`double result = eval_ast(root);`
			`printf("AST Result: %f\n", result);`
			`return 0;`
			`}`