2025-07-22 15:57:51 +03:00
# include <assert.h>
2025-07-20 22:24:25 +03:00
# include <ctype.h>
# include <stddef.h>
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <sys/types.h>
2025-08-05 02:30:30 +03:00
# include <stdbool.h>
2025-07-20 22:24:25 +03:00
typedef struct {
char * mstr ;
} mstring ;
typedef struct {
int mint ;
} mint ;
typedef struct {
float myfloat ;
} mfloat ;
2025-07-28 17:30:45 +03:00
int str_to_int ( char * strint ) {
int new_int = atoi ( strint ) ;
return new_int ;
}
2025-07-28 18:41:15 +03:00
float str_to_float ( char * strif ) {
char * fptr ;
float new_int = strtof ( strif , & fptr ) ;
return new_int ;
}
2025-07-20 22:24:25 +03:00
typedef enum {
TOKEN_PLUS ,
TOKEN_MINUS ,
TOKEN_INTEGER ,
2025-07-22 15:57:51 +03:00
TOKEN_FLOAT ,
2025-07-20 22:40:11 +03:00
TOKEN_SPACE ,
2025-07-21 13:34:20 +03:00
TOKEN_STRING ,
2025-07-28 18:07:23 +03:00
TOKEN_MUL ,
TOKEN_DIV ,
2025-07-20 22:24:25 +03:00
intdef ,
TOKEN_UNKNOWN ,
2025-08-05 12:06:22 +03:00
TOKEN_EOF ,
2025-07-20 22:24:25 +03:00
} symbols ;
2025-07-21 13:34:20 +03:00
typedef enum {
BHV_STACK ,
BHV_UNDEFINED ,
BHV_NUMBER ,
BHV_STRING ,
2025-07-22 15:57:51 +03:00
BHV_FLOAT ,
2025-07-21 13:34:20 +03:00
} symbol_bhv ;
2025-07-23 18:32:45 +03:00
2025-07-20 22:24:25 +03:00
typedef struct {
symbols type ;
char * text ;
size_t text_len ;
2025-07-21 13:34:20 +03:00
symbol_bhv behaviour ;
uint cursor_skip ;
2025-07-22 15:57:51 +03:00
symbols previous_token ;
2025-07-20 22:24:25 +03:00
} Token ;
2025-07-28 17:30:45 +03:00
// since I now have tokenize all I dont really need previous_token. I can just ast walk it without each individual token carrying all data
2025-07-23 18:32:45 +03:00
typedef struct {
Token * unit ;
size_t size ;
size_t capacity ;
} TokenArr ;
2025-08-05 02:30:30 +03:00
// maybe should add cursor even for TokenArr to use C's printf % to add whitespace in order to move something like this
// input = 1 + 323 + =-=-
// ^
// |- Unknown Token
2025-07-23 18:32:45 +03:00
2025-07-20 22:24:25 +03:00
typedef struct {
char * content ;
// size_t cursor;
// size_t line;
} Lexer ;
2025-07-24 16:17:10 +03:00
// will not nesseccarilly use AST. just could be useful in the future.
typedef enum {
AST_NUMBER ,
AST_BINARY_OP ,
} ASTNodeType ;
2025-08-05 12:06:22 +03:00
typedef struct ASTNode ASTNode ;
struct ASTNode {
2025-07-24 16:17:10 +03:00
ASTNodeType type ;
union {
2025-08-05 12:06:22 +03:00
struct { double value ; } number ;
2025-07-24 16:17:10 +03:00
struct {
char op ;
2025-08-05 12:06:22 +03:00
ASTNode * left ;
ASTNode * right ;
2025-07-24 16:17:10 +03:00
} binary ;
2025-08-05 12:06:22 +03:00
} data ;
} ;
2025-07-24 16:17:10 +03:00
typedef struct {
2025-08-05 02:30:30 +03:00
Token * tokens ;
2025-07-24 16:17:10 +03:00
size_t cursor ;
} parser ;
2025-08-05 02:30:30 +03:00
// tokenArr to token*
2025-07-24 16:17:10 +03:00
2025-07-20 22:24:25 +03:00
// Lexer
void lexer_new ( char * content , size_t content_len ) {
2025-07-28 17:30:45 +03:00
( void ) content ;
( void ) content_len ;
2025-07-20 22:24:25 +03:00
}
// Token
void lexer_next ( Lexer * mylexer ) {
2025-07-28 17:30:45 +03:00
( void ) mylexer ;
2025-07-20 22:24:25 +03:00
}
2025-08-05 02:30:30 +03:00
Token parser_peek ( parser * p ) {
return p - > tokens [ p - > cursor ] ;
}
Token parser_advance ( parser * p ) {
return p - > tokens [ p - > cursor + + ] ;
}
bool parser_match ( parser * p , symbols tokent ) {
if ( parser_peek ( p ) . type = = tokent ) {
parser_advance ( p ) ;
return true ;
} else {
return false ;
}
}
2025-08-05 12:06:22 +03:00
ASTNode * ast_new_number ( double val ) {
ASTNode * node = malloc ( sizeof ( ASTNode ) ) ;
node - > type = AST_NUMBER ;
node - > data . number . value = val ;
return node ;
}
ASTNode * ast_new_binary ( char op , ASTNode * l , ASTNode * r ) {
ASTNode * node = malloc ( sizeof ( ASTNode ) ) ;
node - > type = AST_BINARY_OP ;
node - > data . binary . op = op ;
node - > data . binary . left = l ;
node - > data . binary . right = r ;
// maybe need to fix
return node ;
}
ASTNode * parse_factor ( parser * p ) {
Token tok = parser_peek ( p ) ;
if ( tok . type = = TOKEN_EOF ) {
fprintf ( stderr , " Unexpected end of input in factor \n " ) ;
exit ( EXIT_FAILURE ) ;
}
if ( tok . type = = TOKEN_INTEGER | | tok . type = = TOKEN_FLOAT ) {
parser_advance ( p ) ;
double v = atof ( tok . text ) ;
return ast_new_number ( v ) ;
}
fprintf ( stderr , " Unexpected token '%s' in factor \n " , tok . text ) ;
exit ( EXIT_FAILURE ) ;
}
ASTNode * parse_term ( parser * p ) {
ASTNode * node = parse_factor ( p ) ;
while ( true ) {
Token tok = parser_peek ( p ) ;
if ( tok . type = = TOKEN_MUL | | tok . type = = TOKEN_DIV ) {
parser_advance ( p ) ;
ASTNode * right = parse_factor ( p ) ;
node = ast_new_binary ( tok . text [ 0 ] , node , right ) ;
} else {
break ;
}
}
return node ;
}
ASTNode * parse_expression ( parser * p ) {
ASTNode * node = parse_term ( p ) ;
while ( true ) {
Token tok = parser_peek ( p ) ;
if ( tok . type = = TOKEN_PLUS | | tok . type = = TOKEN_MINUS ) {
parser_advance ( p ) ;
ASTNode * right = parse_term ( p ) ;
node = ast_new_binary ( tok . text [ 0 ] , node , right ) ;
} else {
break ;
}
}
return node ;
}
double eval_ast ( ASTNode * node ) {
if ( node - > type = = AST_NUMBER ) {
return node - > data . number . value ;
}
double L = eval_ast ( node - > data . binary . left ) ;
double R = eval_ast ( node - > data . binary . right ) ;
switch ( node - > data . binary . op ) {
case ' + ' : return L + R ;
case ' - ' : return L - R ;
case ' * ' : return L * R ;
case ' / ' : return L / R ;
default :
fprintf ( stderr , " Unknown op '%c' \n " , node - > data . binary . op ) ;
exit ( EXIT_FAILURE ) ;
}
}
2025-08-05 02:30:30 +03:00
2025-07-20 22:49:50 +03:00
// will implement a stack for arithmetic later. do I want a compiler or interpreter? since this is a learning experience im gonna do the easier thing first
2025-07-20 22:24:25 +03:00
Token read_from_tok ( char * text , uint cursor ) {
Token mytoks ;
2025-07-21 13:34:20 +03:00
2025-07-20 22:24:25 +03:00
static char buf [ 64 ] ;
size_t i = 0 ;
2025-07-21 13:34:20 +03:00
mytoks . cursor_skip = 1 ;
2025-07-22 15:57:51 +03:00
// integer logic. will have to somehow detect "." for floats but it will be hard to do because the way I wrote this code is shit
// ie: checking for . depends on the switch statement. so I will have to maybe add previous_token to the token struct. Actually a feasible idea.
// will I need to set previous_token to the current token? maybe.
2025-07-20 22:42:44 +03:00
if ( isdigit ( text [ cursor ] ) ) {
2025-07-20 22:24:25 +03:00
size_t start = cursor ;
2025-07-22 15:57:51 +03:00
int dots_seen = 0 ;
while ( isdigit ( text [ cursor ] ) | | text [ cursor ] = = ' . ' ) {
if ( text [ cursor ] = = ' . ' ) {
dots_seen + = 1 ;
assert ( dots_seen < 2 ) ;
}
buf [ i + + ] = text [ cursor + + ] ;
2025-07-20 22:24:25 +03:00
}
2025-07-22 15:57:51 +03:00
// recheck this assert later
2025-07-20 22:24:25 +03:00
buf [ i ] = ' \0 ' ;
2025-07-22 15:57:51 +03:00
if ( ! dots_seen ) {
mytoks . type = TOKEN_INTEGER ;
mytoks . behaviour = BHV_NUMBER ;
} else {
mytoks . type = TOKEN_FLOAT ;
mytoks . behaviour = BHV_FLOAT ;
}
2025-07-21 13:34:20 +03:00
mytoks . cursor_skip = cursor - start ;
2025-07-24 16:17:10 +03:00
mytoks . text = strdup ( buf ) ;
2025-07-21 13:34:20 +03:00
mytoks . text_len = i ;
2025-07-22 15:57:51 +03:00
}
// string logic
else if ( isalpha ( text [ cursor ] ) ) {
2025-07-21 13:34:20 +03:00
size_t start = cursor ;
while ( isalpha ( text [ cursor ] ) ) {
buf [ i + + ] = text [ cursor + + ] ;
}
buf [ i ] = ' \0 ' ;
mytoks . type = TOKEN_STRING ;
mytoks . behaviour = BHV_STRING ;
mytoks . cursor_skip = cursor - start ;
2025-07-24 16:17:10 +03:00
mytoks . text = strdup ( buf ) ;
2025-07-20 22:24:25 +03:00
mytoks . text_len = i ;
2025-07-20 22:42:44 +03:00
}
2025-07-21 13:34:20 +03:00
2025-07-20 22:42:44 +03:00
else {
2025-07-20 22:24:25 +03:00
buf [ 0 ] = text [ cursor ] ;
buf [ 1 ] = ' \0 ' ;
switch ( text [ cursor ] ) {
case ' + ' :
mytoks . type = TOKEN_PLUS ;
2025-07-28 18:07:23 +03:00
// asigning text is not really needed unless for debug. could however be useful for codegen later. NOW IT BECAME A MUST LOL
2025-07-24 16:17:10 +03:00
mytoks . text = strdup ( " + " ) ;
2025-07-21 13:34:20 +03:00
mytoks . behaviour = BHV_STACK ;
break ;
2025-07-20 22:24:25 +03:00
case ' - ' :
mytoks . type = TOKEN_MINUS ;
2025-07-24 16:17:10 +03:00
mytoks . text = strdup ( " - " ) ;
2025-07-21 13:34:20 +03:00
mytoks . behaviour = BHV_STACK ;
2025-07-20 22:24:25 +03:00
break ;
2025-07-20 22:40:11 +03:00
case ' ' :
mytoks . type = TOKEN_SPACE ;
2025-07-24 16:17:10 +03:00
mytoks . text = strdup ( " space " ) ;
2025-07-20 22:40:11 +03:00
break ;
2025-07-28 18:07:23 +03:00
case ' * ' :
mytoks . type = TOKEN_MUL ;
mytoks . text = strdup ( " * " ) ;
mytoks . behaviour = BHV_STACK ;
break ;
case ' / ' :
mytoks . type = TOKEN_DIV ;
mytoks . text = strdup ( " / " ) ;
mytoks . behaviour = BHV_STACK ;
break ;
2025-07-20 22:24:25 +03:00
default :
2025-07-20 22:40:11 +03:00
mytoks . type = TOKEN_UNKNOWN ;
2025-07-21 13:34:20 +03:00
mytoks . behaviour = BHV_UNDEFINED ;
2025-07-24 16:19:49 +03:00
mytoks . text = strdup ( buf ) ;
2025-07-21 13:34:20 +03:00
2025-07-20 22:24:25 +03:00
}
}
return mytoks ;
}
2025-07-23 18:32:45 +03:00
void tokenarr_push ( TokenArr * arr , Token tok ) {
if ( arr - > size > = arr - > capacity ) {
arr - > capacity = arr - > capacity ? arr - > capacity * 2 : 8 ;
arr - > unit = realloc ( arr - > unit , arr - > capacity * sizeof ( Token ) ) ;
assert ( arr - > unit ! = NULL ) ;
}
arr - > unit [ arr - > size + + ] = tok ;
}
TokenArr tokenize_all ( const char * input ) {
TokenArr arr = { NULL , 0 , 0 } ;
size_t i = 0 ;
size_t len = strlen ( input ) ;
while ( i < len ) {
Token tok = read_from_tok ( ( char * ) input , i ) ;
i + = tok . cursor_skip ;
2025-08-05 12:06:22 +03:00
if ( tok . type = = TOKEN_SPACE ) {
free ( tok . text ) ;
continue ;
}
tokenarr_push ( & arr , tok ) ;
2025-07-23 18:32:45 +03:00
}
2025-08-05 12:06:22 +03:00
Token eof = { 0 } ;
eof . type = TOKEN_EOF ;
eof . text = strdup ( " EOF " ) ;
eof . text_len = 3 ;
eof . behaviour = BHV_UNDEFINED ;
eof . cursor_skip = 0 ;
tokenarr_push ( & arr , eof ) ;
2025-07-23 18:32:45 +03:00
return arr ;
}
2025-07-20 22:24:25 +03:00
// Token* c
2025-07-24 16:17:10 +03:00
void token_parser ( Token mytok , char * input ) {
2025-07-21 13:34:20 +03:00
int length1 = strlen ( input ) ;
int i = 0 ;
while ( i < length1 ) {
mytok = read_from_tok ( input , i ) ;
printf ( " Text: %s \n " , mytok . text ) ;
printf ( " Behaviour: %d \n " , mytok . behaviour ) ;
if ( mytok . behaviour = = BHV_STACK ) {
printf ( " this is stack lil bro \n " ) ;
}
i + + ;
}
}
2025-07-20 22:24:25 +03:00
// operators accepted in int/digit or whatever type def only when they have a digit before AND after them
2025-07-21 13:34:20 +03:00
/*
2025-07-20 22:24:25 +03:00
int main ( ) {
Token newtok ;
2025-07-21 13:34:20 +03:00
char * input = " 8 " ;
parser ( newtok , input ) ;
}
*/
2025-07-23 18:32:45 +03:00
char * token_type_to_string ( symbols type ) {
2025-07-22 16:18:37 +03:00
switch ( type ) {
case TOKEN_PLUS : return " TOKEN_PLUS " ;
case TOKEN_MINUS : return " TOKEN_MINUS " ;
case TOKEN_INTEGER : return " TOKEN_INTEGER " ;
case TOKEN_FLOAT : return " TOKEN_FLOAT " ;
case TOKEN_SPACE : return " TOKEN_SPACE " ;
case TOKEN_STRING : return " TOKEN_STRING " ;
case intdef : return " intdef " ;
case TOKEN_UNKNOWN : return " TOKEN_UNKNOWN " ;
default : return " UNKNOWN_SYMBOL " ;
}
}
2025-07-23 18:32:45 +03:00
void main2 ( ) {
2025-07-22 16:18:37 +03:00
char * input = " 323.23 + Hello world 102102 " ;
2025-07-21 13:34:20 +03:00
int length1 = strlen ( input ) ;
int i = 0 ;
2025-07-22 16:18:37 +03:00
printf ( " input: %s \n \n " , input ) ;
2025-07-21 13:34:20 +03:00
while ( i < length1 ) {
2025-07-22 16:18:37 +03:00
Token result = read_from_tok ( input , i ) ;
printf ( " text: %s \n type: %u (%s) \n \n " , result . text , result . type , token_type_to_string ( result . type ) ) ;
2025-07-28 18:07:23 +03:00
i + = result . cursor_skip ;
2025-07-20 22:24:25 +03:00
}
}
2025-07-23 18:32:45 +03:00
2025-07-28 18:41:15 +03:00
void mathparser ( const char * input ) {
2025-07-28 18:07:23 +03:00
TokenArr stack = tokenize_all ( input ) ;
2025-07-28 18:41:15 +03:00
float result = 0 ;
float current = 0 ;
float sign = 1 ;
float op = 0 ;
2025-07-28 18:07:23 +03:00
for ( size_t i = 0 ; i < stack . size ; + + i ) {
switch ( stack . unit [ i ] . type ) {
2025-07-28 18:41:15 +03:00
case TOKEN_INTEGER :
{
float value = str_to_float ( stack . unit [ i ] . text ) ;
if ( op = = 1 ) {
current * = value ;
op = 0 ;
} else if ( op = = 2 ) {
current / = value ;
op = 0 ;
} else {
current = value ;
}
break ;
}
case TOKEN_FLOAT :
{
float value = str_to_float ( stack . unit [ i ] . text ) ;
2025-07-28 18:07:23 +03:00
if ( op = = 1 ) {
current * = value ;
op = 0 ;
} else if ( op = = 2 ) {
current / = value ;
op = 0 ;
} else {
current = value ;
}
break ;
}
case TOKEN_PLUS :
result + = sign * current ;
sign = 1 ;
op = 0 ;
break ;
case TOKEN_MINUS :
result + = sign * current ;
sign = - 1 ;
op = 0 ;
break ;
case TOKEN_MUL :
op = 1 ;
break ;
case TOKEN_DIV :
op = 2 ;
break ;
default :
break ;
2025-07-28 17:56:35 +03:00
}
2025-07-28 17:30:45 +03:00
}
2025-07-28 18:41:15 +03:00
result + = sign * current ;
printf ( " %f \n " , result ) ;
2025-07-28 18:07:23 +03:00
for ( size_t j = 0 ; j < stack . size ; + + j ) {
free ( stack . unit [ j ] . text ) ;
2025-07-28 17:56:35 +03:00
}
2025-07-28 18:07:23 +03:00
free ( stack . unit ) ;
2025-07-28 17:30:45 +03:00
}
int main4 ( ) {
2025-07-24 16:19:49 +03:00
char * input = " print(5) hello " ;
2025-07-23 18:32:45 +03:00
printf ( " input: %s \n \n " , input ) ;
TokenArr arr = tokenize_all ( input ) ;
2025-07-24 16:17:10 +03:00
2025-07-23 18:32:45 +03:00
for ( size_t j = 0 ; j < arr . size ; + + j ) {
Token * result = & arr . unit [ j ] ;
printf ( " text: %s \n type: %u (%s) \n \n " , result - > text , result - > type , token_type_to_string ( result - > type ) ) ;
}
2025-07-24 16:17:10 +03:00
printf ( " ================ Tokenized ================= \n " ) ;
for ( size_t j = 0 ; j < arr . size ; + + j ) {
Token * result = & arr . unit [ j ] ;
printf ( " text: %s, type: %u (%s) || " , result - > text , result - > type , token_type_to_string ( result - > type ) ) ;
}
printf ( " \n " ) ;
for ( size_t j = 0 ; j < arr . size ; + + j ) {
free ( arr . unit [ j ] . text ) ;
}
2025-07-23 18:32:45 +03:00
free ( arr . unit ) ;
return 0 ;
}
2025-07-28 17:30:45 +03:00
2025-08-05 12:06:22 +03:00
int main5 ( ) {
2025-07-28 18:41:15 +03:00
char * input = " 40/2.3 * 10 + 400 " ;
printf ( " input: %s \n " , input ) ;
mathparser ( input ) ;
2025-07-28 17:30:45 +03:00
}
2025-08-05 12:06:22 +03:00
int main ( ) {
const char * input = " 40/2.3 * 10 + 400 - 5 " ;
printf ( " Input: %s \n " , input ) ;
TokenArr toks = tokenize_all ( input ) ;
parser p = { toks . unit , 0 } ;
ASTNode * root = parse_expression ( & p ) ;
double result = eval_ast ( root ) ;
printf ( " AST Result: %f \n " , result ) ;
return 0 ;
}