now we can read files and interpret them

This commit is contained in:
2025-08-24 21:48:26 +03:00
parent 14eaa8cc97
commit d7205a90bb
2 changed files with 220 additions and 78 deletions

137
lexer.c
View File

@@ -6,6 +6,8 @@
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
#include <stdbool.h> #include <stdbool.h>
#define NB_IMPLEMENTATION
#include "nb.h"
typedef struct{ typedef struct{
char* mstr; char* mstr;
@@ -41,9 +43,9 @@ typedef enum{
TOKEN_STRING, TOKEN_STRING,
TOKEN_MUL, TOKEN_MUL,
TOKEN_DIV, TOKEN_DIV,
intdef,
TOKEN_UNKNOWN, TOKEN_UNKNOWN,
TOKEN_EOF, TOKEN_EOF,
TOKEN_NEWLINE
} symbols; } symbols;
typedef enum{ typedef enum{
@@ -54,9 +56,6 @@ typedef enum{
BHV_FLOAT, BHV_FLOAT,
} symbol_bhv; } symbol_bhv;
typedef struct{ typedef struct{
symbols type; symbols type;
char* text; char* text;
@@ -66,18 +65,11 @@ typedef struct{
symbols previous_token; symbols previous_token;
} Token; } Token;
// since I now have tokenize all I dont really need previous_token. I can just ast walk it without each individual token carrying all data
typedef struct{ typedef struct{
Token* unit; Token* unit;
size_t size; size_t size;
size_t capacity; size_t capacity;
} TokenArr; } TokenArr;
// maybe should add cursor even for TokenArr to use C's printf % to add whitespace in order to move something like this
// input = 1 + 323 + =-=-
// ^
// |- Unknown Token
typedef struct{ typedef struct{
char *content; char *content;
@@ -85,9 +77,6 @@ typedef struct{
// size_t line; // size_t line;
} Lexer; } Lexer;
// will not nesseccarilly use AST. just could be useful in the future.
typedef enum{ typedef enum{
AST_NUMBER, AST_NUMBER,
AST_BINARY_OP, AST_BINARY_OP,
@@ -111,7 +100,6 @@ typedef struct{
Token* tokens; Token* tokens;
size_t cursor; size_t cursor;
} parser; } parser;
// tokenArr to token*
// Lexer // Lexer
void lexer_new(char *content, size_t content_len){ void lexer_new(char *content, size_t content_len){
@@ -172,7 +160,6 @@ ASTNode* parse_factor(parser* p) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
ASTNode* parse_term(parser* p) { ASTNode* parse_term(parser* p) {
ASTNode* node = parse_factor(p); ASTNode* node = parse_factor(p);
while (true) { while (true) {
@@ -188,7 +175,6 @@ ASTNode* parse_term(parser* p) {
return node; return node;
} }
ASTNode* parse_expression(parser* p) { ASTNode* parse_expression(parser* p) {
ASTNode* node = parse_term(p); ASTNode* node = parse_term(p);
while (true) { while (true) {
@@ -221,11 +207,6 @@ double eval_ast(ASTNode* node) {
} }
} }
// will implement a stack for arithmetic later. do I want a compiler or interpreter? since this is a learning experience im gonna do the easier thing first
Token read_from_tok(char* text, uint cursor){ Token read_from_tok(char* text, uint cursor){
Token mytoks; Token mytoks;
@@ -233,9 +214,6 @@ Token read_from_tok(char* text, uint cursor){
size_t i = 0; size_t i = 0;
mytoks.cursor_skip = 1; mytoks.cursor_skip = 1;
// integer logic. will have to somehow detect "." for floats but it will be hard to do because the way I wrote this code is shit
// ie: checking for . depends on the switch statement. so I will have to maybe add previous_token to the token struct. Actually a feasible idea.
// will I need to set previous_token to the current token? maybe.
if (isdigit(text[cursor])) { if (isdigit(text[cursor])) {
size_t start = cursor; size_t start = cursor;
int dots_seen = 0; int dots_seen = 0;
@@ -247,9 +225,6 @@ Token read_from_tok(char* text, uint cursor){
buf[i++] = text[cursor++]; buf[i++] = text[cursor++];
} }
// recheck this assert later
buf[i] = '\0'; buf[i] = '\0';
if (!dots_seen){ if (!dots_seen){
@@ -265,7 +240,6 @@ Token read_from_tok(char* text, uint cursor){
mytoks.text = strdup(buf); mytoks.text = strdup(buf);
mytoks.text_len = i; mytoks.text_len = i;
} }
// string logic
else if (isalpha(text[cursor])){ else if (isalpha(text[cursor])){
size_t start = cursor; size_t start = cursor;
while (isalpha(text[cursor])) { while (isalpha(text[cursor])) {
@@ -283,13 +257,13 @@ Token read_from_tok(char* text, uint cursor){
buf[0] = text[cursor]; buf[0] = text[cursor];
buf[1] = '\0'; buf[1] = '\0';
switch (text[cursor]){ switch (text[cursor])
{
case '+': case '+':
mytoks.type = TOKEN_PLUS; mytoks.type = TOKEN_PLUS;
// asigning text is not really needed unless for debug. could however be useful for codegen later. NOW IT BECAME A MUST LOL
mytoks.text = strdup("+"); mytoks.text = strdup("+");
mytoks.behaviour = BHV_STACK; mytoks.behaviour = BHV_STACK;
break; break;
case '-': case '-':
mytoks.type = TOKEN_MINUS; mytoks.type = TOKEN_MINUS;
mytoks.text = strdup("-"); mytoks.text = strdup("-");
@@ -309,6 +283,11 @@ Token read_from_tok(char* text, uint cursor){
mytoks.text = strdup("/"); mytoks.text = strdup("/");
mytoks.behaviour = BHV_STACK; mytoks.behaviour = BHV_STACK;
break; break;
case '\n':
mytoks.type = TOKEN_NEWLINE;
mytoks.text = strdup("newline");
mytoks.cursor_skip = 1;
break;
default: default:
mytoks.type = TOKEN_UNKNOWN; mytoks.type = TOKEN_UNKNOWN;
mytoks.behaviour = BHV_UNDEFINED; mytoks.behaviour = BHV_UNDEFINED;
@@ -336,7 +315,7 @@ TokenArr tokenize_all(const char* input) {
while (i < len) { while (i < len) {
Token tok = read_from_tok((char*)input, i); Token tok = read_from_tok((char*)input, i);
i += tok.cursor_skip; i += tok.cursor_skip;
if (tok.type == TOKEN_SPACE) { if (tok.type == TOKEN_SPACE || tok.type == TOKEN_NEWLINE) {
free(tok.text); free(tok.text);
continue; continue;
} }
@@ -395,23 +374,22 @@ char* token_type_to_string(symbols type) {
case TOKEN_FLOAT: return "TOKEN_FLOAT"; case TOKEN_FLOAT: return "TOKEN_FLOAT";
case TOKEN_SPACE: return "TOKEN_SPACE"; case TOKEN_SPACE: return "TOKEN_SPACE";
case TOKEN_STRING: return "TOKEN_STRING"; case TOKEN_STRING: return "TOKEN_STRING";
case intdef: return "intdef";
case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN"; case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
default: return "UNKNOWN_SYMBOL"; default: return "UNKNOWN_SYMBOL";
} }
} }
void main2() { // void main2() {
char* input = "323.23 + Hello world 102102"; // char* input = "323.23 + Hello world 102102";
int length1 = strlen(input); // int length1 = strlen(input);
int i = 0; // int i = 0;
printf("input: %s\n\n", input); // printf("input: %s\n\n", input);
while (i < length1) { // while (i < length1) {
Token result = read_from_tok(input, i); // Token result = read_from_tok(input, i);
printf("text: %s\ntype: %u (%s)\n\n", result.text, result.type, token_type_to_string(result.type)); // printf("text: %s\ntype: %u (%s)\n\n", result.text, result.type, token_type_to_string(result.type));
i += result.cursor_skip; // i += result.cursor_skip;
} // }
} // }
@@ -482,44 +460,46 @@ void mathparser(const char* input) {
} }
int main4() { // int main4() {
char* input = "print(5) hello"; // char* input = "print(5) hello";
printf("input: %s\n\n", input); // printf("input: %s\n\n", input);
//
TokenArr arr = tokenize_all(input); // TokenArr arr = tokenize_all(input);
//
for (size_t j = 0; j < arr.size; ++j) { // for (size_t j = 0; j < arr.size; ++j) {
Token* result = &arr.unit[j]; // Token* result = &arr.unit[j];
printf("text: %s\ntype: %u (%s)\n\n", result->text, result->type, token_type_to_string(result->type)); // printf("text: %s\ntype: %u (%s)\n\n", result->text, result->type, token_type_to_string(result->type));
} // }
//
printf("================ Tokenized =================\n"); // printf("================ Tokenized =================\n");
//
for (size_t j = 0; j < arr.size; ++j) { // for (size_t j = 0; j < arr.size; ++j) {
Token* result = &arr.unit[j]; // Token* result = &arr.unit[j];
printf("text: %s, type: %u (%s) || ", result->text, result->type, token_type_to_string(result->type)); // printf("text: %s, type: %u (%s) || ", result->text, result->type, token_type_to_string(result->type));
} // }
printf("\n"); // printf("\n");
for (size_t j = 0; j < arr.size; ++j) { // for (size_t j = 0; j < arr.size; ++j) {
free(arr.unit[j].text); // free(arr.unit[j].text);
} // }
free(arr.unit); // free(arr.unit);
return 0; // return 0;
} // }
int main5(){ // int main5(){
char* input = "40/2.3 * 10 + 400"; // char* input = "40/2.3 * 10 + 400";
printf("input: %s\n", input); // printf("input: %s\n", input);
mathparser(input); // mathparser(input);
} // return 0;
// }
int main() { int main(int argc, char** argv) {
const char* input = "40/2.3 * 10 + 400 - 5"; if (argc > 1){
char* input = nb_read_file(argv[1]);
printf("Input: %s\n", input); printf("Input: %s\n", input);
TokenArr toks = tokenize_all(input); TokenArr toks = tokenize_all(input);
@@ -529,6 +509,9 @@ int main() {
double result = eval_ast(root); double result = eval_ast(root);
printf("AST Result: %f\n", result); printf("AST Result: %f\n", result);
} else {
printf("Usage: %s <file>\n", argv[0]);
}
return 0; return 0;
} }

159
nb.h
View File

@@ -1,7 +1,12 @@
#include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdbool.h>
#include <string.h> #include <string.h>
typedef struct{ typedef struct{
int capacity; int capacity;
int arrsize; int arrsize;
@@ -9,6 +14,56 @@ typedef struct{
} nb_arr; } nb_arr;
typedef struct{
FILE *filep;
size_t filesize;
int chars;
char *buf;
} nb_file;
void nb_init(nb_arr *newarr, int initial_capacity);
void nb_append(nb_arr *newarr, char *newval);
void nb_append_int(nb_arr *newarr, int myint);
void nb_append_float(nb_arr *newarr, float myfloat);
void nb_free(nb_arr *newarr);
char* nb_strdup(const char* s); // make this void that uses realloc later.
void nb_print(nb_arr *newarr);
void nb_print_info(nb_arr *newarr);
void nb_cmd(nb_arr *newarr);
// void copy_file(char* old_file_name, char* new_file_name);
void nb_copy_file(char* old_file_name, char* new_file_name);
//bool needs_rebuild(); // need to implement rename file first to .old or something like nob does
bool nb_did_file_change(char *filename);
bool nb_does_file_exist(char *filename);
void nb_rebuild(char filename[]);
char* nb_read_file(char* file_name);
#ifdef NB_IMPLEMENTATION // make sure to define this before using the header
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* /*
char* nb_strdup(const char* s) { char* nb_strdup(const char* s) {
@@ -109,3 +164,107 @@ void append_c_file(FILE *filepointer){
} }
void nb_copy_file(char* old_file_name, char* new_file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file.
nb_file old_file;
nb_file new_file;
old_file.filep = fopen(old_file_name, "rb");
fseek(old_file.filep, 0, SEEK_END);
old_file.filesize = ftell(old_file.filep);
old_file.buf = (char*)malloc(old_file.filesize);
fseek(old_file.filep, 0, SEEK_SET);
fread(old_file.buf, 1, old_file.filesize, old_file.filep);
fclose(old_file.filep);
new_file.filep = fopen(new_file_name, "wb");
fwrite(old_file.buf, 1, old_file.filesize, new_file.filep);
fclose(new_file.filep);
}
bool nb_did_file_change(char *filename){
struct stat file_old;
stat(filename, &file_old);
struct stat file_new;
char buf[64];
sprintf(buf, "%s.new", filename);
stat(buf, &file_new);
if (file_old.st_mtim.tv_sec > file_new.st_mtim.tv_sec){
return true;
} else {
return false;
}
}
bool nb_does_file_exist(char *filename){
char buf[64];
sprintf(buf, "%s.new", filename);
if (access("test.c.new", F_OK) == 0){
return true;
}
return false;
}
void nb_rebuild(char filename[]){
char new_file[128];
sprintf(new_file, "%s.new", filename);
if (nb_does_file_exist(new_file)){
printf("%s does exist\n", new_file);
if (nb_did_file_change(filename)){
printf("file did change\n");
nb_copy_file(filename, new_file);
nb_arr cmd;
char fname[128];
nb_init(&cmd, sizeof(fname)*2);
strncpy(fname, filename, sizeof(fname));
fname[sizeof(fname)-1] = '\0';
char *dot = strrchr(fname, '.');
if (dot != NULL) {
*dot = '\0';
}
printf("fname is: %s\n", fname);
nb_append(&cmd, "gcc");
nb_append(&cmd, "-o");
nb_append(&cmd, fname);
nb_append(&cmd, filename);
nb_cmd(&cmd);
nb_print_info(&cmd);
printf("rebuilt\n");
} else {
printf("file did not change\n");
}
}else{
printf("created %s", filename);
nb_copy_file(filename, new_file);
}
}
char* nb_read_file(char* file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file.
nb_file file;
file.filep = fopen(file_name, "rb");
fseek(file.filep, 0, SEEK_END);
file.filesize = ftell(file.filep);
file.buf = (char*)malloc(file.filesize);
fseek(file.filep, 0, SEEK_SET);
fread(file.buf, 1, file.filesize, file.filep);
fclose(file.filep);
return file.buf;
}
#endif //NB_IMPLEMENTATION