the bases of the tokenizer is basically complete?

This commit is contained in:
2025-07-22 15:57:51 +03:00
parent da3367de3f
commit e58d24b9be

33
lexer.c
View File

@@ -1,3 +1,4 @@
#include <assert.h>
#include <ctype.h> #include <ctype.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
@@ -23,6 +24,7 @@ typedef enum{
TOKEN_PLUS, TOKEN_PLUS,
TOKEN_MINUS, TOKEN_MINUS,
TOKEN_INTEGER, TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_SPACE, TOKEN_SPACE,
TOKEN_STRING, TOKEN_STRING,
intdef, intdef,
@@ -34,6 +36,7 @@ typedef enum{
BHV_UNDEFINED, BHV_UNDEFINED,
BHV_NUMBER, BHV_NUMBER,
BHV_STRING, BHV_STRING,
BHV_FLOAT,
} symbol_bhv; } symbol_bhv;
typedef struct{ typedef struct{
@@ -42,6 +45,7 @@ typedef struct{
size_t text_len; size_t text_len;
symbol_bhv behaviour; symbol_bhv behaviour;
uint cursor_skip; uint cursor_skip;
symbols previous_token;
} Token; } Token;
typedef struct{ typedef struct{
@@ -68,19 +72,40 @@ Token read_from_tok(char* text, uint cursor){
size_t i = 0; size_t i = 0;
mytoks.cursor_skip = 1; mytoks.cursor_skip = 1;
// integer logic. will have to somehow detect "." for floats but it will be hard to do because the way I wrote this code is shit
// ie: checking for . depends on the switch statement. so I will have to maybe add previous_token to the token struct. Actually a feasible idea.
// will I need to set previous_token to the current token? maybe.
if (isdigit(text[cursor])) { if (isdigit(text[cursor])) {
size_t start = cursor; size_t start = cursor;
while (isdigit(text[cursor])) { int dots_seen = 0;
while ( isdigit(text[cursor]) || text[cursor] == '.') {
if (text[cursor] == '.') {
dots_seen +=1;
assert(dots_seen < 2);
}
buf[i++] = text[cursor++]; buf[i++] = text[cursor++];
} }
// recheck this assert later
buf[i] = '\0'; buf[i] = '\0';
if (!dots_seen){
mytoks.type = TOKEN_INTEGER; mytoks.type = TOKEN_INTEGER;
mytoks.behaviour = BHV_NUMBER; mytoks.behaviour = BHV_NUMBER;
} else {
mytoks.type = TOKEN_FLOAT;
mytoks.behaviour = BHV_FLOAT;
}
mytoks.cursor_skip = cursor - start; mytoks.cursor_skip = cursor - start;
mytoks.text = buf; mytoks.text = buf;
mytoks.text_len = i; mytoks.text_len = i;
} else if (isalpha(text[cursor])){ }
// string logic
else if (isalpha(text[cursor])){
size_t start = cursor; size_t start = cursor;
while (isalpha(text[cursor])) { while (isalpha(text[cursor])) {
buf[i++] = text[cursor++]; buf[i++] = text[cursor++];
@@ -159,7 +184,7 @@ int main(){
int main(){ int main(){
Token newtok; Token newtok;
char* input = "32323 + Hello world"; char* input = "323.23 + Hello world";
int length1 = strlen(input); int length1 = strlen(input);
int i = 0; int i = 0;
while (i < length1) { while (i < length1) {