CE improved tokenizer
All checks were successful
Build documentation / build-and-deploy (push) Successful in 3m14s

This commit is contained in:
2026-03-01 13:42:04 +01:00
parent a5d5e7d6a4
commit e61545c4cf
3 changed files with 120 additions and 41 deletions

107
ce/ce.c
View File

@@ -47,14 +47,24 @@ struct context {
#define CPRINTF_BUF_MAX 4096
#define cprintf(context, fmt, ...) \
do { \
char* __cprintf_buf = malloc (CPRINTF_BUF_MAX); \
memset (__cprintf_buf, 0, CPRINTF_BUF_MAX); \
snprintf (__cprintf_buf, CPRINTF_BUF_MAX, (fmt), ##__VA_ARGS__); \
strbuf_append_str (&(context)->strbuf, __cprintf_buf); \
free (__cprintf_buf); \
} while (0)
void cprintf (struct context* context, const char* fmt, ...) {
va_list args;
va_start (args, fmt);
char* buf = malloc (CPRINTF_BUF_MAX);
if (buf == NULL) {
va_end (args);
return;
}
vsnprintf (buf, CPRINTF_BUF_MAX, fmt, args);
va_end (args);
strbuf_append_str (&context->strbuf, buf);
free (buf);
}
#define LINE_BUFFER_MAX 1024
#define TOKEN_MAX 64
@@ -253,41 +263,58 @@ static bool run = true;
static void putch (char ch) { mail_send (e_pgid, &ch, 1); }
void putchar_ (char ch) { putch (ch); }
static bool tokenize_line (void* ctx, const char* start, size_t len) {
struct list_node_link** head = ctx;
static void tokenize (struct list_node_link** tokens, const char* text) {
const char* p = text;
struct token* token = arena_malloc (&arena, sizeof (*token));
memset (token, 0, sizeof (*token));
memcpy (token->buffer, start, min (sizeof (token->buffer) - 1, len));
list_append ((*head), &token->tokens_link);
return true;
}
static void classify_tokens (struct list_node_link* tokens) {
struct list_node_link *token_link, *token_tmp_link;
list_foreach (tokens, token_link, token_tmp_link) {
struct token* token = list_entry (token_link, struct token, tokens_link);
if (strcmp (token->buffer, "(") == 0) {
token->class = TOKEN_CLASS_OPAREN;
} else if (strcmp (token->buffer, ")") == 0) {
token->class = TOKEN_CLASS_CPAREN;
} else if (strcmp (token->buffer, ";") == 0) {
token->class = TOKEN_CLASS_SEMICOLON;
} else if (strcmp (token->buffer, ">") == 0) {
token->class = TOKEN_CLASS_REDIR;
} else {
token->class = TOKEN_CLASS_WORD;
while (*p) {
if (isspace (*p)) {
p++;
continue;
}
if (*p == '(' || *p == ')' || *p == ';' || *p == '>') {
struct token* token = arena_malloc (&arena, sizeof (*token));
memset (token, 0, sizeof (*token));
token->buffer[0] = *p;
if (*p == '(')
token->class = TOKEN_CLASS_OPAREN;
else if (*p == ')')
token->class = TOKEN_CLASS_CPAREN;
else if (*p == ';')
token->class = TOKEN_CLASS_SEMICOLON;
else if (*p == '>')
token->class = TOKEN_CLASS_REDIR;
list_append (*tokens, &token->tokens_link);
p++;
continue;
}
if (isprint (*p)) {
struct token* token = arena_malloc (&arena, sizeof (*token));
memset (token, 0, sizeof (*token));
size_t i = 0;
while (*p && !isspace (*p) && *p != '(' && *p != ')' && *p != ';' && *p != '>') {
if (i < TOKEN_MAX - 1)
token->buffer[i++] = *p;
p++;
}
token->class = TOKEN_CLASS_WORD;
list_append (*tokens, &token->tokens_link);
continue;
}
printf ("ERROR unknown character '%c'\n", *p);
p++;
}
}
static void execute (struct ast_node* root, struct context* context);
static void parse_tokens (struct list_node_link* tokens) {
static void parse_and_execute (struct list_node_link* tokens) {
struct parser parser;
parser.current = NULL;
parser.next = get_token (tokens);
@@ -491,12 +518,10 @@ static void execute (struct ast_node* root, struct context* context) {
static void exec_line (const char* line) {
struct list_node_link* tokens = NULL;
strtokenize (line, ' ', &tokens, &tokenize_line);
tokenize (&tokens, line);
if (tokens != NULL) {
classify_tokens (tokens);
parse_tokens (tokens);
}
if (tokens != NULL)
parse_and_execute (tokens);
arena_reset (&arena);
}