CE improved tokenizer
All checks were successful
Build documentation / build-and-deploy (push) Successful in 3m14s

This commit is contained in:
2026-03-01 13:42:04 +01:00
parent a5d5e7d6a4
commit e61545c4cf
3 changed files with 120 additions and 41 deletions

107
ce/ce.c
View File

@@ -47,14 +47,24 @@ struct context {
#define CPRINTF_BUF_MAX 4096 #define CPRINTF_BUF_MAX 4096
#define cprintf(context, fmt, ...) \ void cprintf (struct context* context, const char* fmt, ...) {
do { \ va_list args;
char* __cprintf_buf = malloc (CPRINTF_BUF_MAX); \ va_start (args, fmt);
memset (__cprintf_buf, 0, CPRINTF_BUF_MAX); \
snprintf (__cprintf_buf, CPRINTF_BUF_MAX, (fmt), ##__VA_ARGS__); \ char* buf = malloc (CPRINTF_BUF_MAX);
strbuf_append_str (&(context)->strbuf, __cprintf_buf); \
free (__cprintf_buf); \ if (buf == NULL) {
} while (0) va_end (args);
return;
}
vsnprintf (buf, CPRINTF_BUF_MAX, fmt, args);
va_end (args);
strbuf_append_str (&context->strbuf, buf);
free (buf);
}
#define LINE_BUFFER_MAX 1024 #define LINE_BUFFER_MAX 1024
#define TOKEN_MAX 64 #define TOKEN_MAX 64
@@ -253,41 +263,58 @@ static bool run = true;
static void putch (char ch) { mail_send (e_pgid, &ch, 1); } static void putch (char ch) { mail_send (e_pgid, &ch, 1); }
void putchar_ (char ch) { putch (ch); } void putchar_ (char ch) { putch (ch); }
static bool tokenize_line (void* ctx, const char* start, size_t len) { static void tokenize (struct list_node_link** tokens, const char* text) {
struct list_node_link** head = ctx; const char* p = text;
struct token* token = arena_malloc (&arena, sizeof (*token)); while (*p) {
if (isspace (*p)) {
memset (token, 0, sizeof (*token)); p++;
memcpy (token->buffer, start, min (sizeof (token->buffer) - 1, len)); continue;
list_append ((*head), &token->tokens_link);
return true;
}
static void classify_tokens (struct list_node_link* tokens) {
struct list_node_link *token_link, *token_tmp_link;
list_foreach (tokens, token_link, token_tmp_link) {
struct token* token = list_entry (token_link, struct token, tokens_link);
if (strcmp (token->buffer, "(") == 0) {
token->class = TOKEN_CLASS_OPAREN;
} else if (strcmp (token->buffer, ")") == 0) {
token->class = TOKEN_CLASS_CPAREN;
} else if (strcmp (token->buffer, ";") == 0) {
token->class = TOKEN_CLASS_SEMICOLON;
} else if (strcmp (token->buffer, ">") == 0) {
token->class = TOKEN_CLASS_REDIR;
} else {
token->class = TOKEN_CLASS_WORD;
} }
if (*p == '(' || *p == ')' || *p == ';' || *p == '>') {
struct token* token = arena_malloc (&arena, sizeof (*token));
memset (token, 0, sizeof (*token));
token->buffer[0] = *p;
if (*p == '(')
token->class = TOKEN_CLASS_OPAREN;
else if (*p == ')')
token->class = TOKEN_CLASS_CPAREN;
else if (*p == ';')
token->class = TOKEN_CLASS_SEMICOLON;
else if (*p == '>')
token->class = TOKEN_CLASS_REDIR;
list_append (*tokens, &token->tokens_link);
p++;
continue;
}
if (isprint (*p)) {
struct token* token = arena_malloc (&arena, sizeof (*token));
memset (token, 0, sizeof (*token));
size_t i = 0;
while (*p && !isspace (*p) && *p != '(' && *p != ')' && *p != ';' && *p != '>') {
if (i < TOKEN_MAX - 1)
token->buffer[i++] = *p;
p++;
}
token->class = TOKEN_CLASS_WORD;
list_append (*tokens, &token->tokens_link);
continue;
}
printf ("ERROR unknown character '%c'\n", *p);
p++;
} }
} }
static void execute (struct ast_node* root, struct context* context); static void execute (struct ast_node* root, struct context* context);
static void parse_tokens (struct list_node_link* tokens) { static void parse_and_execute (struct list_node_link* tokens) {
struct parser parser; struct parser parser;
parser.current = NULL; parser.current = NULL;
parser.next = get_token (tokens); parser.next = get_token (tokens);
@@ -491,12 +518,10 @@ static void execute (struct ast_node* root, struct context* context) {
static void exec_line (const char* line) { static void exec_line (const char* line) {
struct list_node_link* tokens = NULL; struct list_node_link* tokens = NULL;
strtokenize (line, ' ', &tokens, &tokenize_line); tokenize (&tokens, line);
if (tokens != NULL) { if (tokens != NULL)
classify_tokens (tokens); parse_and_execute (tokens);
parse_tokens (tokens);
}
arena_reset (&arena); arena_reset (&arena);
} }

View File

@@ -90,3 +90,31 @@ char* strcat (char* dest, const char* src) {
return rdest; return rdest;
} }
int isalnum (int c) { return isalpha (c) || isdigit (c); }
int isalpha (int c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); }
int iscntrl (int c) { return (c >= 0 && c <= 32) || (c == 127); }
int isdigit (int c) { return (c >= '0' && c <= '9'); }
int isgraph (int c) { return (c > 32 && c <= 126); }
int islower (int c) { return (c >= 'A' && c <= 'z'); }
int isprint (int c) { return (c >= 32 && c <= 126); }
int ispunct (int c) { return isgraph (c) && !isalnum (c); }
int isspace (int c) {
return (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v');
}
int isupper (int c) { return (c >= 'A' && c <= 'Z'); }
int isxdigit (int c) { return isdigit (c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); }
int isascii (int c) { return (c >= 0 && c <= 127); }
int isblank (int c) { return (c == ' ' || c == '\t'); }

View File

@@ -31,4 +31,30 @@ int strcmp (const char* s1, const char* s2);
/* concatinate strings */ /* concatinate strings */
char* strcat (char* dest, const char* src); char* strcat (char* dest, const char* src);
int isalnum (int c);
int isalpha (int c);
int iscntrl (int c);
int isdigit (int c);
int isgraph (int c);
int islower (int c);
int isprint (int c);
int ispunct (int c);
int isspace (int c);
int isupper (int c);
int isxdigit (int c);
int isascii (int c);
int isblank (int c);
#endif // _LIBSTRING_STRING_H #endif // _LIBSTRING_STRING_H