Compare commits

...

3 Commits

Author SHA1 Message Date
fa8a774115 tb Hex/byte escapes 2025-09-29 23:18:27 +02:00
39981fdbbf tb String escaping 2025-09-29 23:12:19 +02:00
10711f7fb3 tb Fix string tokenization bug (bad classification) 2025-09-29 22:58:40 +02:00
2 changed files with 48 additions and 4 deletions

View File

@ -4,6 +4,7 @@
#include <ansiq/all.h> #include <ansiq/all.h>
#include <args/args.h> #include <args/args.h>
#include <string/string.h> #include <string/string.h>
#include <string/conv.h>
#include <string/char.h> #include <string/char.h>
#include <sync/spinlock.h> #include <sync/spinlock.h>
#include <syscall/syscall.h> #include <syscall/syscall.h>

View File

@ -34,7 +34,47 @@ void tz_tokenize(Tokenizer *tz) {
i++; i++;
size_t j = 0; size_t j = 0;
while (i < len && tz->str[i] != '\'') { while (i < len && tz->str[i] != '\'') {
str[j++] = tz->str[i++]; if (tz->str[i] == '\\') {
if (i + 1 < len) {
i++;
char c;
switch (tz->str[i]) {
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case '\\': c = '\\'; break;
case '\'': c = '\''; break;
case '"': c = '"'; break;
case 'x': {
if (i + 2 < len) {
char buf[3];
buf[0] = tz->str[i + 1];
buf[1] = tz->str[i + 2];
buf[2] = '\0';
char *endp;
uint8_t b = (uint8_t)string_conv_strtoul(buf, &endp, 16);
c = *(char *)&b;
i += 2;
}
} break;
default: c = tz->str[i]; break;
}
if (j + 1 < TZ_MAX_TK) {
str[j++] = c;
}
} else {
if (j + 1 < TZ_MAX_TK) {
str[j++] = '\\';
}
}
} else {
if (j + 1 < TZ_MAX_TK) {
str[j++] = tz->str[i];
}
}
i++;
} }
Token *tk = umalloc(sizeof(*tk)); Token *tk = umalloc(sizeof(*tk));
tk->str = str; tk->str = str;
@ -44,8 +84,11 @@ void tz_tokenize(Tokenizer *tz) {
char *tkstr = umalloc(TZ_MAX_TK); char *tkstr = umalloc(TZ_MAX_TK);
string_memset(tkstr, 0, TZ_MAX_TK); string_memset(tkstr, 0, TZ_MAX_TK);
size_t j = 0; size_t j = 0;
while (i < len && !string_chr_isspace(tz->str[i])) { while (i < len && !string_chr_isspace(tz->str[i]) && tz->str[i] != '\'') {
tkstr[j++] = tz->str[i++]; if (j + 1 < TZ_MAX_TK) {
tkstr[j++] = tz->str[i];
}
i++;
} }
Token *tk = umalloc(sizeof(*tk)); Token *tk = umalloc(sizeof(*tk));
tk->str = tkstr; tk->str = tkstr;
@ -58,7 +101,7 @@ void tz_tokenize(Tokenizer *tz) {
void tz_classify(Tokenizer *tz) { void tz_classify(Tokenizer *tz) {
Token *tk, *tktmp; Token *tk, *tktmp;
LL_FOREACH_SAFE(tz->tokens, tk, tktmp) { LL_FOREACH_SAFE(tz->tokens, tk, tktmp) {
if (tk->str[0] == '"') { if (tk->str[0] == '\'') {
tk->type = TOK_STRING; tk->type = TOK_STRING;
} else if (tk->str[0] == '%') { } else if (tk->str[0] == '%') {
RtCmd *cmd, *cmdtmp; RtCmd *cmd, *cmdtmp;