mop3/ce/parser.c

#include "parser.h"
#include "arena_alloc.h"
#include "context.h"
#include "interp.h"
#include <arena.h>
#include <list.h>
#include <mprintf.h>
#include <strconv.h>
#include <string.h>

static struct parse_rule parse_rules[] = {
    [TOKEN_CLASS_WORD] = {&word_nud, NULL, PREC_NONE},
    [TOKEN_CLASS_OPAREN] = {&oparen_nud, NULL, PREC_NONE},
    [TOKEN_CLASS_CPAREN] = {NULL, NULL, PREC_NONE},
    [TOKEN_CLASS_SEMICOLON] = {NULL, &semicolon_led, PREC_SEQ},
    [TOKEN_CLASS_REDIR] = {NULL, &redir_led, PREC_REDIR},
    [TOKEN_CLASS_AMPERSAND] = {NULL, &run_bg_led, PREC_RUN_BG},
};

static struct token* get_token(struct list_node_link* link) {
  if (link == NULL)
    return NULL;

  return list_entry(link, struct token, tokens_link);
}

static struct token* advance(struct parser* parser) {
  struct token* token = parser->next;

  if (token != NULL) {
    parser->current = token;
    parser->next = get_token(token->tokens_link.next);
  } else {
    parser->current = NULL;
  }

  return token;
}

static struct ast_node* parse_precedence(struct parser* parser, int precedence) {
  struct token* token = advance(parser);

  if (token == NULL)
    return NULL;

  nud_func_t nud = parse_rules[token->class].nud;

  if (nud == NULL)
    return NULL;

  struct ast_node* left = nud(parser, token);

  while (parser->next && precedence < parse_rules[parser->next->class].precedence) {
    token = advance(parser);
    led_func_t led = parse_rules[token->class].led;

    if (led != NULL)
      left = led(parser, token, left);
  }

  return left;
}

struct ast_node* word_nud(struct parser* parser, struct token* token) {
  struct ast_node* node = arena_malloc(&arena, sizeof(*node));
  node->class = AST_NODE_CLASS_CMD;
  node->u.cmd.name = token->buffer;
  node->u.cmd.arg_count = 0;

  while (parser->next != NULL && parser->next->class == TOKEN_CLASS_WORD) {
    struct token* arg = advance(parser);

    if (node->u.cmd.arg_count < CMD_ARGS_MAX)
      node->u.cmd.args[node->u.cmd.arg_count++] = arg->buffer;
  }

  return node;
}

struct ast_node* oparen_nud(struct parser* parser, struct token* token) {
  (void)token;

  struct ast_node* node = arena_malloc(&arena, sizeof(*node));
  node->class = AST_NODE_CLASS_SUBSHELL;

  node->u.subshell.inner = parse_precedence(parser, PREC_LOWEST);

  if (parser->next != NULL && parser->next->class == TOKEN_CLASS_CPAREN)
    advance(parser);

  return node;
}

struct ast_node* semicolon_led(struct parser* parser, struct token* token, struct ast_node* left) {
  (void)token;

  struct ast_node* node = arena_malloc(&arena, sizeof(*node));
  node->class = AST_NODE_CLASS_SEQ;
  node->u.seq.left = left;
  node->u.seq.right = parse_precedence(parser, PREC_SEQ);

  return node;
}

struct ast_node* redir_led(struct parser* parser, struct token* token, struct ast_node* left) {
  (void)token;

  struct ast_node* node = arena_malloc(&arena, sizeof(*node));
  node->class = AST_NODE_CLASS_REDIR;
  node->u.redir.source = left;

  struct token* next_token = advance(parser);
  if (next_token != NULL && next_token->class == TOKEN_CLASS_WORD)
    node->u.redir.file_path = next_token->buffer;

  return node;
}

struct ast_node* run_bg_led(struct parser* parser, struct token* token, struct ast_node* left) {
  (void)token;

  struct ast_node* node = arena_malloc(&arena, sizeof(*node));
  node->class = AST_NODE_CLASS_RUN_BG;
  node->u.run_bg.expr = left;

  return node;
}

static char handle_escape(char c) {
  switch (c) {
  case 'n':
    return '\n';
  case 't':
    return '\t';
  case 'r':
    return '\r';
  case 'b':
    return '\b';
  case 'f':
    return '\f';
  case 'v':
    return '\v';
  case '\\':
    return '\\';
  case '"':
    return '\"';
  case '\'':
    return '\'';
  default:
    return c;
  }
}

void tokenize(struct list_node_link** tokens, const char* text) {
  const char* p = text;

  while (*p) {
    if (isspace(*p)) {
      p++;
      continue;
    }

    if (*p == '#')
      return;

    if (*p == '"') {
      p++;
      struct token* token = arena_malloc(&arena, sizeof(*token));
      memset(token, 0, sizeof(*token));
      size_t i = 0;

      while (*p && *p != '"') {
        if (i >= TOKEN_MAX - 1)
          break;

        if (*p == '\\') {
          p++;
          if (*p) {
            token->buffer[i++] = handle_escape(*p);
            p++;
          }
        } else {
          token->buffer[i++] = *p;
          p++;
        }
      }

      if (*p == '"')
        p++;

      token->class = TOKEN_CLASS_WORD;
      list_append(*tokens, &token->tokens_link);
      continue;
    }

    if (*p == '(' || *p == ')' || *p == ';' || *p == '>' || *p == '&') {
      struct token* token = arena_malloc(&arena, sizeof(*token));
      memset(token, 0, sizeof(*token));

      token->buffer[0] = *p;
      if (*p == '(')
        token->class = TOKEN_CLASS_OPAREN;
      else if (*p == ')')
        token->class = TOKEN_CLASS_CPAREN;
      else if (*p == ';')
        token->class = TOKEN_CLASS_SEMICOLON;
      else if (*p == '>')
        token->class = TOKEN_CLASS_REDIR;
      else if (*p == '&')
        token->class = TOKEN_CLASS_AMPERSAND;

      list_append(*tokens, &token->tokens_link);
      p++;
      continue;
    }

    if (isprint(*p)) {
      struct token* token = arena_malloc(&arena, sizeof(*token));
      memset(token, 0, sizeof(*token));
      size_t i = 0;

      while (*p &&
             !isspace(*p) &&
             *p != '(' &&
             *p != ')' &&
             *p != ';' &&
             *p != '>' &&
             *p != '"' &&
             *p != '#' &&
             *p != '&') {
        if (*p == '$') {
          p++;

          size_t ni = 0;
          char numbuf[TOKEN_MAX];

          while (*p && isdigit(*p)) {
            numbuf[ni++] = *p++;
          }
          numbuf[ni] = '\0';

          if (ni > 0) {
            uint32_t idx = str_to_uint32(numbuf) % posvar_count;
            struct posvar* posvar = &posvars[idx];

            for (size_t j = 0; posvar->buf[j] && i < TOKEN_MAX - 1; j++)
              token->buffer[i++] = posvar->buf[j];
          } else {
            if (i < TOKEN_MAX - 1)
              token->buffer[i++] = '$';
          }

          continue;
        }

        if (i < TOKEN_MAX - 1)
          token->buffer[i++] = *p;

        p++;
      }

      token->class = TOKEN_CLASS_WORD;
      list_append(*tokens, &token->tokens_link);
      continue;
    }

    mprintf("ERROR unknown character '%c'\n", *p);
    p++;
  }
}

void parse_and_execute(struct list_node_link* tokens) {
  struct parser parser;
  parser.current = NULL;
  parser.next = get_token(tokens);

  while (parser.next != NULL) {
    struct ast_node* root = parse_precedence(&parser, PREC_NONE);

    if (root != NULL) {
      struct context context;
      memset(&context, 0, sizeof(context));
      execute(root, &context, false);

      if (context.strbuf.items != NULL)
        mprintf("%.*s", (int)context.strbuf.count, context.strbuf.items);
    }
  }
}