#include #include #include "lex.h" #define CHAR_COMMENT '#' static bool source_peek(struct source *src, char *c) { if (src->offset >= src->contents.len) return false; *c = src->contents.ptr[src->offset]; return true; } static void source_next(struct source *src) { assert(src->offset < src->contents.len); src->offset++; } static bool source_get(struct source *src, char *c) { if (!source_peek(src, c)) return false; source_next(src); return true; } static void source_unget(struct source *src) { assert(src->offset); src->offset--; } static struct range loc_from(struct source *src, size_t start) { return (struct range) { .start = start, .end = src->offset }; } static struct range loc_last(struct source *src) { return loc_from(src, src->offset-1); } static str str_from(struct source *src, size_t start) { return (str) { src->offset - start, src->contents.ptr + start }; } static bool is_blank_char(char c) { return isblank(c) || c == '\n'; } static bool is_num_start_char(char c) { return isdigit(c) || c == '+' || c == '-' || c == '.'; } static bool is_num_char(char c) { return isxdigit(c) || c == '+' || c == '-' || c == '.'; } static bool is_name_char(char c) { return isprint(c) && !is_blank_char(c) && c != CHAR_COMMENT && c != '(' && c != ')'; } static void source_skip(struct source *src) { char c; while (source_peek(src, &c) && is_blank_char(c)) source_next(src); if (source_peek(src, &c) && c == CHAR_COMMENT) while (source_get(src, &c) && c != '\n') ; } static str lex_name(struct source *src) { size_t start = src->offset; char c; while (source_peek(src, &c) && is_name_char(c)) source_next(src); return str_from(src, start); } static str lex_string(struct source *src, size_t start) { strbuf buf = { .cap = 32 }; char c; while (source_get(src, &c)) { if (c == '\"') { str s = arraybuf_cast(buf); arraybuf_insert(&src->strings, s); return s; } // TODO: more escapes if (c == '\\') { if (!source_get(src, &c)) source_error(src, loc_last(src), "unterminated escape"); switch (c) { case '\\': c = '\\'; break; case '\"': c = '\"'; break; default: source_error(src, loc_last(src), "unknown escape"); break; } } arraybuf_insert(&buf, c); } source_error(src, loc_from(src, start), "unterminated string"); } static double lex_number(struct source *src) { size_t start = src->offset; char c; while (source_peek(src, &c) && is_num_char(c)) source_next(src); double num; if (!str_parse_double(str_from(src, start), &num)) source_error(src, loc_from(src, start), "invalid number"); return num; } enum op_category op_category_tab[] = { #define OP_TYPE_CAT(name, sym, cat) OPC_##cat, OP_TYPES(OP_TYPE_CAT) #undef OP_TYPE_CAT }; str op_name_tab[] = { #define OP_TYPE_NAME(name, sym, cat) S(name), OP_TYPES(OP_TYPE_NAME) #undef OP_TYPE_NAME }; static bool lex_op_type(str in_sym, enum op_type *op) { if (0) {} #define OP_TYPE_CASE(sym, name, cat) else if (array_eq(in_sym, S(sym))) { *op = OP_##name; return true; } OP_TYPES(OP_TYPE_CASE) #undef OP_TYPE_CASE else return false; } bool lex_token(struct source *src, struct token *tok) { source_skip(src); size_t start = src->offset; char c; if (!source_get(src, &c)) return false; switch (c) { case '(': { tok->type = TOKEN_OP; source_skip(src); size_t start_name = src->offset; str name = lex_name(src); tok->op.name_loc = loc_from(src, start_name); if (!lex_op_type(name, &tok->op.type)) source_error(src, tok->op.name_loc, "unknown operation"); arraybuf(struct token) children = {}; while ((source_skip(src), !(source_peek(src, &c) && c == ')'))) { arraybuf_grow(&children, 1); if (!lex_token(src, &children.ptr[children.len++])) source_error(src, loc_from(src, start), "unterminated operation"); } source_next(src); array_assign(&tok->op.children, children); break; } case '$': tok->type = TOKEN_VAR; tok->var = lex_name(src); break; case '"': tok->type = TOKEN_STRING; tok->string = lex_string(src, start); break; default: source_unget(src); if (is_num_start_char(c)) { tok->type = TOKEN_NUMBER; tok->number = lex_number(src); } else if (is_name_char(c)) { tok->type = TOKEN_IDENT; tok->ident = lex_name(src); } else { source_error(src, loc_last(src), "expected token"); } break; } tok->loc = loc_from(src, start); return true; } void free_token(struct token *tok) { if (tok->type != TOKEN_OP) return; for (size_t i = 0; i < tok->op.children.len; i++) free_token(&tok->op.children.ptr[i]); free(tok->op.children.ptr); }