aboutsummaryrefslogtreecommitdiff
path: root/src/parse.c
diff options
context:
space:
mode:
authorElias Fleckenstein <eliasfleckenstein@web.de>2021-12-30 14:18:15 +0100
committerElias Fleckenstein <eliasfleckenstein@web.de>2021-12-30 14:18:15 +0100
commitecc06c082036aa93f6810ec21e73610c55f5a57b (patch)
treeaff479c3bc5b39ead9f65dffb01d399b341fa4ba /src/parse.c
downloaduwu-lang-ecc06c082036aa93f6810ec21e73610c55f5a57b.tar.xz
Initial commit
Diffstat (limited to 'src/parse.c')
-rw-r--r--src/parse.c325
1 files changed, 325 insertions, 0 deletions
diff --git a/src/parse.c b/src/parse.c
new file mode 100644
index 0000000..be0ee98
--- /dev/null
+++ b/src/parse.c
@@ -0,0 +1,325 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "err.h"
+#include "parse.h"
+
+#define DEBUG 0
+
+#if DEBUG
+#define DBG(FN) printf("%s %s %s\n", FN, expression_types[state->expression->type], (state->expression->type == EX_FNCALL && state->expression->value.str_value) ? state->expression->value.str_value : "");
+#else
+#define DBG(FN)
+#endif
+
+#if DEBUG
+static char *expression_types[EX_FNCALL + 1] = {
+ "uninitialized",
+ "integer-literal",
+ "string-literal",
+ "argument-number",
+ "function-name",
+ "function-call",
+};
+
+static void print_expression(ParseExpression *expr, int indent)
+{
+ for (int i = 0; i < indent; i++)
+ printf("\t");
+
+ printf("%s ", expression_types[expr->type]);
+
+ if (expr->type == EX_INTLIT || expr->type == EX_ARGNUM)
+ printf("%d\n", expr->value.int_value);
+ else
+ printf("\"%s\"\n", expr->value.str_value);
+
+ if (expr->type == EX_FNCALL)
+ for (size_t i = 0; i < expr->num_children; i++)
+ print_expression(expr->children[i], indent + 1);
+}
+
+static void print_ast(AbstractSyntaxTree tree)
+{
+ printf("\n[Abstract Syntax Tree]\n\n");
+
+ for (size_t f = 0; f < tree.num_functions; f++) {
+ ParseFunction *function = tree.functions[f];
+
+ printf("function %s\n", function->name);
+ print_expression(function->expression, 1);
+ }
+}
+#endif
+
+static void buffer_append(ParseState *state, char c)
+{
+ state->buffer = realloc(state->buffer, ++state->buffer_size);
+ state->buffer[state->buffer_size - 1] = c;
+}
+
+static char *buffer_terminate(ParseState *state)
+{
+ buffer_append(state, '\0');
+
+ char *buffer = state->buffer;
+
+ state->buffer = NULL;
+ state->buffer_size = 0;
+
+ return buffer;
+}
+
+static void start_arg(ParseState *state)
+{
+ DBG(__FUNCTION__)
+
+ ParseExpression *parent = state->expression;
+ parent->children = realloc(parent->children, sizeof *parent->children * ++parent->num_children);
+ ParseExpression *child = parent->children[parent->num_children - 1] = malloc(sizeof *child);
+
+ child->type = EX_UNINIT;
+ child->parent = parent;
+
+ state->expression = child;
+}
+
+static bool continue_arg(ParseState *state, char c)
+{
+ DBG(__FUNCTION__)
+
+ if (c == ',')
+ start_arg(state);
+ else if (c == ')')
+ state->expression = state->expression->parent;
+ else if (! isspace(c))
+ return false;
+
+ return true;
+}
+
+static bool finish_arg(ParseState *state, char c)
+{
+ state->expression = state->expression->parent;
+
+ if (state->expression)
+ continue_arg(state, c);
+ else if (! isspace(c))
+ return false;
+
+ return true;
+}
+
+static bool parse_expression_init(ParseState *state, char c)
+{
+ DBG(__FUNCTION__)
+
+ if (c == ',')
+ return false;
+
+ if (isspace(c))
+ return true;
+
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ state->expression->type = EX_INTLIT;
+ buffer_append(state, c);
+ return true;
+
+ case '"':
+ state->expression->type = EX_STRLIT;
+ return true;
+
+ case '$':
+ state->expression->type = EX_ARGNUM;
+ return true;
+
+ case '&':
+ state->expression->type = EX_FNNAME;
+ return true;
+
+ default:
+ state->expression->type = EX_FNCALL;
+ state->expression->value.str_value = NULL;
+ buffer_append(state, c);
+ return true;
+ }
+}
+
+static bool parse_expression_finish(ParseState *state, char c)
+{
+ DBG(__FUNCTION__)
+
+ if (state->expression->type == EX_ARGNUM && state->buffer_size == 0)
+ return false;
+
+ char *buffer_read = buffer_terminate(state);
+
+ if (state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) {
+ state->expression->value.int_value = atoi(buffer_read);
+ free(buffer_read);
+ } else {
+ state->expression->value.str_value = buffer_read;
+ }
+
+ if (state->expression->type == EX_FNCALL) {
+ state->expression->num_children = 0;
+ state->expression->children = NULL;
+
+ if (c == '(')
+ start_arg(state);
+ else
+ return finish_arg(state, c);
+ } else {
+ if (c == ',' || c == ')')
+ return finish_arg(state, c);
+ else
+ state->expression = state->expression->parent;
+ }
+
+ return true;
+}
+
+static bool parse_expression_continue(ParseState *state, char c)
+{
+ DBG(__FUNCTION__)
+
+ if (state->expression->type == EX_FNCALL && state->expression->value.str_value)
+ return continue_arg(state, c);
+
+ if (
+ state->expression->type == EX_STRLIT
+ ? c == '"'
+ : (
+ (state->expression->type == EX_FNCALL && c == '(')
+ || isspace(c) || c == ',' || c == ')'
+ )
+ ) {
+ return parse_expression_finish(state, c);
+ } else {
+ if ((state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) && ! isdigit(c))
+ return false;
+
+ if ((state->expression->type == EX_FNNAME || state->expression->type == EX_FNCALL) && (c == '&' || c == '$'))
+ return false;
+
+ buffer_append(state, c);
+ }
+
+ return true;
+}
+
+static bool parse_expression(ParseState *state, char c)
+{
+ DBG(__FUNCTION__)
+
+ return state->expression->type == EX_UNINIT
+ ? parse_expression_init(state, c)
+ : parse_expression_continue(state, c);
+}
+
+static bool parse_function(ParseState *state, char c)
+{
+#if DEBUG
+ printf("%s\n", __FUNCTION__);
+#endif
+
+ if (c == '\"' || c == '$' || c == ':' || c == ',' || c == '&' || c == '(' || c == ')' || isdigit(c))
+ return false;
+
+ if (! isspace(c)) {
+ buffer_append(state, c);
+ return true;
+ }
+
+ if (state->buffer_size == 0)
+ return true;
+
+ char *name = buffer_terminate(state);
+
+ state->expression = malloc(sizeof *state->expression);
+ state->expression->type = EX_UNINIT;
+ state->expression->parent = NULL;
+
+ state->tree.functions = realloc(state->tree.functions, sizeof *state->tree.functions * ++state->tree.num_functions);
+ *(state->tree.functions[state->tree.num_functions - 1] = malloc(sizeof(ParseFunction))) = (ParseFunction) {
+ .name = name,
+ .expression = state->expression,
+ };
+
+ return true;
+}
+
+static bool parse_character(ParseState *state, char c)
+{
+#if DEBUG
+ printf("\nparse_character ");
+
+ if (isspace(c))
+ printf("<SPACE>");
+ else
+ printf("%c", c);
+
+ printf("\n");
+#endif
+
+ return state->expression
+ ? parse_expression(state, c)
+ : parse_function(state, c);
+}
+
+AbstractSyntaxTree parse_file(const char *filename)
+{
+ ParseState state = {
+ .tree = {
+ .num_functions = 0,
+ .functions = NULL,
+ },
+ .buffer_size = 0,
+ .buffer = NULL,
+ .expression = NULL,
+ };
+
+ int lines = 1;
+
+ FILE *f = fopen(filename, "r");
+
+ if (! f)
+ error("%s: unable to open\n", filename);
+
+#if DEBUG
+ printf("[File %s]\n[Line %d]\n", filename, lines);
+#endif
+
+ while (true) {
+ char c = getc(f);
+
+ if (feof(f))
+ break;
+
+ if (ferror(f))
+ error("%s: I/O error\n", filename);
+
+ if (c == '\n')
+ ++lines;
+
+#if DEBUG
+ if (c == '\n')
+ printf("\n[Line %d]\n", lines);
+#endif
+
+ if (! parse_character(&state, c))
+ error("%s: syntax error in line %d\n", filename, lines);
+ }
+
+ if (state.buffer || state.expression)
+ error("%s: syntax error at end of file\n", filename);
+
+ fclose(f);
+
+#if DEBUG
+ print_ast(state.tree);
+#endif
+
+ return state.tree;
+}