diff options
author | Elias Fleckenstein <eliasfleckenstein@web.de> | 2021-12-30 14:18:15 +0100 |
---|---|---|
committer | Elias Fleckenstein <eliasfleckenstein@web.de> | 2021-12-30 14:18:15 +0100 |
commit | ecc06c082036aa93f6810ec21e73610c55f5a57b (patch) | |
tree | aff479c3bc5b39ead9f65dffb01d399b341fa4ba /src | |
download | uwu-lang-ecc06c082036aa93f6810ec21e73610c55f5a57b.tar.xz |
Initial commit
Diffstat (limited to 'src')
-rw-r--r-- | src/collect.c | 406 | ||||
-rw-r--r-- | src/collect.h | 8 | ||||
-rw-r--r-- | src/err.h | 17 | ||||
-rw-r--r-- | src/expression.h | 14 | ||||
-rw-r--r-- | src/main.c | 56 | ||||
-rw-r--r-- | src/parse.c | 325 | ||||
-rw-r--r-- | src/parse.h | 48 | ||||
-rw-r--r-- | src/util.h | 17 | ||||
-rw-r--r-- | src/vm.c | 52 | ||||
-rw-r--r-- | src/vm.h | 6 |
10 files changed, 949 insertions, 0 deletions
diff --git a/src/collect.c b/src/collect.c new file mode 100644 index 0000000..fadb9d1 --- /dev/null +++ b/src/collect.c @@ -0,0 +1,406 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <libgen.h> +#include <dlfcn.h> +#include "err.h" +#include "util.h" +#include "collect.h" +#include "parse.h" + +#define DEBUG 0 + +// helper functions + +static char *wrap_name_func(const char *name, char *(*fn)(char *)) +{ + char *copy = strdup(name); + char *result = fn(copy); + char *result_copy = strdup(result); + + free(copy); + return result_copy; +} + +static char *basename_wrapper(const char *name) +{ + return wrap_name_func(name, &basename); +} + +static char *dirname_wrapper(const char *name) +{ + return wrap_name_func(name, &dirname); +} + +static bool file_exists(const char *filename) +{ + FILE *f = fopen(filename, "r"); + + if (f) { + fclose(f); + return true; + } + + return false; +} + +// type definitions + +typedef struct +{ + char *name; + UwUVMFunction *ref; +} FunctionLink; + +typedef struct +{ + char *path; // path without file extension + char *filename; // path with file extension + char *environment; // directory path + + UwUVMModuleType type; // native (.so) or plain (.uwu) + + FunctionLink *functions; // required functions + size_t num_functions; // number of required functions + size_t loaded_functions; // number of loaded functions (<= num_functions) + + union + { + AbstractSyntaxTree ast; // abstract syntax tree generated by parser (for plain modules) + void *lib; // dlopen() shared object handle (for native modules) + } handle; +} Module; + +typedef struct +{ + Module **modules; // loaded modules + size_t num_modules; // count for modules + + char *std_path; // path to standard library + + UwUVMProgram program; // the result program +} CollectorState; + +// functions + +// returns mallocated string +static inline char *get_filename(const char *module_path) +{ + const char *try_names[3] = { + "%s", + "%s.uwu", + "%s.so", + }; + + char *filename; + + for (int i = 0; i < 3; i++) { + filename = asprintf_wrapper(try_names[i], module_path); + + if (file_exists(filename)) + return filename; + else + free(filename); + } + + return NULL; +} + +// module_path is a mallocated string +static Module *require_module(CollectorState *state, char *module_path) +{ + for (size_t i = 0; i < state->num_modules; i++) { + Module *module = state->modules[i]; + + if (strcmp(module_path, module->path) == 0) { + free(module_path); + return module; + } + } + + char *filename = get_filename(module_path); + + if (! filename) + error("error: module %s not found\n", module_path); + + size_t filename_len = strlen(filename); + UwUVMModuleType type = (filename_len >= 3 && strcmp(filename + filename_len - 3, ".so") == 0) ? MODULE_NATIVE : MODULE_PLAIN; + + state->modules = realloc(state->modules, sizeof *state->modules * ++state->num_modules); + Module *module = state->modules[state->num_modules - 1] = malloc(sizeof *module); + + *module = (Module) { + .path = module_path, + .filename = filename, + .environment = dirname_wrapper(module_path), + + .type = type, + + .functions = NULL, + .num_functions = 0, + .loaded_functions = 0, + }; + + if (type == MODULE_PLAIN) { + module->handle.ast = parse_file(filename); + } else { + state->program.libraries = realloc(state->program.libraries, sizeof(void *) * ++state->program.num_libraries); + state->program.libraries[state->program.num_libraries - 1] = module->handle.lib = dlopen(filename, RTLD_LAZY); + + char *err = dlerror(); + if (err) + error("%s\n", err); + } + + return module; +} + +static UwUVMFunction *require_function(CollectorState *state, Module *module, const char *name) +{ + for (size_t i = 0; i < module->num_functions; i++) { + FunctionLink *link = &module->functions[i]; + + if (strcmp(link->name, name) == 0) + return link->ref; + } + + UwUVMFunction *ref = malloc(sizeof *ref); + ref->type = module->type; + + state->program.functions = realloc(state->program.functions, sizeof *state->program.functions * ++state->program.num_functions); + state->program.functions[state->program.num_functions - 1] = ref; + + module->functions = realloc(module->functions, sizeof *module->functions * ++module->num_functions); + module->functions[module->num_functions - 1] = (FunctionLink) { + .name = strdup(name), + .ref = ref, + }; + + return ref; +} + +static UwUVMFunction *resolve_function(CollectorState *state, Module *caller_module, const char *full_name) +{ + size_t len = strlen(full_name); + + const char *fnname; + for (fnname = &full_name[len - 1]; *fnname != ':' && fnname > full_name; fnname--) + ; + + if (*fnname == ':') + fnname++; + + if (*fnname == '\0') + error("error: empty function name\n"); + + Module *callee_module; + + if (fnname == full_name) { + callee_module = caller_module; + } else { + const char *caller_path = caller_module->environment; + const char *callee_name = full_name; + + if (*callee_name == ':') { + caller_path = state->std_path; + callee_name++; + } + + size_t path_len = fnname - callee_name; + char callee_path[path_len]; + + for (size_t i = 0; i < path_len; i++) + callee_path[i] = (i == path_len - 1) ? '\0' + : (callee_name[i] == ':') ? '/' + : callee_name[i]; + + callee_module = require_module(state, asprintf_wrapper("%s/%s", caller_path, callee_path)); + } + + return require_function(state, callee_module, fnname); +} + +static void translate_expression(CollectorState *state, Module *module, UwUVMExpression *vm_expr, ParseExpression *parse_expr) +{ + UwUVMFunction *vm_function; + + if (parse_expr->type == EX_FNNAME || parse_expr->type == EX_FNCALL) { + vm_function = resolve_function(state, module, parse_expr->value.str_value); + free(parse_expr->value.str_value); + } + + switch (vm_expr->type = parse_expr->type) { + case EX_INTLIT: + case EX_ARGNUM: + vm_expr->value.int_value = parse_expr->value.int_value; + break; + + case EX_STRLIT: + vm_expr->value.str_value = parse_expr->value.str_value; + break; + + case EX_FNNAME: + vm_expr->value.ref_value = vm_function; + break; + + case EX_FNCALL: + vm_expr->value.cll_value.function = vm_function; + vm_expr->value.cll_value.args = malloc(sizeof(UwUVMExpression) * parse_expr->num_children); + vm_expr->value.cll_value.num_args = parse_expr->num_children; + + for (size_t i = 0; i < parse_expr->num_children; i++) + translate_expression(state, module, &vm_expr->value.cll_value.args[i], parse_expr->children[i]); + + if (parse_expr->children) + free(parse_expr->children); + break; + + default: + break; + } + + free(parse_expr); +} + +static void load_functions(CollectorState *state, Module *module) +{ + for (; module->loaded_functions < module->num_functions; module->loaded_functions++) { + FunctionLink *linkptr = &module->functions[module->loaded_functions]; + FunctionLink link = *linkptr; + + bool found = false; + + if (module->type == MODULE_PLAIN) { + ParseFunction **function = NULL; + + for (size_t i = 0; i < module->handle.ast.num_functions; i++) { + ParseFunction **fn = &module->handle.ast.functions[i]; + + if (*fn && strcmp((*fn)->name, link.name) == 0) { + function = fn; + break; + } + } + + if (function) { + found = true; + linkptr = NULL; + + translate_expression(state, module, link.ref->value.plain = malloc(sizeof(UwUVMExpression)), (*function)->expression); + free((*function)->name); + free(*function); + + *function = NULL; + } + } else { + char *symbol = asprintf_wrapper("uwu_%s", link.name); + linkptr->ref->value.native = dlsym(module->handle.lib, symbol); + + if (! dlerror()) + found = true; + + free(symbol); + } + + if (! found) + error("error: no function %s in module %s\n", link.name, module->filename); + } +} + +static void free_expression(ParseExpression *expr) +{ + if (expr->type == EX_FNCALL) { + for (size_t i = 0; i < expr->num_children; i++) + free_expression(expr->children[i]); + + if (expr->children) + free(expr->children); + } + + if (expr->type != EX_INTLIT && expr->type != EX_ARGNUM) + free(expr->value.str_value); + + free(expr); +} + +UwUVMProgram create_program(const char *progname, const char *modname) +{ + char *prog_dirname = dirname_wrapper(progname); + char *api_path = asprintf_wrapper("%s/api/api.so", prog_dirname); + + CollectorState state = { + .modules = NULL, + .num_modules = 0, + .std_path = asprintf_wrapper("%s/std", prog_dirname), + .program = { + .api_library = dlopen(api_path, RTLD_NOW | RTLD_GLOBAL), + .main_function = NULL, + .functions = NULL, + .num_functions = 0, + .libraries = NULL, + .num_libraries = 0, + }, + }; + + free(prog_dirname); + free(api_path); + + state.program.main_function = require_function(&state, require_module(&state, strdup(modname)), "main"); + + while (true) { + bool fully_loaded = true; + + for (size_t i = 0; i < state.num_modules; i++) { + Module *module = state.modules[i]; + +#if DEBUG + printf("%s %lu/%lu\n", module->filename, module->loaded_functions, module->num_functions); +#endif + + if (module->loaded_functions < module->num_functions) { + fully_loaded = false; + load_functions(&state, module); + } + } + + if (fully_loaded) + break; + } + + free(state.std_path); + + for (size_t i = 0; i < state.num_modules; i++) { + Module *module = state.modules[i]; + + free(module->path); + free(module->filename); + free(module->environment); + + for (size_t f = 0; f < module->num_functions; f++) + free(module->functions[f].name); + + free(module->functions); + + if (module->type == MODULE_PLAIN) { + for (size_t f = 0; f < module->handle.ast.num_functions; f++) { + ParseFunction *function = module->handle.ast.functions[f]; + + if (function) { + free_expression(function->expression); + free(function->name); + free(function); + } + } + + if (module->handle.ast.functions) + free(module->handle.ast.functions); + } + + free(module); + } + + free(state.modules); + + return state.program; +} diff --git a/src/collect.h b/src/collect.h new file mode 100644 index 0000000..da118a5 --- /dev/null +++ b/src/collect.h @@ -0,0 +1,8 @@ +#ifndef _COLLECT_H_ +#define _COLLECT_H_ + +#include "../api/vm.h" + +UwUVMProgram create_program(const char *progname, const char *modname); + +#endif diff --git a/src/err.h b/src/err.h new file mode 100644 index 0000000..1dccbf9 --- /dev/null +++ b/src/err.h @@ -0,0 +1,17 @@ +#ifndef _ERR_H_ +#define _ERR_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +static inline void error(const char *format, ...) +{ + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + exit(1); +} + +#endif diff --git a/src/expression.h b/src/expression.h new file mode 100644 index 0000000..da74620 --- /dev/null +++ b/src/expression.h @@ -0,0 +1,14 @@ +#ifndef _EXPRESSION_H_ +#define _EXPRESSION_H_ + +typedef enum +{ + EX_UNINIT, + EX_INTLIT, + EX_STRLIT, + EX_ARGNUM, + EX_FNNAME, + EX_FNCALL, +} ExpressionType; + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..60c22ba --- /dev/null +++ b/src/main.c @@ -0,0 +1,56 @@ +#include "err.h" +#include "vm.h" + +int main(int argc, char *argv[]) +{ + if (argc < 2) + error("usage: %s <module>\n", argv[0]); + + vm_run_file(argv[0], argv[1]); + return 0; +} + +/* + +0123 +"asd" +$arg +&fnname +func(asd) + +:int:add +:str:cat +:boo:and +:arr:arr +:set:set + +integer::add() +integer::sub() +integer::mul() +integer::div() +integer::mod() +integer::pow() + +string::concat() +string::split() +string::find() + +array::array() +array::select() +array::insert() +array::length() +array::reduce() +array::map() + +set::set() +set::pair() +set::select() +set::insert() +set::remove() +set::contains() + +boolean::and() +boolean::or() +boolean::xor() + +*/ diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..be0ee98 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,325 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include "err.h" +#include "parse.h" + +#define DEBUG 0 + +#if DEBUG +#define DBG(FN) printf("%s %s %s\n", FN, expression_types[state->expression->type], (state->expression->type == EX_FNCALL && state->expression->value.str_value) ? state->expression->value.str_value : ""); +#else +#define DBG(FN) +#endif + +#if DEBUG +static char *expression_types[EX_FNCALL + 1] = { + "uninitialized", + "integer-literal", + "string-literal", + "argument-number", + "function-name", + "function-call", +}; + +static void print_expression(ParseExpression *expr, int indent) +{ + for (int i = 0; i < indent; i++) + printf("\t"); + + printf("%s ", expression_types[expr->type]); + + if (expr->type == EX_INTLIT || expr->type == EX_ARGNUM) + printf("%d\n", expr->value.int_value); + else + printf("\"%s\"\n", expr->value.str_value); + + if (expr->type == EX_FNCALL) + for (size_t i = 0; i < expr->num_children; i++) + print_expression(expr->children[i], indent + 1); +} + +static void print_ast(AbstractSyntaxTree tree) +{ + printf("\n[Abstract Syntax Tree]\n\n"); + + for (size_t f = 0; f < tree.num_functions; f++) { + ParseFunction *function = tree.functions[f]; + + printf("function %s\n", function->name); + print_expression(function->expression, 1); + } +} +#endif + +static void buffer_append(ParseState *state, char c) +{ + state->buffer = realloc(state->buffer, ++state->buffer_size); + state->buffer[state->buffer_size - 1] = c; +} + +static char *buffer_terminate(ParseState *state) +{ + buffer_append(state, '\0'); + + char *buffer = state->buffer; + + state->buffer = NULL; + state->buffer_size = 0; + + return buffer; +} + +static void start_arg(ParseState *state) +{ + DBG(__FUNCTION__) + + ParseExpression *parent = state->expression; + parent->children = realloc(parent->children, sizeof *parent->children * ++parent->num_children); + ParseExpression *child = parent->children[parent->num_children - 1] = malloc(sizeof *child); + + child->type = EX_UNINIT; + child->parent = parent; + + state->expression = child; +} + +static bool continue_arg(ParseState *state, char c) +{ + DBG(__FUNCTION__) + + if (c == ',') + start_arg(state); + else if (c == ')') + state->expression = state->expression->parent; + else if (! isspace(c)) + return false; + + return true; +} + +static bool finish_arg(ParseState *state, char c) +{ + state->expression = state->expression->parent; + + if (state->expression) + continue_arg(state, c); + else if (! isspace(c)) + return false; + + return true; +} + +static bool parse_expression_init(ParseState *state, char c) +{ + DBG(__FUNCTION__) + + if (c == ',') + return false; + + if (isspace(c)) + return true; + + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + state->expression->type = EX_INTLIT; + buffer_append(state, c); + return true; + + case '"': + state->expression->type = EX_STRLIT; + return true; + + case '$': + state->expression->type = EX_ARGNUM; + return true; + + case '&': + state->expression->type = EX_FNNAME; + return true; + + default: + state->expression->type = EX_FNCALL; + state->expression->value.str_value = NULL; + buffer_append(state, c); + return true; + } +} + +static bool parse_expression_finish(ParseState *state, char c) +{ + DBG(__FUNCTION__) + + if (state->expression->type == EX_ARGNUM && state->buffer_size == 0) + return false; + + char *buffer_read = buffer_terminate(state); + + if (state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) { + state->expression->value.int_value = atoi(buffer_read); + free(buffer_read); + } else { + state->expression->value.str_value = buffer_read; + } + + if (state->expression->type == EX_FNCALL) { + state->expression->num_children = 0; + state->expression->children = NULL; + + if (c == '(') + start_arg(state); + else + return finish_arg(state, c); + } else { + if (c == ',' || c == ')') + return finish_arg(state, c); + else + state->expression = state->expression->parent; + } + + return true; +} + +static bool parse_expression_continue(ParseState *state, char c) +{ + DBG(__FUNCTION__) + + if (state->expression->type == EX_FNCALL && state->expression->value.str_value) + return continue_arg(state, c); + + if ( + state->expression->type == EX_STRLIT + ? c == '"' + : ( + (state->expression->type == EX_FNCALL && c == '(') + || isspace(c) || c == ',' || c == ')' + ) + ) { + return parse_expression_finish(state, c); + } else { + if ((state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) && ! isdigit(c)) + return false; + + if ((state->expression->type == EX_FNNAME || state->expression->type == EX_FNCALL) && (c == '&' || c == '$')) + return false; + + buffer_append(state, c); + } + + return true; +} + +static bool parse_expression(ParseState *state, char c) +{ + DBG(__FUNCTION__) + + return state->expression->type == EX_UNINIT + ? parse_expression_init(state, c) + : parse_expression_continue(state, c); +} + +static bool parse_function(ParseState *state, char c) +{ +#if DEBUG + printf("%s\n", __FUNCTION__); +#endif + + if (c == '\"' || c == '$' || c == ':' || c == ',' || c == '&' || c == '(' || c == ')' || isdigit(c)) + return false; + + if (! isspace(c)) { + buffer_append(state, c); + return true; + } + + if (state->buffer_size == 0) + return true; + + char *name = buffer_terminate(state); + + state->expression = malloc(sizeof *state->expression); + state->expression->type = EX_UNINIT; + state->expression->parent = NULL; + + state->tree.functions = realloc(state->tree.functions, sizeof *state->tree.functions * ++state->tree.num_functions); + *(state->tree.functions[state->tree.num_functions - 1] = malloc(sizeof(ParseFunction))) = (ParseFunction) { + .name = name, + .expression = state->expression, + }; + + return true; +} + +static bool parse_character(ParseState *state, char c) +{ +#if DEBUG + printf("\nparse_character "); + + if (isspace(c)) + printf("<SPACE>"); + else + printf("%c", c); + + printf("\n"); +#endif + + return state->expression + ? parse_expression(state, c) + : parse_function(state, c); +} + +AbstractSyntaxTree parse_file(const char *filename) +{ + ParseState state = { + .tree = { + .num_functions = 0, + .functions = NULL, + }, + .buffer_size = 0, + .buffer = NULL, + .expression = NULL, + }; + + int lines = 1; + + FILE *f = fopen(filename, "r"); + + if (! f) + error("%s: unable to open\n", filename); + +#if DEBUG + printf("[File %s]\n[Line %d]\n", filename, lines); +#endif + + while (true) { + char c = getc(f); + + if (feof(f)) + break; + + if (ferror(f)) + error("%s: I/O error\n", filename); + + if (c == '\n') + ++lines; + +#if DEBUG + if (c == '\n') + printf("\n[Line %d]\n", lines); +#endif + + if (! parse_character(&state, c)) + error("%s: syntax error in line %d\n", filename, lines); + } + + if (state.buffer || state.expression) + error("%s: syntax error at end of file\n", filename); + + fclose(f); + +#if DEBUG + print_ast(state.tree); +#endif + + return state.tree; +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..861ad1c --- /dev/null +++ b/src/parse.h @@ -0,0 +1,48 @@ +#ifndef _PARSE_H_ +#define _PARSE_H_ + +#include <stddef.h> +#include <stdbool.h> +#include "expression.h" + +typedef struct ParseExpression +{ + ExpressionType type; + union + { + int int_value; + char *str_value; + } value; + size_t num_children; + struct ParseExpression **children; + struct ParseExpression *parent; +} ParseExpression; + +typedef struct +{ + char *name; + ParseExpression *expression; +} ParseFunction; + +typedef struct +{ + size_t num_functions; + ParseFunction **functions; +} AbstractSyntaxTree; + +typedef struct +{ + AbstractSyntaxTree tree; + + size_t buffer_size; + char *buffer; + + ParseExpression *expression; + + int lines; + bool success; +} ParseState; + +AbstractSyntaxTree parse_file(const char *filename); + +#endif diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..b0baa37 --- /dev/null +++ b/src/util.h @@ -0,0 +1,17 @@ +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include <stdio.h> +#include <stdarg.h> + +static inline char *asprintf_wrapper(const char *format, ...) +{ + va_list args; + va_start(args, format); + char *ptr; + vasprintf(&ptr, format, args); + va_end(args); + return ptr; +} + +#endif diff --git a/src/vm.c b/src/vm.c new file mode 100644 index 0000000..7f0db99 --- /dev/null +++ b/src/vm.c @@ -0,0 +1,52 @@ +#include <dlfcn.h> +#include <stdlib.h> +#include <stdio.h> +#include "collect.h" +#include "vm.h" + +static void free_expression(UwUVMExpression *expr) +{ + if (expr->type == EX_FNCALL) { + for (size_t i = 0; i < expr->value.cll_value.num_args; i++) + free_expression(&expr->value.cll_value.args[i]); + + free(expr->value.cll_value.args); + } + + if (expr->type == EX_STRLIT) + free(expr->value.str_value); +} + +void vm_run_file(const char *progname, const char *modname) +{ + UwUVMProgram program = create_program(progname, modname); + UwUVMValue result = ((UwUVMValue (*)(UwUVMFunction *, UwUVMArgs args)) dlsym(program.api_library, "uwuvm_run_function"))(program.main_function, (UwUVMArgs) {.num = 0, .evaluated = NULL, .unevaluated = NULL, .super = NULL}); + + char *str = ((char *(*)(UwUVMValue)) dlsym(program.api_library, "uwustr_get"))(result); + + printf("%s\n", str); + free(str); + + ((void (*)(UwUVMValue)) dlsym(program.api_library, "uwuvm_free_value"))(result); + + for (size_t i = 0; i < program.num_functions; i++) { + UwUVMFunction *function = program.functions[i]; + + if (function->type == MODULE_PLAIN) { + free_expression(function->value.plain); + free(function->value.plain); + } + + free(function); + } + + free(program.functions); + + for (size_t i = 0; i < program.num_libraries; i++) + dlclose(program.libraries[i]); + + if (program.libraries) + free(program.libraries); + + dlclose(program.api_library); +} diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 0000000..5e23c8b --- /dev/null +++ b/src/vm.h @@ -0,0 +1,6 @@ +#ifndef _VM_H_ +#define _VM_H_ + +void vm_run_file(const char *progname, const char *modname); + +#endif |