From eddc4693e49f70cd214b7645cb9fce54a89fbb6c Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Mon, 11 Feb 2019 18:43:18 -0800 Subject: Initial import --- .gitignore | 6 + LICENSE | 36 ++ Makefile | 86 +++ README.md | 17 + arg.h | 18 + config.def.h | 27 + decl.c | 897 ++++++++++++++++++++++++++++++++ decl.h | 37 ++ deps.mk | 22 + driver.c | 423 +++++++++++++++ emit.h | 49 ++ eval.c | 134 +++++ eval.h | 1 + expr.c | 874 +++++++++++++++++++++++++++++++ expr.h | 99 ++++ htab.c | 137 +++++ htab.h | 13 + init.c | 279 ++++++++++ init.h | 13 + main.c | 67 +++ ops.h | 108 ++++ pp.c | 144 +++++ pp.h | 8 + qbe.c | 1153 +++++++++++++++++++++++++++++++++++++++++ runtests | 20 + scan.c | 369 +++++++++++++ scan.h | 4 + scope.c | 91 ++++ scope.h | 21 + siphash.c | 165 ++++++ stmt.c | 280 ++++++++++ stmt.h | 4 + tests/basic.c | 3 + tests/basic.qbe | 6 + tests/compound-assignment.c | 4 + tests/compound-assignment.qbe | 18 + tests/const-expr-div.c | 1 + tests/const-expr-div.qbe | 1 + tests/const-expr-mod.c | 1 + tests/const-expr-mod.qbe | 1 + tests/const-expr-shr.c | 1 + tests/const-expr-shr.qbe | 1 + tests/float-promote.c | 8 + tests/float-promote.qbe | 11 + tests/float-to-uint32.c | 4 + tests/float-to-uint32.qbe | 9 + tests/float-to-uint64.c | 4 + tests/float-to-uint64.qbe | 18 + tests/for-loop.c | 6 + tests/for-loop.qbe | 21 + tests/global-align.c | 1 + tests/global-align.qbe | 1 + tests/hello.c | 5 + tests/hello.qbe | 9 + tests/local-align.c | 3 + tests/local-align.qbe | 7 + tests/local-init.c | 6 + tests/local-init.qbe | 22 + tests/struct-copy.c | 8 + tests/struct-copy.qbe | 21 + tests/struct-passing.c | 11 + tests/struct-passing.qbe | 7 + tests/subtract-pointer.c | 3 + tests/subtract-pointer.qbe | 17 + tests/switch.c | 10 + tests/switch.qbe | 62 +++ tests/tentative.c | 2 + tests/tentative.qbe | 1 + tests/typedef-name.c | 4 + tests/typedef-name.qbe | 7 + tests/typedef.c | 2 + tests/typedef.qbe | 1 + tests/uint32-to-float.c | 4 + tests/uint32-to-float.qbe | 9 + tests/uint64-to-float.c | 4 + tests/uint64-to-float.qbe | 20 + tests/unused-return.c | 4 + tests/unused-return.qbe | 7 + token.c | 143 +++++ token.h | 140 +++++ tree.c | 98 ++++ tree.h | 8 + type.c | 296 +++++++++++ type.h | 115 ++++ util.c | 133 +++++ util.h | 31 ++ 86 files changed, 6942 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 arg.h create mode 100644 config.def.h create mode 100644 decl.c create mode 100644 decl.h create mode 100644 deps.mk create mode 100644 driver.c create mode 100644 emit.h create mode 100644 eval.c create mode 100644 eval.h create mode 100644 expr.c create mode 100644 expr.h create mode 100644 htab.c create mode 100644 htab.h create mode 100644 init.c create mode 100644 init.h create mode 100644 main.c create mode 100644 ops.h create mode 100644 pp.c create mode 100644 pp.h create mode 100644 qbe.c create mode 100755 runtests create mode 100644 scan.c create mode 100644 scan.h create mode 100644 scope.c create mode 100644 scope.h create mode 100644 siphash.c create mode 100644 stmt.c create mode 100644 stmt.h create mode 100644 tests/basic.c create mode 100644 tests/basic.qbe create mode 100644 tests/compound-assignment.c create mode 100644 tests/compound-assignment.qbe create mode 100644 tests/const-expr-div.c create mode 100644 tests/const-expr-div.qbe create mode 100644 tests/const-expr-mod.c create mode 100644 tests/const-expr-mod.qbe create mode 100644 tests/const-expr-shr.c create mode 100644 tests/const-expr-shr.qbe create mode 100644 tests/float-promote.c create mode 100644 tests/float-promote.qbe create mode 100644 tests/float-to-uint32.c create mode 100644 tests/float-to-uint32.qbe create mode 100644 tests/float-to-uint64.c create mode 100644 tests/float-to-uint64.qbe create mode 100644 tests/for-loop.c create mode 100644 tests/for-loop.qbe create mode 100644 tests/global-align.c create mode 100644 tests/global-align.qbe create mode 100644 tests/hello.c create mode 100644 tests/hello.qbe create mode 100644 tests/local-align.c create mode 100644 tests/local-align.qbe create mode 100644 tests/local-init.c create mode 100644 tests/local-init.qbe create mode 100644 tests/struct-copy.c create mode 100644 tests/struct-copy.qbe create mode 100644 tests/struct-passing.c create mode 100644 tests/struct-passing.qbe create mode 100644 tests/subtract-pointer.c create mode 100644 tests/subtract-pointer.qbe create mode 100644 tests/switch.c create mode 100644 tests/switch.qbe create mode 100644 tests/tentative.c create mode 100644 tests/tentative.qbe create mode 100644 tests/typedef-name.c create mode 100644 tests/typedef-name.qbe create mode 100644 tests/typedef.c create mode 100644 tests/typedef.qbe create mode 100644 tests/uint32-to-float.c create mode 100644 tests/uint32-to-float.qbe create mode 100644 tests/uint64-to-float.c create mode 100644 tests/uint64-to-float.qbe create mode 100644 tests/unused-return.c create mode 100644 tests/unused-return.qbe create mode 100644 token.c create mode 100644 token.h create mode 100644 tree.c create mode 100644 tree.h create mode 100644 type.c create mode 100644 type.h create mode 100644 util.c create mode 100644 util.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..06b6bc0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.o +/cc +/cc-qbe +/config.h +/stage2 +/stage3 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07aefab --- /dev/null +++ b/LICENSE @@ -0,0 +1,36 @@ +Copyright © 2019 Michael Forney + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +-------------------------------------------------------------------------------- +tree.c is based on src/search/tsearch.c by Szabolcs Nagy + +Copyright © 2005-2014 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..07d42fa --- /dev/null +++ b/Makefile @@ -0,0 +1,86 @@ +.POSIX: + +BACKEND=qbe + +srcdir=. +-include $(srcdir)/config.mk + +.PHONY: all +all: cc cc-qbe + +DRIVER_SRC=\ + driver.c\ + util.c +DRIVER_OBJ=$(DRIVER_SRC:.c=.o) + +config.h: + @cp config.def.h $@ + +cc: $(DRIVER_OBJ) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(DRIVER_OBJ) + +SRC=\ + decl.c\ + eval.c\ + expr.c\ + htab.c\ + init.c\ + main.c\ + pp.c\ + scan.c\ + scope.c\ + siphash.c\ + stmt.c\ + tree.c\ + token.c\ + type.c\ + util.c\ + $(BACKEND).c +OBJ=$(SRC:.c=.o) + +cc-qbe: $(OBJ) + $(CC) $(LDFLAGS) -o $@ $(OBJ) + +decl.o : $(srcdir)/decl.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +driver.o : $(srcdir)/driver.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +eval.o : $(srcdir)/eval.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +expr.o : $(srcdir)/expr.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +htab.o : $(srcdir)/htab.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +init.o : $(srcdir)/init.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +main.o : $(srcdir)/main.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +pp.o : $(srcdir)/pp.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +scan.o : $(srcdir)/scan.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +scope.o : $(srcdir)/scope.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +siphash.o : $(srcdir)/siphash.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +stmt.o : $(srcdir)/stmt.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +tree.o : $(srcdir)/tree.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +token.o : $(srcdir)/token.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +type.o : $(srcdir)/type.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +util.o : $(srcdir)/util.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< +qbe.o : $(srcdir)/qbe.c $(stagedeps) ; $(CC) $(CFLAGS) -c -o $@ $< + +.PHONY: stage2 +stage2: cc cc-qbe + @mkdir -p $@ + $(MAKE) -C $@ -f ../$(srcdir)/Makefile srcdir=../$(srcdir) stagedeps='../cc ../cc-qbe' CC=../cc CFLAGS= LDFLAGS= +.PHONY: stage3 +stage3: stage2 + @mkdir -p $@ + $(MAKE) -C $@ -f ../$(srcdir)/Makefile srcdir=../$(srcdir) stagedeps='../stage2/cc ../stage2/cc-qbe' CC=../stage2/cc CFLAGS= LDFLAGS= + +.PHONY: bootstrap +bootstrap: stage2 stage3 + cmp stage2/cc stage3/cc + cmp stage2/cc-qbe stage3/cc-qbe + +.PHONY: check +check: cc-qbe + @./runtests + +.PHONY: clean +clean: + rm -rf cc $(DRIVER_OBJ) cc-qbe $(OBJ) stage2 stage3 + +deps.mk: $(DRIVER_SRC) $(SRC) config.h + $(CC) $(CFLAGS) -MM $(DRIVER_SRC) $(SRC) >$@ +-include deps.mk diff --git a/README.md b/README.md new file mode 100644 index 0000000..ea19cad --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +This is a C11 compiler using [QBE] as a backend. + +There is still much to do and the code is a little rough, but it currently +implements most of the language and is self-hosting. + +Still TODO: +- Come up with a name so it can be installed alongside the system `cc`. +- The preprocessor (currently we are just using the system `cpp`). +- Bit-fields. +- Passing structs with embedded unions by value. +- Variable-length arrays. +- `volatile`-qualified types (requires support from QBE). +- `_Thread_local` storage-class specifier (requires support from QBE). +- `long double` type (requires support from QBE). +- Inline assembly (requires support from QBE). + +[QBE]: https://c9x.me/compile/ diff --git a/arg.h b/arg.h new file mode 100644 index 0000000..8f93ee1 --- /dev/null +++ b/arg.h @@ -0,0 +1,18 @@ +#define ARGBEGIN \ + for (;;) { \ + ++argv, --argc; \ + if (argc == 0 || (*argv)[0] != '-') \ + break; \ + if ((*argv)[1] == '-' && !(*argv)[2]) { \ + ++argv, --argc; \ + break; \ + } \ + for (char *opt_ = &(*argv)[1], done_ = 0; !done_ && *opt_; ++opt_) { \ + switch (*opt_) + +#define ARGEND \ + } \ + } + +#define EARGF(x) \ + (done_ = 1, *++opt_ ? opt_ : argv[1] ? --argc, *++argv : ((x), abort(), NULL)) diff --git a/config.def.h b/config.def.h new file mode 100644 index 0000000..c9daa1a --- /dev/null +++ b/config.def.h @@ -0,0 +1,27 @@ +#define DYNAMICLINKER "/lib64/ld-linux-x86-64.so.2" + +static char *startfiles[] = { + "-l", ":crt1.o", + "-l", ":crti.o", +}; +static char *endfiles[] = { + "-l", ":crtn.o", + "-l", "c", +}; + +static char *preprocesscmd[] = { + "cpp", "-P", + + /* prevent libc from using GNU C extensions */ + "-U", "__GNUC__", + + /* required for glibc headers */ + "-D", "__restrict=restrict", + "-D", "__extension__=", + "-D", "__attribute__(x)=", + "-D", "__asm__(x)=", +}; +static char *compilecmd[] = {"cc-qbe"}; +static char *codegencmd[] = {"qbe"}; +static char *assemblecmd[] = {"as"}; +static char *linkcmd[] = {"ld", "--dynamic-linker=" DYNAMICLINKER}; diff --git a/decl.c b/decl.c new file mode 100644 index 0000000..bd9090b --- /dev/null +++ b/decl.c @@ -0,0 +1,897 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "decl.h" +#include "emit.h" +#include "expr.h" +#include "htab.h" +#include "init.h" +#include "pp.h" +#include "scope.h" +#include "stmt.h" +#include "token.h" +#include "type.h" + +struct declaration builtinvastart = {.kind = DECLBUILTIN}; +struct declaration builtinvaend = {.kind = DECLBUILTIN}; +struct declaration builtinoffsetof = {.kind = DECLBUILTIN}; + +static struct list tentativedefns = {&tentativedefns, &tentativedefns}; + +enum storageclass { + SCNONE, + + SCTYPEDEF = 1<<1, + SCEXTERN = 1<<2, + SCSTATIC = 1<<3, + SCAUTO = 1<<4, + SCREGISTER = 1<<5, + SCTHREADLOCAL = 1<<6, +}; + +enum typespecifier { + SPECNONE, + + SPECVOID = 1<<1, + SPECCHAR = 1<<2, + SPECBOOL = 1<<3, + SPECINT = 1<<4, + SPECFLOAT = 1<<5, + SPECDOUBLE = 1<<6, + SPECSHORT = 1<<7, + SPECLONG = 1<<8, + SPECLONG2 = 1<<9, + SPECSIGNED = 1<<10, + SPECUNSIGNED = 1<<11, + SPECCOMPLEX = 1<<12, + + SPECLONGLONG = SPECLONG|SPECLONG2, +}; + +enum funcspecifier { + FUNCNONE, + + FUNCINLINE = 1<<1, + FUNCNORETURN = 1<<2, +}; + +struct declaration * +mkdecl(enum declarationkind k, struct type *t, enum linkage linkage) +{ + struct declaration *d; + + d = xmalloc(sizeof(*d)); + d->kind = k; + d->linkage = linkage; + d->type = t; + d->tentative = false; + d->defined = false; + d->align = 0; + d->value = NULL; + + return d; +} + +/* 6.7.1 Storage-class specifiers */ +static int +storageclass(enum storageclass *sc) +{ + enum storageclass allowed, new; + + switch (tok.kind) { + case TTYPEDEF: new = SCTYPEDEF; break; + case TEXTERN: new = SCEXTERN; break; + case TSTATIC: new = SCSTATIC; break; + case T_THREAD_LOCAL: new = SCTHREADLOCAL; break; + case TAUTO: new = SCAUTO; break; + case TREGISTER: new = SCREGISTER; break; + default: return 0; + } + if (!sc) + error(&tok.loc, "storage class not allowed in this declaration"); + switch (*sc) { + case SCNONE: allowed = ~SCNONE; break; + case SCTHREADLOCAL: allowed = SCSTATIC|SCEXTERN; break; + case SCSTATIC: + case SCEXTERN: allowed = SCTHREADLOCAL; break; + default: allowed = SCNONE; break; + } + if (new & ~allowed) + error(&tok.loc, "invalid combination of storage class specifiers"); + *sc |= new; + next(); + + return 1; +} + +/* 6.7.3 Type qualifiers */ +static int +typequal(enum typequalifier *tq) +{ + switch (tok.kind) { + case TCONST: *tq |= QUALCONST; break; + case TVOLATILE: *tq |= QUALVOLATILE; break; + case TRESTRICT: *tq |= QUALRESTRICT; break; + case T_ATOMIC: error(&tok.loc, "_Atomic type qualifier is not yet supported"); + default: return 0; + } + next(); + + return 1; +} + +/* 6.7.4 Function specifiers */ +static int +funcspec(enum funcspecifier *fs) +{ + enum funcspecifier new; + + switch (tok.kind) { + case TINLINE: new = FUNCINLINE; break; + case T_NORETURN: new = FUNCNORETURN; break; + default: return 0; + } + if (!fs) + error(&tok.loc, "function specifier not allowed in this declaration"); + *fs |= new; + next(); + + return 1; +} + +static void structdecl(struct scope *, struct type *); + +static struct type * +tagspec(struct scope *s) +{ + struct type *t; + char *tag; + enum typekind kind; + struct declaration *d; + uint64_t i; + + switch (tok.kind) { + case TSTRUCT: kind = TYPESTRUCT; break; + case TUNION: kind = TYPEUNION; break; + case TENUM: kind = TYPEBASIC; break; + default: fatal("internal error: unknown tag kind"); + } + next(); + if (tok.kind == TIDENT) { + tag = tok.lit; + next(); + t = scopegettag(s, tag, false); + if (s->parent && !t && tok.kind != TLBRACE && (kind == TYPEBASIC || tok.kind != TSEMICOLON)) + t = scopegettag(s->parent, tag, true); + } else if (tok.kind != TLBRACE) { + error(&tok.loc, "expected identifier or '{' after struct/union"); + } else { + tag = NULL; + t = NULL; + } + if (t) { + if (t->kind != kind) + error(&tok.loc, "redeclaration of tag '%s' with different kind", tag); + } else { + t = mktype(kind, NULL); + if (kind == TYPEBASIC) { + *t = typeint; + } else { + t->repr = &i64; // XXX + t->size = 0; + t->align = 0; + t->structunion.tag = tag; + t->structunion.members = (struct array){0}; + } + t->incomplete = true; + if (tag) + scopeputtag(s, tag, t); + } + if (tok.kind != TLBRACE) + return t; + if (!t->incomplete) + error(&tok.loc, "redefinition of tag '%s'", tag); + next(); + switch (t->kind) { + case TYPESTRUCT: + case TYPEUNION: + while (tok.kind != TRBRACE) + structdecl(s, t); + next(); + t->size = ALIGNUP(t->size, t->align); + t->incomplete = false; + break; + case TYPEBASIC: /* enum */ + for (i = 0; tok.kind == TIDENT; ++i) { + d = mkdecl(DECLCONST, &typeint, LINKNONE); + scopeputdecl(s, tok.lit, d); + next(); + if (consume(TASSIGN)) + i = intconstexpr(s); + d->value = mkintconst(t->repr, i); + if (!consume(TCOMMA)) + break; + } + expect(TRBRACE, "to close enum specifier"); + t->incomplete = false; + } + + return t; +} + +/* 6.7 Declarations */ +static struct type * +declspecs(struct scope *s, enum storageclass *sc, enum funcspecifier *fs, int *align) +{ + struct type *t; + struct declaration *d; + enum typespecifier ts = SPECNONE; + enum typequalifier tq = QUALNONE; + int ntypes = 0; + uint64_t i; + + t = NULL; + if (sc) + *sc = SCNONE; + if (fs) + *fs = FUNCNONE; + for (;;) { + if (typequal(&tq) || storageclass(sc) || funcspec(fs)) + continue; + switch (tok.kind) { + /* 6.7.2 Type specifiers */ + case TVOID: + t = &typevoid; + ++ntypes; + next(); + break; + case TCHAR: + ts |= SPECCHAR; + ++ntypes; + next(); + break; + case TSHORT: + if (ts & SPECSHORT) + error(&tok.loc, "duplicate 'short'"); + ts |= SPECSHORT; + next(); + break; + case TINT: + ts |= SPECINT; + ++ntypes; + next(); + break; + case TLONG: + if (ts & SPECLONG2) + error(&tok.loc, "too many 'long'"); + if (ts & SPECLONG) + ts |= SPECLONG2; + ts |= SPECLONG; + next(); + break; + case TFLOAT: + ts |= SPECFLOAT; + ++ntypes; + next(); + break; + case TDOUBLE: + ts |= SPECDOUBLE; + ++ntypes; + next(); + break; + case TSIGNED: + if (ts & SPECSIGNED) + error(&tok.loc, "duplicate 'signed'"); + ts |= SPECSIGNED; + next(); + break; + case TUNSIGNED: + if (ts & SPECUNSIGNED) + error(&tok.loc, "duplicate 'unsigned'"); + ts |= SPECUNSIGNED; + next(); + break; + case T_BOOL: + t = &typebool; + ++ntypes; + next(); + break; + case T_COMPLEX: + fatal("_Complex is not yet supported"); + break; + case T_ATOMIC: + fatal("_Atomic is not yet supported"); + break; + case T__BUILTIN_VA_LIST: + t = &typevalist; + ++ntypes; + next(); + break; + case TSTRUCT: + case TUNION: + case TENUM: + t = tagspec(s); + ++ntypes; + break; + case TIDENT: + if (t || ts) + goto done; + d = scopegetdecl(s, tok.lit, 1); + if (!d || d->kind != DECLTYPE) + goto done; + t = d->type; + ++ntypes; + next(); + break; + + /* 6.7.5 Alignment specifier */ + case T_ALIGNAS: + if (!align) + error(&tok.loc, "alignment specifier not allowed in this declaration"); + next(); + expect(TLPAREN, "after '_Alignas'"); + t = typename(s); + if (t) { + *align = t->align; + } else { + i = intconstexpr(s); + if (!i || i & (i - 1) || i > 16) + error(&tok.loc, "invalid alignment: %d", i); + *align = (int)i; + } + expect(TRPAREN, "to close '_Alignas' specifier"); + break; + + default: + goto done; + } + if (ntypes > 1 || (t && ts)) + error(&tok.loc, "multiple types in declaration specifiers"); + } +done: + switch ((int)ts) { + case SPECNONE: break; + case SPECCHAR: t = &typechar; break; + case SPECSIGNED|SPECCHAR: t = &typeschar; break; + case SPECUNSIGNED|SPECCHAR: t = &typeuchar; break; + case SPECSHORT: + case SPECSHORT|SPECINT: + case SPECSIGNED|SPECSHORT: + case SPECSIGNED|SPECSHORT|SPECINT: t = &typeshort; break; + case SPECUNSIGNED|SPECSHORT: + case SPECUNSIGNED|SPECSHORT|SPECINT: t = &typeushort; break; + case SPECINT: + case SPECSIGNED: + case SPECSIGNED|SPECINT: t = &typeint; break; + case SPECUNSIGNED: + case SPECUNSIGNED|SPECINT: t = &typeuint; break; + case SPECLONG: + case SPECLONG|SPECINT: + case SPECSIGNED|SPECLONG: + case SPECSIGNED|SPECLONG|SPECINT: t = &typelong; break; + case SPECUNSIGNED|SPECLONG: + case SPECUNSIGNED|SPECLONG|SPECINT: t = &typeulong; break; + case SPECLONGLONG: + case SPECLONGLONG|SPECINT: + case SPECSIGNED|SPECLONGLONG: + case SPECSIGNED|SPECLONGLONG|SPECINT: t = &typellong; break; + case SPECUNSIGNED|SPECLONGLONG: + case SPECUNSIGNED|SPECLONGLONG|SPECINT: t = &typeullong; break; + case SPECFLOAT: t = &typefloat; break; + case SPECDOUBLE: t = &typedouble; break; + case SPECLONG|SPECDOUBLE: t = &typelongdouble; break; + default: + error(&tok.loc, "invalid combination of type specifiers"); + } + if (!t && (tq || (sc && *sc) || (fs && *fs))) + error(&tok.loc, "declaration has no type specifier"); + + return mkqualifiedtype(t, tq); +} + +/* 6.7.6 Declarators */ +static struct parameter *parameter(struct scope *); + +struct partialtype { + struct type *outer; + struct type **inner; +}; + +static bool +istypename(struct scope *s, const char *name) +{ + struct declaration *d; + + d = scopegetdecl(s, tok.lit, 1); + return d && d->kind == DECLTYPE; +} + +static void +declaratortypes(struct scope *s, struct partialtype *result, char **name, bool allowabstract) +{ + struct partialtype prefix1 = {NULL, &prefix1.outer}, prefix2 = {NULL, &prefix2.outer}; + struct type *t; + struct parameter **p; + uint64_t i; + enum typequalifier tq; + + while (consume(TMUL)) { + t = mkpointertype(result->outer); + tq = QUALNONE; + while (typequal(&tq)) + ; + if (!result->outer) + result->inner = &t->base; + result->outer = mkqualifiedtype(t, tq); + } + if (name) + *name = NULL; + switch (tok.kind) { + case TLPAREN: + next(); + if (allowabstract && tok.kind != TMUL && (tok.kind != TIDENT || istypename(s, tok.lit))) + goto func; + declaratortypes(s, &prefix1, name, allowabstract); + expect(TRPAREN, "after parenthesized declarator"); + break; + case TIDENT: + if (!name) + error(&tok.loc, "identifier not allowed in abstract declarator"); + *name = tok.lit; + next(); + break; + default: + if (!allowabstract) + error(&tok.loc, "expected '(' or identifier"); + } + for (;;) { + switch (tok.kind) { + case TLPAREN: /* function declarator */ + next(); + func: + t = mktype(TYPEFUNC, NULL); + t->func.isprototype = 0; + t->func.isvararg = 0; + t->func.isnoreturn = 0; + t->func.params = NULL; + p = &t->func.params; + switch (tok.kind) { + case TIDENT: + if (!istypename(s, tok.lit)) { + /* identifier-list (K&R declaration) */ + do { + *p = mkparam(tok.lit, NULL); + p = &(*p)->next; + next(); + if (tok.kind != TCOMMA) + break; + next(); + } while (tok.kind == TIDENT); + break; + } + /* fallthrough */ + default: + t->func.isprototype = 1; + for (;;) { + *p = parameter(s); + p = &(*p)->next; + if (tok.kind != TCOMMA) + break; + next(); + if (tok.kind == TELLIPSIS) { + t->func.isvararg = 1; + next(); + break; + } + } + if (typeunqual(t->func.params->type, NULL)->kind == TYPEVOID && !t->func.params->next) + t->func.params = NULL; + break; + case TRPAREN: + break; + } + expect(TRPAREN, "to close function declarator"); + *prefix2.inner = t; + prefix2.inner = &t->base; + break; + case TLBRACK: /* array declarator */ + next(); + tq = QUALNONE; + for (;;) { + if (tok.kind == TSTATIC) + next(); /* ignore */ + else if (!typequal(&tq)) + break; + } + if (tok.kind == TMUL) + error(&tok.loc, "VLAs are not yet supported"); + if (tok.kind == TRBRACK) { + i = 0; + next(); + } else { + i = intconstexpr(s); + expect(TRBRACK, "after array length"); + } + t = mkarraytype(NULL, i); + *prefix2.inner = mkqualifiedtype(t, tq); + prefix2.inner = &t->base; + break; + default: + goto done; + } + } +done: + if (prefix2.outer) { + if (result->inner == &result->outer) + result->inner = prefix2.inner; + *prefix2.inner = result->outer; + result->outer = prefix2.outer; + } + if (prefix1.outer) { + if (result->inner == &result->outer) + result->inner = prefix1.inner; + *prefix1.inner = result->outer; + result->outer = prefix1.outer; + } +} + +static struct type * +declarator(struct scope *s, struct type *t, char **name, bool allowabstract) +{ + struct partialtype result = {NULL, &result.outer}; + + declaratortypes(s, &result, name, allowabstract); + *result.inner = t; + for (t = result.outer; t; t = t->base) { + switch (t->kind) { + case TYPEARRAY: + t->align = t->base->align; + t->size = t->base->size * t->array.length; // XXX: overflow? + break; + } + } + + return result.outer; +} + +static struct type * +adjust(struct type *t) +{ + enum typequalifier tq = QUALNONE; + + t = typeunqual(t, &tq); + switch (t->kind) { + case TYPEARRAY: + t = mkqualifiedtype(mkpointertype(t->base), tq); + break; + case TYPEFUNC: + t = mkpointertype(t); + break; + } + + return t; +} + +static struct parameter * +parameter(struct scope *s) +{ + struct parameter *p; + struct type *t; + enum storageclass sc; + + t = declspecs(s, &sc, NULL, NULL); + if (!t) + error(&tok.loc, "no type in parameter declaration"); + if (sc && sc != SCREGISTER) + error(&tok.loc, "parameter declaration has invalid storage-class specifier"); + p = mkparam(NULL, t); + p->type = adjust(declarator(s, p->type, &p->name, true)); + + return p; +} + +static bool +paramdecl(struct scope *s, struct parameter *params) +{ + struct parameter *p; + struct type *t, *base; + char *name; + + base = declspecs(s, NULL, NULL, NULL); + if (!base) + return false; + for (;;) { + t = adjust(declarator(s, base, &name, false)); + for (p = params; p; p = p->next) { + if (strcmp(name, p->name) == 0) { + p->type = t; + break; + } + } + if (!p) + error(&tok.loc, "old-style function declarator has no parameter named '%s'", name); + if (tok.kind == TSEMICOLON) + break; + expect(TCOMMA, "or ';' after parameter declarator"); + } + next(); + return true; +} + +// XXX: cleanup +static void +structdecl(struct scope *s, struct type *t) +{ + struct type *base; + struct member *m; + int basealign = 0, align; + + base = declspecs(s, NULL, NULL, &align); + if (!base) + error(&tok.loc, "no type in struct member declaration"); + if (tok.kind == TSEMICOLON) { + if ((base->kind != TYPESTRUCT && base->kind != TYPEUNION) || base->structunion.tag) + error(&tok.loc, "struct declaration must declare at least one member"); + next(); + if (align < base->align) + align = base->align; + t->size = ALIGNUP(t->size, align); + arrayforeach (&base->structunion.members, m) + m->offset += t->size; + arrayaddbuf(&t->structunion.members, base->structunion.members.val, base->structunion.members.len); + t->size += ALIGNUP(base->size, align); + if (t->align < align) + t->align = align; + return; + } + for (;;) { + align = basealign; + if (tok.kind != TCOLON) { + m = arrayadd(&t->structunion.members, sizeof(*m)); + m->type = declarator(s, base, &m->name, false); + assert(m->type->align > 0); + if (align < m->type->align) + align = m->type->align; + t->size = ALIGNUP(t->size, align); + m->offset = t->size; + t->size += m->type->size; + if (t->align < align) + t->align = align; + } + if (tok.kind == TCOLON) + error(&tok.loc, "bit-fields are not yet supported"); + if (tok.kind == TSEMICOLON) + break; + expect(TCOMMA, "or ';' after declarator"); + } + next(); +} + +/* 6.7.7 Type names */ +struct type * +typename(struct scope *s) +{ + struct type *t; + + t = declspecs(s, NULL, NULL, NULL); + return t ? declarator(s, t, NULL, true) : NULL; +} + +bool +decl(struct scope *s, struct function *f) +{ + struct type *t, *base; + enum storageclass sc; + enum funcspecifier fs; + struct initializer *init; + struct parameter *p; + char *name; + int allowfunc = !f; + struct declaration *d; + enum declarationkind kind; + enum linkage linkage; + uint64_t c; + int align = 0; + + if (consume(T_STATIC_ASSERT)) { + expect(TLPAREN, "after _Static_assert"); + c = intconstexpr(s); + expect(TCOMMA, "after static assertion expression"); + expect(TSTRINGLIT, "after static assertion expression"); + if (!c) + error(&tok.loc, "static assertion failed"); // XXX: add string here + expect(TRPAREN, "after static assertion message"); + expect(TSEMICOLON, "after static assertion"); + return true; + } + base = declspecs(s, &sc, &fs, &align); + if (!base) + return false; + if (!f) { + /* 6.9p2 */ + if (sc & SCAUTO) + error(&tok.loc, "external declaration must not contain 'auto'"); + if (sc & SCREGISTER) + error(&tok.loc, "external declaration must not contain 'register'"); + } + if (consume(TSEMICOLON)) { + /* XXX 6.7p2 error unless in function parameter/struct/union, or tag/enum members are declared */ + return true; + } + for (;;) { + t = declarator(s, base, &name, false); + kind = sc & SCTYPEDEF ? DECLTYPE : t->kind == TYPEFUNC ? DECLFUNC : DECLOBJECT; + d = scopegetdecl(s, name, false); + if (d && d->kind != kind) + error(&tok.loc, "'%s' redeclared with different kind", name); + switch (kind) { + case DECLTYPE: + if (align) + error(&tok.loc, "typedef '%s' declared with alignment specifier", name); + if (!d) + scopeputdecl(s, name, mkdecl(DECLTYPE, t, LINKNONE)); + else if (!typesame(d->type, t)) + error(&tok.loc, "typedef '%s' redefined with different type", name); + break; + case DECLOBJECT: + if (d) { + if (d->linkage == LINKNONE) + error(&tok.loc, "object '%s' with no linkage redeclared", name); + if (!(sc & SCEXTERN)) { + linkage = f ? LINKNONE : sc & SCSTATIC ? LINKINTERN : LINKEXTERN; + if (d->linkage != linkage) + error(&tok.loc, "object '%s' redeclared with different linkage", name); + } + if (!typecompatible(d->type, t)) + error(&tok.loc, "object '%s' redeclared with incompatible type", name); + d->type = typecomposite(t, d->type); + } else { + if (sc & SCEXTERN) { + if (s->parent) + d = scopegetdecl(s->parent, name, true); + linkage = d && d->linkage != LINKNONE ? d->linkage : LINKEXTERN; + d = scopegetdecl(&filescope, name, false); + if (d) { + if (d->linkage != linkage) + error(&tok.loc, "object '%s' redeclared with different linkage", name); + if (!typecompatible(d->type, t)) + error(&tok.loc, "object '%s' redeclared with incompatible type", name); + t = typecomposite(t, d->type); + } + } else { + linkage = f ? LINKNONE : sc & SCSTATIC ? LINKINTERN : LINKEXTERN; + } + + d = mkdecl(kind, t, linkage); + scopeputdecl(s, name, d); + if (linkage != LINKNONE || sc & SCSTATIC) + d->value = mkglobal(name, linkage == LINKNONE); + } + if (d->align < align) + d->align = align; + if (consume(TASSIGN)) { + if (f && d->linkage != LINKNONE) + error(&tok.loc, "object '%s' with block scope and %s linkage cannot have initializer", name, d->linkage == LINKEXTERN ? "external" : "internal"); + if (d->defined) + error(&tok.loc, "object '%s' redefined", name); + init = parseinit(s, d->type); + } else { + init = NULL; + } + if (sc & SCEXTERN) + break; + if (init || f) { + if (d->linkage != LINKNONE || sc & SCSTATIC) + emitdata(d, init); + else + funcinit(f, d, init); + d->defined = true; + if (d->tentative) { + d->tentative = false; + listremove(&d->link); + } + } else if (!d->defined && !d->tentative) { + d->tentative = true; + listinsert(tentativedefns.prev, &d->link); + } + break; + case DECLFUNC: + if (align) + error(&tok.loc, "function '%s' declared with alignment specifier", name); + t->func.isnoreturn |= fs & FUNCNORETURN; + if (f && sc && sc != SCEXTERN) /* 6.7.1p7 */ + error(&tok.loc, "function '%s' with block scope may only have storage class 'extern'", name); + if (d) { + if (!typecompatible(t, d->type)) + error(&tok.loc, "function '%s' redeclared with incompatible type", name); + d->type = typecomposite(t, d->type); + } else { + if (s->parent) + d = scopegetdecl(s->parent, name, 1); + if (d && d->linkage != LINKNONE) { + linkage = d->linkage; + if (!typecompatible(t, d->type)) + error(&tok.loc, "function '%s' redeclared with incompatible type", name); + t = typecomposite(t, d->type); + } else { + linkage = sc & SCSTATIC ? LINKINTERN : LINKEXTERN; + } + d = mkdecl(kind, t, linkage); + d->value = mkglobal(name, false); + scopeputdecl(s, name, d); + } + break; + } + switch (tok.kind) { + default: + if (!allowfunc || kind != DECLFUNC || t->func.isprototype) + error(&tok.loc, "expected ',' or ';' after declarator"); + /* K&R-style function definition */ + while (paramdecl(s, t->func.params)) + ; + for (p = t->func.params; p; p = p->next) { + if (!p->type) + error(&tok.loc, "old-style function definition does not declare '%s'", p->name); + } + if (tok.kind != TLBRACE) + error(&tok.loc, "expected compound statement after function declarator"); + /* fallthrough */ + case TLBRACE: + if (!allowfunc) + error(&tok.loc, "function declaration not allowed"); + if (d->defined) + error(&tok.loc, "function '%s' redefined", name); + s = mkscope(&filescope); + f = mkfunc(name, t, s); + stmt(f, s); + emitfunc(f, d->linkage == LINKEXTERN); + s = delscope(s); + d->defined = true; + return true; + case TCOMMA: + next(); + allowfunc = 0; + break; + case TSEMICOLON: + next(); + return true; + } + } +} + +struct declaration *stringdecl(struct expression *expr) +{ + static struct hashtable *strings; + struct hashtablekey key; + void **entry; + struct declaration *d; + + if (!strings) + strings = mkhtab(64); + assert(expr->kind == EXPRSTRING); + htabbufkey(&key, expr->string.data, expr->string.size); + entry = htabput(strings, &key); + d = *entry; + if (!d) { + d = mkdecl(DECLOBJECT, expr->type, LINKNONE); + d->value = mkglobal("string", true); + emitdata(d, mkinit(0, expr)); + *entry = d; + } + return d; +} + +void +emittentativedefns(void) +{ + struct list *l; + + for (l = tentativedefns.next; l != &tentativedefns; l = l->next) + emitdata(listelement(l, struct declaration, link), NULL); +} diff --git a/decl.h b/decl.h new file mode 100644 index 0000000..4b3ef20 --- /dev/null +++ b/decl.h @@ -0,0 +1,37 @@ +enum declarationkind { + DECLTYPE, + DECLOBJECT, + DECLFUNC, + DECLCONST, + DECLBUILTIN, +}; + +enum linkage { + LINKNONE, + LINKINTERN, + LINKEXTERN, +}; + +struct declaration { + enum declarationkind kind; + enum linkage linkage; + struct type *type; + struct value *value; + struct list link; + int align; /* may be more strict than type requires */ + _Bool tentative, defined; +}; + +struct scope; +struct function; + +extern struct declaration builtinvastart, builtinvaend, builtinoffsetof; + +struct declaration *mkdecl(enum declarationkind, struct type *, enum linkage); +_Bool decl(struct scope *, struct function *); +struct type *typename(struct scope *); + +struct expression; +struct declaration *stringdecl(struct expression *); + +void emittentativedefns(void); diff --git a/deps.mk b/deps.mk new file mode 100644 index 0000000..fa3afd0 --- /dev/null +++ b/deps.mk @@ -0,0 +1,22 @@ +driver.o: driver.c util.h config.h +util.o: util.c util.h +decl.o: decl.c util.h decl.h emit.h expr.h htab.h init.h pp.h scope.h \ + stmt.h token.h type.h +eval.o: eval.c util.h decl.h emit.h eval.h expr.h token.h type.h +expr.o: expr.c util.h decl.h eval.h expr.h init.h pp.h scope.h token.h \ + type.h +htab.o: htab.c util.h htab.h +init.o: init.c util.h decl.h expr.h init.h pp.h token.h type.h +main.o: main.c util.h arg.h decl.h pp.h scope.h token.h +pp.o: pp.c util.h pp.h scan.h token.h +scan.o: scan.c util.h scan.h token.h +scope.o: scope.c util.h htab.h scope.h +siphash.o: siphash.c +stmt.o: stmt.c util.h decl.h emit.h expr.h pp.h scope.h stmt.h token.h \ + type.h +tree.o: tree.c util.h tree.h +token.o: token.c util.h token.h +type.o: type.c util.h emit.h type.h +util.o: util.c util.h +qbe.o: qbe.c util.h decl.h emit.h eval.h expr.h htab.h init.h scope.h \ + token.h tree.h type.h ops.h diff --git a/driver.c b/driver.c new file mode 100644 index 0000000..df24d8a --- /dev/null +++ b/driver.c @@ -0,0 +1,423 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "util.h" + +extern int pipe2(int[2], int); + +enum phaseid { + PREPROCESS, + COMPILE, + CODEGEN, + ASSEMBLE, + LINK, + + NPHASES, +}; + +#include "config.h" + +struct phase { + const char *name; + struct array cmd; + size_t cmdbase; + pid_t pid; +}; + +extern char **environ; +static struct { + bool nostdlib; +} flags; +static struct phase phases[] = { + [PREPROCESS] = {.name = "preprocess"}, + [COMPILE] = {.name = "compile"}, + [CODEGEN] = {.name = "codegen"}, + [ASSEMBLE] = {.name = "assemble"}, + [LINK] = {.name = "link"}, +}; + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-c|-S|-E] [-D name[=value]] [-U name] [-s] [-g] [-o output] input...\n", argv0); + exit(2); +} + +static enum phaseid +inputphase(const char *name) +{ + const char *dot; + + dot = strrchr(name, '.'); + if (dot) { + if (strcmp(dot, ".c") == 0) + return PREPROCESS; + if (strcmp(dot, ".i") == 0) + return COMPILE; + if (strcmp(dot, ".qbe") == 0) + return CODEGEN; + if (strcmp(dot, ".s") == 0 || strcmp(dot, ".S") == 0) + return ASSEMBLE; + } + + return LINK; +} + +static char * +changeext(const char *name, const char *ext) +{ + const char *slash, *dot; + char *result; + size_t baselen; + + slash = strrchr(name, '/'); + if (!slash) + slash = name; + dot = strrchr(slash, '.'); + baselen = dot ? (size_t)(dot - name + 1) : strlen(name); + result = xmalloc(baselen + strlen(ext) + 1); + memcpy(result, name, baselen); + strcpy(result + baselen, ext); + + return result; +} + +static int +spawnphase(struct phase *phase, int *fd, char *input, char *output, bool last) +{ + int ret, pipefd[2]; + posix_spawn_file_actions_t actions; + + phase->cmd.len = phase->cmdbase; + if (output) { + arrayaddptr(&phase->cmd, "-o"); + arrayaddptr(&phase->cmd, output); + } + if (input) + arrayaddptr(&phase->cmd, input); + arrayaddptr(&phase->cmd, NULL); + + ret = posix_spawn_file_actions_init(&actions); + if (ret) + goto err0; + if (*fd != -1) + ret = posix_spawn_file_actions_adddup2(&actions, *fd, 0); + if (!last) { + if (pipe2(pipefd, O_CLOEXEC) < 0) { + ret = errno; + goto err1; + } + ret = posix_spawn_file_actions_adddup2(&actions, pipefd[1], 1); + if (ret) + goto err2; + } + + ret = posix_spawnp(&phase->pid, *(char **)phase->cmd.val, &actions, NULL, phase->cmd.val, environ); + if (ret) + goto err2; + if (!last) { + *fd = pipefd[0]; + close(pipefd[1]); + } + posix_spawn_file_actions_destroy(&actions); + + return 0; + +err2: + if (!last) { + close(pipefd[0]); + close(pipefd[1]); + } +err1: + posix_spawn_file_actions_destroy(&actions); +err0: + return ret; +} + +static bool +succeeded(const char *phase, pid_t pid, int status) +{ + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) == 0) + return true; + warn("%s: process %ju exited with status %d", phase, (uintmax_t)pid, WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + warn("%s: process signaled: %s", phase, strsignal(WTERMSIG(status))); + } else { + warn("%s: process failed", phase); + } + return false; +} + +static char * +buildobj(char *input, char *output, enum phaseid last) +{ + const char *phase; + enum phaseid first; + int status, ret, fd; + bool success = true; + size_t i, npids; + pid_t pid; + + npids = 0; + first = inputphase(input); + if (first > last || first == LINK) + return input; + switch (last) { + case COMPILE: + if (!output) + output = changeext(input, "qbe"); + break; + case CODEGEN: + if (!output) + output = changeext(input, "s"); + break; + case ASSEMBLE: + if (!output) + output = changeext(input, "o"); + break; + case LINK: + /* TODO: temporary object, or just removed later? */ + output = changeext(input, "o"); + last = ASSEMBLE; + break; + } + if (output && strcmp(output, "-") == 0) + output = NULL; + + for (i = first, fd = -1; i <= last; ++i, ++npids) { + ret = spawnphase(&phases[i], &fd, i == first ? input : NULL, i == last ? output : NULL, i == last); + if (ret) { + warn("%s: spawn \"%s\": %s", phases[i].name, *(char **)phases[i].cmd.val, strerror(ret)); + goto kill; + } + } + + while (npids > 0) { + pid = wait(&status); + if (pid < 0) + fatal("waitpid:"); + for (i = 0; i < NPHASES; ++i) { + if (pid == phases[i].pid) { + --npids; + phases[i].pid = 0; + phase = phases[i].name; + break; + } + } + if (i == NPHASES) + continue; /* unknown process */ + if (!succeeded(phase, pid, status)) { +kill: + if (success && npids > 0) { + for (i = 0; i < NPHASES; ++i) { + if (phases[i].pid) + kill(phases[i].pid, SIGTERM); + } + } + success = false; + } + } + if (!success) { + if (output) + unlink(output); + exit(1); + } + return output; +} + +static _Noreturn void +buildexe(char *inputs[], size_t ninputs, char *output) +{ + struct phase *p = &phases[LINK]; + size_t i; + int ret, status; + pid_t pid; + + arrayaddptr(&p->cmd, "-o"); + arrayaddptr(&p->cmd, output); + if (!flags.nostdlib) + arrayaddbuf(&p->cmd, startfiles, sizeof(startfiles)); + for (i = 0; i < ninputs; ++i) + arrayaddptr(&p->cmd, inputs[i]); + if (!flags.nostdlib) + arrayaddbuf(&p->cmd, endfiles, sizeof(endfiles)); + arrayaddptr(&p->cmd, NULL); + + ret = posix_spawnp(&pid, *(char **)p->cmd.val, NULL, NULL, p->cmd.val, environ); + if (ret) + fatal("%s: spawn \"%s\": %s", p->name, *(char **)p->cmd.val, strerror(errno)); + if (waitpid(pid, &status, 0) < 0) + fatal("waitpid %ju:", (uintmax_t)pid); + exit(!succeeded(p->name, pid, status)); +} + +static char * +nextarg(char ***argv) +{ + if ((**argv)[2] != '\0') + return &(**argv)[2]; + ++*argv; + if (!**argv) + usage(); + return **argv; +} + +static char * +compilecommand(void) +{ + char self[PATH_MAX], *cmd; + ssize_t n; + + n = readlink("/proc/self/exe", self, sizeof(self) - 4); + if (n < 0) + fatal("readlink /proc/self/exe:"); + if (n == sizeof(self) - 4) + fatal("target of /proc/self/exe is too large"); + strcpy(self + n, "-qbe"); + if (access(self, X_OK) < 0) + return NULL; + cmd = strdup(self); + if (!cmd) + fatal("strdup:"); + return cmd; +} + +int +main(int argc, char *argv[]) +{ + enum phaseid last = LINK; + char *arg, *end, *output = NULL, **input; + struct array inputs = {0}, *cmd; + size_t i; + + arrayaddbuf(&phases[PREPROCESS].cmd, preprocesscmd, sizeof(preprocesscmd)); + arrayaddbuf(&phases[COMPILE].cmd, compilecmd, sizeof(compilecmd)); + arrayaddbuf(&phases[CODEGEN].cmd, codegencmd, sizeof(codegencmd)); + arrayaddbuf(&phases[ASSEMBLE].cmd, assemblecmd, sizeof(assemblecmd)); + arrayaddbuf(&phases[LINK].cmd, linkcmd, sizeof(linkcmd)); + arg = compilecommand(); + if (arg) + *(char **)phases[COMPILE].cmd.val = arg; + + argv0 = progname(argv[0], "cc"); + for (;;) { + ++argv, --argc; + arg = *argv; + if (!arg) + break; + if (arg[0] != '-') { + arrayaddptr(&inputs, arg); + continue; + } + if (strcmp(arg, "-nostdlib") == 0) { + flags.nostdlib = true; + } else if (strcmp(arg, "-static") == 0) { + arrayaddptr(&phases[LINK].cmd, arg); + } else if (strcmp(arg, "-emit-qbe") == 0) { + last = COMPILE; + } else if (strcmp(arg, "-include") == 0) { + --argc, arg = *++argv; + if (!arg) + usage(); + arrayaddptr(&phases[PREPROCESS].cmd, "-include"); + arrayaddptr(&phases[PREPROCESS].cmd, arg); + } else { + if (arg[2] != '\0' && strchr("cESs", arg[1])) + usage(); + switch (arg[1]) { + case 'c': + last = ASSEMBLE; + break; + case 'D': + arrayaddptr(&phases[PREPROCESS].cmd, "-D"); + arrayaddptr(&phases[PREPROCESS].cmd, nextarg(&argv)); + break; + case 'E': + last = PREPROCESS; + break; + case 'I': + arrayaddptr(&phases[PREPROCESS].cmd, "-I"); + arrayaddptr(&phases[PREPROCESS].cmd, nextarg(&argv)); + break; + case 'L': + arrayaddptr(&phases[LINK].cmd, "-L"); + arrayaddptr(&phases[LINK].cmd, nextarg(&argv)); + break; + case 'l': + arrayaddptr(&inputs, "-l"); + arrayaddptr(&inputs, nextarg(&argv)); + break; + case 'O': + /* ignore */ + break; + case 'o': + output = nextarg(&argv); + break; + case 'S': + last = CODEGEN; + break; + case 's': + arrayaddptr(&phases[LINK].cmd, "-s"); + break; + case 'U': + arrayaddptr(&phases[PREPROCESS].cmd, "-U"); + arrayaddptr(&phases[PREPROCESS].cmd, nextarg(&argv)); + break; + case 'W': + if (arg[2] && arg[3] == ',') { + switch (arg[2]) { + case 'p': cmd = &phases[PREPROCESS].cmd; break; + case 'a': cmd = &phases[ASSEMBLE].cmd; break; + case 'l': cmd = &phases[LINK].cmd; break; + default: usage(); + } + for (arg += 4; arg; arg = end ? end + 1 : NULL) { + end = strchr(arg, ','); + if (end) + *end = '\0'; + printf("arg %s\n", arg); + arrayaddptr(cmd, arg); + } + } else { + /* ignore warning flag */ + } + break; + default: + usage(); + } + } + } + + for (i = 0; i < NPHASES; ++i) + phases[i].cmdbase = phases[i].cmd.len; + if (inputs.len == 0) + usage(); + if (output && inputs.len / sizeof(char *) > 1 && last != LINK) + fatal("cannot specify -o with multiple input files without linking"); + for (input = inputs.val; input < (char **)((char *)inputs.val + inputs.len); ++input) { + if (strcmp(*input, "-l") == 0) + ++input; + else + *input = buildobj(*input, output, last); + } + if (last == LINK) { + if (!output) + output = "a.out"; + buildexe(inputs.val, inputs.len / sizeof(char *), output); + } + + return 0; +} diff --git a/emit.h b/emit.h new file mode 100644 index 0000000..c4b1eaf --- /dev/null +++ b/emit.h @@ -0,0 +1,49 @@ +struct gotolabel { + struct value *label; + _Bool defined; +}; + +struct function { + char *name; + struct declaration *namedecl; + struct type *type; + struct block *start, *end; + struct hashtable *gotos; + uint64_t lastid; +}; + +struct switchcases { + void *root; + struct value *defaultlabel; +}; + +struct representation; +struct declaration; +struct expression; +struct initializer; +struct scope; +struct type; + +struct switchcases *mkswitch(void); +void switchcase(struct switchcases *, uint64_t, struct value *); + +struct value *mkblock(char *); +struct value *mkglobal(char *, _Bool); +struct value *mkintconst(struct representation *, uint64_t); + +uint64_t intconstvalue(struct value *); + +struct function *mkfunc(char *, struct type *, struct scope *); +void funclabel(struct function *, struct value *); +struct value *funcexpr(struct function *, struct expression *); +void funcjmp(struct function *, struct value *); +void funcjnz(struct function *, struct value *, struct value *, struct value *); +void funcret(struct function *, struct value *); +struct gotolabel *funcgoto(struct function *, char *); +void funcswitch(struct function *, struct value *, struct switchcases *, struct value *); +void funcinit(struct function *, struct declaration *, struct initializer *); + +void emitfunc(struct function *, _Bool); +void emitdata(struct declaration *, struct initializer *); + +extern struct representation i8, i16, i32, i64, f32, f64; diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..80e9578 --- /dev/null +++ b/eval.c @@ -0,0 +1,134 @@ +#include +#include +#include "util.h" +#include "decl.h" +#include "emit.h" +#include "eval.h" +#include "expr.h" +#include "token.h" +#include "type.h" + +struct expression * +eval(struct expression *expr) +{ + struct expression *l, *r, *c; + int op; + + switch (expr->kind) { + case EXPRIDENT: + if (expr->ident.decl->kind != DECLCONST) + break; + expr->kind = EXPRCONST; + expr->constant.i = intconstvalue(expr->ident.decl->value); + break; + case EXPRUNARY: + l = eval(expr->unary.base); + if (expr->unary.op != TBAND) + break; + switch (l->kind) { + case EXPRUNARY: + if (l->unary.op == TMUL) + expr = eval(l->unary.base); + break; + case EXPRSTRING: + l->ident.decl = stringdecl(l); + l->kind = EXPRIDENT; + expr->unary.base = l; + break; + } + break; + case EXPRCAST: + l = eval(expr->cast.e); + if (l->kind == EXPRCONST) { + expr->kind = EXPRCONST; + if (typeprop(l->type) & PROPINT && typeprop(expr->type) & PROPFLOAT) + expr->constant.i = l->constant.f; + else if (typeprop(l->type) & PROPFLOAT && typeprop(expr->type) & PROPINT) + expr->constant.f = l->constant.i; + else + expr->constant = l->constant; + } else if (l->type->kind == TYPEPOINTER && expr->type->kind == TYPEPOINTER) { + expr = l; + } + break; + case EXPRBINARY: + l = eval(expr->binary.l); + r = eval(expr->binary.r); + expr->binary.l = l; + expr->binary.r = r; + if (l->kind != EXPRCONST) + break; + if (expr->binary.op == TLOR) + return !l->constant.i ? r : l; + if (expr->binary.op == TLAND) + return l->constant.i ? r : l; + if (r->kind != EXPRCONST) + break; + expr->kind = EXPRCONST; + op = expr->binary.op; +#define F (1<<8) +#define S (2<<8) + if (typeprop(l->type) & PROPFLOAT) + op |= F; + else if (l->type->basic.issigned) + op |= S; + switch (op) { + case TMUL: + case TMUL|S: expr->constant.i = l->constant.i * r->constant.i; break; + case TMUL|F: expr->constant.f = l->constant.f * r->constant.f; break; + case TDIV: expr->constant.i = l->constant.i / r->constant.i; break; + case TDIV|S: expr->constant.i = (int64_t)l->constant.i / (int64_t)r->constant.i; break; + case TDIV|F: expr->constant.f = l->constant.f / r->constant.f; break; + case TMOD: expr->constant.i = l->constant.i % r->constant.i; break; + case TMOD|S: expr->constant.i = (int64_t)l->constant.i % (int64_t)r->constant.i; break; + case TADD: + case TADD|S: expr->constant.i = l->constant.i + r->constant.i; break; + case TADD|F: expr->constant.f = l->constant.f + r->constant.f; break; + case TSUB: + case TSUB|S: expr->constant.i = l->constant.i - r->constant.i; break; + case TSUB|F: expr->constant.f = l->constant.f - r->constant.f; break; + case TSHL: + case TSHL|S: expr->constant.i = l->constant.i << r->constant.i; break; + case TSHR: expr->constant.i = l->constant.i >> r->constant.i; break; + case TSHR|S: expr->constant.i = (int64_t)l->constant.i >> r->constant.i; break; + case TBAND: + case TBAND|S: expr->constant.i = l->constant.i & r->constant.i; break; + case TBOR: + case TBOR|S: expr->constant.i = l->constant.i | r->constant.i; break; + case TXOR: + case TXOR|S: expr->constant.i = l->constant.i ^ r->constant.i; break; + case TLESS: expr->constant.i = l->constant.i < r->constant.i; break; + case TLESS|S: expr->constant.i = (int64_t)l->constant.i < (int64_t)r->constant.i; break; + case TLESS|F: expr->constant.i = l->constant.f < r->constant.f; break; + case TGREATER: expr->constant.i = l->constant.i > r->constant.i; break; + case TGREATER|S: expr->constant.i = (int64_t)l->constant.i > (int64_t)r->constant.i; break; + case TGREATER|F: expr->constant.i = l->constant.f > r->constant.f; break; + case TLEQ: expr->constant.i = l->constant.i <= r->constant.i; break; + case TLEQ|S: expr->constant.i = (int64_t)l->constant.i <= (int64_t)r->constant.i; break; + case TLEQ|F: expr->constant.i = l->constant.f <= r->constant.f; break; + case TGEQ: expr->constant.i = l->constant.i >= r->constant.i; break; + case TGEQ|S: expr->constant.i = (int64_t)l->constant.i >= (int64_t)r->constant.i; break; + case TGEQ|F: expr->constant.i = l->constant.f >= r->constant.f; break; + case TEQL: + case TEQL|S: expr->constant.i = l->constant.i == r->constant.i; break; + case TEQL|F: expr->constant.i = l->constant.f == r->constant.f; break; + case TNEQ: + case TNEQ|S: expr->constant.i = l->constant.i != r->constant.i; break; + case TNEQ|F: expr->constant.i = l->constant.f != r->constant.f; break; + default: + fatal("internal error; unknown binary expression"); + } +#undef F +#undef S + break; + case EXPRCOND: + l = expr->cond.t; + r = expr->cond.f; + c = eval(expr->cond.e); + if (c->kind != EXPRCONST) + break; + return eval(c->constant.i ? l : r); + } + + return expr; +} diff --git a/eval.h b/eval.h new file mode 100644 index 0000000..c9628c9 --- /dev/null +++ b/eval.h @@ -0,0 +1 @@ +struct expression *eval(struct expression *); diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..92cd4d9 --- /dev/null +++ b/expr.c @@ -0,0 +1,874 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "decl.h" +#include "eval.h" +#include "expr.h" +#include "init.h" +#include "pp.h" +#include "scope.h" +#include "token.h" +#include "type.h" + +static struct expression * +mkexpr(enum expressionkind k, struct type *t, enum expressionflags flags) +{ + struct expression *e; + + e = xmalloc(sizeof(*e)); + e->type = flags & EXPRFLAG_LVAL || !t ? t : typeunqual(t, NULL); + e->flags = flags; + e->kind = k; + e->next = NULL; + + return e; +} + +static struct expression * +mkconstexpr(struct type *t, uint64_t n) +{ + struct expression *e; + + e = mkexpr(EXPRCONST, t, 0); + e->constant.i = n; + + return e; +} + +void +delexpr(struct expression *e) +{ + free(e); +} + +/* 6.3.2.1 Conversion of arrays and function designators */ +static struct expression * +decay(struct expression *e) +{ + struct expression *old = e; + + switch (e->type->kind) { + case TYPEARRAY: + e = mkexpr(EXPRUNARY, mkpointertype(old->type->base), EXPRFLAG_DECAYED); + e->unary.op = TBAND; + e->unary.base = old; + break; + case TYPEFUNC: + e = mkexpr(EXPRUNARY, mkpointertype(old->type), EXPRFLAG_DECAYED); + e->unary.op = TBAND; + e->unary.base = old; + break; + } + + return e; +} + +static struct type * +inttype(uint64_t val, bool decimal, char *end) +{ + static struct { + struct type *type; + const char *end1, *end2; + } limits[] = { + {&typeint, "", NULL}, + {&typeuint, "u", NULL}, + {&typelong, "l", NULL}, + {&typeulong, "ul", "lu"}, + {&typellong, "ll", NULL}, + {&typeullong, "ull", "llu"}, + }; + static const uint64_t max[] = { + [1] = 0xff, + [2] = 0xffff, + [4] = 0xffffffff, + [8] = 0xffffffffffffffff, + }; + struct type *t; + size_t i, step; + + for (i = 0; end[i]; ++i) + end[i] = tolower(end[i]); + for (i = 0; i < LEN(limits); ++i) { + if (strcmp(end, limits[i].end1) == 0) + break; + if (limits[i].end2 && strcmp(end, limits[i].end2) == 0) + break; + } + if (i == LEN(limits)) + error(&tok.loc, "invalid integer constant suffix '%s'", end); + step = i % 2 || decimal ? 2 : 1; + for (; i < LEN(limits); i += step) { + t = limits[i].type; + if (val <= max[t->size] >> t->basic.issigned) + return t; + } + error(&tok.loc, "no suitable type for constant '%s'", tok.lit); +} + +static int +isodigit(int c) +{ + return '0' <= c && c <= '8'; +} + +static int +unescape(char **p) +{ + int c; + char *s = *p; + + if (*s == '\\') { + ++s; + switch (*s) { + case '\'': + case '"': + case '?': + case '\\': c = *s; ++s; break; + case 'a': c = '\a'; ++s; break; + case 'b': c = '\b'; ++s; break; + case 'f': c = '\f'; ++s; break; + case 'n': c = '\n'; ++s; break; + case 'r': c = '\r'; ++s; break; + case 't': c = '\t'; ++s; break; + case 'v': c = '\v'; ++s; break; + case 'x': + ++s; + assert(isxdigit(*s)); + c = 0; + do c = c * 16 + (*s > '9' ? tolower(*s) - 'a' : *s - '0'); + while (isxdigit(*++s)); + break; + default: + assert(isodigit(*s)); + c = 0; + do c = c * 8 + (*s - '0'); + while (isodigit(*++s)); + } + } else { + c = *s++; + } + *p = s; + return c; +} + +/* 6.5 Expressions */ +static struct expression * +primaryexpr(struct scope *s) +{ + struct expression *e; + struct declaration *d; + char *src, *dst, *end; + int base; + + switch (tok.kind) { + case TIDENT: + d = scopegetdecl(s, tok.lit, 1); + if (!d) + error(&tok.loc, "undeclared identifier: %s", tok.lit); + e = mkexpr(EXPRIDENT, d->type, d->kind == DECLOBJECT ? EXPRFLAG_LVAL : 0); + e->ident.decl = d; + if (d->kind != DECLBUILTIN) + e = decay(e); + next(); + break; + case TSTRINGLIT: + e = mkexpr(EXPRSTRING, mkarraytype(&typechar, 0), EXPRFLAG_LVAL); + e->string.size = 0; + e->string.data = NULL; + do { + e->string.data = xreallocarray(e->string.data, e->string.size + strlen(tok.lit), 1); + dst = e->string.data + e->string.size; + for (src = tok.lit + 1; *src != '"'; ++dst) + *dst = unescape(&src); + e->string.size = dst - e->string.data; + next(); + } while (tok.kind == TSTRINGLIT); + e->type->array.length = e->string.size + 1; + e->type->size = e->type->array.length * e->type->base->size; + e = decay(e); + break; + case TCHARCONST: + src = tok.lit + 1; + e = mkconstexpr(&typeint, unescape(&src)); + if (*src != '\'') + error(&tok.loc, "character constant contains more than one character: %c", *src); + next(); + break; + case TNUMBER: + e = mkexpr(EXPRCONST, NULL, 0); + base = tok.lit[0] != '0' ? 10 : tolower(tok.lit[1]) == 'x' ? 16 : 8; + if (strpbrk(tok.lit, base == 16 ? ".pP" : ".eE")) { + /* floating constant */ + errno = 0; + e->constant.f = strtod(tok.lit, &end); + if (errno) + error(&tok.loc, "invalid floating constant '%s': %s", tok.lit, strerror(errno)); + if (!end[0]) + e->type = &typedouble; + else if (tolower(end[0]) == 'f' && !end[1]) + e->type = &typefloat; + else if (tolower(end[0]) == 'l' && !end[1]) + e->type = &typelongdouble; + else + error(&tok.loc, "invalid floating constant suffix '%s'", *end); + } else { + /* integer constant */ + errno = 0; + e->constant.i = strtoull(tok.lit, &end, 0); + if (errno) + error(&tok.loc, "invalid integer constant '%s': %s", tok.lit, strerror(errno)); + e->type = inttype(e->constant.i, base == 10, end); + } + next(); + break; + case TLPAREN: + next(); + e = expr(s); + expect(TRPAREN, "after expression"); + break; + case T_GENERIC: + error(&tok.loc, "generic selection is not yet supported"); + default: + error(&tok.loc, "expected primary expression"); + } + + return e; +} + +static void +lvalueconvert(struct expression *e) +{ + e->type = typeunqual(e->type, NULL); + e->flags &= ~EXPRFLAG_LVAL; +} + +static struct type * +commonreal(struct expression **e1, struct expression **e2) +{ + struct type *t; + + t = typecommonreal((*e1)->type, (*e2)->type); + *e1 = exprconvert(*e1, t); + *e2 = exprconvert(*e2, t); + + return t; +} + +static struct expression * +mkbinaryexpr(struct location *loc, enum tokenkind op, struct expression *l, struct expression *r) +{ + struct expression *e; + struct type *t = NULL; + enum typeproperty lp, rp; + + lvalueconvert(l); + lvalueconvert(r); + lp = typeprop(l->type); + rp = typeprop(r->type); + switch (op) { + case TLOR: + case TLAND: + if (!(lp & PROPSCALAR)) + error(loc, "left-hand-side of logical operator must be scalar"); + if (!(rp & PROPSCALAR)) + error(loc, "right-hand-side of logical operator must be scalar"); + l = exprconvert(l, &typebool); + r = exprconvert(r, &typebool); + t = &typeint; + break; + case TEQL: + case TNEQ: + case TLESS: + case TGREATER: + case TLEQ: + case TGEQ: + if (lp & PROPARITH && rp & PROPARITH) { + commonreal(&l, &r); + } else { + // XXX: check pointer types + } + t = &typeint; + break; + case TBOR: + case TXOR: + case TBAND: + t = commonreal(&l, &r); + break; + case TADD: + if (lp & PROPARITH && rp & PROPARITH) { + t = commonreal(&l, &r); + } else if (l->type->kind == TYPEPOINTER && rp & PROPINT) { + t = l->type; + r = mkbinaryexpr(loc, TMUL, exprconvert(r, &typeulong), mkconstexpr(&typeulong, t->base->size)); + } else if (lp & PROPINT && r->type->kind == TYPEPOINTER) { + t = r->type; + } else { + error(loc, "invalid operands to '+' operator"); + } + break; + case TSUB: + if (lp & PROPARITH && rp & PROPARITH) { + t = commonreal(&l, &r); + } else if (l->type->kind == TYPEPOINTER && rp & PROPINT) { + t = l->type; + r = mkbinaryexpr(loc, TMUL, exprconvert(r, &typeulong), mkconstexpr(&typeulong, t->base->size)); + } else if (l->type->kind == TYPEPOINTER && r->type->kind == TYPEPOINTER && typecompatible(typeunqual(l->type->base, NULL), typeunqual(r->type->base, NULL))) { + l = mkbinaryexpr(loc, TDIV, exprconvert(l, &typeulong), mkconstexpr(&typeulong, l->type->base->size)); + r = mkbinaryexpr(loc, TDIV, exprconvert(r, &typeulong), mkconstexpr(&typeulong, r->type->base->size)); + t = &typelong; + } else { + error(loc, "invalid operands to '-' operator"); + } + break; + case TMOD: + if (!(lp & PROPINT) || !(rp & PROPINT)) + error(loc, "operands to '%%' operator must be integer"); + t = commonreal(&l, &r); + break; + case TMUL: + case TDIV: + if (!(lp & PROPARITH) || !(rp & PROPARITH)) + error(loc, "operands to '%c' operator must be arithmetic", op == TMUL ? '*' : '/'); + t = commonreal(&l, &r); + break; + case TSHL: + case TSHR: + if (!(lp & PROPINT) || !(rp & PROPINT)) + error(loc, "operands to '%s' operator must be integer", op == TSHL ? "<<" : ">>"); + l = exprconvert(l, typeintpromote(l->type)); + r = exprconvert(r, typeintpromote(r->type)); + t = l->type; + break; + default: + fatal("internal error: unknown binary operator %d", op); + } + e = mkexpr(EXPRBINARY, t, 0); + e->binary.op = op; + e->binary.l = l; + e->binary.r = r; + + return e; +} + +static struct expression * +postfixexpr(struct scope *s, struct expression *r) +{ + struct expression *e, *arr, *idx, *tmp, **end; + struct type *t; + enum typequalifier tq; + struct parameter *p; + struct member *m; + char *name; + enum tokenkind op; + bool lvalue; + + if (!r) + r = primaryexpr(s); + for (;;) { + switch (tok.kind) { + case TLBRACK: /* subscript */ + next(); + arr = r; + idx = expr(s); + if (arr->type->kind != TYPEPOINTER) { + if (idx->type->kind != TYPEPOINTER) + error(&tok.loc, "either array or index must be pointer type"); + tmp = arr; + arr = idx; + idx = tmp; + } + if (arr->type->base->incomplete) + error(&tok.loc, "array is pointer to incomplete type"); + lvalueconvert(idx); + if (!(typeprop(idx->type) & PROPINT)) + error(&tok.loc, "index is not an integer type"); + tmp = mkbinaryexpr(&tok.loc, TADD, arr, idx); + + e = mkexpr(EXPRUNARY, arr->type->base, EXPRFLAG_LVAL); + e->unary.op = TMUL; + e->unary.base = tmp; + expect(TRBRACK, "after array index"); + e = decay(e); + break; + case TLPAREN: /* function call */ + next(); + if (r->kind == EXPRIDENT && r->ident.decl->kind == DECLBUILTIN) { + if (r->ident.decl == &builtinvastart) { + e = mkexpr(EXPRBUILTIN, &typevoid, 0); + e->builtin.kind = BUILTINVASTART; + e->builtin.arg = exprconvert(assignexpr(s), &typevalistptr); + expect(TCOMMA, "after va_list"); + free(expect(TIDENT, "after ','")); + // XXX: check that this was actually a parameter name? + expect(TRPAREN, "after parameter identifier"); + } else if (r->ident.decl == &builtinvaend) { + e = mkexpr(EXPRBUILTIN, &typevoid, 0); + e->builtin.kind = BUILTINVAEND; + exprconvert(assignexpr(s), &typevalistptr); + expect(TRPAREN, "after va_list"); + } else if (r->ident.decl == &builtinoffsetof) { + t = typename(s); + expect(TCOMMA, "after type name"); + name = expect(TIDENT, "after ','"); + if (t->kind != TYPESTRUCT && t->kind != TYPEUNION) + error(&tok.loc, "type is not a struct/union type"); + m = typemember(t, name); + if (!m) + error(&tok.loc, "struct/union has no member named '%s'", name); + e = mkconstexpr(&typeulong, m->offset); + free(name); + expect(TRPAREN, "after member name"); + } else { + fatal("internal error; unknown builtin"); + } + break; + } + lvalueconvert(r); + if (r->type->kind != TYPEPOINTER || r->type->base->kind != TYPEFUNC) + error(&tok.loc, "called object is not a function"); + t = r->type->base; + e = mkexpr(EXPRCALL, t->base, 0); + e->call.func = r; + e->call.args = NULL; + e->call.nargs = 0; + p = t->func.params; + end = &e->call.args; + for (;;) { + if (tok.kind == TRPAREN) + break; + if (e->call.args) + expect(TCOMMA, "or ')' after function call argument"); + if (!p && !t->func.isvararg && t->func.isprototype) + error(&tok.loc, "too many arguments for function call"); + *end = assignexpr(s); + if (!t->func.isprototype || (t->func.isvararg && !p)) + *end = exprconvert(*end, typeargpromote((*end)->type)); + else + *end = exprconvert(*end, p->type); + end = &(*end)->next; + ++e->call.nargs; + if (p) + p = p->next; + } + if (!t->func.isprototype && p) + error(&tok.loc, "not enough arguments for function call"); + e = decay(e); + next(); + break; + case TPERIOD: + e = mkexpr(EXPRUNARY, mkpointertype(r->type), 0); + e->unary.op = TBAND; + e->unary.base = r; + r = e; + /* fallthrough */ + case TARROW: + op = tok.kind; + if (r->type->kind != TYPEPOINTER) + error(&tok.loc, "arrow operator must be applied to pointer to struct/union"); + tq = QUALNONE; + t = typeunqual(r->type->base, &tq); + if (t->kind != TYPESTRUCT && t->kind != TYPEUNION) + error(&tok.loc, "arrow operator must be applied to pointer to struct/union"); + next(); + if (tok.kind != TIDENT) + error(&tok.loc, "expected identifier after '->' operator"); + lvalue = op == TARROW || r->unary.base->flags & EXPRFLAG_LVAL; + r = exprconvert(r, mkpointertype(&typechar)); + m = typemember(t, tok.lit); + if (!m) + error(&tok.loc, "struct/union has no member named '%s'", tok.lit); + r = mkbinaryexpr(&tok.loc, TADD, r, mkconstexpr(&typeulong, m->offset)); + r = exprconvert(r, mkpointertype(mkqualifiedtype(m->type, tq))); + e = mkexpr(EXPRUNARY, r->type->base, lvalue ? EXPRFLAG_LVAL : 0); + e->unary.op = TMUL; + e->unary.base = r; + e = decay(e); + next(); + break; + case TINC: + case TDEC: + e = mkexpr(EXPRINCDEC, r->type, 0); + e->incdec.op = tok.kind; + e->incdec.base = r; + e->incdec.post = 1; + next(); + break; + default: + return r; + } + r = e; + } +} + +static struct expression *castexpr(struct scope *); + +static struct expression * +unaryexpr(struct scope *s) +{ + enum tokenkind op; + struct expression *e, *l; + struct type *t; + + op = tok.kind; + switch (op) { + case TINC: + case TDEC: + next(); + l = unaryexpr(s); + if (!(l->flags & EXPRFLAG_LVAL)) + error(&tok.loc, "operand of %srement operator must be an lvalue", op == TINC ? "inc" : "dec"); + /* + if (l->qualifiers & QUALCONST) + error(&tok.loc, "operand of %srement operator is const qualified", op == TINC ? "inc" : "dec"); + */ + e = mkexpr(EXPRINCDEC, l->type, 0); + e->incdec.op = op; + e->incdec.base = l; + e->incdec.post = 0; + break; + case TBAND: + next(); + e = castexpr(s); + if (!(e->flags & EXPRFLAG_DECAYED)) { + l = e; + e = mkexpr(EXPRUNARY, NULL, 0); + e->unary.op = op; + e->unary.base = l; + e->type = mkpointertype(l->type); + } + break; + case TMUL: + next(); + l = castexpr(s); + if (l->type->kind != TYPEPOINTER) + error(&tok.loc, "cannot dereference non-pointer"); + e = mkexpr(EXPRUNARY, l->type->base, EXPRFLAG_LVAL); + e->unary.op = op; + e->unary.base = l; + e = decay(e); + break; + case TADD: + next(); + e = castexpr(s); + e = exprconvert(e, typeintpromote(e->type)); + break; + case TSUB: + next(); + e = castexpr(s); + e = exprconvert(e, typeintpromote(e->type)); + e = mkbinaryexpr(&tok.loc, TSUB, mkconstexpr(&typeint, 0), e); + break; + case TBNOT: + next(); + e = castexpr(s); + e = exprconvert(e, typeintpromote(e->type)); + e = mkbinaryexpr(&tok.loc, TXOR, e, mkconstexpr(e->type, -1)); + break; + case TLNOT: + next(); + e = castexpr(s); + lvalueconvert(e); + if (!(typeprop(e->type) & PROPSCALAR)) + error(&tok.loc, "operator '!' must have scalar operand"); + e = mkbinaryexpr(&tok.loc, TEQL, e, mkconstexpr(&typeint, 0)); + break; + case TSIZEOF: + case T_ALIGNOF: + next(); + if (consume(TLPAREN)) { + t = typename(s); + if (!t) { + e = expr(s); + if (e->flags & EXPRFLAG_DECAYED) + e = e->unary.base; + t = e->type; + } + expect(TRPAREN, "after type name"); + } else if (op == TSIZEOF) { + e = unaryexpr(s); + if (e->flags & EXPRFLAG_DECAYED) + e = e->unary.base; + t = e->type; + } else { + error(&tok.loc, "expected ')' after '_Alignof'"); + } + e = mkconstexpr(&typeulong, op == TSIZEOF ? t->size : t->align); + break; + default: + e = postfixexpr(s, NULL); + } + + return e; +} + +static struct expression * +castexpr(struct scope *s) +{ + struct type *t; + struct expression *r, *e, **end; + + end = &r; + while (consume(TLPAREN)) { + t = typename(s); + if (!t) { + e = expr(s); + expect(TRPAREN, "after expression to match '('"); + *end = postfixexpr(s, e); + return r; + } + expect(TRPAREN, "after type name"); + if (tok.kind == TLBRACE) { + e = mkexpr(EXPRCOMPOUND, t, EXPRFLAG_LVAL); + e->compound.init = parseinit(s, t); + e = decay(e); + *end = postfixexpr(s, e); + return r; + } + e = mkexpr(EXPRCAST, t, 0); + // XXX check types 6.5.4 + *end = e; + end = &e->cast.e; + } + *end = unaryexpr(s); + + return r; +} + +static int +isequality(enum tokenkind t) +{ + return t == TEQL || t == TNEQ; +} + +static int +isrelational(enum tokenkind t) +{ + return t == TLESS || t == TGREATER || t == TLEQ || t == TGEQ; +} + +static int +isshift(enum tokenkind t) +{ + return t == TSHL || t == TSHR; +} + +static int +isadditive(enum tokenkind t) +{ + return t == TADD || t == TSUB; +} + +static int +ismultiplicative(enum tokenkind t) +{ + return t == TMUL || t == TDIV || t == TMOD; +} + +static struct expression * +binaryexpr(struct scope *s, size_t i) +{ + static const struct { + int tok; + int (*fn)(enum tokenkind); + } prec[] = { + // XXX: bitmask? + {.tok = TLOR}, + {.tok = TLAND}, + {.tok = TBOR}, + {.tok = TXOR}, + {.tok = TBAND}, + {.fn = isequality}, + {.fn = isrelational}, + {.fn = isshift}, + {.fn = isadditive}, + {.fn = ismultiplicative}, + }; + struct expression *e, *l, *r; + struct location loc; + enum tokenkind op; + + if (i >= LEN(prec)) + return castexpr(s); + l = binaryexpr(s, i + 1); + while (tok.kind == prec[i].tok || (prec[i].fn && prec[i].fn(tok.kind))) { + op = tok.kind; + loc = tok.loc; + next(); + r = binaryexpr(s, i + 1); + e = mkbinaryexpr(&loc, op, l, r); + l = e; + } + + return l; +} + +static bool +nullpointer(struct expression *e) +{ + if (e->kind != EXPRCONST) + return false; + if (!(typeprop(e->type) & PROPINT) && (e->type->kind != TYPEPOINTER || e->type->base != &typevoid)) + return false; + return e->constant.i == 0; +} + +static struct expression * +condexpr(struct scope *s) +{ + struct expression *r, *e; + struct type *t, *f; + enum typequalifier tq; + + r = binaryexpr(s, 0); + if (!consume(TQUESTION)) + return r; + e = mkexpr(EXPRCOND, NULL, 0); + e->cond.e = exprconvert(r, &typebool); + e->cond.t = expr(s); + expect(TCOLON, "in conditional expression"); + e->cond.f = condexpr(s); + lvalueconvert(e->cond.t); + lvalueconvert(e->cond.f); + t = e->cond.t->type; + f = e->cond.f->type; + if (t == f) { + e->type = t; + } else if (typeprop(t) & PROPARITH && typeprop(f) & PROPARITH) { + e->type = commonreal(&e->cond.t, &e->cond.f); + } else if (t == &typevoid && f == &typevoid) { + e->type = &typevoid; + } else { + e->cond.t = eval(e->cond.t); + e->cond.f = eval(e->cond.f); + if (nullpointer(e->cond.t) && f->kind == TYPEPOINTER) { + e->type = f; + } else if (nullpointer(e->cond.f) && t->kind == TYPEPOINTER) { + e->type = t; + } else if (t->kind == TYPEPOINTER && f->kind == TYPEPOINTER) { + tq = QUALNONE; + t = typeunqual(t->base, &tq); + f = typeunqual(f->base, &tq); + if (t == &typevoid || f == &typevoid) { + e->type = &typevoid; + } else { + if (!typecompatible(t, f)) + error(&tok.loc, "operands of conditional operator must have compatible types"); + e->type = typecomposite(t, f); + } + e->type = mkpointertype(mkqualifiedtype(e->type, tq)); + } else { + error(&tok.loc, "invalid operands to conditional operator"); + } + } + + return e; +} + +struct expression * +constexpr(struct scope *s) +{ + return eval(condexpr(s)); +} + +uint64_t +intconstexpr(struct scope *s) +{ + struct expression *e; + + e = constexpr(s); + if (e->kind != EXPRCONST || !(typeprop(e->type) & PROPINT)) + error(&tok.loc, "not an integer constant expression"); + return e->constant.i; +} + +struct expression * +assignexpr(struct scope *s) +{ + struct expression *e, *l, *r, *tmp = NULL, **res = &e; + enum tokenkind op; + + l = condexpr(s); + if (l->kind == EXPRBINARY || l->kind == EXPRCOMMA || l->kind == EXPRCAST) + return l; + switch (tok.kind) { + case TASSIGN: op = TNONE; break; + case TMULASSIGN: op = TMUL; break; + case TDIVASSIGN: op = TDIV; break; + case TMODASSIGN: op = TMOD; break; + case TADDASSIGN: op = TADD; break; + case TSUBASSIGN: op = TSUB; break; + case TSHLASSIGN: op = TSHL; break; + case TSHRASSIGN: op = TSHR; break; + case TBANDASSIGN: op = TBAND; break; + case TXORASSIGN: op = TXOR; break; + case TBORASSIGN: op = TBOR; break; + default: + return l; + } + next(); + r = assignexpr(s); + if (op) { + /* rewrite `E1 OP= E2` as `T = &E1, *T = *T OP E2`, where T is a temporary slot */ + tmp = mkexpr(EXPRTEMP, mkpointertype(l->type), EXPRFLAG_LVAL); + tmp->temp = NULL; + e = mkexpr(EXPRCOMMA, l->type, 0); + e->comma.exprs = mkexpr(EXPRASSIGN, tmp->type, 0); + e->comma.exprs->assign.l = tmp; + e->comma.exprs->assign.r = mkexpr(EXPRUNARY, e->type, 0); + e->comma.exprs->assign.r->unary.op = TBAND; + e->comma.exprs->assign.r->unary.base = l; + res = &e->comma.exprs->next; + l = mkexpr(EXPRUNARY, l->type, 0); + l->unary.op = TMUL; + l->unary.base = tmp; + r = mkbinaryexpr(&tok.loc, op, l, r); + } + r = exprconvert(r, l->type); + *res = mkexpr(EXPRASSIGN, l->type, 0); + (*res)->assign.l = l; + (*res)->assign.r = r; + + return e; +} + +struct expression * +expr(struct scope *s) +{ + struct expression *r, *e, **end; + + end = &r; + for (;;) { + e = assignexpr(s); + *end = e; + end = &e->next; + if (tok.kind != TCOMMA) + break; + next(); + } + if (!r->next) + return r; + e = mkexpr(EXPRCOMMA, e->type, 0); + e->comma.exprs = r; + + return e; +} + +struct expression * +exprconvert(struct expression *e, struct type *t) +{ + struct expression *cast; + + if (typecompatible(e->type, t)) + return e; + cast = mkexpr(EXPRCAST, t, 0); + cast->cast.e = e; + + return cast; +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..8269d22 --- /dev/null +++ b/expr.h @@ -0,0 +1,99 @@ +enum expressionkind { + /* primary expression */ + EXPRIDENT, + EXPRCONST, + EXPRSTRING, + + /* postfix expression */ + EXPRCALL, + /* member E.M gets transformed to *(typeof(E.M) *)((char *)E + offsetof(typeof(E), M)) */ + EXPRINCDEC, + EXPRCOMPOUND, + /* subscript E1[E2] gets transformed to *((E1)+(E2)) */ + + EXPRUNARY, + EXPRCAST, + EXPRBINARY, + EXPRCOND, + EXPRASSIGN, + EXPRCOMMA, + + EXPRBUILTIN, + EXPRTEMP, +}; + +enum expressionflags { + EXPRFLAG_LVAL = 1<<0, + EXPRFLAG_DECAYED = 1<<1, +}; + +struct expression { + enum expressionkind kind; + enum expressionflags flags; + struct type *type; + struct expression *next; + union { + struct { + struct declaration *decl; + } ident; + union { + uint64_t i; + double f; + } constant; + struct { + char *data; + size_t size; + } string; + struct { + struct expression *func, *args; + size_t nargs; + } call; + struct { + struct initializer *init; + } compound; + struct { + int op; + _Bool post; + struct expression *base; + } incdec; + struct { + int op; + struct expression *base; + } unary; + struct { + struct expression *e; + } cast; + struct { + int op; + struct expression *l, *r; + } binary; + struct { + struct expression *e, *t, *f; + } cond; + struct { + struct expression *l, *r; + } assign; + struct { + struct expression *exprs; + } comma; + struct { + enum { + BUILTINVASTART, + BUILTINVAEND, + } kind; + struct expression *arg; + } builtin; + struct value *temp; + }; +}; + +struct scope; + +struct expression *expr(struct scope *); +struct expression *assignexpr(struct scope *); +struct expression *constexpr(struct scope *); +uint64_t intconstexpr(struct scope *); +void delexpr(struct expression *); + +void exprpromote(struct expression **); // XXX: move to type +struct expression *exprconvert(struct expression *, struct type *); diff --git a/htab.c b/htab.c new file mode 100644 index 0000000..5b4cb0a --- /dev/null +++ b/htab.c @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include "util.h" +#include "htab.h" + +struct hashtable { + size_t len, cap; + struct hashtablekey *keys; + void **vals; +}; + +static uint64_t +hash(const void *ptr, size_t len) +{ + extern int siphash(const uint8_t *, const size_t, const uint8_t *, uint8_t *, const size_t); + static const uint8_t k[16] = {0}; // XXX: we don't have a way to get entropy in standard C + uint64_t r; + + siphash(ptr, len, k, (uint8_t *)&r, sizeof(r)); + + return r; +} + +void +htabbufkey(struct hashtablekey *k, const char *s, size_t n) +{ + k->str = s; + k->len = n; + k->hash = hash(s, n); +} + +void +htabstrkey(struct hashtablekey *k, const char *s) +{ + htabbufkey(k, s, strlen(s)); +} + +struct hashtable * +mkhtab(size_t cap) +{ + struct hashtable *h; + size_t i; + + assert(!(cap & cap - 1)); + h = xmalloc(sizeof(*h)); + h->len = 0; + h->cap = cap; + h->keys = xreallocarray(NULL, cap, sizeof(h->keys[0])); + h->vals = xreallocarray(NULL, cap, sizeof(h->vals[0])); + for (i = 0; i < cap; ++i) + h->keys[i].str = NULL; + + return h; +} + +void +delhtab(struct hashtable *h, void del(void *)) +{ + size_t i; + + if (!h) + return; + if (del) { + for (i = 0; i < h->cap; ++i) { + if (h->keys[i].str) + del(h->vals[i]); + } + } + free(h->keys); + free(h->vals); + free(h); +} + +static bool +keyequal(struct hashtablekey *k1, struct hashtablekey *k2) +{ + if (k1->hash != k2->hash || k1->len != k2->len) + return false; + return memcmp(k1->str, k2->str, k1->len) == 0; +} + +static size_t +keyindex(struct hashtable *h, struct hashtablekey *k) +{ + size_t i; + + i = k->hash & h->cap - 1; + while (h->keys[i].str && !keyequal(&h->keys[i], k)) + i = i + 1 & h->cap - 1; + return i; +} + +void ** +htabput(struct hashtable *h, struct hashtablekey *k) +{ + struct hashtablekey *oldkeys; + void **oldvals; + size_t i, j, oldcap; + + if (h->cap / 2 < h->len) { + oldkeys = h->keys; + oldvals = h->vals; + oldcap = h->cap; + h->cap *= 2; + h->keys = xreallocarray(NULL, h->cap, sizeof(h->keys[0])); + h->vals = xreallocarray(NULL, h->cap, sizeof(h->vals[0])); + for (i = 0; i < h->cap; ++i) + h->keys[i].str = NULL; + for (i = 0; i < oldcap; ++i) { + if (oldkeys[i].str) { + j = keyindex(h, &oldkeys[i]); + h->keys[j] = oldkeys[i]; + h->vals[j] = oldvals[i]; + } + } + } + i = keyindex(h, k); + if (!h->keys[i].str) { + h->keys[i] = *k; + h->vals[i] = NULL; + ++h->len; + } + + return &h->vals[i]; +} + +void * +htabget(struct hashtable *h, struct hashtablekey *k) +{ + size_t i; + + i = keyindex(h, k); + return h->keys[i].str ? h->vals[i] : NULL; +} diff --git a/htab.h b/htab.h new file mode 100644 index 0000000..eb0b663 --- /dev/null +++ b/htab.h @@ -0,0 +1,13 @@ +struct hashtablekey { + uint64_t hash; + const char *str; + size_t len; +}; + +void htabstrkey(struct hashtablekey *, const char *); +void htabbufkey(struct hashtablekey *, const char *, size_t); + +struct hashtable *mkhtab(size_t); +void delhtab(struct hashtable *, void(void *)); +void **htabput(struct hashtable *, struct hashtablekey *); +void *htabget(struct hashtable *, struct hashtablekey *); diff --git a/init.c b/init.c new file mode 100644 index 0000000..7068065 --- /dev/null +++ b/init.c @@ -0,0 +1,279 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "decl.h" +#include "expr.h" +#include "init.h" +#include "pp.h" +#include "token.h" +#include "type.h" + +struct object { + uint64_t offset; + struct type *type; + union { + struct member *mem; + size_t idx; + }; + bool iscur; +}; + +struct initparser { + /* TODO: keep track of type depth, and allocate maximum possible + number of nested objects in initializer */ + struct object obj[32], *cur, *sub; +}; + +struct initializer * +mkinit(uint64_t offset, struct expression *expr) +{ + struct initializer *init; + + init = xmalloc(sizeof(*init)); + init->start = offset; + init->end = offset + expr->type->size; + init->expr = expr; + init->next = NULL; + + return init; +} + +static void +initadd(struct initializer **init, struct initializer *new) +{ + struct initializer *next, *sub, *last; + uint64_t offset; + + while (*init && new->start >= (*init)->end) + init = &(*init)->next; + next = *init; + if (next && next->start <= new->start && new->end < next->end) { + initadd(&next->subinit, new); + last = NULL; /* silence gcc, we know that next->subinit has at least one member */ + for (offset = next->start, sub = next->subinit; sub; offset = sub->end, last = sub, sub = sub->next) { + if (sub->start != offset) + return; + } + if (offset == next->end) { + *init = next->subinit; + last->next = next->next; + } + } else { + *init = new; + while (next && next->start < (*init)->end) + next = next->next; + (*init)->next = next; + } +} + +static void +updatearray(struct type *t, uint64_t i) +{ + if (!t->incomplete) + return; + if (++i > t->array.length) { + t->array.length = i; + t->size = i * t->base->size; + } +} + +static void +designator(struct scope *s, struct initparser *p) +{ + struct type *t; + uint64_t offset; + const char *name; + + p->sub = p->cur; + t = p->sub->type; + offset = p->sub->offset; + for (;;) { + switch (tok.kind) { + case TLBRACK: + if (t->kind != TYPEARRAY) + error(&tok.loc, "index designator is only valid for array types"); + next(); + p->sub->idx = intconstexpr(s); + if (t->incomplete) + updatearray(t, p->sub->idx); + else if (p->sub->idx >= t->array.length) + error(&tok.loc, "index designator is larger than array length"); + expect(TRBRACK, "for index designator"); + t = t->base; + offset += p->sub->idx * t->size; + break; + case TPERIOD: + if (t->kind != TYPESTRUCT && t->kind != TYPEUNION) + error(&tok.loc, "member designator only valid for struct/union types"); + next(); + name = expect(TIDENT, "for member designator"); + arrayforeach (&t->structunion.members, p->sub->mem) { + if (strcmp(p->sub->mem->name, name) == 0) + break; + } + if (!p->sub->mem) + error(&tok.loc, "%s has no member named '%s'", t->kind == TYPEUNION ? "union" : "struct", name); + t = p->sub->mem->type; + offset += p->sub->mem->offset; + break; + default: + expect(TASSIGN, "after designator"); + return; + } + if (++p->sub == p->obj + LEN(p->obj)) + fatal("internal error: too many designators"); + p->sub->type = t; + p->sub->offset = offset; + } +} + +static void +focus(struct initparser *p) +{ + struct type *t; + uint64_t offset; + + offset = p->sub->offset; + switch (p->sub->type->kind) { + case TYPEARRAY: + p->sub->idx = 0; + if (p->sub->type->incomplete) + updatearray(p->sub->type, p->sub->idx); + t = p->sub->type->base; + break; + case TYPESTRUCT: + case TYPEUNION: + p->sub->mem = p->sub->type->structunion.members.val; + t = p->sub->mem->type; + break; + default: + t = p->sub->type; + } + if (++p->sub == p->obj + LEN(p->obj)) + fatal("internal error: too many designators"); + p->sub->type = typeunqual(t, NULL); + p->sub->offset = offset; +} + +static void +advance(struct initparser *p) +{ + struct type *t; + uint64_t offset; + + for (;;) { + --p->sub; + offset = p->sub->offset; + switch (p->sub->type->kind) { + case TYPEARRAY: + ++p->sub->idx; + if (p->sub->type->incomplete) + updatearray(p->sub->type, p->sub->idx); + if (p->sub->idx < p->sub->type->array.length) { + t = p->sub->type->base; + offset += t->size * p->sub->idx; + goto done; + } + break; + case TYPESTRUCT: + ++p->sub->mem; + if (p->sub->mem != (void *)((uintptr_t)p->sub->type->structunion.members.val + p->sub->type->structunion.members.len)) { + t = p->sub->mem->type; + offset += p->sub->mem->offset; + goto done; + } + break; + } + if (p->sub == p->cur) + error(&tok.loc, "too many initializers for type"); + } +done: + ++p->sub; + p->sub->type = typeunqual(t, NULL); + p->sub->offset = offset; +} + +/* 6.7.9 Initialization */ +struct initializer * +parseinit(struct scope *s, struct type *t) +{ + struct initparser p; + struct initializer *init = NULL; + struct expression *expr; + struct type *base; + + p.cur = NULL; + p.sub = p.obj; + p.sub->offset = 0; + p.sub->type = t; + for (;;) { + if (p.cur) { + if (tok.kind == TLBRACK || tok.kind == TPERIOD) + designator(s, &p); + else if (p.sub != p.cur) + advance(&p); + else + focus(&p); + } + if (tok.kind == TLBRACE) { + next(); + if (p.cur && p.cur->type == p.sub->type) + error(&tok.loc, "nested braces around scalar initializer"); + p.cur = p.sub; + p.cur->iscur = true; + continue; + } + expr = assignexpr(s); + for (;;) { + t = typeunqual(p.sub->type, NULL); + switch (t->kind) { + case TYPEARRAY: + if (expr->flags & EXPRFLAG_DECAYED && expr->unary.base->kind == EXPRSTRING) { + expr = expr->unary.base; + base = typeunqual(t->base, NULL); + if (!typecompatible(expr->type->base, base)) + error(&tok.loc, "array initializer is string literal with incompatible type"); + if (t->incomplete) + updatearray(t, expr->string.size + 1); + goto add; + } + break; + case TYPESTRUCT: + case TYPEUNION: + if (typecompatible(expr->type, t)) + goto add; + break; + default: /* scalar type */ + assert(typeprop(t) & PROPSCALAR); + expr = exprconvert(expr, t); + goto add; + } + focus(&p); + } + add: + initadd(&init, mkinit(p.sub->offset, expr)); + for (;;) { + if (p.sub->type->kind == TYPEARRAY && p.sub->type->incomplete) + p.sub->type->incomplete = false; + if (!p.cur) + return init; + if (tok.kind == TCOMMA) { + next(); + if (tok.kind != TRBRACE) + break; + } else if (tok.kind != TRBRACE) { + error(&tok.loc, "expected ',' or '}' after initializer"); + } + next(); + p.sub = p.cur; + do p.cur = p.cur == p.obj ? NULL : p.cur - 1; + while (p.cur && !p.cur->iscur); + } + } +} diff --git a/init.h b/init.h new file mode 100644 index 0000000..9200e9f --- /dev/null +++ b/init.h @@ -0,0 +1,13 @@ +struct initializer { + uint64_t start, end; + struct expression *expr; + struct initializer *next, *subinit; +}; + +struct scope; +struct type; +struct function; +struct declaration; + +struct initializer *mkinit(uint64_t, struct expression *); +struct initializer *parseinit(struct scope *, struct type *); diff --git a/main.c b/main.c new file mode 100644 index 0000000..fa50078 --- /dev/null +++ b/main.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include "util.h" +#include "arg.h" +#include "decl.h" +#include "pp.h" +#include "scope.h" +#include "token.h" + +static noreturn void +usage(void) +{ + fprintf(stderr, "usage: %s [input]\n", argv0); + exit(2); +} + +int +main(int argc, char *argv[]) +{ + bool pponly = false; + char *output = NULL; + + argv0 = progname(argv[0], "qbe"); + ARGBEGIN { + case 'E': + pponly = true; + break; + case 'o': + output = EARGF(usage()); + break; + default: + usage(); + } ARGEND + + if (argc == 1) { + if (!freopen(argv[0], "r", stdin)) + fatal("open %s:", argv[0]); + } else if (argc) { + usage(); + } + if (output) { + if (!freopen(output, "w", stdout)) + fatal("open %s:", output); + } + + ppinit(argc ? argv[0] : ""); + if (pponly) { + while (tok.kind != TEOF) { + tokprint(&tok); + next(); + } + } else { + scopeputdecl(&filescope, "__builtin_va_start", &builtinvastart); + scopeputdecl(&filescope, "__builtin_va_end", &builtinvaend); + scopeputdecl(&filescope, "__builtin_offsetof", &builtinoffsetof); + while (tok.kind != TEOF) { + if (!decl(&filescope, NULL)) + error(&tok.loc, "expected declaration or function definition"); + } + } + emittentativedefns(); + + return 0; +} diff --git a/ops.h b/ops.h new file mode 100644 index 0000000..7af7810 --- /dev/null +++ b/ops.h @@ -0,0 +1,108 @@ +/* jumps */ +OP(IJMP, 0, 1, "jmp") +OP(IJNZ, 0, 3, "jnz") +OP(IRET, 0, 1, "ret") + +/* arithmetic and bits */ +OP(IADD, 1, 2, "add") +OP(ISUB, 1, 2, "sub") +OP(IDIV, 1, 2, "div") +OP(IMUL, 1, 2, "mul") +OP(IUDIV, 1, 2, "udiv") +OP(IREM, 1, 2, "rem") +OP(IUREM, 1, 2, "urem") +OP(IOR, 1, 2, "or") +OP(IXOR, 1, 2, "xor") +OP(IAND, 1, 2, "and") +OP(ISAR, 1, 2, "sar") +OP(ISHR, 1, 2, "shr") +OP(ISHL, 1, 2, "shl") + +/* memory */ +OP(ISTORED, 0, 2, "stored") +OP(ISTORES, 0, 2, "stores") +OP(ISTOREL, 0, 2, "storel") +OP(ISTOREW, 0, 2, "storew") +OP(ISTOREH, 0, 2, "storeh") +OP(ISTOREB, 0, 2, "storeb") +OP(ILOADD, 1, 1, "loadd") +OP(ILOADS, 1, 1, "loads") +OP(ILOADL, 1, 1, "loadl") +OP(ILOADSW, 1, 1, "loadsw") +OP(ILOADUW, 1, 1, "loaduw") +OP(ILOADSH, 1, 1, "loadsh") +OP(ILOADUH, 1, 1, "loaduh") +OP(ILOADSB, 1, 1, "loadsb") +OP(ILOADUB, 1, 1, "loadub") +OP(IALLOC4, 1, 1, "alloc4") +OP(IALLOC8, 1, 1, "alloc8") +OP(IALLOC16, 1, 1, "alloc16") + +/* comparisons */ +OP(ICEQW, 1, 2, "ceqw") +OP(ICNEW, 1, 2, "cnew") +OP(ICSLEW, 1, 2, "cslew") +OP(ICSLTW, 1, 2, "csltw") +OP(ICSGEW, 1, 2, "csgew") +OP(ICSGTW, 1, 2, "csgtw") +OP(ICULEW, 1, 2, "culew") +OP(ICULTW, 1, 2, "cultw") +OP(ICUGEW, 1, 2, "cugew") +OP(ICUGTW, 1, 2, "cugtw") + +OP(ICEQL, 1, 2, "ceql") +OP(ICNEL, 1, 2, "cnel") +OP(ICSLEL, 1, 2, "cslel") +OP(ICSLTL, 1, 2, "csltl") +OP(ICSGEL, 1, 2, "csgel") +OP(ICSGTL, 1, 2, "csgtl") +OP(ICULEL, 1, 2, "culel") +OP(ICULTL, 1, 2, "cultl") +OP(ICUGEL, 1, 2, "cugel") +OP(ICUGTL, 1, 2, "cugtl") + +OP(ICEQS, 1, 2, "ceqs") +OP(ICNES, 1, 2, "cnes") +OP(ICLES, 1, 2, "cles") +OP(ICLTS, 1, 2, "clts") +OP(ICGES, 1, 2, "cges") +OP(ICGTS, 1, 2, "cgts") +OP(ICOS, 1, 2, "cos") +OP(ICUOS, 1, 2, "cuos") + +OP(ICEQD, 1, 2, "ceqd") +OP(ICNED, 1, 2, "cned") +OP(ICLED, 1, 2, "cled") +OP(ICLTD, 1, 2, "cltd") +OP(ICGED, 1, 2, "cged") +OP(ICGTD, 1, 2, "cgtd") +OP(ICOD, 1, 2, "cod") +OP(ICUOD, 1, 2, "cuod") + +/* conversions */ +OP(IEXTSW, 1, 1, "extsw") +OP(IEXTUW, 1, 1, "extuw") +OP(IEXTSH, 1, 1, "extsh") +OP(IEXTUH, 1, 1, "extuh") +OP(IEXTSB, 1, 1, "extsb") +OP(IEXTUB, 1, 1, "extub") +OP(IEXTS, 1, 1, "exts") +OP(ITRUNCD, 1, 1, "truncd") +OP(ISTOSI, 1, 1, "stosi") +OP(IDTOSI, 1, 1, "dtosi") +OP(ISWTOF, 1, 1, "swtof") +OP(ISLTOF, 1, 1, "sltof") + +/* cast and copy */ +OP(ICAST, 1, 1, "cast") +OP(ICOPY, 1, 1, "copy") + +/* call */ +OP(ICALL, 1, -1, "call") + +/* variadic */ +OP(IVASTART, 0, 1, "vastart") +OP(IVAARG, 1, 1, "vaarg") + +/* phi */ +OP(IPHI, 1, -1, "phi") diff --git a/pp.c b/pp.c new file mode 100644 index 0000000..0ca7a28 --- /dev/null +++ b/pp.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include "util.h" +#include "pp.h" +#include "scan.h" +#include "token.h" + +static struct scanner *scanner; +static struct token pending; + +static void +keyword(struct token *tok) +{ + static const struct { + const char *name; + int value; + } keywords[] = { + {"_Alignas", T_ALIGNAS}, + {"_Alignof", T_ALIGNOF}, + {"_Atomic", T_ATOMIC}, + {"_Bool", T_BOOL}, + {"_Complex", T_COMPLEX}, + {"_Generic", T_GENERIC}, + {"_Imaginary", T_IMAGINARY}, + {"_Noreturn", T_NORETURN}, + {"_Static_assert", T_STATIC_ASSERT}, + {"_Thread_local", T_THREAD_LOCAL}, + {"__builtin_va_list", T__BUILTIN_VA_LIST}, + {"auto", TAUTO}, + {"break", TBREAK}, + {"case", TCASE}, + {"char", TCHAR}, + {"const", TCONST}, + {"continue", TCONTINUE}, + {"default", TDEFAULT}, + {"do", TDO}, + {"double", TDOUBLE}, + {"else", TELSE}, + {"enum", TENUM}, + {"extern", TEXTERN}, + {"float", TFLOAT}, + {"for", TFOR}, + {"goto", TGOTO}, + {"if", TIF}, + {"inline", TINLINE}, + {"int", TINT}, + {"long", TLONG}, + {"register", TREGISTER}, + {"restrict", TRESTRICT}, + {"return", TRETURN}, + {"short", TSHORT}, + {"signed", TSIGNED}, + {"sizeof", TSIZEOF}, + {"static", TSTATIC}, + {"struct", TSTRUCT}, + {"switch", TSWITCH}, + {"typedef", TTYPEDEF}, + {"union", TUNION}, + {"unsigned", TUNSIGNED}, + {"void", TVOID}, + {"volatile", TVOLATILE}, + {"while", TWHILE}, + }; + size_t low = 0, high = LEN(keywords), mid; + int cmp; + + while (low < high) { + mid = (low + high) / 2; + cmp = strcmp(tok->lit, keywords[mid].name); + if (cmp == 0) { + free(tok->lit); + tok->kind = keywords[mid].value; + tok->lit = NULL; + break; + } + if (cmp < 0) + high = mid; + else + low = mid + 1; + } +} + +void +ppinit(const char *file) +{ + scanner = mkscanner(file); + next(); +} + +static void +nextinto(struct token *t) +{ + do scan(scanner, t); + while (t->kind == TNEWLINE); + if (t->kind == TIDENT) + keyword(t); +} + +void +next(void) +{ + if (pending.kind) { + tok = pending; + pending.kind = TNONE; + } else { + nextinto(&tok); + } +} + +bool +peek(int kind) +{ + nextinto(&pending); + if (pending.kind != kind) + return false; + pending.kind = TNONE; + nextinto(&tok); + return true; +} + +char * +expect(int kind, const char *msg) +{ + char *lit; + + if (tok.kind != kind) + error(&tok.loc, "expected %d %s, saw %d", kind, msg, tok.kind); + lit = tok.lit; + next(); + + return lit; +} + +bool +consume(int kind) +{ + if (tok.kind != kind) + return false; + next(); + return true; +} diff --git a/pp.h b/pp.h new file mode 100644 index 0000000..462fd0c --- /dev/null +++ b/pp.h @@ -0,0 +1,8 @@ +struct location; + +void ppinit(const char *); + +void next(void); +_Bool peek(int); +char *expect(int, const char *); +_Bool consume(int); diff --git a/qbe.c b/qbe.c new file mode 100644 index 0000000..77bf0f9 --- /dev/null +++ b/qbe.c @@ -0,0 +1,1153 @@ +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "decl.h" +#include "emit.h" +#include "eval.h" +#include "expr.h" +#include "htab.h" +#include "init.h" +#include "scope.h" +#include "token.h" +#include "tree.h" +#include "type.h" + +struct name { + char *str; + uint64_t id; +}; + +struct representation { + char base; + char ext; + struct name abi; +}; + +struct value { + enum { + VALNONE, + VALGLOBAL, + VALCONST, + VALLABEL, + VALTEMP, + VALELLIPSIS, + } kind; + struct representation *repr; + union { + struct name name; + uint64_t i; + double f; + }; +}; + +enum instructionkind { + INONE, + +#define OP(op, ret, arg, name) op, +#include "ops.h" +#undef OP +}; + +struct instruction { + enum instructionkind kind; + struct value res, *arg[]; +}; + +struct block { + struct value label; + _Bool terminated; + struct array insts; + + struct block *next; +}; + +struct switchcase { + struct treenode node; + struct value *body; +}; + +struct representation i8 = {'w', 'b'}; +struct representation i16 = {'w', 'h'}; +struct representation i32 = {'w', 'w'}; +struct representation i64 = {'l', 'l'}; +struct representation f32 = {'s', 's'}; +struct representation f64 = {'d', 'd'}; +struct representation iptr = {'l', 'l'}; + +void +switchcase(struct switchcases *cases, uint64_t i, struct value *v) +{ + struct switchcase *c; + + c = treeinsert(&cases->root, i, sizeof(*c)); + if (!c->node.new) + error(&tok.loc, "multiple 'case' labels with same value"); + c->body = v; +} + +/* functions */ + +struct value * +mkblock(char *name) +{ + static uint64_t id; + struct block *b; + + b = xmalloc(sizeof(*b)); + b->label.kind = VALLABEL; + b->label.name.str = name; + b->label.name.id = ++id; + b->terminated = false; + b->insts = (struct array){0}; + b->next = NULL; + + return &b->label; +} + +static void emittype(struct type *); +static void emitvalue(struct value *); + +static void +funcname(struct function *f, struct name *n, char *s) +{ + n->id = ++f->lastid; + n->str = s; +} + +static void +functemp(struct function *f, struct value *v, struct representation *repr) +{ + if (!repr) + fatal("temp has no type"); + v->kind = VALTEMP; + funcname(f, &v->name, NULL); + v->repr = repr; +} + +static struct { + int ret, arg; + char *str; +} instdesc[] = { +#define OP(op, ret, arg, name) [op] = {ret, arg, name}, +#include "ops.h" +#undef OP +}; + +static struct value * +funcinst(struct function *f, int op, struct representation *repr, struct value *args[]) +{ + struct instruction *inst; + size_t n; + + if (f->end->terminated) + return NULL; + n = instdesc[op].arg; + if (n == -1) { + for (n = 0; args[n]; ++n) + ; + ++n; + } + if (n > (SIZE_MAX - sizeof(*inst)) / sizeof(args[0])) { + errno = ENOMEM; + fatal("malloc:"); + } + inst = xmalloc(sizeof(*inst) + n * sizeof(args[0])); + inst->kind = op; + if (repr) + functemp(f, &inst->res, repr); + else + inst->res.kind = VALNONE; + memcpy(inst->arg, args, n * sizeof(args[0])); + arrayaddptr(&f->end->insts, inst); + + return instdesc[op].ret ? &inst->res : NULL; +} + +struct value * +mkintconst(struct representation *r, uint64_t n) +{ + struct value *v; + + v = xmalloc(sizeof(*v)); + v->kind = VALCONST; + v->repr = r; + v->i = n; + + return v; +} + +uint64_t +intconstvalue(struct value *v) +{ + assert(v->kind = VALCONST); + return v->i; +} + +static struct value * +mkfltconst(struct representation *r, double n) +{ + struct value *v; + + v = xmalloc(sizeof(*v)); + v->kind = VALCONST; + v->repr = r; + v->f = n; + + return v; +} + +static void +funcalloc(struct function *f, struct declaration *d) +{ + enum instructionkind op; + struct instruction *inst; + + assert(!d->type->incomplete); + assert(d->type->size > 0); + if (!d->align) + d->align = d->type->align; + else if (d->align < d->type->align) + error(&tok.loc, "object requires alignment %d, which is stricter than %d", d->type->align, d->align); + switch (d->align) { + case 1: + case 2: + case 4: op = IALLOC4; break; + case 8: op = IALLOC8; break; + case 16: op = IALLOC16; break; + default: + fatal("internal error: invalid alignment: %d\n", d->align); + } + inst = xmalloc(sizeof(*inst) + sizeof(inst->arg[0])); + inst->kind = op; + functemp(f, &inst->res, &iptr); + inst->arg[0] = mkintconst(&i64, d->type->size); + d->value = &inst->res; + arrayaddptr(&f->start->insts, inst); +} + +static void +funcstore(struct function *f, struct type *t, struct value *addr, struct value *v) +{ + enum instructionkind op; + enum typequalifier tq = QUALNONE; + + t = typeunqual(t, &tq); + if (tq & QUALVOLATILE) + error(&tok.loc, "volatile store is not yet supported"); + if (tq & QUALCONST) + error(&tok.loc, "cannot store to 'const' object"); + switch (t->kind) { + case TYPEBASIC: + switch (t->basic.kind) { + case BASICBOOL: + case BASICCHAR: op = ISTOREB; break; + case BASICSHORT: op = ISTOREH; break; + case BASICINT: op = ISTOREW; break; + case BASICLONG: op = ISTOREL; break; + case BASICLONGLONG: op = ISTOREL; break; + case BASICFLOAT: op = ISTORES; break; + case BASICDOUBLE: op = ISTORED; break; + case BASICLONGDOUBLE: + fatal("'long double' store is not yet supported"); + default: + fatal("internal error; unknown basic type %d", t->basic.kind); + } + break; + case TYPEPOINTER: + op = ISTOREL; + break; + case TYPESTRUCT: + case TYPEUNION: + case TYPEARRAY: { + enum instructionkind loadop, op; + struct value *src, *dst, *tmp, *align; + uint64_t offset; + + switch (t->align) { + case 1: loadop = ILOADUB, op = ISTOREB; break; + case 2: loadop = ILOADUH, op = ISTOREH; break; + case 4: loadop = ILOADUW, op = ISTOREW; break; + case 8: loadop = ILOADL, op = ISTOREL; break; + default: + fatal("internal error; invalid alignment %d", t->align); + } + src = v; + dst = addr; + align = mkintconst(&iptr, t->align); + for (offset = 0; offset < t->size; offset += t->align) { + tmp = funcinst(f, loadop, &iptr, (struct value *[]){src}); + funcinst(f, op, NULL, (struct value *[]){tmp, dst}); + src = funcinst(f, IADD, &iptr, (struct value *[]){src, align}); + dst = funcinst(f, IADD, &iptr, (struct value *[]){dst, align}); + } + return; + } + default: + fatal("unimplemented store"); + } + funcinst(f, op, NULL, (struct value *[]){v, addr}); +} + +static struct value * +funcload(struct function *f, struct type *t, struct value *addr) +{ + enum instructionkind op; + enum typequalifier tq; + + t = typeunqual(t, &tq); + switch (t->kind) { + case TYPEBASIC: + switch (t->size) { + case 1: op = t->basic.issigned ? ILOADSB : ILOADUB; break; + case 2: op = t->basic.issigned ? ILOADSH : ILOADUH; break; + case 4: op = typeprop(t) & PROPFLOAT ? ILOADS : t->basic.issigned ? ILOADSW : ILOADUW; break; + case 8: op = typeprop(t) & PROPFLOAT ? ILOADD : ILOADL; break; + default: + fatal("internal error; unimplemented load"); + } + break; + case TYPEPOINTER: + op = ILOADL; + break; + case TYPESTRUCT: + case TYPEUNION: + case TYPEARRAY: + return addr; + default: + fatal("unimplemented load %d", t->kind); + } + return funcinst(f, op, t->repr, (struct value *[]){addr}); +} + +struct value * +mkglobal(char *name, bool private) +{ + static uint64_t id; + struct value *v; + + v = xmalloc(sizeof(*v)); + v->kind = VALGLOBAL; + v->repr = &iptr; + v->name.str = name; + v->name.id = private ? ++id : 0; + + return v; +} + +struct function * +mkfunc(char *name, struct type *t, struct scope *s) +{ + struct function *f; + struct parameter *p; + struct declaration *d; + + f = xmalloc(sizeof(*f)); + f->name = name; + f->type = t; + f->start = f->end = (struct block *)mkblock("start"); + f->gotos = mkhtab(8); + f->lastid = 0; + + /* allocate space for parameters */ + for (p = t->func.params; p; p = p->next) { + if (!p->name) + error(&tok.loc, "parameter name omitted in function definition"); + emittype(p->type); + d = mkdecl(DECLOBJECT, p->type, LINKNONE); + p->value = xmalloc(sizeof(*p->value)); + functemp(f, p->value, p->type->repr); + if (p->type->repr->abi.id) { + d->value = p->value; + } else { + funcinit(f, d, NULL); + funcstore(f, p->type, d->value, p->value); + } + scopeputdecl(s, p->name, d); + } + + t = mkarraytype(mkqualifiedtype(&typechar, QUALCONST), strlen(name) + 1); + d = mkdecl(DECLOBJECT, t, LINKNONE); + d->value = mkglobal("__func__", true); + scopeputdecl(s, "__func__", d); + f->namedecl = d; + + funclabel(f, mkblock("body")); + + return f; +} + +void +funclabel(struct function *f, struct value *v) +{ + assert(v->kind == VALLABEL); + f->end->next = (struct block *)v; + f->end = f->end->next; +} + +void +funcjmp(struct function *f, struct value *v) +{ + funcinst(f, IJMP, NULL, (struct value *[]){v}); + f->end->terminated = true; +} + +void +funcjnz(struct function *f, struct value *v, struct value *l1, struct value *l2) +{ + funcinst(f, IJNZ, NULL, (struct value *[]){v, l1, l2}); + f->end->terminated = true; +} + +void +funcret(struct function *f, struct value *v) +{ + funcinst(f, IRET, NULL, (struct value *[]){v}); + f->end->terminated = true; +} + +struct gotolabel * +funcgoto(struct function *f, char *name) +{ + void **entry; + struct gotolabel *g; + struct hashtablekey key; + + htabstrkey(&key, name); + entry = htabput(f->gotos, &key); + g = *entry; + if (!g) { + g = xmalloc(sizeof(*g)); + g->label = mkblock(name); + *entry = g; + } + + return g; +} + +static struct value * +objectaddr(struct function *f, struct expression *e) +{ + struct declaration *d; + + switch (e->kind) { + case EXPRIDENT: + d = e->ident.decl; + if (d->kind != DECLOBJECT && d->kind != DECLFUNC) + error(&tok.loc, "identifier is not an object or function"); // XXX: fix location, var name + if (d == f->namedecl) { + fputs("data ", stdout); + emitvalue(d->value); + printf(" = { b \"%s\", b 0 }\n", f->name); + f->namedecl = NULL; + } + return d->value; + case EXPRSTRING: + d = stringdecl(e); + return d->value; + case EXPRCOMPOUND: + d = mkdecl(DECLOBJECT, e->type, LINKNONE); + funcinit(f, d, e->compound.init); + return d->value; + case EXPRUNARY: + if (e->unary.op == TMUL) + return funcexpr(f, e->unary.base); + } + error(&tok.loc, "expression is not an object"); +} + +/* TODO: move these conversions to QBE */ +static struct value * +utof(struct function *f, struct representation *r, struct value *v) +{ + struct value *odd, *big, *phi[5] = {0}, *join; + + if (v->repr->base == 'w') { + v = funcinst(f, IEXTUW, &i64, (struct value *[]){v}); + return funcinst(f, ISLTOF, r, (struct value *[]){v}); + } + + phi[0] = mkblock("utof_small"); + phi[2] = mkblock("utof_big"); + join = mkblock("utof_join"); + + big = funcinst(f, ICSLTL, &i32, (struct value *[]){v, mkintconst(&i64, 0)}); + funcjnz(f, big, phi[2], phi[0]); + + funclabel(f, phi[0]); + phi[1] = funcinst(f, ISLTOF, r, (struct value *[]){v}); + funcjmp(f, join); + + funclabel(f, phi[2]); + odd = funcinst(f, IAND, &i64, (struct value *[]){v, mkintconst(&i64, 1)}); + v = funcinst(f, ISHR, &i64, (struct value *[]){v, mkintconst(&i64, 1)}); + v = funcinst(f, IOR, &i64, (struct value *[]){v, odd}); /* round to odd */ + v = funcinst(f, ISLTOF, r, (struct value *[]){v}); + phi[3] = funcinst(f, IADD, r, (struct value *[]){v, v}); + + funclabel(f, join); + return funcinst(f, IPHI, r, phi); +} + +static struct value * +ftou(struct function *f, struct representation *r, struct value *v) +{ + uint64_t max = 1ull << ((32 << (r->base == 'l')) - 1); + struct value *big, *phi[5] = {0}, *join, *maxflt, *maxint; + enum instructionkind op = v->repr->base == 's' ? ISTOSI : IDTOSI; + + if (r->base == 'w') { + v = funcinst(f, op, &i64, (struct value *[]){v}); + return funcinst(f, ICOPY, r, (struct value *[]){v}); + } + + phi[0] = mkblock("ftou_small"); + phi[2] = mkblock("ftou_big"); + join = mkblock("ftou_join"); + + maxflt = mkfltconst(v->repr, r->base == 'w' ? 0x1p31 : 0x1p63); + maxint = mkintconst(&i64, r->base == 'w' ? 1ull<<31 : 1ull<<63); + + big = funcinst(f, v->repr->base == 's' ? ICGES : ICGED, &i32, (struct value *[]){v, maxflt}); + funcjnz(f, big, phi[2], phi[0]); + + funclabel(f, phi[0]); + phi[1] = funcinst(f, op, r, (struct value *[]){v}); + funcjmp(f, join); + + funclabel(f, phi[2]); + v = funcinst(f, ISUB, v->repr, (struct value *[]){v, maxflt}); + v = funcinst(f, op, r, (struct value *[]){v}); + phi[3] = funcinst(f, IXOR, r, (struct value *[]){v, maxint}); + + funclabel(f, join); + return funcinst(f, IPHI, r, phi); +} + +struct value * +funcexpr(struct function *f, struct expression *e) +{ + static struct value ellipsis = {.kind = VALELLIPSIS}; + enum instructionkind op = INONE; + struct declaration *d; + struct value *l, *r, *v, *addr, **argvals, **argval; + struct expression *arg; + struct value *label[5]; + struct type *t; + + switch (e->kind) { + case EXPRIDENT: + d = e->ident.decl; + switch (d->kind) { + case DECLOBJECT: return funcload(f, d->type, d->value); + case DECLCONST: return d->value; + default: + fatal("unimplemented declaration kind %d", d->kind); + } + break; + case EXPRCONST: + if (typeprop(e->type) & PROPINT || e->type->kind == TYPEPOINTER) + return mkintconst(e->type->repr, e->constant.i); + return mkfltconst(e->type->repr, e->constant.f); + case EXPRCOMPOUND: + l = objectaddr(f, e); + return funcload(f, e->type, l); + case EXPRINCDEC: + addr = objectaddr(f, e->incdec.base); + l = funcload(f, e->incdec.base->type, addr); + if (e->type->kind == TYPEPOINTER) + r = mkintconst(e->type->repr, e->type->base->size); + else if (typeprop(e->type) & PROPINT) + r = mkintconst(e->type->repr, 1); + else if (typeprop(e->type) & PROPFLOAT) + r = mkfltconst(e->type->repr, 1); + else + fatal("not a scalar"); + v = funcinst(f, e->incdec.op == TINC ? IADD : ISUB, e->type->repr, (struct value *[]){l, r}); + funcstore(f, e->type, addr, v); + return e->incdec.post ? l : v; + case EXPRCALL: + argvals = xreallocarray(NULL, e->call.nargs + 3, sizeof(argvals[0])); + argvals[0] = funcexpr(f, e->call.func); + for (argval = &argvals[1], arg = e->call.args; arg; ++argval, arg = arg->next) { + emittype(arg->type); + *argval = funcexpr(f, arg); + } + if (e->call.func->type->base->func.isvararg) + *argval++ = &ellipsis; + *argval = NULL; + return funcinst(f, ICALL, e->type == &typevoid ? NULL : e->type->repr, argvals); + //if (e->call.func->type->base->func.isnoreturn) + // funcret(f, NULL); + case EXPRUNARY: + switch (e->unary.op) { + case TBAND: + return objectaddr(f, e->unary.base); + case TMUL: + r = funcexpr(f, e->unary.base); + return funcload(f, e->type, r); + } + fatal("internal error; unknown unary expression"); + break; + case EXPRCAST: { + struct type *src, *dst; + int srcprop, dstprop; + + l = funcexpr(f, e->cast.e); + r = NULL; + + src = typeunqual(e->cast.e->type, NULL); + if (src->kind == TYPEPOINTER) + src = &typeulong; + dst = typeunqual(e->type, NULL); + if (dst->kind == TYPEPOINTER) + dst = &typeulong; + if (dst->kind == TYPEVOID) + return NULL; + if (src->kind != TYPEBASIC || dst->kind != TYPEBASIC) + fatal("internal error; unsupported conversion"); + srcprop = typeprop(src); + dstprop = typeprop(dst); + if (dst->basic.kind == BASICBOOL) { + r = mkintconst(src->repr, 0); + if (srcprop & PROPINT) + op = src->size == 8 ? ICNEL : ICNEW; + else + op = src->size == 8 ? ICNED : ICNES; + } else if (dstprop & PROPINT) { + if (srcprop & PROPINT) { + if (dst->size == 8 && src->size <= 4) + op = src->basic.issigned ? IEXTSW : IEXTUW; + else + op = ICOPY; + } else { + if (!dst->basic.issigned) + return ftou(f, dst->repr, l); + op = src->size == 8 ? IDTOSI : ISTOSI; + } + } else { + if (srcprop & PROPINT) { + if (!src->basic.issigned) + return utof(f, dst->repr, l); + op = src->size == 8 ? ISLTOF : ISWTOF; + } else { + if (src->size < dst->size) + op = IEXTS; + else if (src->size > dst->size) + op = ITRUNCD; + else + op = ICOPY; + } + } + return funcinst(f, op, dst->repr, (struct value *[]){l, r}); + } + case EXPRBINARY: + if (e->binary.op == TLOR || e->binary.op == TLAND) { + label[0] = mkblock("logic_right"); + label[1] = mkblock("logic_join"); + + l = funcexpr(f, e->binary.l); + label[2] = (struct value *)f->end; + if (e->binary.op == TLOR) + funcjnz(f, l, label[1], label[0]); + else + funcjnz(f, l, label[0], label[1]); + funclabel(f, label[0]); + r = funcexpr(f, e->binary.r); + label[3] = (struct value *)f->end; + funclabel(f, label[1]); + return funcinst(f, IPHI, e->type->repr, (struct value *[]){label[2], l, label[3], r, NULL}); + } + l = funcexpr(f, e->binary.l); + r = funcexpr(f, e->binary.r); + t = e->binary.l->type; + if (t->kind == TYPEPOINTER) + t = &typeulong; + switch (e->binary.op) { + case TMUL: + op = IMUL; + break; + case TDIV: + op = !(typeprop(e->type) & PROPINT) || e->type->basic.issigned ? IDIV : IUDIV; + break; + case TMOD: + op = e->type->basic.issigned ? IREM : IUREM; + break; + case TADD: + op = IADD; + break; + case TSUB: + op = ISUB; + break; + case TSHL: + op = ISHL; + break; + case TSHR: + op = t->basic.issigned ? ISAR : ISHR; + break; + case TBOR: + op = IOR; + break; + case TBAND: + op = IAND; + break; + case TXOR: + op = IXOR; + break; + case TLESS: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICLTS : t->basic.issigned ? ICSLTW : ICULTW; + else + op = typeprop(t) & PROPFLOAT ? ICLTD : t->basic.issigned ? ICSLTL : ICULTL; + break; + case TGREATER: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICGTS : t->basic.issigned ? ICSGTW : ICUGTW; + else + op = typeprop(t) & PROPFLOAT ? ICGTD : t->basic.issigned ? ICSGTL : ICUGTL; + break; + case TLEQ: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICLES : t->basic.issigned ? ICSLEW : ICULEW; + else + op = typeprop(t) & PROPFLOAT ? ICLED : t->basic.issigned ? ICSLEL : ICULEL; + break; + case TGEQ: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICGES : t->basic.issigned ? ICSGEW : ICUGEW; + else + op = typeprop(t) & PROPFLOAT ? ICGED : t->basic.issigned ? ICSGEL : ICUGEL; + break; + case TEQL: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICEQS : ICEQW; + else + op = typeprop(t) & PROPFLOAT ? ICEQD : ICEQL; + break; + case TNEQ: + if (t->size <= 4) + op = typeprop(t) & PROPFLOAT ? ICNES : ICNEW; + else + op = typeprop(t) & PROPFLOAT ? ICNED : ICNEL; + break; + } + if (op == INONE) + fatal("internal error; unimplemented binary expression"); + return funcinst(f, op, e->type->repr, (struct value *[]){l, r}); + case EXPRCOND: + label[0] = mkblock("cond_true"); + label[1] = mkblock("cond_false"); + label[2] = mkblock("cond_join"); + + v = funcexpr(f, e->cond.e); + funcjnz(f, v, label[0], label[1]); + + funclabel(f, label[0]); + l = funcexpr(f, e->cond.t); + label[3] = (struct value *)f->end; + funcjmp(f, label[2]); + + funclabel(f, label[1]); + r = funcexpr(f, e->cond.f); + label[4] = (struct value *)f->end; + + funclabel(f, label[2]); + if (e->type == &typevoid) + return NULL; + return funcinst(f, IPHI, e->type->repr, (struct value *[]){label[3], l, label[4], r, NULL}); + case EXPRASSIGN: + r = funcexpr(f, e->assign.r); + if (e->assign.l->kind == EXPRTEMP) { + e->assign.l->temp = r; + } else { + l = objectaddr(f, e->assign.l); + funcstore(f, e->assign.l->type, l, r); + } + return r; + case EXPRCOMMA: + for (e = e->comma.exprs; e->next; e = e->next) + funcexpr(f, e); + return funcexpr(f, e); + case EXPRBUILTIN: + switch (e->builtin.kind) { + case BUILTINVASTART: + l = funcexpr(f, e->builtin.arg); + funcinst(f, IVASTART, NULL, (struct value *[]){l}); + break; + case BUILTINVAEND: + /* no-op */ + break; + default: + fatal("internal error: unimplemented builtin"); + } + return NULL; + case EXPRTEMP: + assert(e->temp); + return e->temp; + default: + fatal("unimplemented expression %d", e->kind); + } +} + +static void +zero(struct function *func, struct value *addr, int align, uint64_t offset, uint64_t end) +{ + enum instructionkind store[] = { + [1] = ISTOREB, + [2] = ISTOREH, + [4] = ISTOREW, + [8] = ISTOREL, + }; + struct value *tmp; + static struct value z = {.kind = VALCONST, .repr = &i64}; + int a = 1; + + while (offset < end) { + if ((align - (offset & align - 1)) & a) { + tmp = funcinst(func, IADD, &iptr, (struct value *[]){addr, mkintconst(&iptr, offset)}); + funcinst(func, store[a], NULL, (struct value *[]){&z, tmp}); + offset += a; + } + if (a < align) + a <<= 1; + } +} + +void +funcinit(struct function *func, struct declaration *d, struct initializer *init) +{ + struct value *src, *dst; + uint64_t offset = 0; + size_t i; + + funcalloc(func, d); + if (!init) + return; + for (; init; init = init->next) { + zero(func, d->value, d->type->align, offset, init->start); + if (init->expr->kind == EXPRSTRING) { + for (i = 0; i <= init->expr->string.size && i < init->end - init->start; ++i) { + dst = funcinst(func, IADD, &iptr, (struct value *[]){d->value, mkintconst(&iptr, init->start + i)}); + funcinst(func, ISTOREB, NULL, (struct value *[]){mkintconst(&i8, init->expr->string.data[i]), dst}); + } + } else { + dst = funcinst(func, IADD, &iptr, (struct value *[]){d->value, mkintconst(&iptr, init->start)}); + src = funcexpr(func, init->expr); + funcstore(func, init->expr->type, dst, src); + } + offset = init->end; + } + zero(func, d->value, d->type->align, offset, d->type->size); +} + +static void +casesearch(struct function *f, struct value *v, struct switchcase *c, struct value *defaultlabel) +{ + struct value *res, *label[3], *key; + + if (!c) { + funcjmp(f, defaultlabel); + return; + } + label[0] = mkblock("switch_lt"); + label[1] = mkblock("switch_ge"); + label[2] = mkblock("switch_gt"); + + // XXX: linear search if c->node.height < 4 + key = mkintconst(&i64, c->node.key); + res = funcinst(f, ICULTW, &i32, (struct value *[]){v, key}); + funcjnz(f, res, label[0], label[1]); + funclabel(f, label[0]); + casesearch(f, v, c->node.child[0], defaultlabel); + funclabel(f, label[1]); + res = funcinst(f, ICUGTW, typeint.repr, (struct value *[]){v, key}); + funcjnz(f, res, label[2], c->body); + funclabel(f, label[2]); + casesearch(f, v, c->node.child[1], defaultlabel); +} + +void +funcswitch(struct function *f, struct value *v, struct switchcases *c, struct value *defaultlabel) +{ + casesearch(f, v, c->root, defaultlabel); +} + +/* emit */ + +static void +emitname(struct name *n) +{ + if (n->str) + fputs(n->str, stdout); + if (n->id) + printf(".%" PRIu64, n->id); +} + +static void +emitvalue(struct value *v) +{ + static char sigil[] = { + [VALLABEL] = '@', + [VALTEMP] = '%', + [VALGLOBAL] = '$', + }; + + switch (v->kind) { + case VALCONST: + if (v->repr->base == 's' || v->repr->base == 'd') + printf("%c_%a", v->repr->base, v->f); + else + printf("%" PRIu64, v->i); + break; + case VALELLIPSIS: + fputs("...", stdout); + break; + default: + if (v->kind >= LEN(sigil) || !sigil[v->kind]) + fatal("invalid value"); + putchar(sigil[v->kind]); + if (v->kind == VALGLOBAL && v->name.id) + fputs(".L", stdout); + emitname(&v->name); + } +} + +static void +aggr(struct type *t) +{ + uint64_t i; + struct member *m; + + switch (t->kind) { + case TYPEARRAY: + if (typeprop(t->base) & PROPAGGR) { + for (i = 0; i < t->array.length; ++i) + aggr(t->base); + } else { + printf("%c %" PRIu64 ", ", t->base->repr->ext, t->array.length); + } + break; + case TYPESTRUCT: + arrayforeach (&t->structunion.members, m) { + if (typeprop(m->type) & PROPAGGR) + aggr(m->type); + else + printf("%c, ", m->type->repr->ext); + } + break; + default: + fatal("internal error; not an aggregate type"); + } +} + +static void +emittype(struct type *t) +{ + static uint64_t id; + + if (t->repr->abi.id || !(typeprop(t) & PROPAGGR)) + return; + t->repr = xmalloc(sizeof(*t->repr)); + t->repr->base = 'l'; + if (t->kind == TYPESTRUCT || t->kind == TYPEUNION) + t->repr->abi.str = t->structunion.tag; + t->repr->abi.id = ++id; + fputs("type :", stdout); + emitname(&t->repr->abi); + fputs(" = { ", stdout); + aggr(t); + puts("}"); +} + +static void +emitrepr(struct representation *r, _Bool abi) +{ + if (abi && r->abi.id) { + putchar(':'); + emitname(&r->abi); + } else { + putchar(r->base); + } +} + +static void +emitinst(struct instruction *inst) +{ + struct value **arg; + + putchar('\t'); + assert(inst->kind < LEN(instdesc)); + if (instdesc[inst->kind].ret && inst->res.kind) { + emitvalue(&inst->res); + fputs(" =", stdout); + emitrepr(inst->res.repr, inst->kind == ICALL); + putchar(' '); + } + fputs(instdesc[inst->kind].str, stdout); + switch (inst->kind) { + case ICALL: + putchar(' '); + emitvalue(inst->arg[0]); + putchar('('); + for (arg = &inst->arg[1]; *arg; ++arg) { + if (arg != &inst->arg[1]) + fputs(", ", stdout); + if ((*arg)->kind != VALELLIPSIS) { + emitrepr((*arg)->repr, true); + putchar(' '); + } + emitvalue(*arg); + } + putchar(')'); + break; + case IPHI: + putchar(' '); + for (arg = inst->arg; *arg; ++arg) { + if (arg != inst->arg) + fputs(", ", stdout); + emitvalue(*arg); + putchar(' '); + emitvalue(*++arg); + } + break; + case IRET: + if (!inst->arg[0]) + break; + /* fallthrough */ + default: + putchar(' '); + emitvalue(inst->arg[0]); + if (instdesc[inst->kind].arg > 1) { + fputs(", ", stdout); + emitvalue(inst->arg[1]); + } + if (instdesc[inst->kind].arg > 2) { + fputs(", ", stdout); + emitvalue(inst->arg[2]); + } + } + putchar('\n'); +} + +void +emitfunc(struct function *f, _Bool global) +{ + struct block *b; + struct instruction **inst; + struct parameter *p; + size_t n; + + if (!f->end->terminated) + funcret(f, NULL); + if (global) + puts("export"); + fputs("function ", stdout); + if (f->type->base != &typevoid) { + emitrepr(f->type->base->repr, true); + putchar(' '); + } + printf("$%s(", f->name); + for (p = f->type->func.params; p; p = p->next) { + if (p != f->type->func.params) + fputs(", ", stdout); + emitrepr(p->type->repr, true); + putchar(' '); + emitvalue(p->value); + } + if (f->type->func.isvararg) + fputs(", ...", stdout); + puts(") {"); + for (b = f->start; b; b = b->next) { + emitvalue(&b->label); + putchar('\n'); + n = b->insts.len / sizeof(*inst); + for (inst = b->insts.val; n; --n, ++inst) + emitinst(*inst); + } + puts("}"); +} + +static void +dataitem(struct expression *expr, uint64_t size) +{ + struct declaration *decl; + size_t i; + char c; + + switch (expr->kind) { + case EXPRUNARY: + if (expr->unary.op != TBAND) + fatal("not a address expr"); + expr = expr->unary.base; + if (expr->kind != EXPRIDENT) + error(&tok.loc, "initializer is not a constant expression"); + decl = expr->ident.decl; + if (decl->value->kind != VALGLOBAL) + fatal("not a global"); + emitvalue(decl->value); + break; + case EXPRBINARY: + if (expr->binary.l->kind != EXPRUNARY || expr->binary.r->kind != EXPRCONST) + error(&tok.loc, "initializer is not a constant expression"); + dataitem(expr->binary.l, 0); + fputs(" + ", stdout); + dataitem(expr->binary.r, 0); + break; + case EXPRCONST: + if (typeprop(expr->type) & PROPFLOAT) + printf("%c_%a", expr->type->size == 4 ? 's' : 'd', expr->constant.f); + else + printf("%" PRIu64, expr->constant.i); + break; + case EXPRSTRING: + fputc('"', stdout); + for (i = 0; i < expr->string.size && i < size; ++i) { + c = expr->string.data[i]; + if (isprint(c) && c != '"' && c != '\\') + putchar(c); + else + printf("\\%03hho", c); + } + fputc('"', stdout); + if (i < size) + printf(", z %" PRIu64, size - i); + break; + default: + fatal("unimplemented initdata"); + } +} + +void +emitdata(struct declaration *d, struct initializer *init) +{ + uint64_t offset = 0; + struct initializer *cur; + + if (!d->align) + d->align = d->type->align; + else if (d->align < d->type->align) + error(&tok.loc, "object requires alignment %d, which is stricter than %d", d->type->align, d->align); + for (cur = init; cur; cur = cur->next) + cur->expr = eval(cur->expr); + if (d->linkage == LINKEXTERN) + fputs("export ", stdout); + fputs("data ", stdout); + emitvalue(d->value); + printf(" = align %d { ", d->align); + + for (; init; offset = init->end, init = init->next) { + if (offset < init->start) + printf("z %" PRIu64 ", ", init->start - offset); + printf("%c ", init->expr->type->kind == TYPEARRAY ? init->expr->type->base->repr->ext : init->expr->type->repr->ext); + dataitem(init->expr, init->end - init->start); + fputs(", ", stdout); + } + assert(offset <= d->type->size); + if (offset < d->type->size) + printf("z %" PRIu64 " ", d->type->size - offset); + puts("}"); +} diff --git a/runtests b/runtests new file mode 100755 index 0000000..6865d0e --- /dev/null +++ b/runtests @@ -0,0 +1,20 @@ +#!/bin/sh + +if [ $# = 0 ] ; then + set -- tests/*.c +fi + +exitstatus=0 +out=$(mktemp) +trap 'rm "$out"' EXIT +for test ; do + if ${CC:=./cc} -emit-qbe -o $out $test && diff -Nu "${test%.c}.qbe" "$out" ; then + result="PASS" + else + result="FAIL" + exitstatus=1 + fi + echo "[$result] $test" >&2 +done + +exit $exitstatus diff --git a/scan.c b/scan.c new file mode 100644 index 0000000..22c1821 --- /dev/null +++ b/scan.c @@ -0,0 +1,369 @@ +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "scan.h" +#include "token.h" + +struct buffer { + char *str; + size_t len, cap; +}; + +struct scanner { + int chr; + struct token next; + struct location loc; + struct buffer buf; + bool usebuf; +}; + +static void +bufadd(struct buffer *b, char c) +{ + if (b->len >= b->cap) { + b->cap = b->cap ? b->cap * 2 : 1<<8; + b->str = xreallocarray(b->str, b->cap, 1); + } + b->str[b->len++] = c; +} + +static char * +bufget(struct buffer *b) +{ + char *s; + + bufadd(b, '\0'); + s = xmalloc(b->len); + memcpy(s, b->str, b->len); + b->len = 0; + + return s; +} + +static void +nextchar(struct scanner *s) +{ + if (s->usebuf) + bufadd(&s->buf, s->chr); + s->chr = getchar(); + if (s->chr == '\n') + ++s->loc.line, s->loc.col = 1; + else + ++s->loc.col; +} + +static int +op2(struct scanner *s, int t1, int t2) +{ + nextchar(s); + if (s->chr != '=') + return t1; + nextchar(s); + return t2; +} + +static int +op3(struct scanner *s, int t1, int t2, int t3) +{ + int c; + + c = s->chr; + nextchar(s); + if (s->chr == '=') { + nextchar(s); + return t2; + } + if (s->chr != c) + return t1; + nextchar(s); + return t3; +} + +static int +op4(struct scanner *s, int t1, int t2, int t3, int t4) +{ + int c; + + c = s->chr; + nextchar(s); + if (s->chr == '=') { + nextchar(s); + return t2; + } + if (s->chr != c) + return t1; + nextchar(s); + if (s->chr != '=') + return t3; + nextchar(s); + return t4; +} + +static int +ident(struct scanner *s) +{ + s->usebuf = true; + do nextchar(s); + while (isalnum(s->chr) || s->chr == '_'); + + return TIDENT; +} + +static int +isodigit(int c) +{ + return (unsigned)c - '0' < 8; +} + +static enum tokenkind +number(struct scanner *s) +{ + bool allowsign = false; + + s->usebuf = true; + for (;;) { + nextchar(s); + switch (s->chr) { + case 'e': + case 'E': + case 'p': + case 'P': + allowsign = true; + break; + case '+': + case '-': + if (!allowsign) + goto done; + break; + case '_': + case '.': + allowsign = false; + break; + default: + if (!isalnum(s->chr)) + goto done; + allowsign = false; + } + } +done: + return TNUMBER; +} + +static void +escape(struct scanner *s) +{ + nextchar(s); + if (s->chr == 'x') { + nextchar(s); + if (!isxdigit(s->chr)) + error(&s->loc, "invalid hexadecimal escape sequence"); + do nextchar(s); + while (isxdigit(s->chr)); + } else if (isodigit(s->chr)) { + nextchar(s); + if (isodigit(s->chr)) { + nextchar(s); + if (isodigit(s->chr)) + nextchar(s); + } + } else if (strchr("'\"?\\abfnrtv", s->chr)) { + nextchar(s); + } else { + error(&s->loc, "invalid escape sequence"); + } +} + +static enum tokenkind +charconst(struct scanner *s) +{ + s->usebuf = true; + nextchar(s); + for (;;) { + switch (s->chr) { + case '\\': + escape(s); + break; + case '\'': + nextchar(s); + return TCHARCONST; + case '\n': + error(&s->loc, "newline in character constant"); + default: + nextchar(s); + break; + } + } +} + +static int +stringlit(struct scanner *s) +{ + s->usebuf = true; + nextchar(s); + for (;;) { + switch (s->chr) { + case '\\': + escape(s); + break; + case '"': + nextchar(s); + return TSTRINGLIT; + case '\n': + error(&s->loc, "newline in string literal"); + default: + nextchar(s); + break; + } + } +} + +static int +scankind(struct scanner *s) +{ + int tok; + struct location loc; + +again: + switch (s->chr) { + case ' ': + case '\t': + case '\f': + case '\v': + nextchar(s); + goto again; + case '!': + return op2(s, TLNOT, TNEQ); + case '"': + return stringlit(s); + case '#': + nextchar(s); + if (s->chr != '#') + return THASH; + nextchar(s); + return THASHHASH; + case '%': + return op2(s, TMOD, TMODASSIGN); + case '&': + return op3(s, TBAND, TBANDASSIGN, TLAND); + // TODO: u8, U, and L string literals + case '\'': + return charconst(s); + case '*': + return op2(s, TMUL, TMULASSIGN); + case '+': + return op3(s, TADD, TADDASSIGN, TINC); + case '-': + tok = op3(s, TSUB, TSUBASSIGN, TDEC); + if (tok != TSUB || s->chr != '>') + return tok; + nextchar(s); + return TARROW; + case '/': + return op2(s, TDIV, TDIVASSIGN); + case '<': + return op4(s, TLESS, TLEQ, TSHL, TSHLASSIGN); + case '=': + return op2(s, TASSIGN, TEQL); + case '>': + return op4(s, TGREATER, TGEQ, TSHR, TSHRASSIGN); + case '^': + return op2(s, TXOR, TXORASSIGN); + case '|': + return op3(s, TBOR, TBORASSIGN, TLOR); + case '\n': + nextchar(s); + return TNEWLINE; + case '[': + nextchar(s); + return TLBRACK; + case ']': + nextchar(s); + return TRBRACK; + case '(': + nextchar(s); + return TLPAREN; + case ')': + nextchar(s); + return TRPAREN; + case '{': + nextchar(s); + return TLBRACE; + case '}': + nextchar(s); + return TRBRACE; + case '.': + nextchar(s); + if (s->chr != '.') + return TPERIOD; + loc = s->loc; + nextchar(s); + if (s->chr != '.') { + ungetc(s->chr, stdout); + s->loc = loc; + s->chr = '.'; + return TPERIOD; + } + nextchar(s); + return TELLIPSIS; + case '~': + nextchar(s); + return TBNOT; + case '?': + nextchar(s); + return TQUESTION; + case ':': + nextchar(s); + return TCOLON; + case ';': + nextchar(s); + return TSEMICOLON; + case ',': + nextchar(s); + return TCOMMA; + case EOF: + return TEOF; + default: + if (isdigit(s->chr)) + return number(s); + if (isalpha(s->chr) || s->chr == '_') + return ident(s); + if (isprint(s->chr)) + error(&s->loc, "unexpected character '%c'", s->chr); + error(&s->loc, "unexpected character '\\x%02x'", s->chr); + } +} + +struct scanner * +mkscanner(const char *file) +{ + struct scanner *s; + + s = xmalloc(sizeof(*s)); + s->buf.str = NULL; + s->buf.len = 0; + s->buf.cap = 0; + s->usebuf = false; + s->loc.file = file; + s->loc.line = 1; + s->loc.col = 0; + nextchar(s); + + return s; +} + +void +scan(struct scanner *s, struct token *t) +{ + t->loc = s->loc; + t->kind = scankind(s); + if (s->usebuf) { + t->lit = bufget(&s->buf); + s->usebuf = false; + } else { + t->lit = NULL; + } +} diff --git a/scan.h b/scan.h new file mode 100644 index 0000000..eaf3c6d --- /dev/null +++ b/scan.h @@ -0,0 +1,4 @@ +struct token; + +struct scanner *mkscanner(const char *file); +void scan(struct scanner *, struct token *); diff --git a/scope.c b/scope.c new file mode 100644 index 0000000..6c1d5fd --- /dev/null +++ b/scope.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include "util.h" +#include "htab.h" +#include "scope.h" + +struct scope filescope; + +struct scope * +mkscope(struct scope *parent) +{ + struct scope *s; + + s = xmalloc(sizeof(*s)); + s->decls = NULL; + s->tags = NULL; + s->breaklabel = parent->breaklabel; + s->continuelabel = parent->continuelabel; + s->switchcases = parent->switchcases; + s->parent = parent; + + return s; +} + +struct scope * +delscope(struct scope *s) +{ + struct scope *parent = s->parent; + + if (s->decls) + delhtab(s->decls, NULL); + if (s->tags) + delhtab(s->tags, NULL); + free(s); + + return parent; +} + +struct declaration * +scopegetdecl(struct scope *s, const char *name, bool recurse) +{ + struct declaration *d; + struct hashtablekey k; + + htabstrkey(&k, name); + do { + d = s->decls ? htabget(s->decls, &k) : NULL; + s = s->parent; + } while (!d && s && recurse); + + return d; +} + +struct type * +scopegettag(struct scope *s, const char *name, bool recurse) +{ + struct type *t; + struct hashtablekey k; + + htabstrkey(&k, name); + do { + t = s->tags ? htabget(s->tags, &k) : NULL; + s = s->parent; + } while (!t && s && recurse); + + return t; +} + +void +scopeputdecl(struct scope *s, const char *name, struct declaration *d) +{ + struct hashtablekey k; + + if (!s->decls) + s->decls = mkhtab(32); + htabstrkey(&k, name); + *htabput(s->decls, &k) = d; +} + +void +scopeputtag(struct scope *s, const char *name, struct type *t) +{ + struct hashtablekey k; + + if (!s->tags) + s->tags = mkhtab(32); + htabstrkey(&k, name); + *htabput(s->tags, &k) = t; +} diff --git a/scope.h b/scope.h new file mode 100644 index 0000000..34301ce --- /dev/null +++ b/scope.h @@ -0,0 +1,21 @@ +struct scope { + struct hashtable *tags; + struct hashtable *decls; + struct value *breaklabel; + struct value *continuelabel; + struct switchcases *switchcases; + struct scope *parent; +}; + +struct scope *mkscope(struct scope *); +struct scope *delscope(struct scope *); + +struct declaration; +void scopeputdecl(struct scope *, const char *, struct declaration *); +struct declaration *scopegetdecl(struct scope *, const char *, _Bool); + +struct type; +void scopeputtag(struct scope *, const char *, struct type *); +struct type *scopegettag(struct scope *, const char *, _Bool); + +extern struct scope filescope; diff --git a/siphash.c b/siphash.c new file mode 100644 index 0000000..871330b --- /dev/null +++ b/siphash.c @@ -0,0 +1,165 @@ +/* + SipHash reference C implementation + + Copyright (c) 2012-2016 Jean-Philippe Aumasson + + Copyright (c) 2012-2014 Daniel J. Bernstein + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with + this software. If not, see + . + */ +#include +#include +#include +#include + +/* default: SipHash-2-4 */ +#define cROUNDS 2 +#define dROUNDS 4 + +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = ROTL(v0, 32); \ + v2 += v3; \ + v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = ROTL(v2, 32); \ + } while (0) + +#ifdef DEBUG +#define TRACE \ + do { \ + printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32), \ + (uint32_t)v0); \ + printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32), \ + (uint32_t)v1); \ + printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32), \ + (uint32_t)v2); \ + printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32), \ + (uint32_t)v3); \ + } while (0) +#else +#define TRACE +#endif + +int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k, + uint8_t *out, const size_t outlen) { + + assert((outlen == 8) || (outlen == 16)); + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t k0 = U8TO64_LE(k); + uint64_t k1 = U8TO64_LE(k + 8); + uint64_t m; + int i; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + if (outlen == 16) + v1 ^= 0xee; + + for (; in != end; in += 8) { + m = U8TO64_LE(in); + v3 ^= m; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: + b |= ((uint64_t)in[6]) << 48; /* fallthrough */ + case 6: + b |= ((uint64_t)in[5]) << 40; /* fallthrough */ + case 5: + b |= ((uint64_t)in[4]) << 32; /* fallthrough */ + case 4: + b |= ((uint64_t)in[3]) << 24; /* fallthrough */ + case 3: + b |= ((uint64_t)in[2]) << 16; /* fallthrough */ + case 2: + b |= ((uint64_t)in[1]) << 8; /* fallthrough */ + case 1: + b |= ((uint64_t)in[0]); + break; + case 0: + break; + } + + v3 ^= b; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + + if (outlen == 16) + v2 ^= 0xee; + else + v2 ^= 0xff; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + + if (outlen == 8) + return 0; + + v1 ^= 0xdd; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out + 8, b); + + return 0; +} diff --git a/stmt.c b/stmt.c new file mode 100644 index 0000000..866d3ff --- /dev/null +++ b/stmt.c @@ -0,0 +1,280 @@ +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "decl.h" +#include "emit.h" +#include "expr.h" +#include "pp.h" +#include "scope.h" +#include "stmt.h" +#include "token.h" +#include "type.h" + +/* 6.8 Statements and blocks */ +void +stmt(struct function *f, struct scope *s) +{ + char *name; + struct expression *e; + struct type *t; + struct value *v, *label[4]; + struct switchcases swtch = {0}; + uint64_t i; + + switch (tok.kind) { + /* 6.8.1 Labeled statements */ + case TCASE: + next(); + if (!s->switchcases) + error(&tok.loc, "'case' label must be in switch"); + label[0] = mkblock("switch_case"); + funclabel(f, label[0]); + i = intconstexpr(s); + switchcase(s->switchcases, i, label[0]); + expect(TCOLON, "after case expression"); + stmt(f, s); + break; + case TDEFAULT: + next(); + if (!s->switchcases) + error(&tok.loc, "'default' label must be in switch"); + if (s->switchcases->defaultlabel) + error(&tok.loc, "multiple 'default' labels"); + expect(TCOLON, "after 'default'"); + s->switchcases->defaultlabel = mkblock("switch_default"); + funclabel(f, s->switchcases->defaultlabel); + stmt(f, s); + break; + + /* 6.8.2 Compound statement */ + case TLBRACE: + next(); + s = mkscope(s); + while (tok.kind != TRBRACE) { + if (!decl(s, f)) + stmt(f, s); + } + s = delscope(s); + next(); + break; + + /* 6.8.3 Expression statement */ + case TSEMICOLON: + next(); + break; + case TIDENT: + name = tok.lit; + if (peek(TCOLON)) { + struct gotolabel *g = funcgoto(f, name); + g->defined = true; + funclabel(f, g->label); + stmt(f, s); + break; + } + /* fallthrough */ + default: + e = expr(s); + v = funcexpr(f, e); + delexpr(e); + expect(TSEMICOLON, "after expression statement"); + break; + + /* 6.8.4 Selection statement */ + case TIF: + next(); + s = mkscope(s); + expect(TLPAREN, "after 'if'"); + e = exprconvert(expr(s), &typebool); + v = funcexpr(f, e); + delexpr(e); + expect(TRPAREN, "after expression"); + + label[0] = mkblock("if_true"); + label[1] = mkblock("if_false"); + funcjnz(f, v, label[0], label[1]); + + funclabel(f, label[0]); + s = mkscope(s); + stmt(f, s); + s = delscope(s); + + if (consume(TELSE)) { + label[2] = mkblock("if_join"); + funcjmp(f, label[2]); + funclabel(f, label[1]); + s = mkscope(s); + stmt(f, s); + s = delscope(s); + funclabel(f, label[2]); + } else { + funclabel(f, label[1]); + } + s = delscope(s); + break; + case TSWITCH: + next(); + + s = mkscope(s); + expect(TLPAREN, "after 'switch'"); + e = expr(s); + expect(TRPAREN, "after expression"); + + t = typeunqual(e->type, NULL); + if (!(typeprop(t) & PROPINT)) + error(&tok.loc, "controlling expression of switch statement must have integer type"); + e = exprconvert(e, typeintpromote(t)); + + label[0] = mkblock("switch_cond"); + label[1] = mkblock("switch_join"); + + v = funcexpr(f, e); + funcjmp(f, label[0]); + s = mkscope(s); + s->breaklabel = label[1]; + s->switchcases = &swtch; + stmt(f, s); + funcjmp(f, label[1]); + + funclabel(f, label[0]); + funcswitch(f, v, &swtch, swtch.defaultlabel ? swtch.defaultlabel : label[1]); + s = delscope(s); + + funclabel(f, label[1]); + s = delscope(s); + break; + + /* 6.8.5 Iteration statements */ + case TWHILE: + next(); + s = mkscope(s); + expect(TLPAREN, "after 'while'"); + e = expr(s); + expect(TRPAREN, "after expression"); + + label[0] = mkblock("while_cond"); + label[1] = mkblock("while_body"); + label[2] = mkblock("while_join"); + + funclabel(f, label[0]); + v = funcexpr(f, e); + funcjnz(f, v, label[1], label[2]); + funclabel(f, label[1]); + s = mkscope(s); + s->continuelabel = label[0]; + s->breaklabel = label[2]; + stmt(f, s); + s = delscope(s); + funcjmp(f, label[0]); + funclabel(f, label[2]); + s = delscope(s); + break; + case TDO: + next(); + + label[0] = mkblock("do_body"); + label[1] = mkblock("do_join"); + + s = mkscope(s); + s = mkscope(s); + s->continuelabel = label[0]; + s->breaklabel = label[1]; + funclabel(f, label[0]); + stmt(f, s); + s = delscope(s); + + expect(TWHILE, "after 'do' statement"); + expect(TLPAREN, "after 'while'"); + e = expr(s); + expect(TRPAREN, "after expression"); + + v = funcexpr(f, e); + funcjnz(f, v, label[0], label[1]); // XXX: compare to 0 + funclabel(f, label[1]); + s = delscope(s); + expect(TSEMICOLON, "after 'do' statement"); + break; + case TFOR: + next(); + expect(TLPAREN, "after while"); + s = mkscope(s); + if (!decl(s, f)) { + if (tok.kind != TSEMICOLON) { + e = expr(s); + funcexpr(f, e); + delexpr(e); + } + expect(TSEMICOLON, NULL); + } + + label[0] = mkblock("for_cond"); + label[1] = mkblock("for_body"); + label[2] = mkblock("for_cont"); + label[3] = mkblock("for_join"); + + funclabel(f, label[0]); + if (tok.kind != TSEMICOLON) { + e = expr(s); + v = funcexpr(f, e); + funcjnz(f, v, label[1], label[3]); + delexpr(e); + } + expect(TSEMICOLON, NULL); + e = tok.kind == TRPAREN ? NULL : expr(s); + expect(TRPAREN, NULL); + + funclabel(f, label[1]); + s = mkscope(s); + s->breaklabel = label[3]; + s->continuelabel = label[2]; + stmt(f, s); + s = delscope(s); + + funclabel(f, label[2]); + if (e) { + funcexpr(f, e); + delexpr(e); + } + funcjmp(f, label[0]); + funclabel(f, label[3]); + s = delscope(s); + break; + + /* 6.8.6 Jump statements */ + case TGOTO: + next(); + name = expect(TIDENT, "after 'goto'"); + funcjmp(f, funcgoto(f, name)->label); + expect(TSEMICOLON, "after 'goto' statement"); + break; + case TCONTINUE: + next(); + if (!s->continuelabel) + error(&tok.loc, "'continue' statement must be in loop"); + funcjmp(f, s->continuelabel); + expect(TSEMICOLON, "after 'continue' statement"); + break; + case TBREAK: + next(); + if (!s->breaklabel) + error(&tok.loc, "'break' statement must be in loop or switch"); + funcjmp(f, s->breaklabel); + expect(TSEMICOLON, "after 'break' statement"); + break; + case TRETURN: + next(); + if (f->type->base != &typevoid) { + e = exprconvert(expr(s), f->type->base); + v = funcexpr(f, e); + delexpr(e); + } else { + v = NULL; + } + funcret(f, v); + expect(TSEMICOLON, "after 'return' statement"); + break; + } +} diff --git a/stmt.h b/stmt.h new file mode 100644 index 0000000..e24e2ff --- /dev/null +++ b/stmt.h @@ -0,0 +1,4 @@ +struct function; +struct scope; + +void stmt(struct function *, struct scope *); diff --git a/tests/basic.c b/tests/basic.c new file mode 100644 index 0000000..061ed7e --- /dev/null +++ b/tests/basic.c @@ -0,0 +1,3 @@ +int main(void) { + return 0; +} diff --git a/tests/basic.qbe b/tests/basic.qbe new file mode 100644 index 0000000..40d18e9 --- /dev/null +++ b/tests/basic.qbe @@ -0,0 +1,6 @@ +export +function w $main() { +@start.1 +@body.2 + ret 0 +} diff --git a/tests/compound-assignment.c b/tests/compound-assignment.c new file mode 100644 index 0000000..d7efcc6 --- /dev/null +++ b/tests/compound-assignment.c @@ -0,0 +1,4 @@ +void f(void) { + int x[1] = {0}, *p = x; + *p++ += 1; +} diff --git a/tests/compound-assignment.qbe b/tests/compound-assignment.qbe new file mode 100644 index 0000000..35e71a5 --- /dev/null +++ b/tests/compound-assignment.qbe @@ -0,0 +1,18 @@ +export +function $f() { +@start.1 + %.1 =l alloc4 4 + %.3 =l alloc8 8 +@body.2 + %.2 =l add %.1, 0 + storew 0, %.2 + %.4 =l add %.3, 0 + storel %.1, %.4 + %.5 =l loadl %.3 + %.6 =l add %.5, 4 + storel %.6, %.3 + %.7 =w loadsw %.5 + %.8 =w add %.7, 1 + storew %.8, %.5 + ret +} diff --git a/tests/const-expr-div.c b/tests/const-expr-div.c new file mode 100644 index 0000000..bac37a5 --- /dev/null +++ b/tests/const-expr-div.c @@ -0,0 +1 @@ +int x = -2/-1; \ No newline at end of file diff --git a/tests/const-expr-div.qbe b/tests/const-expr-div.qbe new file mode 100644 index 0000000..3faa638 --- /dev/null +++ b/tests/const-expr-div.qbe @@ -0,0 +1 @@ +export data $x = align 4 { w 2, } diff --git a/tests/const-expr-mod.c b/tests/const-expr-mod.c new file mode 100644 index 0000000..2f12704 --- /dev/null +++ b/tests/const-expr-mod.c @@ -0,0 +1 @@ +int x = -2%-1; \ No newline at end of file diff --git a/tests/const-expr-mod.qbe b/tests/const-expr-mod.qbe new file mode 100644 index 0000000..a67781d --- /dev/null +++ b/tests/const-expr-mod.qbe @@ -0,0 +1 @@ +export data $x = align 4 { w 0, } diff --git a/tests/const-expr-shr.c b/tests/const-expr-shr.c new file mode 100644 index 0000000..40bcb44 --- /dev/null +++ b/tests/const-expr-shr.c @@ -0,0 +1 @@ +int x = -1 >> 1; \ No newline at end of file diff --git a/tests/const-expr-shr.qbe b/tests/const-expr-shr.qbe new file mode 100644 index 0000000..675fc96 --- /dev/null +++ b/tests/const-expr-shr.qbe @@ -0,0 +1 @@ +export data $x = align 4 { w 18446744073709551615, } diff --git a/tests/float-promote.c b/tests/float-promote.c new file mode 100644 index 0000000..572719a --- /dev/null +++ b/tests/float-promote.c @@ -0,0 +1,8 @@ +void g1(); +void g2(int, ...); +void g3(float); +void f(void) { + g1(1.0f); + g2(0, 1.0f); + g3(1.0f); +} diff --git a/tests/float-promote.qbe b/tests/float-promote.qbe new file mode 100644 index 0000000..052b1fa --- /dev/null +++ b/tests/float-promote.qbe @@ -0,0 +1,11 @@ +export +function $f() { +@start.1 +@body.2 + %.1 =d exts s_0x1p+0 + call $g1(d %.1) + %.2 =d exts s_0x1p+0 + call $g2(w 0, d %.2, ...) + call $g3(s s_0x1p+0) + ret +} diff --git a/tests/float-to-uint32.c b/tests/float-to-uint32.c new file mode 100644 index 0000000..a82c89d --- /dev/null +++ b/tests/float-to-uint32.c @@ -0,0 +1,4 @@ +float g(void); +unsigned f(void) { + return g(); +} diff --git a/tests/float-to-uint32.qbe b/tests/float-to-uint32.qbe new file mode 100644 index 0000000..a8463f3 --- /dev/null +++ b/tests/float-to-uint32.qbe @@ -0,0 +1,9 @@ +export +function w $f() { +@start.1 +@body.2 + %.1 =s call $g() + %.2 =l stosi %.1 + %.3 =w copy %.2 + ret %.3 +} diff --git a/tests/float-to-uint64.c b/tests/float-to-uint64.c new file mode 100644 index 0000000..10e7809 --- /dev/null +++ b/tests/float-to-uint64.c @@ -0,0 +1,4 @@ +float g(void); +unsigned long long f(void) { + return g(); +} diff --git a/tests/float-to-uint64.qbe b/tests/float-to-uint64.qbe new file mode 100644 index 0000000..3c3aa01 --- /dev/null +++ b/tests/float-to-uint64.qbe @@ -0,0 +1,18 @@ +export +function l $f() { +@start.1 +@body.2 + %.1 =s call $g() + %.2 =w cges %.1, s_0x1p+63 + jnz %.2, @ftou_big.4, @ftou_small.3 +@ftou_small.3 + %.3 =l stosi %.1 + jmp @ftou_join.5 +@ftou_big.4 + %.4 =s sub %.1, s_0x1p+63 + %.5 =l stosi %.4 + %.6 =l xor %.5, 9223372036854775808 +@ftou_join.5 + %.7 =l phi @ftou_small.3 %.3, @ftou_big.4 %.6 + ret %.7 +} diff --git a/tests/for-loop.c b/tests/for-loop.c new file mode 100644 index 0000000..ea769c5 --- /dev/null +++ b/tests/for-loop.c @@ -0,0 +1,6 @@ +void g(int); +void f(void) { + int i; + for (i = 0; i < 10; ++i) + g(i); +} diff --git a/tests/for-loop.qbe b/tests/for-loop.qbe new file mode 100644 index 0000000..56b38b1 --- /dev/null +++ b/tests/for-loop.qbe @@ -0,0 +1,21 @@ +export +function $f() { +@start.1 + %.1 =l alloc4 4 +@body.2 + storew 0, %.1 +@for_cond.3 + %.2 =w loadsw %.1 + %.3 =w csltw %.2, 10 + jnz %.3, @for_body.4, @for_join.6 +@for_body.4 + %.4 =w loadsw %.1 + call $g(w %.4) +@for_cont.5 + %.5 =w loadsw %.1 + %.6 =w add %.5, 1 + storew %.6, %.1 + jmp @for_cond.3 +@for_join.6 + ret +} diff --git a/tests/global-align.c b/tests/global-align.c new file mode 100644 index 0000000..c44c2d5 --- /dev/null +++ b/tests/global-align.c @@ -0,0 +1 @@ +_Alignas(8) char c; \ No newline at end of file diff --git a/tests/global-align.qbe b/tests/global-align.qbe new file mode 100644 index 0000000..1b833ed --- /dev/null +++ b/tests/global-align.qbe @@ -0,0 +1 @@ +export data $c = align 8 { z 1 } diff --git a/tests/hello.c b/tests/hello.c new file mode 100644 index 0000000..bad06bc --- /dev/null +++ b/tests/hello.c @@ -0,0 +1,5 @@ +int puts(const char *); +int main(void) { + puts("hello"); + return 0; +} diff --git a/tests/hello.qbe b/tests/hello.qbe new file mode 100644 index 0000000..3e695ab --- /dev/null +++ b/tests/hello.qbe @@ -0,0 +1,9 @@ +data $.Lstring.2 = align 1 { b "hello", z 1, } +export +function w $main() { +@start.1 +@body.2 + %.1 =l copy $.Lstring.2 + %.2 =w call $puts(l %.1) + ret 0 +} diff --git a/tests/local-align.c b/tests/local-align.c new file mode 100644 index 0000000..8fc3736 --- /dev/null +++ b/tests/local-align.c @@ -0,0 +1,3 @@ +void f(void) { + _Alignas(16) char x[4]; +} diff --git a/tests/local-align.qbe b/tests/local-align.qbe new file mode 100644 index 0000000..bc3b85c --- /dev/null +++ b/tests/local-align.qbe @@ -0,0 +1,7 @@ +export +function $f() { +@start.1 + %.1 =l alloc16 4 +@body.2 + ret +} diff --git a/tests/local-init.c b/tests/local-init.c new file mode 100644 index 0000000..0e886d7 --- /dev/null +++ b/tests/local-init.c @@ -0,0 +1,6 @@ +void f(void) { + struct { + char c[8]; + long long i[3]; + } s = {'a'}; +} diff --git a/tests/local-init.qbe b/tests/local-init.qbe new file mode 100644 index 0000000..8d7346a --- /dev/null +++ b/tests/local-init.qbe @@ -0,0 +1,22 @@ +export +function $f() { +@start.1 + %.1 =l alloc8 32 +@body.2 + %.2 =l add %.1, 0 + %.3 =w copy 97 + storeb %.3, %.2 + %.4 =l add %.1, 1 + storeb 0, %.4 + %.5 =l add %.1, 2 + storeh 0, %.5 + %.6 =l add %.1, 4 + storew 0, %.6 + %.7 =l add %.1, 8 + storel 0, %.7 + %.8 =l add %.1, 16 + storel 0, %.8 + %.9 =l add %.1, 24 + storel 0, %.9 + ret +} diff --git a/tests/struct-copy.c b/tests/struct-copy.c new file mode 100644 index 0000000..33084aa --- /dev/null +++ b/tests/struct-copy.c @@ -0,0 +1,8 @@ +struct s { + char s[5]; + float f; +} x; + +void f(void) { + struct s y = x; +} diff --git a/tests/struct-copy.qbe b/tests/struct-copy.qbe new file mode 100644 index 0000000..4778921 --- /dev/null +++ b/tests/struct-copy.qbe @@ -0,0 +1,21 @@ +export +function $f() { +@start.1 + %.1 =l alloc4 12 +@body.2 + %.2 =l add %.1, 0 + %.3 =l loaduw $x + storew %.3, %.2 + %.4 =l add $x, 4 + %.5 =l add %.2, 4 + %.6 =l loaduw %.4 + storew %.6, %.5 + %.7 =l add %.4, 4 + %.8 =l add %.5, 4 + %.9 =l loaduw %.7 + storew %.9, %.8 + %.10 =l add %.7, 4 + %.11 =l add %.8, 4 + ret +} +export data $x = align 4 { z 12 } diff --git a/tests/struct-passing.c b/tests/struct-passing.c new file mode 100644 index 0000000..54c65a2 --- /dev/null +++ b/tests/struct-passing.c @@ -0,0 +1,11 @@ +struct s { + int x; + struct { + char y[3]; + short z; + } s[2]; + double w; +}; + +void f(struct s s) { +} diff --git a/tests/struct-passing.qbe b/tests/struct-passing.qbe new file mode 100644 index 0000000..a01bda2 --- /dev/null +++ b/tests/struct-passing.qbe @@ -0,0 +1,7 @@ +type :s.1 = { w, b 3, h, b 3, h, d, } +export +function $f(:s.1 %.1) { +@start.1 +@body.2 + ret +} diff --git a/tests/subtract-pointer.c b/tests/subtract-pointer.c new file mode 100644 index 0000000..1de9dde --- /dev/null +++ b/tests/subtract-pointer.c @@ -0,0 +1,3 @@ +void f(int *x, int *y) { + x - y; +} diff --git a/tests/subtract-pointer.qbe b/tests/subtract-pointer.qbe new file mode 100644 index 0000000..9e49d9c --- /dev/null +++ b/tests/subtract-pointer.qbe @@ -0,0 +1,17 @@ +export +function $f(l %.1, l %.3) { +@start.1 + %.2 =l alloc8 8 + storel %.1, %.2 + %.4 =l alloc8 8 + storel %.3, %.4 +@body.2 + %.5 =l loadl %.2 + %.6 =l copy %.5 + %.7 =l udiv %.6, 4 + %.8 =l loadl %.4 + %.9 =l copy %.8 + %.10 =l udiv %.9, 4 + %.11 =l sub %.7, %.10 + ret +} diff --git a/tests/switch.c b/tests/switch.c new file mode 100644 index 0000000..b1b7733 --- /dev/null +++ b/tests/switch.c @@ -0,0 +1,10 @@ +void f(void) { + switch (0) { + case 3: break; + case 52: break; + case -3: break; + default: break; + case 0: break; + case 101: break; + } +} diff --git a/tests/switch.qbe b/tests/switch.qbe new file mode 100644 index 0000000..73320a0 --- /dev/null +++ b/tests/switch.qbe @@ -0,0 +1,62 @@ +export +function $f() { +@start.1 +@body.2 + jmp @switch_cond.3 +@switch_case.5 + jmp @switch_join.4 +@switch_case.6 + jmp @switch_join.4 +@switch_case.7 + jmp @switch_join.4 +@switch_default.8 + jmp @switch_join.4 +@switch_case.9 + jmp @switch_join.4 +@switch_case.10 + jmp @switch_join.4 +@switch_cond.3 + %.1 =w cultw 0, 52 + jnz %.1, @switch_lt.11, @switch_ge.12 +@switch_lt.11 + %.2 =w cultw 0, 3 + jnz %.2, @switch_lt.14, @switch_ge.15 +@switch_lt.14 + %.3 =w cultw 0, 0 + jnz %.3, @switch_lt.17, @switch_ge.18 +@switch_lt.17 + jmp @switch_default.8 +@switch_ge.18 + %.4 =w cugtw 0, 0 + jnz %.4, @switch_gt.19, @switch_case.9 +@switch_gt.19 + jmp @switch_default.8 +@switch_ge.15 + %.5 =w cugtw 0, 3 + jnz %.5, @switch_gt.16, @switch_case.5 +@switch_gt.16 + jmp @switch_default.8 +@switch_ge.12 + %.6 =w cugtw 0, 52 + jnz %.6, @switch_gt.13, @switch_case.6 +@switch_gt.13 + %.7 =w cultw 0, 18446744073709551613 + jnz %.7, @switch_lt.20, @switch_ge.21 +@switch_lt.20 + %.8 =w cultw 0, 101 + jnz %.8, @switch_lt.23, @switch_ge.24 +@switch_lt.23 + jmp @switch_default.8 +@switch_ge.24 + %.9 =w cugtw 0, 101 + jnz %.9, @switch_gt.25, @switch_case.10 +@switch_gt.25 + jmp @switch_default.8 +@switch_ge.21 + %.10 =w cugtw 0, 18446744073709551613 + jnz %.10, @switch_gt.22, @switch_case.7 +@switch_gt.22 + jmp @switch_default.8 +@switch_join.4 + ret +} diff --git a/tests/tentative.c b/tests/tentative.c new file mode 100644 index 0000000..1f484ae --- /dev/null +++ b/tests/tentative.c @@ -0,0 +1,2 @@ +int x; +int x = 5; diff --git a/tests/tentative.qbe b/tests/tentative.qbe new file mode 100644 index 0000000..bf252fd --- /dev/null +++ b/tests/tentative.qbe @@ -0,0 +1 @@ +export data $x = align 4 { w 5, } diff --git a/tests/typedef-name.c b/tests/typedef-name.c new file mode 100644 index 0000000..ab85460 --- /dev/null +++ b/tests/typedef-name.c @@ -0,0 +1,4 @@ +typedef int x; +void f(void) { + long x; +} diff --git a/tests/typedef-name.qbe b/tests/typedef-name.qbe new file mode 100644 index 0000000..5ce995e --- /dev/null +++ b/tests/typedef-name.qbe @@ -0,0 +1,7 @@ +export +function $f() { +@start.1 + %.1 =l alloc8 8 +@body.2 + ret +} diff --git a/tests/typedef.c b/tests/typedef.c new file mode 100644 index 0000000..763d37d --- /dev/null +++ b/tests/typedef.c @@ -0,0 +1,2 @@ +typedef int T; +T x; diff --git a/tests/typedef.qbe b/tests/typedef.qbe new file mode 100644 index 0000000..7cf0e40 --- /dev/null +++ b/tests/typedef.qbe @@ -0,0 +1 @@ +export data $x = align 4 { z 4 } diff --git a/tests/uint32-to-float.c b/tests/uint32-to-float.c new file mode 100644 index 0000000..0a045be --- /dev/null +++ b/tests/uint32-to-float.c @@ -0,0 +1,4 @@ +unsigned g(void); +float f(void) { + return g(); +} diff --git a/tests/uint32-to-float.qbe b/tests/uint32-to-float.qbe new file mode 100644 index 0000000..0e90531 --- /dev/null +++ b/tests/uint32-to-float.qbe @@ -0,0 +1,9 @@ +export +function s $f() { +@start.1 +@body.2 + %.1 =w call $g() + %.2 =l extuw %.1 + %.3 =s sltof %.2 + ret %.3 +} diff --git a/tests/uint64-to-float.c b/tests/uint64-to-float.c new file mode 100644 index 0000000..b4e7fc6 --- /dev/null +++ b/tests/uint64-to-float.c @@ -0,0 +1,4 @@ +unsigned long long g(void); +float f(void) { + return g(); +} diff --git a/tests/uint64-to-float.qbe b/tests/uint64-to-float.qbe new file mode 100644 index 0000000..6542c29 --- /dev/null +++ b/tests/uint64-to-float.qbe @@ -0,0 +1,20 @@ +export +function s $f() { +@start.1 +@body.2 + %.1 =l call $g() + %.2 =w csltl %.1, 0 + jnz %.2, @utof_big.4, @utof_small.3 +@utof_small.3 + %.3 =s sltof %.1 + jmp @utof_join.5 +@utof_big.4 + %.4 =l and %.1, 1 + %.5 =l shr %.1, 1 + %.6 =l or %.5, %.4 + %.7 =s sltof %.6 + %.8 =s add %.7, %.7 +@utof_join.5 + %.9 =s phi @utof_small.3 %.3, @utof_big.4 %.8 + ret %.9 +} diff --git a/tests/unused-return.c b/tests/unused-return.c new file mode 100644 index 0000000..815fcb1 --- /dev/null +++ b/tests/unused-return.c @@ -0,0 +1,4 @@ +int g(void); +void f(void) { + g(); +} diff --git a/tests/unused-return.qbe b/tests/unused-return.qbe new file mode 100644 index 0000000..d269c7f --- /dev/null +++ b/tests/unused-return.qbe @@ -0,0 +1,7 @@ +export +function $f() { +@start.1 +@body.2 + %.1 =w call $g() + ret +} diff --git a/token.c b/token.c new file mode 100644 index 0000000..7b079f7 --- /dev/null +++ b/token.c @@ -0,0 +1,143 @@ +#include +#include +#include +#include "util.h" +#include "token.h" + +struct token tok; + +static char *tokstr[] = { + /* keyword */ + [TAUTO] = "auto", + [TBREAK] = "break", + [TCASE] = "case", + [TCHAR] = "char", + [TCONST] = "const", + [TCONTINUE] = "continue", + [TDEFAULT] = "default", + [TDO] = "do", + [TDOUBLE] = "double", + [TELSE] = "else", + [TENUM] = "enum", + [TEXTERN] = "extern", + [TFLOAT] = "float", + [TFOR] = "for", + [TGOTO] = "goto", + [TIF] = "if", + [TINLINE] = "inline", + [TINT] = "int", + [TLONG] = "long", + [TREGISTER] = "register", + [TRESTRICT] = "restrict", + [TRETURN] = "return", + [TSHORT] = "short", + [TSIGNED] = "signed", + [TSIZEOF] = "sizeof", + [TSTATIC] = "static", + [TSTRUCT] = "struct", + [TSWITCH] = "switch", + [TTYPEDEF] = "typedef", + [TUNION] = "union", + [TUNSIGNED] = "unsigned", + [TVOID] = "void", + [TVOLATILE] = "volatile", + [TWHILE] = "while", + [T_ALIGNAS] = "_Alignas", + [T_ALIGNOF] = "_Alignof", + [T_ATOMIC] = "_Atomic", + [T_BOOL] = "_Bool", + [T_COMPLEX] = "_Complex", + [T_GENERIC] = "_Generic", + [T_IMAGINARY] = "_Imaginary", + [T_NORETURN] = "_Noreturn", + [T_STATIC_ASSERT] = "_Static_assert", + [T_THREAD_LOCAL] = "_Thread_local", + [T__BUILTIN_VA_LIST] = "__builtin_va_list", + + /* punctuator */ + [TLBRACK] = "[", + [TRBRACK] = "]", + [TLPAREN] = "(", + [TRPAREN] = ")", + [TLBRACE] = "{", + [TRBRACE] = "}", + [TPERIOD] = ".", + [TARROW] = "->", + [TINC] = "++", + [TDEC] = "--", + [TBAND] = "&", + [TMUL] = "*", + [TADD] = "+", + [TSUB] = "-", + [TBNOT] = "~", + [TLNOT] = "!", + [TDIV] = "/", + [TMOD] = "%", + [TSHL] = "<<", + [TSHR] = ">>", + [TLESS] = "<", + [TGREATER] = ">", + [TLEQ] = "<=", + [TGEQ] = ">=", + [TEQL] = "==", + [TNEQ] = "!=", + [TXOR] = "^", + [TBOR] = "|", + [TLAND] = "&&", + [TLOR] = "||", + [TQUESTION] = "?", + [TCOLON] = ":", + [TSEMICOLON] = ";", + [TELLIPSIS] = "...", + [TASSIGN] = "=", + [TMULASSIGN] = "*=", + [TDIVASSIGN] = "/=", + [TMODASSIGN] = "%=", + [TADDASSIGN] = "+=", + [TSUBASSIGN] = "-=", + [TSHLASSIGN] = "<<=", + [TSHRASSIGN] = ">>=", + [TBANDASSIGN] = "&=", + [TXORASSIGN] = "^=", + [TBORASSIGN] = "|=", + [TCOMMA] = ",", + [THASH] = "#", + [THASHHASH] = "##", +}; + +void +tokprint(const struct token *t) +{ + const char *str; + + switch (t->kind) { + case TIDENT: + case TNUMBER: + case TCHARCONST: + case TSTRINGLIT: + str = t->lit; + break; + case TNEWLINE: + str = "\n"; + break; + case TEOF: + return; + default: + str = tokstr[t->kind]; + } + if (!str) + fatal("cannot print token %d", t->kind); + fputs(str, stdout); +} + +_Noreturn void error(const struct location *loc, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s:%zu:%zu: error: ", loc->file, loc->line, loc->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + putc('\n', stderr); + exit(1); +} diff --git a/token.h b/token.h new file mode 100644 index 0000000..ecede55 --- /dev/null +++ b/token.h @@ -0,0 +1,140 @@ +enum tokenkind { + TNONE, + + TEOF, + TNEWLINE, + + TIDENT, + TNUMBER, + TCHARCONST, + TSTRINGLIT, + + /* keyword */ + TAUTO, + TBREAK, + TCASE, + TCHAR, + TCONST, + TCONTINUE, + TDEFAULT, + TDO, + TDOUBLE, + TELSE, + TENUM, + TEXTERN, + TFLOAT, + TFOR, + TGOTO, + TIF, + TINLINE, + TINT, + TLONG, + TREGISTER, + TRESTRICT, + TRETURN, + TSHORT, + TSIGNED, + TSIZEOF, + TSTATIC, + TSTRUCT, + TSWITCH, + TTYPEDEF, + TUNION, + TUNSIGNED, + TVOID, + TVOLATILE, + TWHILE, + T_ALIGNAS, + T_ALIGNOF, + T_ATOMIC, + T_BOOL, + T_COMPLEX, + T_GENERIC, + T_IMAGINARY, + T_NORETURN, + T_STATIC_ASSERT, + T_THREAD_LOCAL, + T__BUILTIN_VA_LIST, + + /* punctuator */ + TLBRACK, + TRBRACK, + TLPAREN, + TRPAREN, + TLBRACE, + TRBRACE, + TPERIOD, + TARROW, + TINC, + TDEC, + TBAND, + TMUL, + TADD, + TSUB, + TBNOT, + TLNOT, + TDIV, + TMOD, + TSHL, + TSHR, + TLESS, + TGREATER, + TLEQ, + TGEQ, + TEQL, + TNEQ, + TXOR, + TBOR, + TLAND, + TLOR, + TQUESTION, + TCOLON, + TSEMICOLON, + TELLIPSIS, + TASSIGN, + TMULASSIGN, + TDIVASSIGN, + TMODASSIGN, + TADDASSIGN, + TSUBASSIGN, + TSHLASSIGN, + TSHRASSIGN, + TBANDASSIGN, + TXORASSIGN, + TBORASSIGN, + TCOMMA, + THASH, + THASHHASH, +}; + +struct location { + const char *file; + size_t line, col; +}; + +struct token { + enum tokenkind kind; + struct location loc; + char *lit; +}; + +#if 0 +enum tokenclass { + /* declaration */ + STORCLASS = 1<<1, + TYPEQUAL = 1<<2, + TYPESPEC = 1<<3, + FUNCSPEC = 1<<4, + ALIGNSPEC = 1<<5, + STATICASSERT = 1<<6, + DECLARATION = (1<<7) - 1, +}; +#endif + +//struct token *tokdup(struct token *); +//enum tokenclass tokclass(struct token *); + +extern struct token tok; + +void tokprint(const struct token *); +_Noreturn void error(const struct location *, const char *, ...); diff --git a/tree.c b/tree.c new file mode 100644 index 0000000..0e68fb5 --- /dev/null +++ b/tree.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "util.h" +#include "tree.h" + +#define MAXH (sizeof(void *) * 8 * 3 / 2) + +static inline int +height(struct treenode *n) { + return n ? n->height : 0; +} + +static int +rot(void **p, struct treenode *x, int dir /* deeper side */) +{ + struct treenode *y = x->child[dir]; + struct treenode *z = y->child[!dir]; + int hx = x->height; + int hz = height(z); + if (hz > height(y->child[dir])) { + /* + * x + * / \ dir z + * A y / \ + * / \ --> x y + * z D /| |\ + * / \ A B C D + * B C + */ + x->child[dir] = z->child[!dir]; + y->child[!dir] = z->child[dir]; + z->child[!dir] = x; + z->child[dir] = y; + x->height = hz; + y->height = hz; + z->height = hz + 1; + } else { + /* + * x y + * / \ / \ + * A y --> x D + * / \ / \ + * z D A z + */ + x->child[dir] = z; + y->child[!dir] = x; + x->height = hz + 1; + y->height = hz + 2; + z = y; + } + *p = z; + return z->height - hx; +} + +/* balance *p, return 0 if height is unchanged. */ +static int balance(void **p) +{ + struct treenode *n = *p; + int h0 = height(n->child[0]); + int h1 = height(n->child[1]); + if (h0 - h1 + 1u < 3u) { + int old = n->height; + n->height = h0 < h1 ? h1 + 1 : h0 + 1; + return n->height - old; + } + return rot(p, n, h0key) { + n->new = false; + return n; + } + a[i++] = &n->child[key > n->key]; + n = n->child[key > n->key]; + } + assert(sz > sizeof(*n)); + n = xmalloc(sz); + n->key = key; + n->child[0] = n->child[1] = 0; + n->height = 1; + /* insert new node, rebalance ancestors. */ + *a[--i] = n; + while (i && balance(a[--i])) + ; + n->new = true; + return n; +} diff --git a/tree.h b/tree.h new file mode 100644 index 0000000..6aa5c4e --- /dev/null +++ b/tree.h @@ -0,0 +1,8 @@ +struct treenode { + uint64_t key; + void *child[2]; + int height; + _Bool new; /* set by treeinsert if this node was newly allocated */ +}; + +void *treeinsert(void **, uint64_t, size_t); diff --git a/type.c b/type.c new file mode 100644 index 0000000..db65088 --- /dev/null +++ b/type.c @@ -0,0 +1,296 @@ +#include +#include +#include +#include +#include +#include "util.h" +#include "emit.h" +#include "type.h" + +struct type typevoid = {.kind = TYPEVOID}; + +struct type typechar = {.kind = TYPEBASIC, .size = 1, .align = 1, .repr = &i8, .basic = {.kind = BASICCHAR, .issigned = 1}}; +struct type typeschar = {.kind = TYPEBASIC, .size = 1, .align = 1, .repr = &i8, .basic = {.kind = BASICCHAR, .issigned = 1}}; +struct type typeuchar = {.kind = TYPEBASIC, .size = 1, .align = 1, .repr = &i8, .basic = {.kind = BASICCHAR}}; + +struct type typeshort = {.kind = TYPEBASIC, .size = 2, .align = 2, .repr = &i16, .basic = {.kind = BASICSHORT, .issigned = 1}}; +struct type typeushort = {.kind = TYPEBASIC, .size = 2, .align = 2, .repr = &i16, .basic = {.kind = BASICSHORT}}; + +struct type typeint = {.kind = TYPEBASIC, .size = 4, .align = 4, .repr = &i32, .basic = {.kind = BASICINT, .issigned = 1}}; +struct type typeuint = {.kind = TYPEBASIC, .size = 4, .align = 4, .repr = &i32, .basic = {.kind = BASICINT}}; + +struct type typelong = {.kind = TYPEBASIC, .size = 8, .align = 8, .repr = &i64, .basic = {.kind = BASICLONG, .issigned = 1}}; +struct type typeulong = {.kind = TYPEBASIC, .size = 8, .align = 8, .repr = &i64, .basic = {.kind = BASICLONG}}; + +struct type typellong = {.kind = TYPEBASIC, .size = 8, .align = 8, .repr = &i64, .basic = {.kind = BASICLONGLONG, .issigned = 1}}; +struct type typeullong = {.kind = TYPEBASIC, .size = 8, .align = 8, .repr = &i64, .basic = {.kind = BASICLONGLONG}}; + +struct type typebool = {.kind = TYPEBASIC, .size = 1, .align = 1, .repr = &i8, .basic = {.kind = BASICBOOL}}; +struct type typefloat = {.kind = TYPEBASIC, .size = 4, .align = 4, .repr = &f32, .basic = {.kind = BASICFLOAT}}; +struct type typedouble = {.kind = TYPEBASIC, .size = 8, .align = 8, .repr = &f64, .basic = {.kind = BASICDOUBLE}}; +struct type typelongdouble = {.kind = TYPEBASIC, .size = 16, .align = 16, .basic = {.kind = BASICLONGDOUBLE}}; // XXX: not supported by qbe + +static struct type typevaliststruct = {.kind = TYPESTRUCT, .size = 24, .align = 8}; +struct type typevalist = {.kind = TYPEARRAY, .size = 24, .align = 8, .array = {1}, .base = &typevaliststruct}; +struct type typevalistptr = {.kind = TYPEPOINTER, .size = 8, .align = 8, .repr = &i64, .base = &typevaliststruct}; + +struct type * +mktype(enum typekind kind, struct type *base) +{ + struct type *t; + + t = xmalloc(sizeof(*t)); + t->kind = kind; + t->base = base; + t->incomplete = 0; + + return t; +} + +struct type * +mkqualifiedtype(struct type *t, enum typequalifier tq) +{ + if (tq) { + t = mktype(TYPEQUALIFIED, t); + t->qualified.kind = tq; + t->size = t->base->size; + t->align = t->base->align; + t->repr = t->base->repr; + // XXX: incomplete? + } + return t; +} + +struct type * +mkpointertype(struct type *t) +{ + t = mktype(TYPEPOINTER, t); + t->size = 8; + t->align = 8; + t->repr = &i64; + + return t; +} + +struct type * +mkarraytype(struct type *t, uint64_t len) +{ + t = mktype(TYPEARRAY, t); + t->array.length = len; + t->incomplete = !len; + if (t->base) { + t->align = t->base->align; + t->size = t->base->size * len; // XXX: overflow? + } + + return t; +} + +enum typeproperty +typeprop(struct type *t) +{ + enum typeproperty p = PROPNONE; + + switch (t->kind) { + case TYPEVOID: + p |= PROPOBJECT; + break; + case TYPEBASIC: + p |= PROPOBJECT|PROPARITH|PROPSCALAR; + if (!t->basic.iscomplex) + p |= PROPREAL; + switch (t->basic.kind) { + case BASICFLOAT: + case BASICDOUBLE: + case BASICLONGDOUBLE: + p |= PROPFLOAT; + break; + case BASICCHAR: + p |= PROPCHAR; + /* fallthrough */ + default: + p |= PROPINT; + break; + } + break; + case TYPEPOINTER: + p |= PROPOBJECT|PROPSCALAR|PROPDERIVED; + break; + case TYPEARRAY: + p |= PROPOBJECT|PROPAGGR|PROPDERIVED; + break; + case TYPEFUNC: + p |= PROPDERIVED; + break; + case TYPESTRUCT: + p |= PROPOBJECT|PROPAGGR; + break; + default: + break; + } + + return p; +} + +static int +typerank(struct type *t) +{ + assert(typeprop(t) & PROPINT); + switch (t->basic.kind) { + case BASICBOOL: return 1; + case BASICCHAR: return 2; + case BASICSHORT: return 3; + case BASICINT: return 4; + case BASICLONG: return 5; + case BASICLONGLONG: return 6; + default: + fatal("internal error; unhandled integer type"); + } +} + +bool +typecompatible(struct type *t1, struct type *t2) +{ + struct parameter *p1, *p2; + + if (t1 == t2) + return true; + if (t1->kind != t2->kind) + return false; + switch (t1->kind) { + case TYPEQUALIFIED: + if (t1->qualified.kind != t2->qualified.kind) + return false; + return typecompatible(t1->base, t2->base); + case TYPEVOID: + return true; + case TYPEBASIC: + return false; + case TYPEPOINTER: + return typecompatible(t1->base, t2->base); + case TYPEARRAY: + if (t1->array.length && t2->array.length && t1->array.length != t2->array.length) + return false; + return typecompatible(t1->base, t2->base); + case TYPEFUNC: + if (!typecompatible(t1->base, t2->base)) + return false; + if (t1->func.isprototype && t2->func.isprototype) { + if (t1->func.isvararg != t2->func.isvararg) + return false; + for (p1 = t1->func.params, p2 = t2->func.params; p1 && p2; p1 = p1->next, p2 = p2->next) { + if (!typecompatible(p1->type, p2->type)) + return false; + } + return !p1 && !p2; + } + // XXX: handle non-prototype functions + return false; + default: + return false; + } +} + +_Bool +typesame(struct type *t1, struct type *t2) +{ + // XXX: implement + return typecompatible(t1, t2); +} + +struct type * +typecomposite(struct type *t1, struct type *t2) +{ + // XXX: implement 6.2.7 + // XXX: merge with typecompatible? + return t1; +} + +struct type * +typeunqual(struct type *t, enum typequalifier *tq) +{ + while (t->kind == TYPEQUALIFIED) { + if (tq) + *tq |= t->qualified.kind; + t = t->base; + } + return t; +} + +struct type * +typeintpromote(struct type *t) +{ + if (typeprop(t) & PROPINT && typerank(t) <= typerank(&typeint)) + return t->size < typeint.size || t->basic.issigned ? &typeint : &typeuint; + return t; +} + +struct type * +typeargpromote(struct type *t) +{ + if (t == &typefloat) + return &typedouble; + return typeintpromote(t); +} + +struct type * +typecommonreal(struct type *t1, struct type *t2) +{ + struct type *tmp; + + assert(t1->kind == TYPEBASIC && t2->kind == TYPEBASIC); + if (t1 == t2) + return t1; + if (t1 == &typelongdouble || t2 == &typelongdouble) + return &typelongdouble; + if (t1 == &typedouble || t2 == &typedouble) + return &typedouble; + if (t1 == &typefloat || t2 == &typefloat) + return &typefloat; + t1 = typeintpromote(t1); + t2 = typeintpromote(t2); + if (t1 == t2) + return t1; + if (t1->basic.issigned == t2->basic.issigned) + return typerank(t1) > typerank(t2) ? t1 : t2; + if (t1->basic.issigned) { + tmp = t1; + t1 = t2; + t2 = tmp; + } + if (typerank(t1) >= typerank(t2)) + return t1; + if (t1->size < t2->size) + return t2; + if (t2 == &typelong) + return &typeulong; + if (t2 == &typellong) + return &typeullong; + fatal("internal error; could not find common real type"); +} + +struct member * +typemember(struct type *t, const char *name) +{ + struct member *m; + + assert(t->kind == TYPESTRUCT || t->kind == TYPEUNION); + arrayforeach (&t->structunion.members, m) { + if (strcmp(m->name, name) == 0) + return m; + } + return NULL; +} + +struct parameter * +mkparam(char *name, struct type *t) +{ + struct parameter *p; + + p = xmalloc(sizeof(*p)); + p->name = name; + p->type = t; + p->next = NULL; + + return p; +} diff --git a/type.h b/type.h new file mode 100644 index 0000000..662aa44 --- /dev/null +++ b/type.h @@ -0,0 +1,115 @@ +enum typequalifier { + QUALNONE, + + QUALCONST = 1<<1, + QUALRESTRICT = 1<<2, + QUALVOLATILE = 1<<3, + QUALATOMIC = 1<<4, +}; + +enum typekind { + TYPENONE, + + TYPEQUALIFIED, + TYPEVOID, + TYPEBASIC, + TYPEPOINTER, + TYPEARRAY, + TYPEFUNC, + TYPESTRUCT, + TYPEUNION, +}; + +enum typeproperty { + PROPNONE, + + PROPOBJECT = 1<<0, + PROPCHAR = 1<<1, + PROPINT = 1<<2, + PROPREAL = 1<<3, + PROPARITH = 1<<4, + PROPSCALAR = 1<<5, + PROPAGGR = 1<<6, + PROPDERIVED = 1<<7, + PROPFLOAT = 1<<8, +}; + +struct parameter { + char *name; + struct type *type; + struct value *value; + struct parameter *next; +}; + +struct member { + char *name; + struct type *type; + uint64_t offset; +}; + +struct type { + enum typekind kind; + int align; + uint64_t size; + struct representation *repr; + struct type *base; + _Bool incomplete; + union { + struct { + enum typequalifier kind; + } qualified; + struct { + enum { + BASICBOOL, + BASICCHAR, + BASICSHORT, + BASICINT, + BASICLONG, + BASICLONGLONG, + BASICFLOAT, + BASICDOUBLE, + BASICLONGDOUBLE, + } kind; + _Bool issigned, iscomplex; + } basic; + struct { + uint64_t length; + } array; + struct { + int isprototype, isvararg; + struct parameter *params; + _Bool isnoreturn; + } func; + struct { + char *tag; + struct array members; + } structunion; + }; +}; + +struct type *mktype(enum typekind, struct type *base); +struct type *mkqualifiedtype(struct type *, enum typequalifier); +struct type *mkpointertype(struct type *); +struct type *mkarraytype(struct type *, uint64_t); + +_Bool typecompatible(struct type *, struct type *); +_Bool typesame(struct type *, struct type *); +struct type *typecomposite(struct type *, struct type *); +struct type *typeunqual(struct type *, enum typequalifier *); +struct type *typecommonreal(struct type *, struct type *); +struct type *typeargpromote(struct type *); +struct type *typeintpromote(struct type *); +enum typeproperty typeprop(struct type *); +struct member *typemember(struct type *, const char *name); + +struct parameter *mkparam(char *, struct type *); + +extern struct type typevoid; +extern struct type typebool; +extern struct type typechar, typeschar, typeuchar; +extern struct type typeshort, typeushort; +extern struct type typeint, typeuint; +extern struct type typelong, typeulong; +extern struct type typellong, typeullong; +extern struct type typefloat, typedouble, typelongdouble; +extern struct type typevalist, typevalistptr; diff --git a/util.c b/util.c new file mode 100644 index 0000000..1e44194 --- /dev/null +++ b/util.c @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#include +#include +#include +#include "util.h" + +char *argv0; + +static void +vwarn(const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", argv0); + vfprintf(stderr, fmt, ap); + if (fmt[0] && fmt[strlen(fmt) - 1] == ':') { + fputc(' ', stderr); + perror(NULL); + } else { + fputc('\n', stderr); + } +} + +void +warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); +} + +noreturn void +fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(1); +} + +void * +reallocarray(void *buf, size_t n, size_t m) +{ + if (n > 0 && SIZE_MAX / n < m) { + errno = ENOMEM; + return NULL; + } + return realloc(buf, n * m); +} + +void * +xreallocarray(void *buf, size_t n, size_t m) +{ + buf = reallocarray(buf, n, m); + if (!buf) + fatal("reallocarray:"); + + return buf; +} + +void * +xmalloc(size_t len) +{ + void *buf; + + buf = malloc(len); + if (!buf) + fatal("malloc:"); + + return buf; +} + +char * +progname(char *name, char *fallback) +{ + char *slash; + + if (!name) + return fallback; + slash = strrchr(name, '/'); + return slash ? slash + 1 : name; +} + +void * +arrayadd(struct array *a, size_t n) +{ + void *v; + + if (a->cap - a->len < n) { + do a->cap = a->cap ? a->cap * 2 : 256; + while (a->cap - a->len < n); + a->val = realloc(a->val, a->cap); + if (!a->val) + fatal("realloc"); + } + v = (char *)a->val + a->len; + a->len += n; + + return v; +} + +void +arrayaddptr(struct array *a, void *v) +{ + *(void **)arrayadd(a, sizeof(v)) = v; +} + +void +arrayaddbuf(struct array *a, const void *src, size_t n) +{ + memcpy(arrayadd(a, n), src, n); +} + +void +listinsert(struct list *list, struct list *new) +{ + new->next = list->next; + new->prev = list; + list->next->prev = new; + list->next = new; +} + +void +listremove(struct list *list) +{ + list->next->prev = list->prev; + list->prev->next = list->next; +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..50b8f49 --- /dev/null +++ b/util.h @@ -0,0 +1,31 @@ +struct list { + struct list *prev, *next; +}; + +struct array { + void *val; + size_t len, cap; +}; + +extern char *argv0; + +#define LEN(a) (sizeof(a) / sizeof((a)[0])) +#define ALIGNUP(x, n) (((x) + (n) - 1) & ~((n) - 1)) + +void warn(const char *, ...); +_Noreturn void fatal(const char *fmt, ...); + +void *reallocarray(void *, size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +void *xmalloc(size_t); + +char *progname(char *, char *); + +void listinsert(struct list *, struct list *); +void listremove(struct list *); +#define listelement(list, type, member) (type *)((char *)list - offsetof(type, member)) + +void *arrayadd(struct array *, size_t); +void arrayaddptr(struct array *, void *); +void arrayaddbuf(struct array *, const void *, size_t); +#define arrayforeach(a, m) for (m = (a)->val; m != (void *)((char *)(a)->val + (a)->len); ++m) -- cgit v1.2.3