From bad98e94caf37db71f8114bdb8562a7caa4fb256 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Mon, 16 Mar 2020 02:09:59 -0700 Subject: pp: Add support for macro definition and expansion The token pasting operator `##` still needs to be implemented. --- cc.h | 9 + main.c | 1 + pp.c | 499 ++++++++++++++++++++++++++++++-- scan.c | 1 + test/preprocess-macro-function-paren.c | 4 + test/preprocess-macro-function-paren.pp | 2 + test/preprocess-macro-function.c | 2 + test/preprocess-macro-function.pp | 1 + test/preprocess-macro-hide.c | 2 + test/preprocess-macro-hide.pp | 1 + test/preprocess-macro-object.c | 2 + test/preprocess-macro-object.pp | 1 + test/preprocess-macro-stringize.c | 2 + test/preprocess-macro-stringize.pp | 1 + test/preprocess-standard-example-1.c | 4 + test/preprocess-standard-example-1.pp | 1 + test/preprocess-standard-example-2.c | 20 ++ test/preprocess-standard-example-2.pp | 4 + test/preprocess-undef.c | 3 + test/preprocess-undef.pp | 1 + token.c | 2 + util.c | 10 + util.h | 1 + 23 files changed, 551 insertions(+), 23 deletions(-) create mode 100644 test/preprocess-macro-function-paren.c create mode 100644 test/preprocess-macro-function-paren.pp create mode 100644 test/preprocess-macro-function.c create mode 100644 test/preprocess-macro-function.pp create mode 100644 test/preprocess-macro-hide.c create mode 100644 test/preprocess-macro-hide.pp create mode 100644 test/preprocess-macro-object.c create mode 100644 test/preprocess-macro-object.pp create mode 100644 test/preprocess-macro-stringize.c create mode 100644 test/preprocess-macro-stringize.pp create mode 100644 test/preprocess-standard-example-1.c create mode 100644 test/preprocess-standard-example-1.pp create mode 100644 test/preprocess-standard-example-2.c create mode 100644 test/preprocess-standard-example-2.pp create mode 100644 test/preprocess-undef.c create mode 100644 test/preprocess-undef.pp diff --git a/cc.h b/cc.h index d938ade..85f739a 100644 --- a/cc.h +++ b/cc.h @@ -121,6 +121,8 @@ struct location { struct token { enum tokenkind kind; + /* whether or not the token is ineligible for expansion */ + _Bool hide; /* whether or not the token was preceeded by a space */ _Bool space; struct location loc; @@ -384,6 +386,13 @@ void scan(struct token *); /* preprocessor */ +enum ppflags { + /* preserve newlines in preprocessor output */ + PPNEWLINE = 1 << 0, +}; + +extern enum ppflags ppflags; + void ppinit(void); void next(void); diff --git a/main.c b/main.c index a147c28..004df85 100644 --- a/main.c +++ b/main.c @@ -50,6 +50,7 @@ main(int argc, char *argv[]) ppinit(); if (pponly) { + ppflags |= PPNEWLINE; while (tok.kind != TEOF) { tokenprint(&tok); next(); diff --git a/pp.c b/pp.c index 4831cd6..87cb810 100644 --- a/pp.c +++ b/pp.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,14 +8,468 @@ #include "util.h" #include "cc.h" -static struct token pending; +struct macroparam { + char *name; + /* whether or not the argument needs to be stringized */ + bool stringize; +}; + +struct macroarg { + struct token *token; + size_t ntoken; + /* stringized argument */ + struct token str; +}; + +struct macro { + enum { + MACROOBJ, + MACROFUNC, + } kind; + char *name; + /* whether or not this macro is ineligible for expansion */ + bool hide; + /* parameters of function-like macro */ + struct macroparam *param; + size_t nparam; + /* argument tokens of macro invocation */ + struct macroarg *arg; + /* replacement list */ + struct token *token; + size_t ntoken; +}; + +struct frame { + struct token *token; + size_t ntoken; + struct macro *macro; +}; + +enum ppflags ppflags; + +static struct array ctx; +static struct map *macros; +/* number of macros currently undergoing expansion */ +static size_t macrodepth; void ppinit(void) { + macros = mkmap(64); next(); } +/* check if two macro definitions are equal, as in C11 6.10.3p2 */ +static bool +macroequal(struct macro *m1, struct macro *m2) +{ + struct token *t1, *t2; + size_t i; + + if (m1->kind != m2->kind) + return false; + if (m1->kind == MACROFUNC) { + if (m1->nparam != m2->nparam) + return false; + for (i = 0; i < m1->nparam; ++i) { + if (strcmp(m1->param[i].name, m2->param[i].name) != 0) + return false; + } + } + if (m1->ntoken != m2->ntoken) + return false; + for (t1 = m1->token, t2 = m2->token; t1 < m1->token + m1->ntoken; ++t1, ++t2) { + if (t1->kind != t2->kind) + return false; + if (t1->lit && strcmp(t1->lit, t2->lit) != 0) + return false; + } + return true; +} + +/* find the index of a macro parameter with the given name */ +static size_t +macroparam(struct macro *m, const char *name) +{ + size_t i; + + for (i = 0; i < m->nparam; ++i) { + if (strcmp(m->param[i].name, name) == 0) + return i; + } + return -1; +} + +/* lookup a macro by name */ +static struct macro * +macroget(char *name) +{ + struct mapkey k; + + mapkey(&k, name, strlen(name)); + return mapget(macros, &k); +} + +static void +macrodone(struct macro *m) +{ + m->hide = false; + if (m->kind == MACROFUNC && m->nparam > 0) { + free(m->arg[0].token); + free(m->arg); + } + --macrodepth; +} + +static struct token * +framenext(struct frame *f) +{ + return f->ntoken--, f->token++; +} + +/* push a new context frame */ +static struct frame * +ctxpush(struct token *t, size_t n, struct macro *m) +{ + struct frame *f; + + f = arrayadd(&ctx, sizeof(*f)); + f->token = t; + f->ntoken = n; + f->macro = m; + return f; +} + +/* get the next frame with tokens left */ +static struct frame * +ctxframe(void) +{ + struct frame *f; + + for (f = arraylast(&ctx, sizeof(*f)); ctx.len; --f, ctx.len -= sizeof(*f)) { + if (f->ntoken) + return f; + if (f->macro) + macrodone(f->macro); + } + return NULL; +} + +/* get the next token from the context */ +static struct token * +ctxnext(void) +{ + struct frame *f; + struct token *t; + struct macro *m; + size_t i; + +again: + f = ctxframe(); + if (!f) + return NULL; + m = f->macro; + if (m && m->kind == MACROFUNC) { + /* try to expand macro parameter */ + switch (f->token->kind) { + case THASH: + framenext(f); + t = framenext(f); + assert(t && t->kind == TIDENT); + i = macroparam(m, t->lit); + assert(i != -1); + f = ctxpush(&m->arg[i].str, 1, NULL); + break; + case TIDENT: + i = macroparam(m, f->token->lit); + if (i == -1) + break; + framenext(f); + if (m->arg[i].ntoken == 0) + goto again; + f = ctxpush(m->arg[i].token, m->arg[i].ntoken, NULL); + break; + } + /* XXX: token concatenation */ + } + return framenext(f); +} + +static void +define(void) +{ + struct token *t; + struct macro *m; + struct macroparam *p; + struct array params = {0}, repl = {0}; + struct mapkey k; + void **entry; + bool stringize; + size_t i; + + if (tok.kind != TIDENT) + error(&tok.loc, "expected identifier after #define"); + m = xmalloc(sizeof(*m)); + m->name = tok.lit; + m->hide = false; + t = arrayadd(&repl, sizeof(*t)); + scan(t); + if (t->kind == TLPAREN && !t->space) { + m->kind = MACROFUNC; + /* read macro parameter names */ + while (scan(&tok), tok.kind == TIDENT) { + p = arrayadd(¶ms, sizeof(*p)); + p->name = tok.lit; + p->stringize = false; + if (scan(&tok), tok.kind != TCOMMA) + break; + } + if (tok.kind != TRPAREN) + error(&tok.loc, "expected ')' after macro parameter list"); + scan(t); /* first token in replacement list */ + } else { + m->kind = MACROOBJ; + } + + /* read macro body */ + m->param = params.val; + m->nparam = params.len / sizeof(m->param[0]); + t->space = false; + while (t->kind != TNEWLINE) { + if (t->kind == THASHHASH) + error(&t->loc, "'##' operator is not yet implemented"); + stringize = t->kind == THASH; + t = arrayadd(&repl, sizeof(*t)); + scan(t); + if (stringize && m->kind == MACROFUNC) { + if (t->kind != TIDENT) + error(&t->loc, "expected macro parameter name after '#' operator"); + i = macroparam(m, t->lit); + if (i == -1) + error(&t->loc, "'%s' is not a macro parameter name", t->lit); + m->param[i].stringize = true; + } + } + m->token = repl.val; + m->ntoken = repl.len / sizeof(*t) - 1; + tok = *t; + + mapkey(&k, m->name, strlen(m->name)); + entry = mapput(macros, &k); + if (*entry && !macroequal(m, *entry)) + error(&tok.loc, "redefinition of macro '%s'", m->name); + *entry = m; +} + +static void +undef(void) +{ + char *name; + struct mapkey k; + void **entry; + struct macro *m; + + name = tokencheck(&tok, TIDENT, "after #undef"); + mapkey(&k, name, strlen(name)); + entry = mapput(macros, &k); + m = *entry; + if (m) { + free(name); + free(m->param); + free(m->token); + *entry = NULL; + } + scan(&tok); +} + +static void +directive(void) +{ + enum ppflags oldflags; + char *name; + + scan(&tok); + if (tok.kind == TNEWLINE) + return; /* empty directive */ + oldflags = ppflags; + ppflags |= PPNEWLINE; + name = tokencheck(&tok, TIDENT, "or newline after '#'"); + if (strcmp(name, "if") == 0) { + error(&tok.loc, "#if directive is not implemented"); + } else if (strcmp(name, "ifdef") == 0) { + error(&tok.loc, "#ifdef directive is not implemented"); + } else if (strcmp(name, "ifndef") == 0) { + error(&tok.loc, "#ifndef directive is not implemented"); + } else if (strcmp(name, "elif") == 0) { + error(&tok.loc, "#elif directive is not implemented"); + } else if (strcmp(name, "endif") == 0) { + error(&tok.loc, "#endif directive is not implemented"); + } else if (strcmp(name, "include") == 0) { + error(&tok.loc, "#include directive is not implemented"); + } else if (strcmp(name, "define") == 0) { + scan(&tok); + define(); + } else if (strcmp(name, "undef") == 0) { + scan(&tok); + undef(); + } else if (strcmp(name, "line") == 0) { + error(&tok.loc, "#line directive is not implemented"); + } else if (strcmp(name, "error") == 0) { + error(&tok.loc, "#error directive is not implemented"); + } else if (strcmp(name, "pragma") == 0) { + error(&tok.loc, "#pragma directive is not implemented"); + } else { + error(&tok.loc, "invalid preprocessor directive #%s", name); + } + free(name); + tokencheck(&tok, TNEWLINE, "after preprocessing directive"); + ppflags = oldflags; +} + +/* get the next token without expanding it */ +static void +nextinto(struct token *t) +{ + static bool newline = true; + + for (;;) { + scan(t); + if (newline && t->kind == THASH) { + directive(); + } else { + newline = tok.kind == TNEWLINE; + break; + } + } +} + +static struct token * +rawnext(void) +{ + struct token *t; + + t = ctxnext(); + if (!t) { + t = &tok; + nextinto(t); + } + return t; +} + +static bool +peekparen(void) +{ + static struct array pending; + struct token *t; + struct frame *f; + + t = ctxnext(); + if (t) { + if (t->kind == TLPAREN) + return true; + f = arraylast(&ctx, sizeof(*f)); + --f->token; + ++f->ntoken; + return false; + } + pending.len = 0; + do t = arrayadd(&pending, sizeof(*t)), nextinto(t); + while (t->kind == TNEWLINE); + if (t->kind == TLPAREN) + return true; + ctxpush(pending.val, pending.len / sizeof(*t), NULL); + return false; +} + +static void +stringize(struct array *buf, struct token *t) +{ + const char *lit; + + lit = t->lit ? t->lit : tokstr[t->kind]; + /* XXX: double escape string literal */ + arrayaddbuf(buf, lit, strlen(lit)); +} + +static bool +expand(struct token *t) +{ + struct macro *m; + struct macroarg *arg; + struct array str, tok; + size_t i, depth, paren; + + if (t->kind != TIDENT) + return false; + m = macroget(t->lit); + if (!m || m->hide || t->hide) { + t->hide = true; + return false; + } + if (m->kind == MACROFUNC) { + if (!peekparen()) + return false; + /* read macro arguments */ + paren = 0; + depth = macrodepth; + tok = (struct array){0}; + if (m->nparam > 0) { + arg = xreallocarray(NULL, m->nparam, sizeof(*arg)); + } else { + arg = NULL; + t = rawnext(); + } + for (i = 0; i < m->nparam && t->kind != TRPAREN; ++i) { + if (m->param[i].stringize) { + str = (struct array){0}; + arrayaddbuf(&str, "\"", 1); + } + arg[i].ntoken = 0; + for (;;) { + t = rawnext(); + if (t->kind == TEOF) + error(&t->loc, "EOF when reading macro parameters"); + if (macrodepth <= depth) { + /* adjust current macro depth, in case it got shallower */ + depth = macrodepth; + if (paren == 0 && (t->kind == TCOMMA || t->kind == TRPAREN)) + break; + switch (t->kind) { + case TLPAREN: ++paren; break; + case TRPAREN: --paren; break; + } + if (m->param[i].stringize) + stringize(&str, t); + } + if (!expand(t)) { + arrayaddbuf(&tok, t, sizeof(*t)); + ++arg[i].ntoken; + } + } + if (m->param[i].stringize) { + arrayaddbuf(&str, "\"", 2); + arg[i].str = (struct token){ + .kind = TSTRINGLIT, + .lit = str.val, + }; + } + } + if (i < m->nparam) + error(&t->loc, "not enough arguments for macro '%s'", m->name); + tokencheck(t, TRPAREN, "after macro arguments"); + t = tok.val; + for (i = 0; i < m->nparam; ++i) { + arg[i].token = t; + t += arg[i].ntoken; + } + m->arg = arg; + } + ctxpush(m->token, m->ntoken, m); + m->hide = true; + ++macrodepth; + return true; +} + static void keyword(struct token *tok) { @@ -98,36 +553,34 @@ keyword(struct token *tok) } } -static void -nextinto(struct token *t) -{ - do scan(t); - while (t->kind == TNEWLINE); - if (t->kind == TIDENT) - keyword(t); -} - void next(void) { - if (pending.kind) { - tok = pending; - pending.kind = TNONE; - } else { - nextinto(&tok); - } + struct token *t; + + do t = rawnext(); + while (expand(t) || t->kind == TNEWLINE && !(ppflags & PPNEWLINE)); + tok = *t; + if (tok.kind == TIDENT) + keyword(&tok); } bool peek(int kind) { - if (!pending.kind) - nextinto(&pending); - if (pending.kind != kind) - return false; - pending.kind = TNONE; - nextinto(&tok); - return true; + static struct token pending; + struct token old; + + old = tok; + next(); + if (tok.kind == kind) { + next(); + return true; + } + pending = tok; + tok = old; + ctxpush(&pending, 1, NULL); + return false; } char * diff --git a/scan.c b/scan.c index 557b595..c9eb5ea 100644 --- a/scan.c +++ b/scan.c @@ -472,4 +472,5 @@ scan(struct token *t) t->lit = NULL; } t->space = scanner->sawspace; + t->hide = false; } diff --git a/test/preprocess-macro-function-paren.c b/test/preprocess-macro-function-paren.c new file mode 100644 index 0000000..c7c7639 --- /dev/null +++ b/test/preprocess-macro-function-paren.c @@ -0,0 +1,4 @@ +#define g(x) [x] +#define f(a) g a (def) +f((abc)) +f() diff --git a/test/preprocess-macro-function-paren.pp b/test/preprocess-macro-function-paren.pp new file mode 100644 index 0000000..6d86709 --- /dev/null +++ b/test/preprocess-macro-function-paren.pp @@ -0,0 +1,2 @@ +[abc] (def) +[def] diff --git a/test/preprocess-macro-function.c b/test/preprocess-macro-function.c new file mode 100644 index 0000000..87b4c07 --- /dev/null +++ b/test/preprocess-macro-function.c @@ -0,0 +1,2 @@ +#define f(a, b) a, abc, b +f(foo, bar) diff --git a/test/preprocess-macro-function.pp b/test/preprocess-macro-function.pp new file mode 100644 index 0000000..c8109cd --- /dev/null +++ b/test/preprocess-macro-function.pp @@ -0,0 +1 @@ +foo, abc, bar diff --git a/test/preprocess-macro-hide.c b/test/preprocess-macro-hide.c new file mode 100644 index 0000000..22671c7 --- /dev/null +++ b/test/preprocess-macro-hide.c @@ -0,0 +1,2 @@ +#define foo foo bar +foo diff --git a/test/preprocess-macro-hide.pp b/test/preprocess-macro-hide.pp new file mode 100644 index 0000000..d675fa4 --- /dev/null +++ b/test/preprocess-macro-hide.pp @@ -0,0 +1 @@ +foo bar diff --git a/test/preprocess-macro-object.c b/test/preprocess-macro-object.c new file mode 100644 index 0000000..f4c8b21 --- /dev/null +++ b/test/preprocess-macro-object.c @@ -0,0 +1,2 @@ +#define foo bar +foo diff --git a/test/preprocess-macro-object.pp b/test/preprocess-macro-object.pp new file mode 100644 index 0000000..5716ca5 --- /dev/null +++ b/test/preprocess-macro-object.pp @@ -0,0 +1 @@ +bar diff --git a/test/preprocess-macro-stringize.c b/test/preprocess-macro-stringize.c new file mode 100644 index 0000000..528c88e --- /dev/null +++ b/test/preprocess-macro-stringize.c @@ -0,0 +1,2 @@ +#define stringize(a) #a +stringize(hello) diff --git a/test/preprocess-macro-stringize.pp b/test/preprocess-macro-stringize.pp new file mode 100644 index 0000000..3580093 --- /dev/null +++ b/test/preprocess-macro-stringize.pp @@ -0,0 +1 @@ +"hello" diff --git a/test/preprocess-standard-example-1.c b/test/preprocess-standard-example-1.c new file mode 100644 index 0000000..84447e0 --- /dev/null +++ b/test/preprocess-standard-example-1.c @@ -0,0 +1,4 @@ +/* C11 6.10.3.4p4 */ +#define f(a) a*g +#define g(a) f(a) +f(2)(9) diff --git a/test/preprocess-standard-example-1.pp b/test/preprocess-standard-example-1.pp new file mode 100644 index 0000000..738d396 --- /dev/null +++ b/test/preprocess-standard-example-1.pp @@ -0,0 +1 @@ +2*9*g diff --git a/test/preprocess-standard-example-2.c b/test/preprocess-standard-example-2.c new file mode 100644 index 0000000..0deb2ad --- /dev/null +++ b/test/preprocess-standard-example-2.c @@ -0,0 +1,20 @@ +/* C11 6.10.3.5p5 with token concatenation disabled for now */ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +//#define r(x,y) x ## y +#define str(x) # x +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x+(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), /*r(2,3), r(4,), r(,5), r(,)*/ }; +char c[2][6] = { str(hello), str() }; diff --git a/test/preprocess-standard-example-2.pp b/test/preprocess-standard-example-2.pp new file mode 100644 index 0000000..3a52eb0 --- /dev/null +++ b/test/preprocess-standard-example-2.pp @@ -0,0 +1,4 @@ +f(2 * (y+1)) +f(2 * (f(2 * (z[0])))) %f(2 * (0)) + t(1); +f(2 * (2+(3,4)-0,1)) |f(2 * (~ 5)) &f(2 * (0,1))^m(0,1); +int i[] = {1, }; +char c[2][6] = {"hello","" }; diff --git a/test/preprocess-undef.c b/test/preprocess-undef.c new file mode 100644 index 0000000..c43f9ea --- /dev/null +++ b/test/preprocess-undef.c @@ -0,0 +1,3 @@ +#define foo bar +#undef foo +foo diff --git a/test/preprocess-undef.pp b/test/preprocess-undef.pp new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/test/preprocess-undef.pp @@ -0,0 +1 @@ +foo diff --git a/token.c b/token.c index 6d4484d..dc2df02 100644 --- a/token.c +++ b/token.c @@ -116,6 +116,8 @@ tokenprint(const struct token *t) { const char *str; + if (t->space) + fputc(' ', stdout); switch (t->kind) { case TIDENT: case TNUMBER: diff --git a/util.c b/util.c index 1229f2c..63511a3 100644 --- a/util.c +++ b/util.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -116,6 +117,15 @@ arrayaddbuf(struct array *a, const void *src, size_t n) memcpy(arrayadd(a, n), src, n); } +void * +arraylast(struct array *a, size_t n) +{ + if (a->len == 0) + return NULL; + assert(n <= a->len); + return (char *)a->val + a->len - n; +} + void listinsert(struct list *list, struct list *new) { diff --git a/util.h b/util.h index 504662b..0081667 100644 --- a/util.h +++ b/util.h @@ -42,6 +42,7 @@ void listremove(struct list *); void *arrayadd(struct array *, size_t); void arrayaddptr(struct array *, void *); void arrayaddbuf(struct array *, const void *, size_t); +void *arraylast(struct array *, size_t); #define arrayforeach(a, m) for (m = (a)->val; m != (void *)((char *)(a)->val + (a)->len); ++m) /* map */ -- cgit v1.2.3