aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Forney <mforney@mforney.org>2020-03-16 02:09:59 -0700
committerMichael Forney <mforney@mforney.org>2020-03-17 15:22:51 -0700
commitbad98e94caf37db71f8114bdb8562a7caa4fb256 (patch)
treee238ce199becccaf9f940685ba8cb27c2c74737d
parent19b816b032dfc01a49f7f86ae7c411a0f15fd2bc (diff)
pp: Add support for macro definition and expansion
The token pasting operator `##` still needs to be implemented.
-rw-r--r--cc.h9
-rw-r--r--main.c1
-rw-r--r--pp.c499
-rw-r--r--scan.c1
-rw-r--r--test/preprocess-macro-function-paren.c4
-rw-r--r--test/preprocess-macro-function-paren.pp2
-rw-r--r--test/preprocess-macro-function.c2
-rw-r--r--test/preprocess-macro-function.pp1
-rw-r--r--test/preprocess-macro-hide.c2
-rw-r--r--test/preprocess-macro-hide.pp1
-rw-r--r--test/preprocess-macro-object.c2
-rw-r--r--test/preprocess-macro-object.pp1
-rw-r--r--test/preprocess-macro-stringize.c2
-rw-r--r--test/preprocess-macro-stringize.pp1
-rw-r--r--test/preprocess-standard-example-1.c4
-rw-r--r--test/preprocess-standard-example-1.pp1
-rw-r--r--test/preprocess-standard-example-2.c20
-rw-r--r--test/preprocess-standard-example-2.pp4
-rw-r--r--test/preprocess-undef.c3
-rw-r--r--test/preprocess-undef.pp1
-rw-r--r--token.c2
-rw-r--r--util.c10
-rw-r--r--util.h1
23 files changed, 551 insertions, 23 deletions
diff --git a/cc.h b/cc.h
index d938ade..85f739a 100644
--- a/cc.h
+++ b/cc.h
@@ -121,6 +121,8 @@ struct location {
struct token {
enum tokenkind kind;
+ /* whether or not the token is ineligible for expansion */
+ _Bool hide;
/* whether or not the token was preceeded by a space */
_Bool space;
struct location loc;
@@ -384,6 +386,13 @@ void scan(struct token *);
/* preprocessor */
+enum ppflags {
+ /* preserve newlines in preprocessor output */
+ PPNEWLINE = 1 << 0,
+};
+
+extern enum ppflags ppflags;
+
void ppinit(void);
void next(void);
diff --git a/main.c b/main.c
index a147c28..004df85 100644
--- a/main.c
+++ b/main.c
@@ -50,6 +50,7 @@ main(int argc, char *argv[])
ppinit();
if (pponly) {
+ ppflags |= PPNEWLINE;
while (tok.kind != TEOF) {
tokenprint(&tok);
next();
diff --git a/pp.c b/pp.c
index 4831cd6..87cb810 100644
--- a/pp.c
+++ b/pp.c
@@ -1,3 +1,4 @@
+#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
@@ -7,14 +8,468 @@
#include "util.h"
#include "cc.h"
-static struct token pending;
+struct macroparam {
+ char *name;
+ /* whether or not the argument needs to be stringized */
+ bool stringize;
+};
+
+struct macroarg {
+ struct token *token;
+ size_t ntoken;
+ /* stringized argument */
+ struct token str;
+};
+
+struct macro {
+ enum {
+ MACROOBJ,
+ MACROFUNC,
+ } kind;
+ char *name;
+ /* whether or not this macro is ineligible for expansion */
+ bool hide;
+ /* parameters of function-like macro */
+ struct macroparam *param;
+ size_t nparam;
+ /* argument tokens of macro invocation */
+ struct macroarg *arg;
+ /* replacement list */
+ struct token *token;
+ size_t ntoken;
+};
+
+struct frame {
+ struct token *token;
+ size_t ntoken;
+ struct macro *macro;
+};
+
+enum ppflags ppflags;
+
+static struct array ctx;
+static struct map *macros;
+/* number of macros currently undergoing expansion */
+static size_t macrodepth;
void
ppinit(void)
{
+ macros = mkmap(64);
next();
}
+/* check if two macro definitions are equal, as in C11 6.10.3p2 */
+static bool
+macroequal(struct macro *m1, struct macro *m2)
+{
+ struct token *t1, *t2;
+ size_t i;
+
+ if (m1->kind != m2->kind)
+ return false;
+ if (m1->kind == MACROFUNC) {
+ if (m1->nparam != m2->nparam)
+ return false;
+ for (i = 0; i < m1->nparam; ++i) {
+ if (strcmp(m1->param[i].name, m2->param[i].name) != 0)
+ return false;
+ }
+ }
+ if (m1->ntoken != m2->ntoken)
+ return false;
+ for (t1 = m1->token, t2 = m2->token; t1 < m1->token + m1->ntoken; ++t1, ++t2) {
+ if (t1->kind != t2->kind)
+ return false;
+ if (t1->lit && strcmp(t1->lit, t2->lit) != 0)
+ return false;
+ }
+ return true;
+}
+
+/* find the index of a macro parameter with the given name */
+static size_t
+macroparam(struct macro *m, const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < m->nparam; ++i) {
+ if (strcmp(m->param[i].name, name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+/* lookup a macro by name */
+static struct macro *
+macroget(char *name)
+{
+ struct mapkey k;
+
+ mapkey(&k, name, strlen(name));
+ return mapget(macros, &k);
+}
+
+static void
+macrodone(struct macro *m)
+{
+ m->hide = false;
+ if (m->kind == MACROFUNC && m->nparam > 0) {
+ free(m->arg[0].token);
+ free(m->arg);
+ }
+ --macrodepth;
+}
+
+static struct token *
+framenext(struct frame *f)
+{
+ return f->ntoken--, f->token++;
+}
+
+/* push a new context frame */
+static struct frame *
+ctxpush(struct token *t, size_t n, struct macro *m)
+{
+ struct frame *f;
+
+ f = arrayadd(&ctx, sizeof(*f));
+ f->token = t;
+ f->ntoken = n;
+ f->macro = m;
+ return f;
+}
+
+/* get the next frame with tokens left */
+static struct frame *
+ctxframe(void)
+{
+ struct frame *f;
+
+ for (f = arraylast(&ctx, sizeof(*f)); ctx.len; --f, ctx.len -= sizeof(*f)) {
+ if (f->ntoken)
+ return f;
+ if (f->macro)
+ macrodone(f->macro);
+ }
+ return NULL;
+}
+
+/* get the next token from the context */
+static struct token *
+ctxnext(void)
+{
+ struct frame *f;
+ struct token *t;
+ struct macro *m;
+ size_t i;
+
+again:
+ f = ctxframe();
+ if (!f)
+ return NULL;
+ m = f->macro;
+ if (m && m->kind == MACROFUNC) {
+ /* try to expand macro parameter */
+ switch (f->token->kind) {
+ case THASH:
+ framenext(f);
+ t = framenext(f);
+ assert(t && t->kind == TIDENT);
+ i = macroparam(m, t->lit);
+ assert(i != -1);
+ f = ctxpush(&m->arg[i].str, 1, NULL);
+ break;
+ case TIDENT:
+ i = macroparam(m, f->token->lit);
+ if (i == -1)
+ break;
+ framenext(f);
+ if (m->arg[i].ntoken == 0)
+ goto again;
+ f = ctxpush(m->arg[i].token, m->arg[i].ntoken, NULL);
+ break;
+ }
+ /* XXX: token concatenation */
+ }
+ return framenext(f);
+}
+
+static void
+define(void)
+{
+ struct token *t;
+ struct macro *m;
+ struct macroparam *p;
+ struct array params = {0}, repl = {0};
+ struct mapkey k;
+ void **entry;
+ bool stringize;
+ size_t i;
+
+ if (tok.kind != TIDENT)
+ error(&tok.loc, "expected identifier after #define");
+ m = xmalloc(sizeof(*m));
+ m->name = tok.lit;
+ m->hide = false;
+ t = arrayadd(&repl, sizeof(*t));
+ scan(t);
+ if (t->kind == TLPAREN && !t->space) {
+ m->kind = MACROFUNC;
+ /* read macro parameter names */
+ while (scan(&tok), tok.kind == TIDENT) {
+ p = arrayadd(&params, sizeof(*p));
+ p->name = tok.lit;
+ p->stringize = false;
+ if (scan(&tok), tok.kind != TCOMMA)
+ break;
+ }
+ if (tok.kind != TRPAREN)
+ error(&tok.loc, "expected ')' after macro parameter list");
+ scan(t); /* first token in replacement list */
+ } else {
+ m->kind = MACROOBJ;
+ }
+
+ /* read macro body */
+ m->param = params.val;
+ m->nparam = params.len / sizeof(m->param[0]);
+ t->space = false;
+ while (t->kind != TNEWLINE) {
+ if (t->kind == THASHHASH)
+ error(&t->loc, "'##' operator is not yet implemented");
+ stringize = t->kind == THASH;
+ t = arrayadd(&repl, sizeof(*t));
+ scan(t);
+ if (stringize && m->kind == MACROFUNC) {
+ if (t->kind != TIDENT)
+ error(&t->loc, "expected macro parameter name after '#' operator");
+ i = macroparam(m, t->lit);
+ if (i == -1)
+ error(&t->loc, "'%s' is not a macro parameter name", t->lit);
+ m->param[i].stringize = true;
+ }
+ }
+ m->token = repl.val;
+ m->ntoken = repl.len / sizeof(*t) - 1;
+ tok = *t;
+
+ mapkey(&k, m->name, strlen(m->name));
+ entry = mapput(macros, &k);
+ if (*entry && !macroequal(m, *entry))
+ error(&tok.loc, "redefinition of macro '%s'", m->name);
+ *entry = m;
+}
+
+static void
+undef(void)
+{
+ char *name;
+ struct mapkey k;
+ void **entry;
+ struct macro *m;
+
+ name = tokencheck(&tok, TIDENT, "after #undef");
+ mapkey(&k, name, strlen(name));
+ entry = mapput(macros, &k);
+ m = *entry;
+ if (m) {
+ free(name);
+ free(m->param);
+ free(m->token);
+ *entry = NULL;
+ }
+ scan(&tok);
+}
+
+static void
+directive(void)
+{
+ enum ppflags oldflags;
+ char *name;
+
+ scan(&tok);
+ if (tok.kind == TNEWLINE)
+ return; /* empty directive */
+ oldflags = ppflags;
+ ppflags |= PPNEWLINE;
+ name = tokencheck(&tok, TIDENT, "or newline after '#'");
+ if (strcmp(name, "if") == 0) {
+ error(&tok.loc, "#if directive is not implemented");
+ } else if (strcmp(name, "ifdef") == 0) {
+ error(&tok.loc, "#ifdef directive is not implemented");
+ } else if (strcmp(name, "ifndef") == 0) {
+ error(&tok.loc, "#ifndef directive is not implemented");
+ } else if (strcmp(name, "elif") == 0) {
+ error(&tok.loc, "#elif directive is not implemented");
+ } else if (strcmp(name, "endif") == 0) {
+ error(&tok.loc, "#endif directive is not implemented");
+ } else if (strcmp(name, "include") == 0) {
+ error(&tok.loc, "#include directive is not implemented");
+ } else if (strcmp(name, "define") == 0) {
+ scan(&tok);
+ define();
+ } else if (strcmp(name, "undef") == 0) {
+ scan(&tok);
+ undef();
+ } else if (strcmp(name, "line") == 0) {
+ error(&tok.loc, "#line directive is not implemented");
+ } else if (strcmp(name, "error") == 0) {
+ error(&tok.loc, "#error directive is not implemented");
+ } else if (strcmp(name, "pragma") == 0) {
+ error(&tok.loc, "#pragma directive is not implemented");
+ } else {
+ error(&tok.loc, "invalid preprocessor directive #%s", name);
+ }
+ free(name);
+ tokencheck(&tok, TNEWLINE, "after preprocessing directive");
+ ppflags = oldflags;
+}
+
+/* get the next token without expanding it */
+static void
+nextinto(struct token *t)
+{
+ static bool newline = true;
+
+ for (;;) {
+ scan(t);
+ if (newline && t->kind == THASH) {
+ directive();
+ } else {
+ newline = tok.kind == TNEWLINE;
+ break;
+ }
+ }
+}
+
+static struct token *
+rawnext(void)
+{
+ struct token *t;
+
+ t = ctxnext();
+ if (!t) {
+ t = &tok;
+ nextinto(t);
+ }
+ return t;
+}
+
+static bool
+peekparen(void)
+{
+ static struct array pending;
+ struct token *t;
+ struct frame *f;
+
+ t = ctxnext();
+ if (t) {
+ if (t->kind == TLPAREN)
+ return true;
+ f = arraylast(&ctx, sizeof(*f));
+ --f->token;
+ ++f->ntoken;
+ return false;
+ }
+ pending.len = 0;
+ do t = arrayadd(&pending, sizeof(*t)), nextinto(t);
+ while (t->kind == TNEWLINE);
+ if (t->kind == TLPAREN)
+ return true;
+ ctxpush(pending.val, pending.len / sizeof(*t), NULL);
+ return false;
+}
+
+static void
+stringize(struct array *buf, struct token *t)
+{
+ const char *lit;
+
+ lit = t->lit ? t->lit : tokstr[t->kind];
+ /* XXX: double escape string literal */
+ arrayaddbuf(buf, lit, strlen(lit));
+}
+
+static bool
+expand(struct token *t)
+{
+ struct macro *m;
+ struct macroarg *arg;
+ struct array str, tok;
+ size_t i, depth, paren;
+
+ if (t->kind != TIDENT)
+ return false;
+ m = macroget(t->lit);
+ if (!m || m->hide || t->hide) {
+ t->hide = true;
+ return false;
+ }
+ if (m->kind == MACROFUNC) {
+ if (!peekparen())
+ return false;
+ /* read macro arguments */
+ paren = 0;
+ depth = macrodepth;
+ tok = (struct array){0};
+ if (m->nparam > 0) {
+ arg = xreallocarray(NULL, m->nparam, sizeof(*arg));
+ } else {
+ arg = NULL;
+ t = rawnext();
+ }
+ for (i = 0; i < m->nparam && t->kind != TRPAREN; ++i) {
+ if (m->param[i].stringize) {
+ str = (struct array){0};
+ arrayaddbuf(&str, "\"", 1);
+ }
+ arg[i].ntoken = 0;
+ for (;;) {
+ t = rawnext();
+ if (t->kind == TEOF)
+ error(&t->loc, "EOF when reading macro parameters");
+ if (macrodepth <= depth) {
+ /* adjust current macro depth, in case it got shallower */
+ depth = macrodepth;
+ if (paren == 0 && (t->kind == TCOMMA || t->kind == TRPAREN))
+ break;
+ switch (t->kind) {
+ case TLPAREN: ++paren; break;
+ case TRPAREN: --paren; break;
+ }
+ if (m->param[i].stringize)
+ stringize(&str, t);
+ }
+ if (!expand(t)) {
+ arrayaddbuf(&tok, t, sizeof(*t));
+ ++arg[i].ntoken;
+ }
+ }
+ if (m->param[i].stringize) {
+ arrayaddbuf(&str, "\"", 2);
+ arg[i].str = (struct token){
+ .kind = TSTRINGLIT,
+ .lit = str.val,
+ };
+ }
+ }
+ if (i < m->nparam)
+ error(&t->loc, "not enough arguments for macro '%s'", m->name);
+ tokencheck(t, TRPAREN, "after macro arguments");
+ t = tok.val;
+ for (i = 0; i < m->nparam; ++i) {
+ arg[i].token = t;
+ t += arg[i].ntoken;
+ }
+ m->arg = arg;
+ }
+ ctxpush(m->token, m->ntoken, m);
+ m->hide = true;
+ ++macrodepth;
+ return true;
+}
+
static void
keyword(struct token *tok)
{
@@ -98,36 +553,34 @@ keyword(struct token *tok)
}
}
-static void
-nextinto(struct token *t)
-{
- do scan(t);
- while (t->kind == TNEWLINE);
- if (t->kind == TIDENT)
- keyword(t);
-}
-
void
next(void)
{
- if (pending.kind) {
- tok = pending;
- pending.kind = TNONE;
- } else {
- nextinto(&tok);
- }
+ struct token *t;
+
+ do t = rawnext();
+ while (expand(t) || t->kind == TNEWLINE && !(ppflags & PPNEWLINE));
+ tok = *t;
+ if (tok.kind == TIDENT)
+ keyword(&tok);
}
bool
peek(int kind)
{
- if (!pending.kind)
- nextinto(&pending);
- if (pending.kind != kind)
- return false;
- pending.kind = TNONE;
- nextinto(&tok);
- return true;
+ static struct token pending;
+ struct token old;
+
+ old = tok;
+ next();
+ if (tok.kind == kind) {
+ next();
+ return true;
+ }
+ pending = tok;
+ tok = old;
+ ctxpush(&pending, 1, NULL);
+ return false;
}
char *
diff --git a/scan.c b/scan.c
index 557b595..c9eb5ea 100644
--- a/scan.c
+++ b/scan.c
@@ -472,4 +472,5 @@ scan(struct token *t)
t->lit = NULL;
}
t->space = scanner->sawspace;
+ t->hide = false;
}
diff --git a/test/preprocess-macro-function-paren.c b/test/preprocess-macro-function-paren.c
new file mode 100644
index 0000000..c7c7639
--- /dev/null
+++ b/test/preprocess-macro-function-paren.c
@@ -0,0 +1,4 @@
+#define g(x) [x]
+#define f(a) g a (def)
+f((abc))
+f()
diff --git a/test/preprocess-macro-function-paren.pp b/test/preprocess-macro-function-paren.pp
new file mode 100644
index 0000000..6d86709
--- /dev/null
+++ b/test/preprocess-macro-function-paren.pp
@@ -0,0 +1,2 @@
+[abc] (def)
+[def]
diff --git a/test/preprocess-macro-function.c b/test/preprocess-macro-function.c
new file mode 100644
index 0000000..87b4c07
--- /dev/null
+++ b/test/preprocess-macro-function.c
@@ -0,0 +1,2 @@
+#define f(a, b) a, abc, b
+f(foo, bar)
diff --git a/test/preprocess-macro-function.pp b/test/preprocess-macro-function.pp
new file mode 100644
index 0000000..c8109cd
--- /dev/null
+++ b/test/preprocess-macro-function.pp
@@ -0,0 +1 @@
+foo, abc, bar
diff --git a/test/preprocess-macro-hide.c b/test/preprocess-macro-hide.c
new file mode 100644
index 0000000..22671c7
--- /dev/null
+++ b/test/preprocess-macro-hide.c
@@ -0,0 +1,2 @@
+#define foo foo bar
+foo
diff --git a/test/preprocess-macro-hide.pp b/test/preprocess-macro-hide.pp
new file mode 100644
index 0000000..d675fa4
--- /dev/null
+++ b/test/preprocess-macro-hide.pp
@@ -0,0 +1 @@
+foo bar
diff --git a/test/preprocess-macro-object.c b/test/preprocess-macro-object.c
new file mode 100644
index 0000000..f4c8b21
--- /dev/null
+++ b/test/preprocess-macro-object.c
@@ -0,0 +1,2 @@
+#define foo bar
+foo
diff --git a/test/preprocess-macro-object.pp b/test/preprocess-macro-object.pp
new file mode 100644
index 0000000..5716ca5
--- /dev/null
+++ b/test/preprocess-macro-object.pp
@@ -0,0 +1 @@
+bar
diff --git a/test/preprocess-macro-stringize.c b/test/preprocess-macro-stringize.c
new file mode 100644
index 0000000..528c88e
--- /dev/null
+++ b/test/preprocess-macro-stringize.c
@@ -0,0 +1,2 @@
+#define stringize(a) #a
+stringize(hello)
diff --git a/test/preprocess-macro-stringize.pp b/test/preprocess-macro-stringize.pp
new file mode 100644
index 0000000..3580093
--- /dev/null
+++ b/test/preprocess-macro-stringize.pp
@@ -0,0 +1 @@
+"hello"
diff --git a/test/preprocess-standard-example-1.c b/test/preprocess-standard-example-1.c
new file mode 100644
index 0000000..84447e0
--- /dev/null
+++ b/test/preprocess-standard-example-1.c
@@ -0,0 +1,4 @@
+/* C11 6.10.3.4p4 */
+#define f(a) a*g
+#define g(a) f(a)
+f(2)(9)
diff --git a/test/preprocess-standard-example-1.pp b/test/preprocess-standard-example-1.pp
new file mode 100644
index 0000000..738d396
--- /dev/null
+++ b/test/preprocess-standard-example-1.pp
@@ -0,0 +1 @@
+2*9*g
diff --git a/test/preprocess-standard-example-2.c b/test/preprocess-standard-example-2.c
new file mode 100644
index 0000000..0deb2ad
--- /dev/null
+++ b/test/preprocess-standard-example-2.c
@@ -0,0 +1,20 @@
+/* C11 6.10.3.5p5 with token concatenation disabled for now */
+#define x 3
+#define f(a) f(x * (a))
+#undef x
+#define x 2
+#define g f
+#define z z[0]
+#define h g(~
+#define m(a) a(w)
+#define w 0,1
+#define t(a) a
+#define p() int
+#define q(x) x
+//#define r(x,y) x ## y
+#define str(x) # x
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+ (f)^m(m);
+p() i[q()] = { q(1), /*r(2,3), r(4,), r(,5), r(,)*/ };
+char c[2][6] = { str(hello), str() };
diff --git a/test/preprocess-standard-example-2.pp b/test/preprocess-standard-example-2.pp
new file mode 100644
index 0000000..3a52eb0
--- /dev/null
+++ b/test/preprocess-standard-example-2.pp
@@ -0,0 +1,4 @@
+f(2 * (y+1)) +f(2 * (f(2 * (z[0])))) %f(2 * (0)) + t(1);
+f(2 * (2+(3,4)-0,1)) |f(2 * (~ 5)) &f(2 * (0,1))^m(0,1);
+int i[] = {1, };
+char c[2][6] = {"hello","" };
diff --git a/test/preprocess-undef.c b/test/preprocess-undef.c
new file mode 100644
index 0000000..c43f9ea
--- /dev/null
+++ b/test/preprocess-undef.c
@@ -0,0 +1,3 @@
+#define foo bar
+#undef foo
+foo
diff --git a/test/preprocess-undef.pp b/test/preprocess-undef.pp
new file mode 100644
index 0000000..257cc56
--- /dev/null
+++ b/test/preprocess-undef.pp
@@ -0,0 +1 @@
+foo
diff --git a/token.c b/token.c
index 6d4484d..dc2df02 100644
--- a/token.c
+++ b/token.c
@@ -116,6 +116,8 @@ tokenprint(const struct token *t)
{
const char *str;
+ if (t->space)
+ fputc(' ', stdout);
switch (t->kind) {
case TIDENT:
case TNUMBER:
diff --git a/util.c b/util.c
index 1229f2c..63511a3 100644
--- a/util.c
+++ b/util.c
@@ -1,3 +1,4 @@
+#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
@@ -116,6 +117,15 @@ arrayaddbuf(struct array *a, const void *src, size_t n)
memcpy(arrayadd(a, n), src, n);
}
+void *
+arraylast(struct array *a, size_t n)
+{
+ if (a->len == 0)
+ return NULL;
+ assert(n <= a->len);
+ return (char *)a->val + a->len - n;
+}
+
void
listinsert(struct list *list, struct list *new)
{
diff --git a/util.h b/util.h
index 504662b..0081667 100644
--- a/util.h
+++ b/util.h
@@ -42,6 +42,7 @@ void listremove(struct list *);
void *arrayadd(struct array *, size_t);
void arrayaddptr(struct array *, void *);
void arrayaddbuf(struct array *, const void *, size_t);
+void *arraylast(struct array *, size_t);
#define arrayforeach(a, m) for (m = (a)->val; m != (void *)((char *)(a)->val + (a)->len); ++m)
/* map */