From f685cdc1cd6baa17879b15326f553409258d3f73 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Thu, 21 Oct 2021 20:15:15 -0700 Subject: expr: Add support for UTF-8 character constants --- expr.c | 6 +++--- scan.c | 15 ++++----------- test/char-const-u8.c | 3 +++ test/char-const-u8.qbe | 1 + 4 files changed, 11 insertions(+), 14 deletions(-) create mode 100644 test/char-const-u8.c create mode 100644 test/char-const-u8.qbe diff --git a/expr.c b/expr.c index 27acbcf..6ff7dd4 100644 --- a/expr.c +++ b/expr.c @@ -596,11 +596,11 @@ primaryexpr(struct scope *s) break; case TCHARCONST: src = tok.lit; - t = &typeint; switch (*src) { case 'L': ++src; t = targ->typewchar; break; - case 'u': ++src; t = &typeushort; break; - case 'U': ++src; t = &typeuint; break; + case 'u': ++src; t = *src == '8' ? ++src, &typeuchar : &typeushort; break; + case 'U': ++src; t = &typeuint; break; + default: t = &typeint; } assert(*src == '\''); ++src; diff --git a/scan.c b/scan.c index cd85d9d..bd0435c 100644 --- a/scan.c +++ b/scan.c @@ -388,18 +388,11 @@ again: case 'u': s->usebuf = true; nextchar(s); - switch (s->chr) { - case '\'': - return charconst(s); - case '8': - if (s->buf.str[0] != 'u') - break; + if (s->buf.str[0] == 'u' && s->chr == '8') nextchar(s); - if (s->chr != '"') - break; - /* fallthrough */ - case '"': - return stringlit(s); + switch (s->chr) { + case '\'': return charconst(s); + case '"': return stringlit(s); } return ident(s); case EOF: diff --git a/test/char-const-u8.c b/test/char-const-u8.c new file mode 100644 index 0000000..9f20452 --- /dev/null +++ b/test/char-const-u8.c @@ -0,0 +1,3 @@ +unsigned char u8 = u8'a'; +_Static_assert(__builtin_types_compatible_p(__typeof__(u8'b'), unsigned char), + "UTF-8 character constant has incorrect type"); diff --git a/test/char-const-u8.qbe b/test/char-const-u8.qbe new file mode 100644 index 0000000..2d199a4 --- /dev/null +++ b/test/char-const-u8.qbe @@ -0,0 +1 @@ +export data $u8 = align 1 { b 97, } -- cgit v1.2.3