diff options
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | cc.h | 20 | ||||
-rw-r--r-- | decl.c | 9 | ||||
-rw-r--r-- | expr.c | 157 | ||||
-rw-r--r-- | test/initializer-replace-local-string-wide.c | 22 | ||||
-rw-r--r-- | test/initializer-replace-local-string-wide.qbe | 61 | ||||
-rw-r--r-- | test/initializer-replace-static-string-wide.c | 20 | ||||
-rw-r--r-- | test/initializer-replace-static-string-wide.qbe | 3 | ||||
-rw-r--r-- | test/initializer-string-wide.c | 13 | ||||
-rw-r--r-- | test/initializer-string-wide.qbe | 122 |
10 files changed, 387 insertions, 41 deletions
@@ -88,7 +88,6 @@ specified in `config.h`. - Digraph and trigraph sequences ([6.4.6p3] and [5.2.1.1], will not be implemented). -- Wide string literals ([#35]). - Variable-length arrays ([#1]). - `volatile`-qualified types ([#7]). - `_Thread_local` storage-class specifier ([#5]). @@ -309,6 +309,15 @@ enum exprkind { EXPRTEMP, }; +struct stringlit { + size_t size; + union { + unsigned char *data; + uint_least16_t *data16; + uint_least32_t *data32; + }; +}; + struct expr { enum exprkind kind; /* whether this expression is an lvalue */ @@ -331,14 +340,7 @@ struct expr { int64_t i; double f; } constant; - struct { - union { - unsigned char *data; - uint_least16_t *data16; - uint_least32_t *data32; - }; - size_t size; - } string; + struct stringlit string; struct { struct expr *args; size_t nargs; @@ -474,6 +476,8 @@ extern struct scope filescope; /* expr */ +struct type *stringconcat(struct stringlit *, _Bool); + struct expr *expr(struct scope *); struct expr *assignexpr(struct scope *); struct expr *constexpr(struct scope *); @@ -747,7 +747,7 @@ addmember(struct structbuilder *b, struct qualtype mt, char *name, int align, un static bool staticassert(struct scope *s) { - struct expr *e; + struct stringlit msg; uint64_t c; if (!consume(T_STATIC_ASSERT)) @@ -755,11 +755,10 @@ staticassert(struct scope *s) expect(TLPAREN, "after _Static_assert"); c = intconstexpr(s, true); if (consume(TCOMMA)) { - e = assignexpr(s); - if (!e->decayed || e->base->kind != EXPRSTRING) - error(&tok.loc, "expected string literal after static assertion expression"); + tokencheck(&tok, TSTRINGLIT, "after static assertion expression"); + stringconcat(&msg, true); if (!c) - error(&tok.loc, "static assertion failed: %.*s", (int)e->base->string.size, e->base->string.data); + error(&tok.loc, "static assertion failed: %.*s", (int)(msg.size - 1), msg.data); } else if (!c) { error(&tok.loc, "static assertion failed"); } @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include "util.h" +#include "utf.h" #include "cc.h" static struct expr * @@ -358,11 +359,12 @@ isodigit(int c) return '0' <= c && c <= '8'; } -static unsigned -unescape(char **p) +static size_t +decodechar(const char *src, uint_least32_t *chr, bool *hexoct, const char *desc, struct location *loc) { - unsigned c; - char *s = *p; + uint_least32_t c; + size_t n; + const char *s = src; if (*s == '\\') { ++s; @@ -384,18 +386,133 @@ unescape(char **p) c = 0; do c = c * 16 + (*s > '9' ? 10 + tolower(*s) - 'a' : *s - '0'); while (isxdigit(*++s)); + if (hexoct) + *hexoct = true; break; default: assert(isodigit(*s)); c = 0; do c = c * 8 + (*s - '0'); while (isodigit(*++s)); + if (hexoct) + *hexoct = true; } } else { - c = (unsigned char)*s++; + n = utf8dec(&c, s, 4); + if (n == -1) + error(loc, "%s contains invalid UTF-8", desc); + s += n; } - *p = s; - return c; + *chr = c; + return s - src; +} + +static size_t +encodechar8(void *dst, uint_least32_t chr, bool hexoct) +{ + if (!hexoct) + return utf8enc(dst, chr); + *(unsigned char *)dst = chr; + return 1; +} + +static size_t +encodechar16(void *dst, uint_least32_t chr, bool hexoct) +{ + if (!hexoct) + return utf16enc(dst, chr) * sizeof(uint_least16_t); + *(uint_least16_t *)dst = chr; + return sizeof(uint_least16_t); +} + +static size_t +encodechar32(void *dst, uint_least32_t chr, bool hexoct) +{ + *(uint_least32_t *)dst = chr; + return sizeof(uint_least32_t); +} + +struct type * +stringconcat(struct stringlit *str, bool forceutf8) +{ + static struct array parts; + struct { + struct location loc; + char *str; + } *p; + int kind, newkind; + struct type *t; + size_t (*encodechar)(void *, uint_least32_t, bool); + char *src; + unsigned char *buf, *dst; + uint_least32_t chr; + bool hexoct; + size_t len, width; + + assert(tok.kind == TSTRINGLIT); + parts.len = 0; + len = 0; + kind = 0; + do { + src = tok.lit; + switch (*src) { + case 'u': if (src[1] == '8') ++src; /* fallthrough */ + case 'L': + case 'U': newkind = *src, ++src; break; + case '"': newkind = 0; break; + default: assert(0); + } + if (kind != newkind && kind && newkind) + error(&tok.loc, "adjacent string literals have differing prefixes"); + if (newkind) + kind = newkind; + p = arrayadd(&parts, sizeof(*p)); + p->loc = tok.loc; + p->str = src + 1; + len += strlen(src) - 2; + next(); + } while (tok.kind == TSTRINGLIT); + if (forceutf8 || kind == '8') + kind = 0; + ++len; /* null byte */ + switch (kind) { + case 0: t = &typechar; break; + case 'u': t = &typeushort; break; + case 'U': t = &typeuint; break; + case 'L': t = targ->typewchar; break; + } + switch (t->size) { + case 1: + width = 1; + encodechar = encodechar8; + buf = xreallocarray(NULL, len, 1); + str->data = buf; + break; + case 2: + width = sizeof(uint_least16_t); + encodechar = encodechar16; + buf = xreallocarray(NULL, len, width); + str->data16 = (uint_least16_t *)buf; + break; + case 4: + width = sizeof(uint_least32_t); + encodechar = encodechar32; + buf = xreallocarray(NULL, len, width); + str->data32 = (uint_least32_t *)buf; + break; + } + dst = buf; + arrayforeach(&parts, p) { + src = p->str; + while (*src != '"') { + hexoct = false; + src += decodechar(src, &chr, &hexoct, "string literal", &p->loc); + dst += encodechar(dst, chr, hexoct); + } + } + dst += encodechar(dst, 0, false); + str->size = (dst - buf) / width; + return t; } static struct expr * @@ -452,7 +569,7 @@ primaryexpr(struct scope *s) struct decl *d; struct type *t; char *src, *end; - unsigned char *dst; + uint_least32_t chr; int base; switch (tok.kind) { @@ -469,25 +586,10 @@ primaryexpr(struct scope *s) next(); break; case TSTRINGLIT: - e = mkexpr(EXPRSTRING, mkarraytype(&typechar, QUALNONE, 0), NULL); + e = mkexpr(EXPRSTRING, NULL, NULL); + t = stringconcat(&e->string, false); + e->type = mkarraytype(t, QUALNONE, e->string.size); e->lvalue = true; - e->string.size = 0; - e->string.data = NULL; - do { - e->string.data = xreallocarray(e->string.data, e->string.size + strlen(tok.lit) + 1, 1); - dst = e->string.data + e->string.size; - src = tok.lit; - if (*src != '"') - fatal("wide string literal not yet implemented"); - for (++src; *src != '"'; ++dst) - *dst = unescape(&src); - e->string.size = dst - e->string.data; - next(); - } while (tok.kind == TSTRINGLIT); - *dst = '\0'; - e->type->array.length = ++e->string.size; - e->type->size = e->type->array.length * e->type->base->size; - e->type->incomplete = false; e = decay(e); break; case TCHARCONST: @@ -500,7 +602,8 @@ primaryexpr(struct scope *s) } assert(*src == '\''); ++src; - e = mkconstexpr(t, unescape(&src)); + src += decodechar(src, &chr, NULL, "character constant", &tok.loc); + e = mkconstexpr(t, chr); if (*src != '\'') error(&tok.loc, "character constant contains more than one character: %c", *src); next(); diff --git a/test/initializer-replace-local-string-wide.c b/test/initializer-replace-local-string-wide.c new file mode 100644 index 0000000..366020f --- /dev/null +++ b/test/initializer-replace-local-string-wide.c @@ -0,0 +1,22 @@ +void f(void) { + struct { + unsigned short u[6]; + unsigned U[6]; + __typeof__(L' ') L[6]; + } x = { + .u[0] = u'x', + .u[4] = u'y', + .u = u"hello", + .u[1] = u'a', + + .U[0] = U'x', + .U[4] = U'y', + .U = U"hello", + .U[1] = U'a', + + .L[0] = L'x', + .L[4] = L'y', + .L = L"hello", + .L[1] = L'a', + }; +} diff --git a/test/initializer-replace-local-string-wide.qbe b/test/initializer-replace-local-string-wide.qbe new file mode 100644 index 0000000..97ac914 --- /dev/null +++ b/test/initializer-replace-local-string-wide.qbe @@ -0,0 +1,61 @@ +export +function $f() { +@start.1 + %.1 =l alloc4 60 +@body.2 + %.2 =l add %.1, 0 + storeh 104, %.2 + %.3 =l add %.1, 2 + storeh 101, %.3 + %.4 =l add %.1, 4 + storeh 108, %.4 + %.5 =l add %.1, 6 + storeh 108, %.5 + %.6 =l add %.1, 8 + storeh 111, %.6 + %.7 =l add %.1, 10 + storeh 0, %.7 + %.8 =l add %.1, 2 + storeh 97, %.8 + %.9 =l add %.1, 4 + storew 0, %.9 + %.10 =l add %.1, 8 + storew 0, %.10 + %.11 =l add %.1, 12 + storew 104, %.11 + %.12 =l add %.1, 16 + storew 101, %.12 + %.13 =l add %.1, 20 + storew 108, %.13 + %.14 =l add %.1, 24 + storew 108, %.14 + %.15 =l add %.1, 28 + storew 111, %.15 + %.16 =l add %.1, 32 + storew 0, %.16 + %.17 =l add %.1, 16 + storew 97, %.17 + %.18 =l add %.1, 20 + storew 0, %.18 + %.19 =l add %.1, 24 + storew 0, %.19 + %.20 =l add %.1, 28 + storew 0, %.20 + %.21 =l add %.1, 32 + storew 0, %.21 + %.22 =l add %.1, 36 + storew 104, %.22 + %.23 =l add %.1, 40 + storew 101, %.23 + %.24 =l add %.1, 44 + storew 108, %.24 + %.25 =l add %.1, 48 + storew 108, %.25 + %.26 =l add %.1, 52 + storew 111, %.26 + %.27 =l add %.1, 56 + storew 0, %.27 + %.28 =l add %.1, 40 + storew 97, %.28 + ret +} diff --git a/test/initializer-replace-static-string-wide.c b/test/initializer-replace-static-string-wide.c new file mode 100644 index 0000000..b6c348a --- /dev/null +++ b/test/initializer-replace-static-string-wide.c @@ -0,0 +1,20 @@ +struct { + unsigned short s[6]; +} u = { + .s = u"aα€😐", + .s[2] = u'£', +}; + +struct { + unsigned s[5]; +} U = { + .s = U"aα€😐", + .s[3] = U'😃', +}; + +struct { + __typeof__(L' ') s[5]; +} L = { + .s = L"aα€😐", + .s[3] = L'😃', +}; diff --git a/test/initializer-replace-static-string-wide.qbe b/test/initializer-replace-static-string-wide.qbe new file mode 100644 index 0000000..ab42886 --- /dev/null +++ b/test/initializer-replace-static-string-wide.qbe @@ -0,0 +1,3 @@ +export data $u = align 2 { h 97 945 163 55357 56848 0 , } +export data $U = align 4 { w 97 945 8364 128515 0 , } +export data $L = align 4 { w 97 945 8364 128515 0 , } diff --git a/test/initializer-string-wide.c b/test/initializer-string-wide.c new file mode 100644 index 0000000..85b8019 --- /dev/null +++ b/test/initializer-string-wide.c @@ -0,0 +1,13 @@ +char s[] = "aα€😀\xAA\xBBBB\xCCCCCCCC"; +char u8[] = u8"aα€😀\xAA\xBBBB\xCCCCCCCC"; +unsigned short u[] = u"aα€😀\xAA\xBBBB\xCCCCCCCC"; +unsigned U[] = U"aα€😀\xAA\xBBBB\xCCCCCCCC"; +__typeof__(L' ') L[] = L"aα€😀\xAA\xBBBB\xCCCCCCCC"; + +void f(void) { + char s[] = "aα€😀\xAA\xBBBB\xCCCCCCCC"; + char u8[] = u8"aα€😀\xAA\xBBBB\xCCCCCCCC"; + unsigned short u[] = u"aα€😀\xAA\xBBBB\xCCCCCCCC"; + unsigned U[] = U"aα€😀\xAA\xBBBB\xCCCCCCCC"; + __typeof__(L' ') L[] = L"aα€😀\xAA\xBBBB\xCCCCCCCC"; +} diff --git a/test/initializer-string-wide.qbe b/test/initializer-string-wide.qbe new file mode 100644 index 0000000..dc784d4 --- /dev/null +++ b/test/initializer-string-wide.qbe @@ -0,0 +1,122 @@ +export data $s = align 1 { b "a\316\261\342\202\254\360\237\230\200\252\273\314\000", } +export data $u8 = align 1 { b "a\316\261\342\202\254\360\237\230\200\252\273\314\000", } +export data $u = align 2 { h 97 945 8364 55357 56832 170 48059 52428 0 , } +export data $U = align 4 { w 97 945 8364 128512 170 48059 3435973836 0 , } +export data $L = align 4 { w 97 945 8364 128512 170 48059 3435973836 0 , } +export +function $f() { +@start.1 + %.1 =l alloc4 14 + %.16 =l alloc4 14 + %.31 =l alloc4 18 + %.41 =l alloc4 32 + %.50 =l alloc4 32 +@body.2 + %.2 =l add %.1, 0 + storeb 97, %.2 + %.3 =l add %.1, 1 + storeb 206, %.3 + %.4 =l add %.1, 2 + storeb 177, %.4 + %.5 =l add %.1, 3 + storeb 226, %.5 + %.6 =l add %.1, 4 + storeb 130, %.6 + %.7 =l add %.1, 5 + storeb 172, %.7 + %.8 =l add %.1, 6 + storeb 240, %.8 + %.9 =l add %.1, 7 + storeb 159, %.9 + %.10 =l add %.1, 8 + storeb 152, %.10 + %.11 =l add %.1, 9 + storeb 128, %.11 + %.12 =l add %.1, 10 + storeb 170, %.12 + %.13 =l add %.1, 11 + storeb 187, %.13 + %.14 =l add %.1, 12 + storeb 204, %.14 + %.15 =l add %.1, 13 + storeb 0, %.15 + %.17 =l add %.16, 0 + storeb 97, %.17 + %.18 =l add %.16, 1 + storeb 206, %.18 + %.19 =l add %.16, 2 + storeb 177, %.19 + %.20 =l add %.16, 3 + storeb 226, %.20 + %.21 =l add %.16, 4 + storeb 130, %.21 + %.22 =l add %.16, 5 + storeb 172, %.22 + %.23 =l add %.16, 6 + storeb 240, %.23 + %.24 =l add %.16, 7 + storeb 159, %.24 + %.25 =l add %.16, 8 + storeb 152, %.25 + %.26 =l add %.16, 9 + storeb 128, %.26 + %.27 =l add %.16, 10 + storeb 170, %.27 + %.28 =l add %.16, 11 + storeb 187, %.28 + %.29 =l add %.16, 12 + storeb 204, %.29 + %.30 =l add %.16, 13 + storeb 0, %.30 + %.32 =l add %.31, 0 + storeh 97, %.32 + %.33 =l add %.31, 2 + storeh 945, %.33 + %.34 =l add %.31, 4 + storeh 8364, %.34 + %.35 =l add %.31, 6 + storeh 55357, %.35 + %.36 =l add %.31, 8 + storeh 56832, %.36 + %.37 =l add %.31, 10 + storeh 170, %.37 + %.38 =l add %.31, 12 + storeh 48059, %.38 + %.39 =l add %.31, 14 + storeh 52428, %.39 + %.40 =l add %.31, 16 + storeh 0, %.40 + %.42 =l add %.41, 0 + storew 97, %.42 + %.43 =l add %.41, 4 + storew 945, %.43 + %.44 =l add %.41, 8 + storew 8364, %.44 + %.45 =l add %.41, 12 + storew 128512, %.45 + %.46 =l add %.41, 16 + storew 170, %.46 + %.47 =l add %.41, 20 + storew 48059, %.47 + %.48 =l add %.41, 24 + storew 3435973836, %.48 + %.49 =l add %.41, 28 + storew 0, %.49 + %.51 =l add %.50, 0 + storew 97, %.51 + %.52 =l add %.50, 4 + storew 945, %.52 + %.53 =l add %.50, 8 + storew 8364, %.53 + %.54 =l add %.50, 12 + storew 128512, %.54 + %.55 =l add %.50, 16 + storew 170, %.55 + %.56 =l add %.50, 20 + storew 48059, %.56 + %.57 =l add %.50, 24 + storew 3435973836, %.57 + %.58 =l add %.50, 28 + storew 0, %.58 + ret +} |