aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md1
-rw-r--r--cc.h20
-rw-r--r--decl.c9
-rw-r--r--expr.c157
-rw-r--r--test/initializer-replace-local-string-wide.c22
-rw-r--r--test/initializer-replace-local-string-wide.qbe61
-rw-r--r--test/initializer-replace-static-string-wide.c20
-rw-r--r--test/initializer-replace-static-string-wide.qbe3
-rw-r--r--test/initializer-string-wide.c13
-rw-r--r--test/initializer-string-wide.qbe122
10 files changed, 387 insertions, 41 deletions
diff --git a/README.md b/README.md
index ea64226..8788e62 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,6 @@ specified in `config.h`.
- Digraph and trigraph sequences ([6.4.6p3] and [5.2.1.1], will not
be implemented).
-- Wide string literals ([#35]).
- Variable-length arrays ([#1]).
- `volatile`-qualified types ([#7]).
- `_Thread_local` storage-class specifier ([#5]).
diff --git a/cc.h b/cc.h
index 51b8f86..f98f6aa 100644
--- a/cc.h
+++ b/cc.h
@@ -309,6 +309,15 @@ enum exprkind {
EXPRTEMP,
};
+struct stringlit {
+ size_t size;
+ union {
+ unsigned char *data;
+ uint_least16_t *data16;
+ uint_least32_t *data32;
+ };
+};
+
struct expr {
enum exprkind kind;
/* whether this expression is an lvalue */
@@ -331,14 +340,7 @@ struct expr {
int64_t i;
double f;
} constant;
- struct {
- union {
- unsigned char *data;
- uint_least16_t *data16;
- uint_least32_t *data32;
- };
- size_t size;
- } string;
+ struct stringlit string;
struct {
struct expr *args;
size_t nargs;
@@ -474,6 +476,8 @@ extern struct scope filescope;
/* expr */
+struct type *stringconcat(struct stringlit *, _Bool);
+
struct expr *expr(struct scope *);
struct expr *assignexpr(struct scope *);
struct expr *constexpr(struct scope *);
diff --git a/decl.c b/decl.c
index 57a8014..c570efa 100644
--- a/decl.c
+++ b/decl.c
@@ -747,7 +747,7 @@ addmember(struct structbuilder *b, struct qualtype mt, char *name, int align, un
static bool
staticassert(struct scope *s)
{
- struct expr *e;
+ struct stringlit msg;
uint64_t c;
if (!consume(T_STATIC_ASSERT))
@@ -755,11 +755,10 @@ staticassert(struct scope *s)
expect(TLPAREN, "after _Static_assert");
c = intconstexpr(s, true);
if (consume(TCOMMA)) {
- e = assignexpr(s);
- if (!e->decayed || e->base->kind != EXPRSTRING)
- error(&tok.loc, "expected string literal after static assertion expression");
+ tokencheck(&tok, TSTRINGLIT, "after static assertion expression");
+ stringconcat(&msg, true);
if (!c)
- error(&tok.loc, "static assertion failed: %.*s", (int)e->base->string.size, e->base->string.data);
+ error(&tok.loc, "static assertion failed: %.*s", (int)(msg.size - 1), msg.data);
} else if (!c) {
error(&tok.loc, "static assertion failed");
}
diff --git a/expr.c b/expr.c
index 0da1d43..f7b8e6d 100644
--- a/expr.c
+++ b/expr.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include "util.h"
+#include "utf.h"
#include "cc.h"
static struct expr *
@@ -358,11 +359,12 @@ isodigit(int c)
return '0' <= c && c <= '8';
}
-static unsigned
-unescape(char **p)
+static size_t
+decodechar(const char *src, uint_least32_t *chr, bool *hexoct, const char *desc, struct location *loc)
{
- unsigned c;
- char *s = *p;
+ uint_least32_t c;
+ size_t n;
+ const char *s = src;
if (*s == '\\') {
++s;
@@ -384,18 +386,133 @@ unescape(char **p)
c = 0;
do c = c * 16 + (*s > '9' ? 10 + tolower(*s) - 'a' : *s - '0');
while (isxdigit(*++s));
+ if (hexoct)
+ *hexoct = true;
break;
default:
assert(isodigit(*s));
c = 0;
do c = c * 8 + (*s - '0');
while (isodigit(*++s));
+ if (hexoct)
+ *hexoct = true;
}
} else {
- c = (unsigned char)*s++;
+ n = utf8dec(&c, s, 4);
+ if (n == -1)
+ error(loc, "%s contains invalid UTF-8", desc);
+ s += n;
}
- *p = s;
- return c;
+ *chr = c;
+ return s - src;
+}
+
+static size_t
+encodechar8(void *dst, uint_least32_t chr, bool hexoct)
+{
+ if (!hexoct)
+ return utf8enc(dst, chr);
+ *(unsigned char *)dst = chr;
+ return 1;
+}
+
+static size_t
+encodechar16(void *dst, uint_least32_t chr, bool hexoct)
+{
+ if (!hexoct)
+ return utf16enc(dst, chr) * sizeof(uint_least16_t);
+ *(uint_least16_t *)dst = chr;
+ return sizeof(uint_least16_t);
+}
+
+static size_t
+encodechar32(void *dst, uint_least32_t chr, bool hexoct)
+{
+ *(uint_least32_t *)dst = chr;
+ return sizeof(uint_least32_t);
+}
+
+struct type *
+stringconcat(struct stringlit *str, bool forceutf8)
+{
+ static struct array parts;
+ struct {
+ struct location loc;
+ char *str;
+ } *p;
+ int kind, newkind;
+ struct type *t;
+ size_t (*encodechar)(void *, uint_least32_t, bool);
+ char *src;
+ unsigned char *buf, *dst;
+ uint_least32_t chr;
+ bool hexoct;
+ size_t len, width;
+
+ assert(tok.kind == TSTRINGLIT);
+ parts.len = 0;
+ len = 0;
+ kind = 0;
+ do {
+ src = tok.lit;
+ switch (*src) {
+ case 'u': if (src[1] == '8') ++src; /* fallthrough */
+ case 'L':
+ case 'U': newkind = *src, ++src; break;
+ case '"': newkind = 0; break;
+ default: assert(0);
+ }
+ if (kind != newkind && kind && newkind)
+ error(&tok.loc, "adjacent string literals have differing prefixes");
+ if (newkind)
+ kind = newkind;
+ p = arrayadd(&parts, sizeof(*p));
+ p->loc = tok.loc;
+ p->str = src + 1;
+ len += strlen(src) - 2;
+ next();
+ } while (tok.kind == TSTRINGLIT);
+ if (forceutf8 || kind == '8')
+ kind = 0;
+ ++len; /* null byte */
+ switch (kind) {
+ case 0: t = &typechar; break;
+ case 'u': t = &typeushort; break;
+ case 'U': t = &typeuint; break;
+ case 'L': t = targ->typewchar; break;
+ }
+ switch (t->size) {
+ case 1:
+ width = 1;
+ encodechar = encodechar8;
+ buf = xreallocarray(NULL, len, 1);
+ str->data = buf;
+ break;
+ case 2:
+ width = sizeof(uint_least16_t);
+ encodechar = encodechar16;
+ buf = xreallocarray(NULL, len, width);
+ str->data16 = (uint_least16_t *)buf;
+ break;
+ case 4:
+ width = sizeof(uint_least32_t);
+ encodechar = encodechar32;
+ buf = xreallocarray(NULL, len, width);
+ str->data32 = (uint_least32_t *)buf;
+ break;
+ }
+ dst = buf;
+ arrayforeach(&parts, p) {
+ src = p->str;
+ while (*src != '"') {
+ hexoct = false;
+ src += decodechar(src, &chr, &hexoct, "string literal", &p->loc);
+ dst += encodechar(dst, chr, hexoct);
+ }
+ }
+ dst += encodechar(dst, 0, false);
+ str->size = (dst - buf) / width;
+ return t;
}
static struct expr *
@@ -452,7 +569,7 @@ primaryexpr(struct scope *s)
struct decl *d;
struct type *t;
char *src, *end;
- unsigned char *dst;
+ uint_least32_t chr;
int base;
switch (tok.kind) {
@@ -469,25 +586,10 @@ primaryexpr(struct scope *s)
next();
break;
case TSTRINGLIT:
- e = mkexpr(EXPRSTRING, mkarraytype(&typechar, QUALNONE, 0), NULL);
+ e = mkexpr(EXPRSTRING, NULL, NULL);
+ t = stringconcat(&e->string, false);
+ e->type = mkarraytype(t, QUALNONE, e->string.size);
e->lvalue = true;
- e->string.size = 0;
- e->string.data = NULL;
- do {
- e->string.data = xreallocarray(e->string.data, e->string.size + strlen(tok.lit) + 1, 1);
- dst = e->string.data + e->string.size;
- src = tok.lit;
- if (*src != '"')
- fatal("wide string literal not yet implemented");
- for (++src; *src != '"'; ++dst)
- *dst = unescape(&src);
- e->string.size = dst - e->string.data;
- next();
- } while (tok.kind == TSTRINGLIT);
- *dst = '\0';
- e->type->array.length = ++e->string.size;
- e->type->size = e->type->array.length * e->type->base->size;
- e->type->incomplete = false;
e = decay(e);
break;
case TCHARCONST:
@@ -500,7 +602,8 @@ primaryexpr(struct scope *s)
}
assert(*src == '\'');
++src;
- e = mkconstexpr(t, unescape(&src));
+ src += decodechar(src, &chr, NULL, "character constant", &tok.loc);
+ e = mkconstexpr(t, chr);
if (*src != '\'')
error(&tok.loc, "character constant contains more than one character: %c", *src);
next();
diff --git a/test/initializer-replace-local-string-wide.c b/test/initializer-replace-local-string-wide.c
new file mode 100644
index 0000000..366020f
--- /dev/null
+++ b/test/initializer-replace-local-string-wide.c
@@ -0,0 +1,22 @@
+void f(void) {
+ struct {
+ unsigned short u[6];
+ unsigned U[6];
+ __typeof__(L' ') L[6];
+ } x = {
+ .u[0] = u'x',
+ .u[4] = u'y',
+ .u = u"hello",
+ .u[1] = u'a',
+
+ .U[0] = U'x',
+ .U[4] = U'y',
+ .U = U"hello",
+ .U[1] = U'a',
+
+ .L[0] = L'x',
+ .L[4] = L'y',
+ .L = L"hello",
+ .L[1] = L'a',
+ };
+}
diff --git a/test/initializer-replace-local-string-wide.qbe b/test/initializer-replace-local-string-wide.qbe
new file mode 100644
index 0000000..97ac914
--- /dev/null
+++ b/test/initializer-replace-local-string-wide.qbe
@@ -0,0 +1,61 @@
+export
+function $f() {
+@start.1
+ %.1 =l alloc4 60
+@body.2
+ %.2 =l add %.1, 0
+ storeh 104, %.2
+ %.3 =l add %.1, 2
+ storeh 101, %.3
+ %.4 =l add %.1, 4
+ storeh 108, %.4
+ %.5 =l add %.1, 6
+ storeh 108, %.5
+ %.6 =l add %.1, 8
+ storeh 111, %.6
+ %.7 =l add %.1, 10
+ storeh 0, %.7
+ %.8 =l add %.1, 2
+ storeh 97, %.8
+ %.9 =l add %.1, 4
+ storew 0, %.9
+ %.10 =l add %.1, 8
+ storew 0, %.10
+ %.11 =l add %.1, 12
+ storew 104, %.11
+ %.12 =l add %.1, 16
+ storew 101, %.12
+ %.13 =l add %.1, 20
+ storew 108, %.13
+ %.14 =l add %.1, 24
+ storew 108, %.14
+ %.15 =l add %.1, 28
+ storew 111, %.15
+ %.16 =l add %.1, 32
+ storew 0, %.16
+ %.17 =l add %.1, 16
+ storew 97, %.17
+ %.18 =l add %.1, 20
+ storew 0, %.18
+ %.19 =l add %.1, 24
+ storew 0, %.19
+ %.20 =l add %.1, 28
+ storew 0, %.20
+ %.21 =l add %.1, 32
+ storew 0, %.21
+ %.22 =l add %.1, 36
+ storew 104, %.22
+ %.23 =l add %.1, 40
+ storew 101, %.23
+ %.24 =l add %.1, 44
+ storew 108, %.24
+ %.25 =l add %.1, 48
+ storew 108, %.25
+ %.26 =l add %.1, 52
+ storew 111, %.26
+ %.27 =l add %.1, 56
+ storew 0, %.27
+ %.28 =l add %.1, 40
+ storew 97, %.28
+ ret
+}
diff --git a/test/initializer-replace-static-string-wide.c b/test/initializer-replace-static-string-wide.c
new file mode 100644
index 0000000..b6c348a
--- /dev/null
+++ b/test/initializer-replace-static-string-wide.c
@@ -0,0 +1,20 @@
+struct {
+ unsigned short s[6];
+} u = {
+ .s = u"aα€😐",
+ .s[2] = u'£',
+};
+
+struct {
+ unsigned s[5];
+} U = {
+ .s = U"aα€😐",
+ .s[3] = U'😃',
+};
+
+struct {
+ __typeof__(L' ') s[5];
+} L = {
+ .s = L"aα€😐",
+ .s[3] = L'😃',
+};
diff --git a/test/initializer-replace-static-string-wide.qbe b/test/initializer-replace-static-string-wide.qbe
new file mode 100644
index 0000000..ab42886
--- /dev/null
+++ b/test/initializer-replace-static-string-wide.qbe
@@ -0,0 +1,3 @@
+export data $u = align 2 { h 97 945 163 55357 56848 0 , }
+export data $U = align 4 { w 97 945 8364 128515 0 , }
+export data $L = align 4 { w 97 945 8364 128515 0 , }
diff --git a/test/initializer-string-wide.c b/test/initializer-string-wide.c
new file mode 100644
index 0000000..85b8019
--- /dev/null
+++ b/test/initializer-string-wide.c
@@ -0,0 +1,13 @@
+char s[] = "aα€😀\xAA\xBBBB\xCCCCCCCC";
+char u8[] = u8"aα€😀\xAA\xBBBB\xCCCCCCCC";
+unsigned short u[] = u"aα€😀\xAA\xBBBB\xCCCCCCCC";
+unsigned U[] = U"aα€😀\xAA\xBBBB\xCCCCCCCC";
+__typeof__(L' ') L[] = L"aα€😀\xAA\xBBBB\xCCCCCCCC";
+
+void f(void) {
+ char s[] = "aα€😀\xAA\xBBBB\xCCCCCCCC";
+ char u8[] = u8"aα€😀\xAA\xBBBB\xCCCCCCCC";
+ unsigned short u[] = u"aα€😀\xAA\xBBBB\xCCCCCCCC";
+ unsigned U[] = U"aα€😀\xAA\xBBBB\xCCCCCCCC";
+ __typeof__(L' ') L[] = L"aα€😀\xAA\xBBBB\xCCCCCCCC";
+}
diff --git a/test/initializer-string-wide.qbe b/test/initializer-string-wide.qbe
new file mode 100644
index 0000000..dc784d4
--- /dev/null
+++ b/test/initializer-string-wide.qbe
@@ -0,0 +1,122 @@
+export data $s = align 1 { b "a\316\261\342\202\254\360\237\230\200\252\273\314\000", }
+export data $u8 = align 1 { b "a\316\261\342\202\254\360\237\230\200\252\273\314\000", }
+export data $u = align 2 { h 97 945 8364 55357 56832 170 48059 52428 0 , }
+export data $U = align 4 { w 97 945 8364 128512 170 48059 3435973836 0 , }
+export data $L = align 4 { w 97 945 8364 128512 170 48059 3435973836 0 , }
+export
+function $f() {
+@start.1
+ %.1 =l alloc4 14
+ %.16 =l alloc4 14
+ %.31 =l alloc4 18
+ %.41 =l alloc4 32
+ %.50 =l alloc4 32
+@body.2
+ %.2 =l add %.1, 0
+ storeb 97, %.2
+ %.3 =l add %.1, 1
+ storeb 206, %.3
+ %.4 =l add %.1, 2
+ storeb 177, %.4
+ %.5 =l add %.1, 3
+ storeb 226, %.5
+ %.6 =l add %.1, 4
+ storeb 130, %.6
+ %.7 =l add %.1, 5
+ storeb 172, %.7
+ %.8 =l add %.1, 6
+ storeb 240, %.8
+ %.9 =l add %.1, 7
+ storeb 159, %.9
+ %.10 =l add %.1, 8
+ storeb 152, %.10
+ %.11 =l add %.1, 9
+ storeb 128, %.11
+ %.12 =l add %.1, 10
+ storeb 170, %.12
+ %.13 =l add %.1, 11
+ storeb 187, %.13
+ %.14 =l add %.1, 12
+ storeb 204, %.14
+ %.15 =l add %.1, 13
+ storeb 0, %.15
+ %.17 =l add %.16, 0
+ storeb 97, %.17
+ %.18 =l add %.16, 1
+ storeb 206, %.18
+ %.19 =l add %.16, 2
+ storeb 177, %.19
+ %.20 =l add %.16, 3
+ storeb 226, %.20
+ %.21 =l add %.16, 4
+ storeb 130, %.21
+ %.22 =l add %.16, 5
+ storeb 172, %.22
+ %.23 =l add %.16, 6
+ storeb 240, %.23
+ %.24 =l add %.16, 7
+ storeb 159, %.24
+ %.25 =l add %.16, 8
+ storeb 152, %.25
+ %.26 =l add %.16, 9
+ storeb 128, %.26
+ %.27 =l add %.16, 10
+ storeb 170, %.27
+ %.28 =l add %.16, 11
+ storeb 187, %.28
+ %.29 =l add %.16, 12
+ storeb 204, %.29
+ %.30 =l add %.16, 13
+ storeb 0, %.30
+ %.32 =l add %.31, 0
+ storeh 97, %.32
+ %.33 =l add %.31, 2
+ storeh 945, %.33
+ %.34 =l add %.31, 4
+ storeh 8364, %.34
+ %.35 =l add %.31, 6
+ storeh 55357, %.35
+ %.36 =l add %.31, 8
+ storeh 56832, %.36
+ %.37 =l add %.31, 10
+ storeh 170, %.37
+ %.38 =l add %.31, 12
+ storeh 48059, %.38
+ %.39 =l add %.31, 14
+ storeh 52428, %.39
+ %.40 =l add %.31, 16
+ storeh 0, %.40
+ %.42 =l add %.41, 0
+ storew 97, %.42
+ %.43 =l add %.41, 4
+ storew 945, %.43
+ %.44 =l add %.41, 8
+ storew 8364, %.44
+ %.45 =l add %.41, 12
+ storew 128512, %.45
+ %.46 =l add %.41, 16
+ storew 170, %.46
+ %.47 =l add %.41, 20
+ storew 48059, %.47
+ %.48 =l add %.41, 24
+ storew 3435973836, %.48
+ %.49 =l add %.41, 28
+ storew 0, %.49
+ %.51 =l add %.50, 0
+ storew 97, %.51
+ %.52 =l add %.50, 4
+ storew 945, %.52
+ %.53 =l add %.50, 8
+ storew 8364, %.53
+ %.54 =l add %.50, 12
+ storew 128512, %.54
+ %.55 =l add %.50, 16
+ storew 170, %.55
+ %.56 =l add %.50, 20
+ storew 48059, %.56
+ %.57 =l add %.50, 24
+ storew 3435973836, %.57
+ %.58 =l add %.50, 28
+ storew 0, %.58
+ ret
+}