diff options
author | Michael Forney <mforney@mforney.org> | 2021-07-06 18:51:56 -0700 |
---|---|---|
committer | Michael Forney <mforney@mforney.org> | 2021-07-06 19:09:10 -0700 |
commit | 70a532525946a28dc1ffacc542ba6a5b67aee986 (patch) | |
tree | bff296a43c06fff62bed451509782cfbc703167a /utf.c | |
parent | 0f348202d1282d2b38c3a84547693a7a82b25724 (diff) |
Add functions for encoding/decoding UTF-8/16
These will be needed to implement wide string literals.
Diffstat (limited to 'utf.c')
-rw-r--r-- | utf.c | 80 |
1 files changed, 80 insertions, 0 deletions
@@ -0,0 +1,80 @@ +#include <uchar.h> +#include "utf.h" + +size_t +utf8enc(char32_t c, char *s) +{ + if (c < 0x80) { + s[0] = c; + return 1; + } + if (c < 0x800) { + s[0] = 0xc0 | c >> 6; + s[1] = 0x80 | c & 0x3f; + return 2; + } + if (c < 0xd800 || c - 0xe000 < 0x2000) { + s[0] = 0xe0 | c >> 12; + s[1] = 0x80 | c >> 6 & 0x3f; + s[2] = 0x80 | c & 0x3f; + return 3; + } + if (c - 0x10000 < 0x100000) { + s[0] = 0xf0 | c >> 18; + s[1] = 0x80 | c >> 12 & 0x3f; + s[2] = 0x80 | c >> 6 & 0x3f; + s[3] = 0x80 | c & 0x3f; + return 4; + } + return -1; +} + +size_t +utf8dec(const char *s, size_t n, char32_t *c) +{ + size_t i, l; + unsigned char b; + char32_t x; + + b = s[0]; + if (b < 0x80) { + *c = b; + return 1; + } + if ((b & 0xe0) == 0xc0) { + x = b & 0x1f; + l = 2; + } else if ((b & 0xf0) == 0xe0) { + x = b & 0x0f; + l = 3; + } else if ((b & 0xf8) == 0xf0) { + x = b & 0x07; + l = 4; + } + if (n < l) + return -1; + for (i = 1; i < l; ++i) { + b = *++s; + if ((b & 0xc0) != 0x80) + return -1; + x = x << 6 | b & 0x3f; + } + *c = x; + return l; +} + +size_t +utf16enc(char32_t c, char16_t *s) +{ + if (c < 0xd800 || c - 0xe000 < 0x2000) { + s[0] = c; + return 1; + } + c -= 0x10000; + if (c < 0x100000) { + s[0] = 0xd800 | c >> 10 & 0x3ff; + s[1] = 0xdc00 | c & 0x3ff; + return 2; + } + return -1; +} |