diff options
author | Michael Forney <mforney@mforney.org> | 2021-10-20 14:08:27 -0700 |
---|---|---|
committer | Michael Forney <mforney@mforney.org> | 2021-10-20 14:10:44 -0700 |
commit | e7cbbfbd5a8c8fdbbec30e4b78d2e21ade637fad (patch) | |
tree | 7de12ce9c63e329dfd0ef279cfc37a157958a6c6 | |
parent | af3f4c14ee752afde7eb5d3aeb7038a66a8ed298 (diff) |
utf: Detect invalid codepoints
Also, make utf*enc assert that the codepoint is valid and return 0
for an invalid codepoint. This makes it possible to use safely
without error checking. We intend that these functions will only
be called with valid codepoints.
-rw-r--r-- | utf.c | 9 |
1 files changed, 7 insertions, 2 deletions
@@ -1,3 +1,4 @@ +#include <assert.h> #include <stddef.h> #include <stdint.h> #include "utf.h" @@ -27,7 +28,8 @@ utf8enc(unsigned char *s, uint_least32_t c) s[3] = 0x80 | c & 0x3f; return 4; } - return -1; + assert(0); + return 0; } size_t @@ -62,6 +64,8 @@ utf8dec(uint_least32_t *c, const char *s, size_t n) return -1; x = x << 6 | b & 0x3f; } + if (x >= 0x110000 || x - 0xd800 < 0x0200) + return -1; *c = x; return l; } @@ -79,5 +83,6 @@ utf16enc(uint_least16_t *s, uint_least32_t c) s[1] = 0xdc00 | c & 0x3ff; return 2; } - return -1; + assert(0); + return 0; } |