From e7cbbfbd5a8c8fdbbec30e4b78d2e21ade637fad Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Wed, 20 Oct 2021 14:08:27 -0700 Subject: utf: Detect invalid codepoints Also, make utf*enc assert that the codepoint is valid and return 0 for an invalid codepoint. This makes it possible to use safely without error checking. We intend that these functions will only be called with valid codepoints. --- utf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/utf.c b/utf.c index de65172..7891f7e 100644 --- a/utf.c +++ b/utf.c @@ -1,3 +1,4 @@ +#include #include #include #include "utf.h" @@ -27,7 +28,8 @@ utf8enc(unsigned char *s, uint_least32_t c) s[3] = 0x80 | c & 0x3f; return 4; } - return -1; + assert(0); + return 0; } size_t @@ -62,6 +64,8 @@ utf8dec(uint_least32_t *c, const char *s, size_t n) return -1; x = x << 6 | b & 0x3f; } + if (x >= 0x110000 || x - 0xd800 < 0x0200) + return -1; *c = x; return l; } @@ -79,5 +83,6 @@ utf16enc(uint_least16_t *s, uint_least32_t c) s[1] = 0xdc00 | c & 0x3ff; return 2; } - return -1; + assert(0); + return 0; } -- cgit v1.2.3