aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Forney <mforney@mforney.org>2021-10-20 14:08:27 -0700
committerMichael Forney <mforney@mforney.org>2021-10-20 14:10:44 -0700
commite7cbbfbd5a8c8fdbbec30e4b78d2e21ade637fad (patch)
tree7de12ce9c63e329dfd0ef279cfc37a157958a6c6
parentaf3f4c14ee752afde7eb5d3aeb7038a66a8ed298 (diff)
utf: Detect invalid codepoints
Also, make utf*enc assert that the codepoint is valid and return 0 for an invalid codepoint. This makes it possible to use safely without error checking. We intend that these functions will only be called with valid codepoints.
-rw-r--r--utf.c9
1 files changed, 7 insertions, 2 deletions
diff --git a/utf.c b/utf.c
index de65172..7891f7e 100644
--- a/utf.c
+++ b/utf.c
@@ -1,3 +1,4 @@
+#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include "utf.h"
@@ -27,7 +28,8 @@ utf8enc(unsigned char *s, uint_least32_t c)
s[3] = 0x80 | c & 0x3f;
return 4;
}
- return -1;
+ assert(0);
+ return 0;
}
size_t
@@ -62,6 +64,8 @@ utf8dec(uint_least32_t *c, const char *s, size_t n)
return -1;
x = x << 6 | b & 0x3f;
}
+ if (x >= 0x110000 || x - 0xd800 < 0x0200)
+ return -1;
*c = x;
return l;
}
@@ -79,5 +83,6 @@ utf16enc(uint_least16_t *s, uint_least32_t c)
s[1] = 0xdc00 | c & 0x3ff;
return 2;
}
- return -1;
+ assert(0);
+ return 0;
}