aboutsummaryrefslogtreecommitdiff
path: root/common/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'common/unicode.c')
-rw-r--r--common/unicode.c101
1 files changed, 0 insertions, 101 deletions
diff --git a/common/unicode.c b/common/unicode.c
deleted file mode 100644
index 5070e083..00000000
--- a/common/unicode.c
+++ /dev/null
@@ -1,101 +0,0 @@
-#include <stdint.h>
-#include <stddef.h>
-#include "unicode.h"
-
-size_t utf8_chsize(uint32_t ch) {
- if (ch < 0x80) {
- return 1;
- } else if (ch < 0x800) {
- return 2;
- } else if (ch < 0x10000) {
- return 3;
- }
- return 4;
-}
-
-static const uint8_t masks[] = {
- 0x7F,
- 0x1F,
- 0x0F,
- 0x07,
- 0x03,
- 0x01
-};
-
-uint32_t utf8_decode(const char **char_str) {
- uint8_t **s = (uint8_t **)char_str;
-
- uint32_t cp = 0;
- if (**s < 128) {
- // shortcut
- cp = **s;
- ++*s;
- return cp;
- }
- int size = utf8_size((char *)*s);
- if (size == -1) {
- ++*s;
- return UTF8_INVALID;
- }
- uint8_t mask = masks[size - 1];
- cp = **s & mask;
- ++*s;
- while (--size) {
- cp <<= 6;
- cp |= **s & 0x3f;
- ++*s;
- }
- return cp;
-}
-
-size_t utf8_encode(char *str, uint32_t ch) {
- size_t len = 0;
- uint8_t first;
-
- if (ch < 0x80) {
- first = 0;
- len = 1;
- } else if (ch < 0x800) {
- first = 0xc0;
- len = 2;
- } else if (ch < 0x10000) {
- first = 0xe0;
- len = 3;
- } else {
- first = 0xf0;
- len = 4;
- }
-
- for (size_t i = len - 1; i > 0; --i) {
- str[i] = (ch & 0x3f) | 0x80;
- ch >>= 6;
- }
-
- str[0] = ch | first;
- return len;
-}
-
-
-static const struct {
- uint8_t mask;
- uint8_t result;
- int octets;
-} sizes[] = {
- { 0x80, 0x00, 1 },
- { 0xE0, 0xC0, 2 },
- { 0xF0, 0xE0, 3 },
- { 0xF8, 0xF0, 4 },
- { 0xFC, 0xF8, 5 },
- { 0xFE, 0xF8, 6 },
- { 0x80, 0x80, -1 },
-};
-
-int utf8_size(const char *s) {
- uint8_t c = (uint8_t)*s;
- for (size_t i = 0; i < sizeof(sizes) / sizeof(*sizes); ++i) {
- if ((c & sizes[i].mask) == sizes[i].result) {
- return sizes[i].octets;
- }
- }
- return -1;
-}