aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/util/utf8.h11
-rw-r--r--types/xdg_shell/wlr_xdg_toplevel.c7
-rw-r--r--util/meson.build1
-rw-r--r--util/utf8.c66
4 files changed, 85 insertions, 0 deletions
diff --git a/include/util/utf8.h b/include/util/utf8.h
new file mode 100644
index 00000000..4d5172c7
--- /dev/null
+++ b/include/util/utf8.h
@@ -0,0 +1,11 @@
+#ifndef UTIL_UTF8_H
+#define UTIL_UTF8_H
+
+#include <stdbool.h>
+
+/**
+ * Return true if and only if the string is a valid UTF-8 sequence.
+ */
+bool is_utf8(const char *string);
+
+#endif
diff --git a/types/xdg_shell/wlr_xdg_toplevel.c b/types/xdg_shell/wlr_xdg_toplevel.c
index fef9b21c..677c2821 100644
--- a/types/xdg_shell/wlr_xdg_toplevel.c
+++ b/types/xdg_shell/wlr_xdg_toplevel.c
@@ -5,6 +5,7 @@
#include <wlr/util/log.h>
#include <wlr/util/edges.h>
#include "types/wlr_xdg_shell.h"
+#include "util/utf8.h"
void handle_xdg_toplevel_ack_configure(
struct wlr_xdg_toplevel *toplevel,
@@ -224,6 +225,12 @@ static void xdg_toplevel_handle_set_title(struct wl_client *client,
wlr_xdg_toplevel_from_resource(resource);
char *tmp;
+ if (!is_utf8(title)) {
+ // TODO: update when xdg_toplevel has a dedicated error code for this
+ wl_resource_post_error(resource, (uint32_t)-1, "xdg_toplevel title is not valid UTF-8");
+ return;
+ }
+
tmp = strdup(title);
if (tmp == NULL) {
wl_resource_post_no_memory(resource);
diff --git a/util/meson.build b/util/meson.build
index 1c3dcd5a..5af2254f 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -11,5 +11,6 @@ wlr_files += files(
'shm.c',
'time.c',
'token.c',
+ 'utf8.c',
)
diff --git a/util/utf8.c b/util/utf8.c
new file mode 100644
index 00000000..802fd010
--- /dev/null
+++ b/util/utf8.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include "util/utf8.h"
+
+static bool in_range(char x, uint8_t low, uint8_t high) {
+ uint8_t v = (uint8_t)x;
+ return low <= v && v <= high;
+}
+
+bool is_utf8(const char *string) {
+ /* Returns true iff the string is 'well-formed', as defined by
+ * Unicode Standard 15.0.0. See Chapter 3, D92 and Table 3.7.
+ *
+ * UTF-8 strings are sequences of code points encoded in one of the
+ * following ways. The first byte determines the pattern.
+ *
+ * 00..7F
+ * C2..DF 80..BF
+ * E0 A0..BF 80..BF
+ * E1..EC 80..BF 80..BF
+ * ED 80..9F 80..BF
+ * EE..EF 80..BF 80..BF
+ * F0 90..BF 80..BF 80..BF
+ * F1..F3 80..BF 80..BF 80..BF
+ * F4 80..8F 80..BF 80..BF
+ */
+ uint8_t range_table[9][8] = {
+ {0x00, 0x7F},
+ {0xC2, 0xDF, 0x80, 0xBF},
+ {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF},
+ {0xE1, 0xEC, 0x80, 0xBF, 0x80, 0xBF},
+ {0xED, 0xED, 0x80, 0x9F, 0x80, 0xBF},
+ {0xEE, 0xEF, 0x80, 0xBF, 0x80, 0xBF},
+ {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
+ {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
+ {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF},
+ };
+ int lengths[9] = {
+ 1, 2, 3, 3, 3, 3, 4, 4, 4
+ };
+
+ while (string[0]) {
+ bool accept = false;
+ for (int i = 0; i < 9; i++) {
+ if (!in_range(string[0], range_table[i][0],
+ range_table[i][1])) {
+ continue;
+ }
+ for (int j = 1; j < lengths[i]; j++) {
+ if (!in_range(string[j], range_table[i][2 * j],
+ range_table[i][2 * j + 1])) {
+ // Early exit is necessary to avoid
+ // reading past the null terminator
+ return false;
+ }
+ }
+ string += lengths[i];
+ accept = true;
+ break;
+ }
+ if (!accept) {
+ return false;
+ }
+ }
+
+ return true;
+}