From a3d22dbe972770f84ae71f7080fb7bc676c2ba7a Mon Sep 17 00:00:00 2001 From: Manuel Stoeckl Date: Sat, 11 Nov 2023 16:55:41 -0500 Subject: xdg-toplevel: check that title provided actually is UTF-8 While the xdg-shell protocol requires this, it does not yet have a dedicated error code for invalid titles; this commit makes wlroots send a generic error instead. --- include/util/utf8.h | 11 +++++++ types/xdg_shell/wlr_xdg_toplevel.c | 7 ++++ util/meson.build | 1 + util/utf8.c | 66 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+) create mode 100644 include/util/utf8.h create mode 100644 util/utf8.c diff --git a/include/util/utf8.h b/include/util/utf8.h new file mode 100644 index 00000000..4d5172c7 --- /dev/null +++ b/include/util/utf8.h @@ -0,0 +1,11 @@ +#ifndef UTIL_UTF8_H +#define UTIL_UTF8_H + +#include + +/** + * Return true if and only if the string is a valid UTF-8 sequence. + */ +bool is_utf8(const char *string); + +#endif diff --git a/types/xdg_shell/wlr_xdg_toplevel.c b/types/xdg_shell/wlr_xdg_toplevel.c index fef9b21c..677c2821 100644 --- a/types/xdg_shell/wlr_xdg_toplevel.c +++ b/types/xdg_shell/wlr_xdg_toplevel.c @@ -5,6 +5,7 @@ #include #include #include "types/wlr_xdg_shell.h" +#include "util/utf8.h" void handle_xdg_toplevel_ack_configure( struct wlr_xdg_toplevel *toplevel, @@ -224,6 +225,12 @@ static void xdg_toplevel_handle_set_title(struct wl_client *client, wlr_xdg_toplevel_from_resource(resource); char *tmp; + if (!is_utf8(title)) { + // TODO: update when xdg_toplevel has a dedicated error code for this + wl_resource_post_error(resource, (uint32_t)-1, "xdg_toplevel title is not valid UTF-8"); + return; + } + tmp = strdup(title); if (tmp == NULL) { wl_resource_post_no_memory(resource); diff --git a/util/meson.build b/util/meson.build index 1c3dcd5a..5af2254f 100644 --- a/util/meson.build +++ b/util/meson.build @@ -11,5 +11,6 @@ wlr_files += files( 'shm.c', 'time.c', 'token.c', + 'utf8.c', ) diff --git a/util/utf8.c b/util/utf8.c new file mode 100644 index 00000000..802fd010 --- /dev/null +++ b/util/utf8.c @@ -0,0 +1,66 @@ +#include +#include "util/utf8.h" + +static bool in_range(char x, uint8_t low, uint8_t high) { + uint8_t v = (uint8_t)x; + return low <= v && v <= high; +} + +bool is_utf8(const char *string) { + /* Returns true iff the string is 'well-formed', as defined by + * Unicode Standard 15.0.0. See Chapter 3, D92 and Table 3.7. + * + * UTF-8 strings are sequences of code points encoded in one of the + * following ways. The first byte determines the pattern. + * + * 00..7F + * C2..DF 80..BF + * E0 A0..BF 80..BF + * E1..EC 80..BF 80..BF + * ED 80..9F 80..BF + * EE..EF 80..BF 80..BF + * F0 90..BF 80..BF 80..BF + * F1..F3 80..BF 80..BF 80..BF + * F4 80..8F 80..BF 80..BF + */ + uint8_t range_table[9][8] = { + {0x00, 0x7F}, + {0xC2, 0xDF, 0x80, 0xBF}, + {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF}, + {0xE1, 0xEC, 0x80, 0xBF, 0x80, 0xBF}, + {0xED, 0xED, 0x80, 0x9F, 0x80, 0xBF}, + {0xEE, 0xEF, 0x80, 0xBF, 0x80, 0xBF}, + {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}, + {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}, + {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}, + }; + int lengths[9] = { + 1, 2, 3, 3, 3, 3, 4, 4, 4 + }; + + while (string[0]) { + bool accept = false; + for (int i = 0; i < 9; i++) { + if (!in_range(string[0], range_table[i][0], + range_table[i][1])) { + continue; + } + for (int j = 1; j < lengths[i]; j++) { + if (!in_range(string[j], range_table[i][2 * j], + range_table[i][2 * j + 1])) { + // Early exit is necessary to avoid + // reading past the null terminator + return false; + } + } + string += lengths[i]; + accept = true; + break; + } + if (!accept) { + return false; + } + } + + return true; +} -- cgit v1.2.3