From 4d0a446123daebf8d9de77f5359e49229fe1eeb8 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Sun, 30 Mar 2014 04:29:04 +0200 Subject: grep: fix wrong rlcass splitting (thanks erik and kenji) add 0xffff to tab1 as range 0xffff-0x10ffff has 4 byte utf-8 sequence. use Runemax (0x10ffff) instead of Runemask (0x1fffff) to denote the last valid rune for inverted [^] match as Runemask is out of the valid rune space. --- sys/src/cmd/grep/comp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/src/cmd/grep/comp.c b/sys/src/cmd/grep/comp.c index 6be061bbc..d74a7bdaa 100644 --- a/sys/src/cmd/grep/comp.c +++ b/sys/src/cmd/grep/comp.c @@ -135,11 +135,13 @@ Rune tab1[] = { 0x007f, 0x07ff, + 0xffff, }; Rune tab2[] = { 0x003f, 0x0fff, + 0xffff, }; Re2 @@ -275,7 +277,7 @@ re2class(char *s) x = re2or(x, rclass(ov, p[0]-1)); ov = p[1]+1; } - x = re2or(x, rclass(ov, Runemask)); + x = re2or(x, rclass(ov, Runemax)); } else { x = rclass(p[0], p[1]); for(p+=2; *p; p+=2) -- cgit v1.2.3