From 8933ebcf50024f4378a78e556b1ac08091197206 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Sat, 5 Oct 2024 13:01:49 +0200 Subject: [PATCH] sync drw.{c,h} from dmenu - drw: minor improvement to the nomatches cache - overhaul utf8decoding and render invalid utf8 sequences as U+FFFD. Thanks NRK for these improvements! --- drw.c | 114 ++++++++++++++++++++++++++++----------------------------- dwm.c | 1 - util.h | 1 + 3 files changed, 56 insertions(+), 60 deletions(-) diff --git a/drw.c b/drw.c index a58a2b4..344de61 100644 --- a/drw.c +++ b/drw.c @@ -9,54 +9,40 @@ #include "util.h" #define UTF_INVALID 0xFFFD -#define UTF_SIZ 4 -static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; -static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; -static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000}; -static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; - -static long -utf8decodebyte(const char c, size_t *i) +static int +utf8decode(const char *s_in, long *u, int *err) { - for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) - if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) - return (unsigned char)c & ~utfmask[*i]; - return 0; -} - -static size_t -utf8validate(long *u, size_t i) -{ - if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) - *u = UTF_INVALID; - for (i = 1; *u > utfmax[i]; ++i) - ; - return i; -} - -static size_t -utf8decode(const char *c, long *u, size_t clen) -{ - size_t i, j, len, type; - long udecoded; + static const unsigned char lens[] = { + /* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */ + /* 110XX */ 2, 2, 2, 2, + /* 1110X */ 3, 3, + /* 11110 */ 4, + /* 11111 */ 0, /* invalid */ + }; + static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; + static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 }; + const unsigned char *s = (const unsigned char *)s_in; + int len = lens[*s >> 3]; *u = UTF_INVALID; - if (!clen) - return 0; - udecoded = utf8decodebyte(c[0], &len); - if (!BETWEEN(len, 1, UTF_SIZ)) + *err = 1; + if (len == 0) return 1; - for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { - udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); - if (type) - return j; - } - if (j < len) - return 0; - *u = udecoded; - utf8validate(u, len); + long cp = s[0] & leading_mask[len - 1]; + for (int i = 1; i < len; ++i) { + if (s[i] == '\0' || (s[i] & 0xC0) != 0x80) + return i; + cp = (cp << 6) | (s[i] & 0x3F); + } + /* out of range, surrogate, overlong encoding */ + if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1]) + return len; + + *err = 0; + *u = cp; return len; } @@ -238,11 +224,11 @@ drw_rect(Drw *drw, int x, int y, unsigned int w, unsigned int h, int filled, int int drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert) { - int i, ty, ellipsis_x = 0; - unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len; + int ty, ellipsis_x = 0; + unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1; XftDraw *d = NULL; Fnt *usedfont, *curfont, *nextfont; - int utf8strlen, utf8charlen, render = x || y || w || h; + int utf8strlen, utf8charlen, utf8err, render = x || y || w || h; long utf8codepoint = 0; const char *utf8str; FcCharSet *fccharset; @@ -251,9 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp XftResult result; int charexists = 0, overflow = 0; /* keep track of a couple codepoints for which we have no match. */ - enum { nomatches_len = 64 }; - static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches; - static unsigned int ellipsis_width = 0; + static unsigned int nomatches[128], ellipsis_width, invalid_width; + static const char invalid[] = "�"; if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts) return 0; @@ -273,12 +258,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp usedfont = drw->fonts; if (!ellipsis_width && render) ellipsis_width = drw_fontset_getwidth(drw, "..."); + if (!invalid_width && render) + invalid_width = drw_fontset_getwidth(drw, invalid); while (1) { - ew = ellipsis_len = utf8strlen = 0; + ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0; utf8str = text; nextfont = NULL; while (*text) { - utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ); + utf8charlen = utf8decode(text, &utf8codepoint, &utf8err); for (curfont = drw->fonts; curfont; curfont = curfont->next) { charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint); if (charexists) { @@ -300,9 +287,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp else utf8strlen = ellipsis_len; } else if (curfont == usedfont) { - utf8strlen += utf8charlen; text += utf8charlen; - ew += tmpw; + utf8strlen += utf8err ? 0 : utf8charlen; + ew += utf8err ? 0 : tmpw; } else { nextfont = curfont; } @@ -310,7 +297,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp } } - if (overflow || !charexists || nextfont) + if (overflow || !charexists || nextfont || utf8err) break; else charexists = 0; @@ -325,6 +312,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp x += ew; w -= ew; } + if (utf8err && (!render || invalid_width < w)) { + if (render) + drw_text(drw, x, y, w, h, 0, invalid, invert); + x += invalid_width; + w -= invalid_width; + } if (render && overflow) drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert); @@ -338,11 +331,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp * character must be drawn. */ charexists = 1; - for (i = 0; i < nomatches_len; ++i) { - /* avoid calling XftFontMatch if we know we won't find a match */ - if (utf8codepoint == nomatches.codepoint[i]) - goto no_match; - } + hash = (unsigned int)utf8codepoint; + hash = ((hash >> 16) ^ hash) * 0x21F0AAAD; + hash = ((hash >> 15) ^ hash) * 0xD35A2D97; + h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches); + h1 = (hash >> 17) % LENGTH(nomatches); + /* avoid expensive XftFontMatch call when we know we won't find a match */ + if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint) + goto no_match; fccharset = FcCharSetCreate(); FcCharSetAddChar(fccharset, utf8codepoint); @@ -371,7 +367,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp curfont->next = usedfont; } else { xfont_free(usedfont); - nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint; + nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint; no_match: usedfont = drw->fonts; } diff --git a/dwm.c b/dwm.c index 67c6b2b..1443802 100644 --- a/dwm.c +++ b/dwm.c @@ -50,7 +50,6 @@ #define INTERSECT(x,y,w,h,m) (MAX(0, MIN((x)+(w),(m)->wx+(m)->ww) - MAX((x),(m)->wx)) \ * MAX(0, MIN((y)+(h),(m)->wy+(m)->wh) - MAX((y),(m)->wy))) #define ISVISIBLE(C) ((C->tags & C->mon->tagset[C->mon->seltags])) -#define LENGTH(X) (sizeof X / sizeof X[0]) #define MOUSEMASK (BUTTONMASK|PointerMotionMask) #define WIDTH(X) ((X)->w + 2 * (X)->bw) #define HEIGHT(X) ((X)->h + 2 * (X)->bw) diff --git a/util.h b/util.h index f633b51..c0a50d4 100644 --- a/util.h +++ b/util.h @@ -3,6 +3,7 @@ #define MAX(A, B) ((A) > (B) ? (A) : (B)) #define MIN(A, B) ((A) < (B) ? (A) : (B)) #define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B)) +#define LENGTH(X) (sizeof (X) / sizeof (X)[0]) void die(const char *fmt, ...); void *ecalloc(size_t nmemb, size_t size);