From c1c17305161dda72461e237c2058668aeafbcd5b Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Sun, 11 Sep 2011 11:40:51 +0100 Subject: [PATCH] pcre: enable UCP, UTF-8 (if available), extend t/19-match --- src/regex.c | 29 ++++++++++++++++++++++------- testcases/t/19-match.t | 30 +++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/src/regex.c b/src/regex.c index df01dae4..da8f91d8 100644 --- a/src/regex.c +++ b/src/regex.c @@ -21,16 +21,31 @@ */ struct regex *regex_new(const char *pattern) { const char *error; - int offset; + int errorcode, offset; struct regex *re = scalloc(sizeof(struct regex)); re->pattern = sstrdup(pattern); - if (!(re->regex = pcre_compile(pattern, 0, &error, &offset, NULL))) { - ELOG("PCRE regular expression compilation failed at %d: %s", + /* We use PCRE_UCP so that \B, \b, \D, \d, \S, \s, \W, \w and some POSIX + * character classes play nicely with Unicode */ + int options = PCRE_UCP | PCRE_UTF8; + while (!(re->regex = pcre_compile2(pattern, options, &errorcode, &error, &offset, NULL))) { + /* If the error is that PCRE was not compiled with UTF-8 support we + * disable it and try again */ + if (errorcode == 32) { + options &= ~PCRE_UTF8; + continue; + } + ELOG("PCRE regular expression compilation failed at %d: %s\n", offset, error); return NULL; } re->extra = pcre_study(re->regex, 0, &error); + /* If an error happened, we print the error message, but continue. + * Studying the regular expression leads to faster matching, but it’s not + * absolutely necessary. */ + if (error) { + ELOG("PCRE regular expression studying failed: %s\n", error); + } return re; } @@ -43,8 +58,8 @@ struct regex *regex_new(const char *pattern) { bool regex_matches(struct regex *regex, const char *input) { int rc; - /* TODO: is strlen(input) correct for UTF-8 matching? */ - /* TODO: enable UTF-8 */ + /* We use strlen() because pcre_exec() expects the length of the input + * string in bytes */ if ((rc = pcre_exec(regex->regex, regex->extra, input, strlen(input), 0, 0, NULL, 0)) == 0) { LOG("Regular expression \"%s\" matches \"%s\"\n", regex->pattern, input); @@ -57,7 +72,7 @@ bool regex_matches(struct regex *regex, const char *input) { return false; } - /* TODO: handle the other error codes */ - LOG("PCRE error\n"); + ELOG("PCRE error %d while trying to use regular expression \"%s\" on input \"%s\", see pcreapi(3)\n", + rc, regex->pattern, input); return false; } diff --git a/testcases/t/19-match.t b/testcases/t/19-match.t index 4d38c41f..e4fc6ec0 100644 --- a/testcases/t/19-match.t +++ b/testcases/t/19-match.t @@ -139,7 +139,7 @@ sleep 0.25; # two windows should be here $content = get_ws_content($tmp); -ok(@{$content} == 1, 'two windows opened'); +ok(@{$content} == 1, 'window opened'); cmd '[class="^special[0-9]$"] kill'; @@ -148,5 +148,33 @@ sleep 0.25; $content = get_ws_content($tmp); is(@{$content}, 0, 'window killed'); +###################################################################### +# check that UTF-8 works when matching +###################################################################### + +$tmp = fresh_workspace; + +$left = $x->root->create_child( + class => WINDOW_CLASS_INPUT_OUTPUT, + rect => [ 0, 0, 30, 30 ], + background_color => '#0000ff', +); + +$left->_create; +set_wm_class($left->id, 'special7', 'special7'); +$left->name('ä 3'); +$left->map; +sleep 0.25; + +# two windows should be here +$content = get_ws_content($tmp); +ok(@{$content} == 1, 'window opened'); + +cmd '[title="^\w [3]$"] kill'; + +sleep 0.25; + +$content = get_ws_content($tmp); +is(@{$content}, 0, 'window killed'); done_testing; -- 2.39.5