1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* PCRE is a library of functions to support regular expressions whose syntax 6and semantics are as close as possible to those of the Perl 5 language. 7 8 Main Library written by Philip Hazel 9 Copyright (c) 1997-2012 University of Cambridge 10 11 This JIT compiler regression test program was written by Zoltan Herczeg 12 Copyright (c) 2010-2012 13 14----------------------------------------------------------------------------- 15Redistribution and use in source and binary forms, with or without 16modification, are permitted provided that the following conditions are met: 17 18 * Redistributions of source code must retain the above copyright notice, 19 this list of conditions and the following disclaimer. 20 21 * Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in the 23 documentation and/or other materials provided with the distribution. 24 25 * Neither the name of the University of Cambridge nor the names of its 26 contributors may be used to endorse or promote products derived from 27 this software without specific prior written permission. 28 29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39POSSIBILITY OF SUCH DAMAGE. 40----------------------------------------------------------------------------- 41*/ 42 43#ifdef HAVE_CONFIG_H 44#include "config.h" 45#endif 46 47#include <stdio.h> 48#include <string.h> 49#include "pcre.h" 50 51#define PCRE_BUG 0x80000000 52 53/* 54 Letter characters: 55 \xe6\x92\xad = 0x64ad = 25773 (kanji) 56 Non-letter characters: 57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark) 58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888 59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character) 60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character) 61 Newlines: 62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL) 63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator) 64 Othercase pairs: 65 \xc3\xa9 = 0xe9 = 233 (e') 66 \xc3\x89 = 0xc9 = 201 (E') 67 \xc3\xa1 = 0xe1 = 225 (a') 68 \xc3\x81 = 0xc1 = 193 (A') 69 \xc8\xba = 0x23a = 570 70 \xe2\xb1\xa5 = 0x2c65 = 11365 71 \xe1\xbd\xb8 = 0x1f78 = 8056 72 \xe1\xbf\xb8 = 0x1ff8 = 8184 73 \xf0\x90\x90\x80 = 0x10400 = 66560 74 \xf0\x90\x90\xa8 = 0x10428 = 66600 75 Mark property: 76 \xcc\x8d = 0x30d = 781 77 Special: 78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character) 79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character) 80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character) 81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character) 82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character) 83*/ 84 85static int regression_tests(void); 86 87int main(void) 88{ 89 int jit = 0; 90#ifdef SUPPORT_PCRE8 91 pcre_config(PCRE_CONFIG_JIT, &jit); 92#else 93 pcre16_config(PCRE_CONFIG_JIT, &jit); 94#endif 95 if (!jit) { 96 printf("JIT must be enabled to run pcre_jit_test\n"); 97 return 1; 98 } 99 return regression_tests(); 100} 101 102/* --------------------------------------------------------------------------------------- */ 103 104#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) 105#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined 106#endif 107 108#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF) 109#define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP) 110#define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF) 111#define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP) 112#define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF) 113#define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP) 114#define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF) 115 116#define OFFSET_MASK 0x00ffff 117#define F_NO8 0x010000 118#define F_NO16 0x020000 119#define F_NOMATCH 0x040000 120#define F_DIFF 0x080000 121#define F_FORCECONV 0x100000 122#define F_PROPERTY 0x200000 123 124struct regression_test_case { 125 int flags; 126 int start_offset; 127 const char *pattern; 128 const char *input; 129}; 130 131static struct regression_test_case regression_test_cases[] = { 132 /* Constant strings. */ 133 { MUA, 0, "AbC", "AbAbC" }, 134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" }, 135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" }, 136 { MA, 0, "[^a]", "aAbB" }, 137 { CMA, 0, "[^m]", "mMnN" }, 138 { MA, 0, "a[^b][^#]", "abacd" }, 139 { CMA, 0, "A[^B][^E]", "abacd" }, 140 { CMUA, 0, "[^x][^#]", "XxBll" }, 141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" }, 142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" }, 143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" }, 144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" }, 145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" }, 146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" }, 147 { MUA, 0, "[axd]", "sAXd" }, 148 { CMUA, 0, "[axd]", "sAXd" }, 149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" }, 150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" }, 151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" }, 152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 154 { MUA, 0, "[^a]", "\xc2\x80[]" }, 155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" }, 156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" }, 157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" }, 158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" }, 159 { PCRE_CASELESS, 0, "a1", "Aa1" }, 160 { MA, 0, "\\Ca", "cda" }, 161 { CMA, 0, "\\Ca", "CDA" }, 162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" }, 163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" }, 164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 168 169 /* Assertions. */ 170 { MUA, 0, "\\b[^A]", "A_B#" }, 171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" }, 172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" }, 173 { MAP, 0, "\\B", "_\xa1" }, 174 { MAP, 0, "\\b_\\b[,A]\\B", "_," }, 175 { MUAP, 0, "\\b", "\xe6\x92\xad!" }, 176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" }, 177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" }, 178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" }, 180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" }, 181 { MA, 0 | F_NOMATCH, "\\R^", "\n" }, 182 { MA, 1 | F_NOMATCH, "^", "\n" }, 183 { 0, 0, "^ab", "ab" }, 184 { 0, 0 | F_NOMATCH, "^ab", "aab" }, 185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" }, 186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" }, 187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" }, 188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" }, 189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" }, 190 { 0, 0, "ab$", "ab" }, 191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" }, 192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" }, 193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" }, 194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" }, 195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" }, 196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" }, 197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" }, 198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" }, 199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" }, 200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" }, 201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" }, 202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" }, 203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" }, 204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" }, 205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" }, 206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" }, 207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" }, 208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" }, 209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" }, 210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" }, 211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" }, 212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" }, 213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" }, 215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" }, 216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" }, 217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" }, 218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" }, 219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 220 { MA, 0, "\\Aa", "aaa" }, 221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" }, 222 { MA, 1, "\\Ga", "aaa" }, 223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" }, 224 { MA, 0, "a\\z", "aaa" }, 225 { MA, 0 | F_NOMATCH, "a\\z", "aab" }, 226 227 /* Brackets. */ 228 { MUA, 0, "(ab|bb|cd)", "bacde" }, 229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" }, 230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" }, 231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" }, 232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" }, 233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" }, 234 235 /* Greedy and non-greedy ? operators. */ 236 { MUA, 0, "(?:a)?a", "laab" }, 237 { CMUA, 0, "(A)?A", "llaab" }, 238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */ 239 { MUA, 0, "(a)?a", "manm" }, 240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, 241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" }, 242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, 243 244 /* Greedy and non-greedy + operators */ 245 { MUA, 0, "(aa)+aa", "aaaaaaa" }, 246 { MUA, 0, "(aa)+?aa", "aaaaaaa" }, 247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" }, 248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" }, 249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" }, 252 253 /* Greedy and non-greedy * operators */ 254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" }, 255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" }, 256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" }, 257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" }, 258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" }, 259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" }, 260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" }, 261 { MA, 0, "((?:a|)*){0}a", "a" }, 262 263 /* Combining ? + * operators */ 264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" }, 265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" }, 268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" }, 269 270 /* Single character iterators. */ 271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" }, 272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" }, 273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" }, 274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" }, 275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" }, 276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" }, 277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" }, 278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" }, 279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" }, 280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" }, 281 { MUA, 0, "(a?+[^b])+", "babaacacb" }, 282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" }, 283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" }, 284 { CMUA, 0, "[c-f]+k", "DemmFke" }, 285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" }, 286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" }, 287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" }, 288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" }, 289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" }, 290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" }, 291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" }, 292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" }, 293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" }, 294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" }, 295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" }, 296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, 298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, 299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" }, 300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, 303 304 /* Basic character sets. */ 305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, 306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" }, 307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" }, 308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" }, 309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" }, 310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" }, 311 312 /* Unicode properties. */ 313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" }, 314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" }, 315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" }, 316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" }, 317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" }, 318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" }, 325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" }, 326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" }, 327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" }, 328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" }, 329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" }, 330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, 331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, 332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" }, 333 334 /* Possible empty brackets. */ 335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, 336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" }, 337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" }, 338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" }, 339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" }, 340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" }, 341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" }, 342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" }, 343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" }, 344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" }, 345 346 /* Start offset. */ 347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" }, 348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" }, 351 352 /* Newline. */ 353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." }, 356 357 /* Any character except newline or any newline. */ 358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" }, 359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" }, 360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" }, 363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" }, 364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" }, 365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" }, 366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" }, 367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" }, 368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" }, 369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" }, 370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" }, 371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" }, 372 { MUA, 0, "\\R+", "ab\r\n\r" }, 373 { MUA, 0, "\\R*", "ab\r\n\r" }, 374 { MUA, 0, "\\R*", "\r\n\r" }, 375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" }, 376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" }, 377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" }, 378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" }, 379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" }, 380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" }, 381 { MUA, 0, "\\R*\\R\\R", "\n\r" }, 382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" }, 383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" }, 384 385 /* Atomic groups (no fallback from "next" direction). */ 386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" }, 387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" }, 388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op", 389 "bababcdedefgheijijklmlmnop" }, 390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" }, 391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" }, 392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" }, 393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" }, 394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" }, 395 { MUA, 0, "(?>x|)*$", "aaa" }, 396 { MUA, 0, "(?>(x)|)*$", "aaa" }, 397 { MUA, 0, "(?>x|())*$", "aaa" }, 398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" }, 399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" }, 400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" }, 401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" }, 402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" }, 403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" }, 404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" }, 405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" }, 406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" }, 407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" }, 408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" }, 409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" }, 410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" }, 411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" }, 412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" }, 413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 415 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" }, 416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" }, 417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" }, 418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" }, 419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" }, 420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" }, 421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" }, 422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" }, 423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" }, 424 425 /* Possessive quantifiers. */ 426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" }, 427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" }, 428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" }, 429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" }, 430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" }, 431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" }, 432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" }, 433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" }, 434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" }, 435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" }, 436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" }, 437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" }, 438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" }, 439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" }, 440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" }, 441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" }, 442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" }, 443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" }, 444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" }, 445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" }, 446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" }, 447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" }, 448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" }, 449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" }, 450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" }, 451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" }, 452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" }, 453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" }, 454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" }, 455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" }, 456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" }, 457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" }, 458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" }, 459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 461 462 /* Back references. */ 463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" }, 464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" }, 465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" }, 466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" }, 467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" }, 468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" }, 471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" }, 472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" }, 473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" }, 474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" }, 475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" }, 478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" }, 479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" }, 480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." }, 482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" }, 483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" }, 484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" }, 486 487 /* Assertions. */ 488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, 489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" }, 490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" }, 491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" }, 492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" }, 493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" }, 494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" }, 495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" }, 496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" }, 497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" }, 498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" }, 499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" }, 500 { MUA, 0, "((?(?=a)a)+k)", "bbak" }, 501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" }, 502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" }, 503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" }, 504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" }, 505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" }, 506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" }, 509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" }, 510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" }, 511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" }, 512 513 /* Not empty, ACCEPT, FAIL */ 514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" }, 515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" }, 516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" }, 517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" }, 518 { MUA, 0, "a(*ACCEPT)b", "ab" }, 519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" }, 520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" }, 521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" }, 522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" }, 523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" }, 524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" }, 525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" }, 526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" }, 527 { MUA, 0, "((a(*ACCEPT)b))", "ab" }, 528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" }, 529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" }, 530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" }, 531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, 532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, 533 534 /* Conditional blocks. */ 535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, 536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" }, 537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" }, 538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" }, 539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" }, 540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" }, 541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" }, 546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" }, 547 { MUA, 0, "(?(?=a)ab)", "a" }, 548 { MUA, 0, "(?(?<!b)c)", "b" }, 549 { MUA, 0, "(?(DEFINE)a(b))", "a" }, 550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" }, 551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" }, 552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" }, 553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" }, 554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" }, 555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" }, 556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" }, 557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" }, 558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" }, 559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" }, 560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" }, 561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" }, 562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" }, 563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" }, 564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" }, 565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" }, 566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" }, 567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" }, 568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" }, 569 570 /* Set start of match. */ 571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" }, 572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" }, 573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" }, 574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" }, 575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" }, 576 577 /* First line. */ 578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" }, 579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" }, 580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" }, 581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" }, 582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" }, 583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" }, 584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" }, 585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" }, 586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" }, 587 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" }, 588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" }, 589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" }, 590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" }, 591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" }, 592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" }, 593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" }, 594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" }, 595 596 /* Recurse. */ 597 { MUA, 0, "(a)(?1)", "aa" }, 598 { MUA, 0, "((a))(?1)", "aa" }, 599 { MUA, 0, "(b|a)(?1)", "aa" }, 600 { MUA, 0, "(b|(a))(?1)", "aa" }, 601 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" }, 602 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" }, 603 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" }, 604 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" }, 605 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" }, 606 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" }, 607 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" }, 608 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" }, 609 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" }, 610 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" }, 611 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" }, 612 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" }, 613 { MUA, 0, "b|<(?R)*>", "<<b>" }, 614 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" }, 615 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" }, 616 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" }, 617 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" }, 618 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" }, 619 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" }, 620 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" }, 621 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" }, 622 623 /* 16 bit specific tests. */ 624 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" }, 625 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 626 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" }, 627 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 628 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" }, 629 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" }, 630 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" }, 631 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" }, 632 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" }, 633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" }, 634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" }, 635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" }, 636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" }, 637 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" }, 638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" }, 639 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, 640 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 641 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 642 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" }, 643 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" }, 644 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" }, 645 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" }, 646 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" }, 647 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" }, 648 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" }, 649 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" }, 650 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" }, 651 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" }, 652 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" }, 653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" }, 654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" }, 655 656 /* Partial matching. */ 657 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" }, 658 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" }, 659 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" }, 660 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" }, 661 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" }, 662 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, 663 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" }, 664 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" }, 665 666 /* (*MARK) verb. */ 667 { MUA, 0, "a(*MARK:aa)a", "ababaa" }, 668 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" }, 669 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" }, 670 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" }, 671 { MUA, 0, "(?>a(*:aa))b|ac", "ac" }, 672 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" }, 673 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" }, 674 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, 675 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 676 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, 677 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, 678 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 679 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, 680 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, 681 682 /* (*COMMIT) verb. */ 683 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, 684 { MUA, 0, "aa(*COMMIT)b", "xaxaab" }, 685 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" }, 686 { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" }, 687 { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" }, 688 689 /* Deep recursion. */ 690 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, 691 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " }, 692 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" }, 693 694 /* Deep recursion: Stack limit reached. */ 695 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" }, 696 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 697 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 698 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 699 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 700 701 { 0, 0, NULL, NULL } 702}; 703 704static const unsigned char *tables(int mode) 705{ 706 /* The purpose of this function to allow valgrind 707 for reporting invalid reads and writes. */ 708 static unsigned char *tables_copy; 709 const char *errorptr; 710 int erroroffset; 711 unsigned char *default_tables; 712#ifdef SUPPORT_PCRE8 713 pcre *regex; 714 char null_str[1] = { 0 }; 715#else 716 pcre16 *regex; 717 PCRE_UCHAR16 null_str[1] = { 0 }; 718#endif 719 720 if (mode) { 721 if (tables_copy) 722 free(tables_copy); 723 tables_copy = NULL; 724 return NULL; 725 } 726 727 if (tables_copy) 728 return tables_copy; 729 730 default_tables = NULL; 731#ifdef SUPPORT_PCRE8 732 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL); 733 if (regex) { 734 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables); 735 pcre_free(regex); 736 } 737#else 738 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL); 739 if (regex) { 740 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables); 741 pcre16_free(regex); 742 } 743#endif 744 /* Shouldn't ever happen. */ 745 if (!default_tables) 746 return NULL; 747 748 /* Unfortunately this value cannot get from pcre_fullinfo. 749 Since this is a test program, this is acceptable at the moment. */ 750 tables_copy = (unsigned char *)malloc(1088); 751 if (!tables_copy) 752 return NULL; 753 754 memcpy(tables_copy, default_tables, 1088); 755 return tables_copy; 756} 757 758#ifdef SUPPORT_PCRE8 759static pcre_jit_stack* callback8(void *arg) 760{ 761 return (pcre_jit_stack *)arg; 762} 763#endif 764 765#ifdef SUPPORT_PCRE16 766static pcre16_jit_stack* callback16(void *arg) 767{ 768 return (pcre16_jit_stack *)arg; 769} 770#endif 771 772#ifdef SUPPORT_PCRE8 773static void setstack8(pcre_extra *extra) 774{ 775 static pcre_jit_stack *stack; 776 777 if (!extra) { 778 if (stack) 779 pcre_jit_stack_free(stack); 780 stack = NULL; 781 return; 782 } 783 784 if (!stack) 785 stack = pcre_jit_stack_alloc(1, 1024 * 1024); 786 /* Extra can be NULL. */ 787 pcre_assign_jit_stack(extra, callback8, stack); 788} 789#endif /* SUPPORT_PCRE8 */ 790 791#ifdef SUPPORT_PCRE16 792static void setstack16(pcre16_extra *extra) 793{ 794 static pcre16_jit_stack *stack; 795 796 if (!extra) { 797 if (stack) 798 pcre16_jit_stack_free(stack); 799 stack = NULL; 800 return; 801 } 802 803 if (!stack) 804 stack = pcre16_jit_stack_alloc(1, 1024 * 1024); 805 /* Extra can be NULL. */ 806 pcre16_assign_jit_stack(extra, callback16, stack); 807} 808#endif /* SUPPORT_PCRE8 */ 809 810#ifdef SUPPORT_PCRE16 811 812static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length) 813{ 814 unsigned char *iptr = (unsigned char*)input; 815 unsigned short *optr = (unsigned short *)output; 816 unsigned int c; 817 818 if (max_length == 0) 819 return 0; 820 821 while (*iptr && max_length > 1) { 822 c = 0; 823 if (offsetmap) 824 *offsetmap++ = (int)(iptr - (unsigned char*)input); 825 826 if (!(*iptr & 0x80)) 827 c = *iptr++; 828 else if (!(*iptr & 0x20)) { 829 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); 830 iptr += 2; 831 } else if (!(*iptr & 0x10)) { 832 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); 833 iptr += 3; 834 } else if (!(*iptr & 0x08)) { 835 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); 836 iptr += 4; 837 } 838 839 if (c < 65536) { 840 *optr++ = c; 841 max_length--; 842 } else if (max_length <= 2) { 843 *optr = '\0'; 844 return (int)(optr - (unsigned short *)output); 845 } else { 846 c -= 0x10000; 847 *optr++ = 0xd800 | ((c >> 10) & 0x3ff); 848 *optr++ = 0xdc00 | (c & 0x3ff); 849 max_length -= 2; 850 if (offsetmap) 851 offsetmap++; 852 } 853 } 854 if (offsetmap) 855 *offsetmap = (int)(iptr - (unsigned char*)input); 856 *optr = '\0'; 857 return (int)(optr - (unsigned short *)output); 858} 859 860static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length) 861{ 862 unsigned char *iptr = (unsigned char*)input; 863 unsigned short *optr = (unsigned short *)output; 864 865 if (max_length == 0) 866 return 0; 867 868 while (*iptr && max_length > 1) { 869 *optr++ = *iptr++; 870 max_length--; 871 } 872 *optr = '\0'; 873 return (int)(optr - (unsigned short *)output); 874} 875 876#define REGTEST_MAX_LENGTH 4096 877static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH]; 878static int regtest_offsetmap[REGTEST_MAX_LENGTH]; 879 880#endif /* SUPPORT_PCRE16 */ 881 882static int check_ascii(const char *input) 883{ 884 const unsigned char *ptr = (unsigned char *)input; 885 while (*ptr) { 886 if (*ptr > 127) 887 return 0; 888 ptr++; 889 } 890 return 1; 891} 892 893static int regression_tests(void) 894{ 895 struct regression_test_case *current = regression_test_cases; 896 const char *error; 897 char *cpu_info; 898 int i, err_offs; 899 int is_successful, is_ascii_pattern, is_ascii_input; 900 int total = 0; 901 int successful = 0; 902 int successful_row = 0; 903 int counter = 0; 904 int study_mode; 905#ifdef SUPPORT_PCRE8 906 pcre *re8; 907 pcre_extra *extra8; 908 pcre_extra dummy_extra8; 909 int ovector8_1[32]; 910 int ovector8_2[32]; 911 int return_value8_1, return_value8_2; 912 unsigned char *mark8_1, *mark8_2; 913 int utf8 = 0, ucp8 = 0; 914 int disabled_flags8 = 0; 915#endif 916#ifdef SUPPORT_PCRE16 917 pcre16 *re16; 918 pcre16_extra *extra16; 919 pcre16_extra dummy_extra16; 920 int ovector16_1[32]; 921 int ovector16_2[32]; 922 int return_value16_1, return_value16_2; 923 PCRE_UCHAR16 *mark16_1, *mark16_2; 924 int utf16 = 0, ucp16 = 0; 925 int disabled_flags16 = 0; 926 int length16; 927#endif 928 929 /* This test compares the behaviour of interpreter and JIT. Although disabling 930 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is 931 still considered successful from pcre_jit_test point of view. */ 932 933#ifdef SUPPORT_PCRE8 934 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info); 935#else 936 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info); 937#endif 938 939 printf("Running JIT regression tests\n"); 940 printf(" target CPU of SLJIT compiler: %s\n", cpu_info); 941 942#ifdef SUPPORT_PCRE8 943 pcre_config(PCRE_CONFIG_UTF8, &utf8); 944 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8); 945 if (!utf8) 946 disabled_flags8 |= PCRE_UTF8; 947 if (!ucp8) 948 disabled_flags8 |= PCRE_UCP; 949 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled"); 950#endif 951#ifdef SUPPORT_PCRE16 952 pcre16_config(PCRE_CONFIG_UTF16, &utf16); 953 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16); 954 if (!utf16) 955 disabled_flags16 |= PCRE_UTF8; 956 if (!ucp16) 957 disabled_flags16 |= PCRE_UCP; 958 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled"); 959#endif 960 961 while (current->pattern) { 962 /* printf("\nPattern: %s :\n", current->pattern); */ 963 total++; 964 if (current->start_offset & F_PROPERTY) { 965 is_ascii_pattern = 0; 966 is_ascii_input = 0; 967 } else { 968 is_ascii_pattern = check_ascii(current->pattern); 969 is_ascii_input = check_ascii(current->input); 970 } 971 972 if (current->flags & PCRE_PARTIAL_SOFT) 973 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE; 974 else if (current->flags & PCRE_PARTIAL_HARD) 975 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE; 976 else 977 study_mode = PCRE_STUDY_JIT_COMPILE; 978 error = NULL; 979#ifdef SUPPORT_PCRE8 980 re8 = NULL; 981 if (!(current->start_offset & F_NO8)) 982 re8 = pcre_compile(current->pattern, 983 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8), 984 &error, &err_offs, tables(0)); 985 986 extra8 = NULL; 987 if (re8) { 988 error = NULL; 989 extra8 = pcre_study(re8, study_mode, &error); 990 if (!extra8) { 991 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern); 992 pcre_free(re8); 993 re8 = NULL; 994 } 995 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { 996 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern); 997 pcre_free_study(extra8); 998 pcre_free(re8); 999 re8 = NULL; 1000 } 1001 extra8->flags |= PCRE_EXTRA_MARK; 1002 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8)) 1003 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern); 1004#endif 1005#ifdef SUPPORT_PCRE16 1006 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) 1007 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH); 1008 else 1009 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH); 1010 1011 re16 = NULL; 1012 if (!(current->start_offset & F_NO16)) 1013 re16 = pcre16_compile(regtest_buf, 1014 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16), 1015 &error, &err_offs, tables(0)); 1016 1017 extra16 = NULL; 1018 if (re16) { 1019 error = NULL; 1020 extra16 = pcre16_study(re16, study_mode, &error); 1021 if (!extra16) { 1022 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); 1023 pcre16_free(re16); 1024 re16 = NULL; 1025 } 1026 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { 1027 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern); 1028 pcre16_free_study(extra16); 1029 pcre16_free(re16); 1030 re16 = NULL; 1031 } 1032 extra16->flags |= PCRE_EXTRA_MARK; 1033 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16)) 1034 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern); 1035#endif 1036 1037 counter++; 1038 if ((counter & 0x3) != 0) { 1039#ifdef SUPPORT_PCRE8 1040 setstack8(NULL); 1041#endif 1042#ifdef SUPPORT_PCRE16 1043 setstack16(NULL); 1044#endif 1045 } 1046 1047#ifdef SUPPORT_PCRE8 1048 return_value8_1 = -1000; 1049 return_value8_2 = -1000; 1050 for (i = 0; i < 32; ++i) 1051 ovector8_1[i] = -2; 1052 for (i = 0; i < 32; ++i) 1053 ovector8_2[i] = -2; 1054 if (re8) { 1055 mark8_1 = NULL; 1056 mark8_2 = NULL; 1057 setstack8(extra8); 1058 extra8->mark = &mark8_1; 1059 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, 1060 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32); 1061 memset(&dummy_extra8, 0, sizeof(pcre_extra)); 1062 dummy_extra8.flags = PCRE_EXTRA_MARK; 1063 dummy_extra8.mark = &mark8_2; 1064 return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, 1065 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32); 1066 } 1067#endif 1068 1069#ifdef SUPPORT_PCRE16 1070 return_value16_1 = -1000; 1071 return_value16_2 = -1000; 1072 for (i = 0; i < 32; ++i) 1073 ovector16_1[i] = -2; 1074 for (i = 0; i < 32; ++i) 1075 ovector16_2[i] = -2; 1076 if (re16) { 1077 mark16_1 = NULL; 1078 mark16_2 = NULL; 1079 setstack16(extra16); 1080 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) 1081 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH); 1082 else 1083 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH); 1084 extra16->mark = &mark16_1; 1085 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, 1086 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32); 1087 memset(&dummy_extra16, 0, sizeof(pcre16_extra)); 1088 dummy_extra16.flags = PCRE_EXTRA_MARK; 1089 dummy_extra16.mark = &mark16_2; 1090 return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, 1091 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32); 1092 } 1093#endif 1094 1095 /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ 1096 1097 /* If F_DIFF is set, just run the test, but do not compare the results. 1098 Segfaults can still be captured. */ 1099 1100 is_successful = 1; 1101 if (!(current->start_offset & F_DIFF)) { 1102#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 1103 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) { 1104 /* All results must be the same. */ 1105 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) { 1106 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n", 1107 return_value8_1, return_value8_2, return_value16_1, return_value16_2, 1108 total, current->pattern, current->input); 1109 is_successful = 0; 1110 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) { 1111 if (return_value8_1 == PCRE_ERROR_PARTIAL) { 1112 return_value8_1 = 2; 1113 return_value16_1 = 2; 1114 } else { 1115 return_value8_1 *= 2; 1116 return_value16_1 *= 2; 1117 } 1118 1119 /* Transform back the results. */ 1120 if (current->flags & PCRE_UTF8) { 1121 for (i = 0; i < return_value8_1; ++i) { 1122 if (ovector16_1[i] >= 0) 1123 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]]; 1124 if (ovector16_2[i] >= 0) 1125 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]]; 1126 } 1127 } 1128 1129 for (i = 0; i < return_value8_1; ++i) 1130 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { 1131 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n", 1132 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], 1133 total, current->pattern, current->input); 1134 is_successful = 0; 1135 } 1136 } 1137 } else { 1138#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ 1139 /* Only the 8 bit and 16 bit results must be equal. */ 1140#ifdef SUPPORT_PCRE8 1141 if (return_value8_1 != return_value8_2) { 1142 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1143 return_value8_1, return_value8_2, total, current->pattern, current->input); 1144 is_successful = 0; 1145 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) { 1146 if (return_value8_1 == PCRE_ERROR_PARTIAL) 1147 return_value8_1 = 2; 1148 else 1149 return_value8_1 *= 2; 1150 1151 for (i = 0; i < return_value8_1; ++i) 1152 if (ovector8_1[i] != ovector8_2[i]) { 1153 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1154 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input); 1155 is_successful = 0; 1156 } 1157 } 1158#endif 1159 1160#ifdef SUPPORT_PCRE16 1161 if (return_value16_1 != return_value16_2) { 1162 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1163 return_value16_1, return_value16_2, total, current->pattern, current->input); 1164 is_successful = 0; 1165 } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) { 1166 if (return_value16_1 == PCRE_ERROR_PARTIAL) 1167 return_value16_1 = 2; 1168 else 1169 return_value16_1 *= 2; 1170 1171 for (i = 0; i < return_value16_1; ++i) 1172 if (ovector16_1[i] != ovector16_2[i]) { 1173 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1174 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); 1175 is_successful = 0; 1176 } 1177 } 1178#endif 1179 1180#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 1181 } 1182#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ 1183 } 1184 1185 if (is_successful) { 1186#ifdef SUPPORT_PCRE8 1187 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) { 1188 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) { 1189 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", 1190 total, current->pattern, current->input); 1191 is_successful = 0; 1192 } 1193 1194 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) { 1195 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", 1196 total, current->pattern, current->input); 1197 is_successful = 0; 1198 } 1199 } 1200#endif 1201#ifdef SUPPORT_PCRE16 1202 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) { 1203 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) { 1204 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", 1205 total, current->pattern, current->input); 1206 is_successful = 0; 1207 } 1208 1209 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) { 1210 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", 1211 total, current->pattern, current->input); 1212 is_successful = 0; 1213 } 1214 } 1215#endif 1216 } 1217 1218 if (is_successful) { 1219#ifdef SUPPORT_PCRE8 1220 if (mark8_1 != mark8_2) { 1221 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1222 total, current->pattern, current->input); 1223 is_successful = 0; 1224 } 1225#endif 1226#ifdef SUPPORT_PCRE16 1227 if (mark16_1 != mark16_2) { 1228 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1229 total, current->pattern, current->input); 1230 is_successful = 0; 1231 } 1232#endif 1233 } 1234 1235#ifdef SUPPORT_PCRE8 1236 if (re8) { 1237 pcre_free_study(extra8); 1238 pcre_free(re8); 1239 } 1240#endif 1241#ifdef SUPPORT_PCRE16 1242 if (re16) { 1243 pcre16_free_study(extra16); 1244 pcre16_free(re16); 1245 } 1246#endif 1247 1248 if (is_successful) { 1249 successful++; 1250 successful_row++; 1251 printf("."); 1252 if (successful_row >= 60) { 1253 successful_row = 0; 1254 printf("\n"); 1255 } 1256 } else 1257 successful_row = 0; 1258 1259 fflush(stdout); 1260 current++; 1261 } 1262 tables(1); 1263#ifdef SUPPORT_PCRE8 1264 setstack8(NULL); 1265#endif 1266#ifdef SUPPORT_PCRE16 1267 setstack16(NULL); 1268#endif 1269 1270 if (total == successful) { 1271 printf("\nAll JIT regression tests are successfully passed.\n"); 1272 return 0; 1273 } else { 1274 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 1275 return 1; 1276 } 1277} 1278 1279/* End of pcre_jit_test.c */ 1280