1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                  Main Library written by Philip Hazel
9           Copyright (c) 1997-2012 University of Cambridge
10
11  This JIT compiler regression test program was written by Zoltan Herczeg
12                      Copyright (c) 2010-2012
13
14-----------------------------------------------------------------------------
15Redistribution and use in source and binary forms, with or without
16modification, are permitted provided that the following conditions are met:
17
18    * Redistributions of source code must retain the above copyright notice,
19      this list of conditions and the following disclaimer.
20
21    * Redistributions in binary form must reproduce the above copyright
22      notice, this list of conditions and the following disclaimer in the
23      documentation and/or other materials provided with the distribution.
24
25    * Neither the name of the University of Cambridge nor the names of its
26      contributors may be used to endorse or promote products derived from
27      this software without specific prior written permission.
28
29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39POSSIBILITY OF SUCH DAMAGE.
40-----------------------------------------------------------------------------
41*/
42
43#ifdef HAVE_CONFIG_H
44#include "config.h"
45#endif
46
47#include <stdio.h>
48#include <string.h>
49#include "pcre.h"
50
51#define PCRE_BUG 0x80000000
52
53/*
54 Letter characters:
55   \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57   \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
58   \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59   \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60   \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62   \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63   \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65   \xc3\xa9 = 0xe9 = 233 (e')
66      \xc3\x89 = 0xc9 = 201 (E')
67   \xc3\xa1 = 0xe1 = 225 (a')
68      \xc3\x81 = 0xc1 = 193 (A')
69   \xc8\xba = 0x23a = 570
70      \xe2\xb1\xa5 = 0x2c65 = 11365
71   \xe1\xbd\xb8 = 0x1f78 = 8056
72      \xe1\xbf\xb8 = 0x1ff8 = 8184
73   \xf0\x90\x90\x80 = 0x10400 = 66560
74      \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76   \xcc\x8d = 0x30d = 781
77 Special:
78   \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79   \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80   \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81   \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82   \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83*/
84
85static int regression_tests(void);
86
87int main(void)
88{
89	int jit = 0;
90#ifdef SUPPORT_PCRE8
91	pcre_config(PCRE_CONFIG_JIT, &jit);
92#else
93	pcre16_config(PCRE_CONFIG_JIT, &jit);
94#endif
95	if (!jit) {
96		printf("JIT must be enabled to run pcre_jit_test\n");
97		return 1;
98	}
99	return regression_tests();
100}
101
102/* --------------------------------------------------------------------------------------- */
103
104#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106#endif
107
108#define MUA	(PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109#define MUAP	(PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110#define CMUA	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111#define CMUAP	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112#define MA	(PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113#define MAP	(PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114#define CMA	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116#define OFFSET_MASK	0x00ffff
117#define F_NO8		0x010000
118#define F_NO16		0x020000
119#define F_NOMATCH	0x040000
120#define F_DIFF		0x080000
121#define F_FORCECONV	0x100000
122#define F_PROPERTY	0x200000
123
124struct regression_test_case {
125	int flags;
126	int start_offset;
127	const char *pattern;
128	const char *input;
129};
130
131static struct regression_test_case regression_test_cases[] = {
132	/* Constant strings. */
133	{ MUA, 0, "AbC", "AbAbC" },
134	{ MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135	{ CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136	{ MA, 0, "[^a]", "aAbB" },
137	{ CMA, 0, "[^m]", "mMnN" },
138	{ MA, 0, "a[^b][^#]", "abacd" },
139	{ CMA, 0, "A[^B][^E]", "abacd" },
140	{ CMUA, 0, "[^x][^#]", "XxBll" },
141	{ MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142	{ CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143	{ MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144	{ MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145	{ MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146	{ MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147	{ MUA, 0, "[axd]", "sAXd" },
148	{ CMUA, 0, "[axd]", "sAXd" },
149	{ CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150	{ MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151	{ MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152	{ CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153	{ MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154	{ MUA, 0, "[^a]", "\xc2\x80[]" },
155	{ CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156	{ CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157	{ PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158	{ PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159	{ PCRE_CASELESS, 0, "a1", "Aa1" },
160	{ MA, 0, "\\Ca", "cda" },
161	{ CMA, 0, "\\Ca", "CDA" },
162	{ MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163	{ CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164	{ CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165	{ CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166	{ CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167	{ CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169	/* Assertions. */
170	{ MUA, 0, "\\b[^A]", "A_B#" },
171	{ MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172	{ MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173	{ MAP, 0, "\\B", "_\xa1" },
174	{ MAP, 0, "\\b_\\b[,A]\\B", "_," },
175	{ MUAP, 0, "\\b", "\xe6\x92\xad!" },
176	{ MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177	{ MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178	{ MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179	{ MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180	{ CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181	{ MA, 0 | F_NOMATCH, "\\R^", "\n" },
182	{ MA, 1 | F_NOMATCH, "^", "\n" },
183	{ 0, 0, "^ab", "ab" },
184	{ 0, 0 | F_NOMATCH, "^ab", "aab" },
185	{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187	{ PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190	{ 0, 0, "ab$", "ab" },
191	{ 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192	{ PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193	{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194	{ PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197	{ PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198	{ PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199	{ PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200	{ PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201	{ PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202	{ PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203	{ PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204	{ PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205	{ PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206	{ PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207	{ PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208	{ PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218	{ PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219	{ PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220	{ MA, 0, "\\Aa", "aaa" },
221	{ MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222	{ MA, 1, "\\Ga", "aaa" },
223	{ MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224	{ MA, 0, "a\\z", "aaa" },
225	{ MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227	/* Brackets. */
228	{ MUA, 0, "(ab|bb|cd)", "bacde" },
229	{ MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230	{ MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231	{ CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232	{ MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233	{ MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235	/* Greedy and non-greedy ? operators. */
236	{ MUA, 0, "(?:a)?a", "laab" },
237	{ CMUA, 0, "(A)?A", "llaab" },
238	{ MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239	{ MUA, 0, "(a)?a", "manm" },
240	{ CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241	{ MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242	{ MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244	/* Greedy and non-greedy + operators */
245	{ MUA, 0, "(aa)+aa", "aaaaaaa" },
246	{ MUA, 0, "(aa)+?aa", "aaaaaaa" },
247	{ MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248	{ MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249	{ MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250	{ MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251	{ MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253	/* Greedy and non-greedy * operators */
254	{ CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255	{ MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256	{ MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257	{ CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258	{ MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259	{ MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260	{ MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261	{ MA, 0, "((?:a|)*){0}a", "a" },
262
263	/* Combining ? + * operators */
264	{ MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265	{ MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266	{ MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267	{ MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268	{ MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270	/* Single character iterators. */
271	{ MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272	{ MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273	{ MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274	{ MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275	{ MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276	{ MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277	{ MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278	{ MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279	{ MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280	{ MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281	{ MUA, 0, "(a?+[^b])+", "babaacacb" },
282	{ MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283	{ CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284	{ CMUA, 0, "[c-f]+k", "DemmFke" },
285	{ MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286	{ MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287	{ CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288	{ CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289	{ CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290	{ CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291	{ MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292	{ CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293	{ MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294	{ MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295	{ MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296	{ MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297	{ CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298	{ CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299	{ CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300	{ CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301	{ MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302	{ MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304	/* Basic character sets. */
305	{ MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306	{ MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307	{ MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308	{ MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309	{ MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310	{ MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312	/* Unicode properties. */
313	{ MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314	{ MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315	{ MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321	{ MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322	{ MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323	{ MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324	{ MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325	{ CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326	{ MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327	{ MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328	{ MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329	{ CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330	{ MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331	{ MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332	{ PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
333
334	/* Possible empty brackets. */
335	{ MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336	{ MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337	{ MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338	{ MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339	{ MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340	{ MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341	{ MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342	{ MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343	{ MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344	{ MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346	/* Start offset. */
347	{ MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348	{ MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349	{ MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350	{ MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352	/* Newline. */
353	{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354	{ PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355	{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357	/* Any character except newline or any newline. */
358	{ PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359	{ PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360	{ PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362	{ PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363	{ PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364	{ PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365	{ PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366	{ PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367	{ PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368	{ PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369	{ PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370	{ PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371	{ MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372	{ MUA, 0, "\\R+", "ab\r\n\r" },
373	{ MUA, 0, "\\R*", "ab\r\n\r" },
374	{ MUA, 0, "\\R*", "\r\n\r" },
375	{ MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376	{ MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377	{ MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378	{ MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379	{ MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380	{ MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381	{ MUA, 0, "\\R*\\R\\R", "\n\r" },
382	{ MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383	{ MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385	/* Atomic groups (no fallback from "next" direction). */
386	{ MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387	{ MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388	{ MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389			"bababcdedefgheijijklmlmnop" },
390	{ MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391	{ MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392	{ MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393	{ MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394	{ MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395	{ MUA, 0, "(?>x|)*$", "aaa" },
396	{ MUA, 0, "(?>(x)|)*$", "aaa" },
397	{ MUA, 0, "(?>x|())*$", "aaa" },
398	{ MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399	{ MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400	{ MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401	{ MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402	{ MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403	{ MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404	{ MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405	{ MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406	{ MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407	{ MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408	{ MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409	{ MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410	{ MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411	{ MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412	{ CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413	{ MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414	{ MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415	{ MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416	{ MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417	{ MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418	{ MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419	{ MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420	{ MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421	{ MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422	{ MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423	{ MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425	/* Possessive quantifiers. */
426	{ MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427	{ MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428	{ MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429	{ MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430	{ MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431	{ MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432	{ MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433	{ MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434	{ MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435	{ MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436	{ MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437	{ MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438	{ MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439	{ MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440	{ MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441	{ MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442	{ MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443	{ MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444	{ MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445	{ MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446	{ MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447	{ MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448	{ MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449	{ MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450	{ MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451	{ MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452	{ MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453	{ MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454	{ MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455	{ MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456	{ MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457	{ MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458	{ MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459	{ MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460	{ MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462	/* Back references. */
463	{ MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464	{ CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465	{ CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466	{ MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467	{ MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468	{ MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469	{ MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470	{ MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471	{ MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472	{ CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473	{ MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474	{ CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475	{ MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476	{ CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477	{ MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478	{ MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479	{ MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480	{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481	{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482	{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483	{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484	{ PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485	{ CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487	/* Assertions. */
488	{ MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489	{ MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490	{ MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491	{ MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492	{ MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493	{ MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494	{ MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495	{ MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496	{ MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497	{ MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498	{ MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499	{ MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500	{ MUA, 0, "((?(?=a)a)+k)", "bbak" },
501	{ MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502	{ MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503	{ MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504	{ MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505	{ MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506	{ MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507	{ MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508	{ MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509	{ MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510	{ MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511	{ MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513	/* Not empty, ACCEPT, FAIL */
514	{ MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515	{ MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516	{ MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517	{ MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518	{ MUA, 0, "a(*ACCEPT)b", "ab" },
519	{ MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520	{ MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521	{ MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522	{ MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523	{ MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524	{ MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525	{ MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526	{ MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527	{ MUA, 0, "((a(*ACCEPT)b))", "ab" },
528	{ MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529	{ MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530	{ MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531	{ MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532	{ MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534	/* Conditional blocks. */
535	{ MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536	{ MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537	{ MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538	{ MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539	{ MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540	{ MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541	{ MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542	{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543	{ MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544	{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545	{ MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546	{ MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547	{ MUA, 0, "(?(?=a)ab)", "a" },
548	{ MUA, 0, "(?(?<!b)c)", "b" },
549	{ MUA, 0, "(?(DEFINE)a(b))", "a" },
550	{ MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551	{ MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552	{ MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553	{ MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554	{ MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555	{ MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556	{ MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557	{ MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558	{ MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559	{ MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560	{ MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561	{ MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562	{ MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563	{ MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564	{ MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565	{ MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566	{ MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567	{ MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568	{ MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570	/* Set start of match. */
571	{ MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572	{ MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573	{ MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574	{ MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575	{ MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577	/* First line. */
578	{ MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580	{ MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585	{ MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587	{ PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594	{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595
596	/* Recurse. */
597	{ MUA, 0, "(a)(?1)", "aa" },
598	{ MUA, 0, "((a))(?1)", "aa" },
599	{ MUA, 0, "(b|a)(?1)", "aa" },
600	{ MUA, 0, "(b|(a))(?1)", "aa" },
601	{ MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602	{ MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603	{ MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604	{ MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605	{ MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606	{ MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607	{ MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608	{ MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609	{ MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610	{ MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611	{ MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612	{ MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613	{ MUA, 0, "b|<(?R)*>", "<<b>" },
614	{ MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615	{ MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616	{ MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617	{ MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618	{ MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619	{ MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620	{ MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621	{ MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622
623	/* 16 bit specific tests. */
624	{ CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625	{ CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626	{ CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627	{ CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628	{ CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629	{ CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630	{ CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631	{ CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632	{ CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636	{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637	{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640	{ MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641	{ MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642	{ CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643	{ CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644	{ CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645	{ CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646	{ CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647	{ CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648	{ CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649	{ PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650	{ PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651	{ 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652	{ 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653	{ 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654	{ 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655
656	/* Partial matching. */
657	{ MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
658	{ MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
659	{ MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
660	{ MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
661	{ MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
662	{ MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
663	{ MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
664	{ MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
665
666	/* (*MARK) verb. */
667	{ MUA, 0, "a(*MARK:aa)a", "ababaa" },
668	{ MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
669	{ MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
670	{ MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
671	{ MUA, 0, "(?>a(*:aa))b|ac", "ac" },
672	{ MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
673	{ MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
674	{ MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
675	{ MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
676	{ MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
677	{ MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
678	{ MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
679	{ MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
680	{ MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
681
682	/* (*COMMIT) verb. */
683	{ MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
684	{ MUA, 0, "aa(*COMMIT)b", "xaxaab" },
685	{ MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
686	{ MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" },
687	{ MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" },
688
689	/* Deep recursion. */
690	{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
691	{ MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
692	{ MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
693
694	/* Deep recursion: Stack limit reached. */
695	{ MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
696	{ MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
697	{ MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
698	{ MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
699	{ MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
700
701	{ 0, 0, NULL, NULL }
702};
703
704static const unsigned char *tables(int mode)
705{
706	/* The purpose of this function to allow valgrind
707	for reporting invalid reads and writes. */
708	static unsigned char *tables_copy;
709	const char *errorptr;
710	int erroroffset;
711	unsigned char *default_tables;
712#ifdef SUPPORT_PCRE8
713	pcre *regex;
714	char null_str[1] = { 0 };
715#else
716	pcre16 *regex;
717	PCRE_UCHAR16 null_str[1] = { 0 };
718#endif
719
720	if (mode) {
721		if (tables_copy)
722			free(tables_copy);
723		tables_copy = NULL;
724		return NULL;
725	}
726
727	if (tables_copy)
728		return tables_copy;
729
730	default_tables = NULL;
731#ifdef SUPPORT_PCRE8
732	regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
733	if (regex) {
734		pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
735		pcre_free(regex);
736	}
737#else
738	regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
739	if (regex) {
740		pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
741		pcre16_free(regex);
742	}
743#endif
744	/* Shouldn't ever happen. */
745	if (!default_tables)
746		return NULL;
747
748	/* Unfortunately this value cannot get from pcre_fullinfo.
749	Since this is a test program, this is acceptable at the moment. */
750	tables_copy = (unsigned char *)malloc(1088);
751	if (!tables_copy)
752		return NULL;
753
754	memcpy(tables_copy, default_tables, 1088);
755	return tables_copy;
756}
757
758#ifdef SUPPORT_PCRE8
759static pcre_jit_stack* callback8(void *arg)
760{
761	return (pcre_jit_stack *)arg;
762}
763#endif
764
765#ifdef SUPPORT_PCRE16
766static pcre16_jit_stack* callback16(void *arg)
767{
768	return (pcre16_jit_stack *)arg;
769}
770#endif
771
772#ifdef SUPPORT_PCRE8
773static void setstack8(pcre_extra *extra)
774{
775	static pcre_jit_stack *stack;
776
777	if (!extra) {
778		if (stack)
779			pcre_jit_stack_free(stack);
780		stack = NULL;
781		return;
782	}
783
784	if (!stack)
785		stack = pcre_jit_stack_alloc(1, 1024 * 1024);
786	/* Extra can be NULL. */
787	pcre_assign_jit_stack(extra, callback8, stack);
788}
789#endif /* SUPPORT_PCRE8 */
790
791#ifdef SUPPORT_PCRE16
792static void setstack16(pcre16_extra *extra)
793{
794	static pcre16_jit_stack *stack;
795
796	if (!extra) {
797		if (stack)
798			pcre16_jit_stack_free(stack);
799		stack = NULL;
800		return;
801	}
802
803	if (!stack)
804		stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
805	/* Extra can be NULL. */
806	pcre16_assign_jit_stack(extra, callback16, stack);
807}
808#endif /* SUPPORT_PCRE8 */
809
810#ifdef SUPPORT_PCRE16
811
812static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
813{
814	unsigned char *iptr = (unsigned char*)input;
815	unsigned short *optr = (unsigned short *)output;
816	unsigned int c;
817
818	if (max_length == 0)
819		return 0;
820
821	while (*iptr && max_length > 1) {
822		c = 0;
823		if (offsetmap)
824			*offsetmap++ = (int)(iptr - (unsigned char*)input);
825
826		if (!(*iptr & 0x80))
827			c = *iptr++;
828		else if (!(*iptr & 0x20)) {
829			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
830			iptr += 2;
831		} else if (!(*iptr & 0x10)) {
832			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
833			iptr += 3;
834		} else if (!(*iptr & 0x08)) {
835			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
836			iptr += 4;
837		}
838
839		if (c < 65536) {
840			*optr++ = c;
841			max_length--;
842		} else if (max_length <= 2) {
843			*optr = '\0';
844			return (int)(optr - (unsigned short *)output);
845		} else {
846			c -= 0x10000;
847			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
848			*optr++ = 0xdc00 | (c & 0x3ff);
849			max_length -= 2;
850			if (offsetmap)
851				offsetmap++;
852		}
853	}
854	if (offsetmap)
855		*offsetmap = (int)(iptr - (unsigned char*)input);
856	*optr = '\0';
857	return (int)(optr - (unsigned short *)output);
858}
859
860static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
861{
862	unsigned char *iptr = (unsigned char*)input;
863	unsigned short *optr = (unsigned short *)output;
864
865	if (max_length == 0)
866		return 0;
867
868	while (*iptr && max_length > 1) {
869		*optr++ = *iptr++;
870		max_length--;
871	}
872	*optr = '\0';
873	return (int)(optr - (unsigned short *)output);
874}
875
876#define REGTEST_MAX_LENGTH 4096
877static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
878static int regtest_offsetmap[REGTEST_MAX_LENGTH];
879
880#endif /* SUPPORT_PCRE16 */
881
882static int check_ascii(const char *input)
883{
884	const unsigned char *ptr = (unsigned char *)input;
885	while (*ptr) {
886		if (*ptr > 127)
887			return 0;
888		ptr++;
889	}
890	return 1;
891}
892
893static int regression_tests(void)
894{
895	struct regression_test_case *current = regression_test_cases;
896	const char *error;
897	char *cpu_info;
898	int i, err_offs;
899	int is_successful, is_ascii_pattern, is_ascii_input;
900	int total = 0;
901	int successful = 0;
902	int successful_row = 0;
903	int counter = 0;
904	int study_mode;
905#ifdef SUPPORT_PCRE8
906	pcre *re8;
907	pcre_extra *extra8;
908	pcre_extra dummy_extra8;
909	int ovector8_1[32];
910	int ovector8_2[32];
911	int return_value8_1, return_value8_2;
912	unsigned char *mark8_1, *mark8_2;
913	int utf8 = 0, ucp8 = 0;
914	int disabled_flags8 = 0;
915#endif
916#ifdef SUPPORT_PCRE16
917	pcre16 *re16;
918	pcre16_extra *extra16;
919	pcre16_extra dummy_extra16;
920	int ovector16_1[32];
921	int ovector16_2[32];
922	int return_value16_1, return_value16_2;
923	PCRE_UCHAR16 *mark16_1, *mark16_2;
924	int utf16 = 0, ucp16 = 0;
925	int disabled_flags16 = 0;
926	int length16;
927#endif
928
929	/* This test compares the behaviour of interpreter and JIT. Although disabling
930	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
931	still considered successful from pcre_jit_test point of view. */
932
933#ifdef SUPPORT_PCRE8
934	pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
935#else
936	pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
937#endif
938
939	printf("Running JIT regression tests\n");
940	printf("  target CPU of SLJIT compiler: %s\n", cpu_info);
941
942#ifdef SUPPORT_PCRE8
943	pcre_config(PCRE_CONFIG_UTF8, &utf8);
944	pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
945	if (!utf8)
946		disabled_flags8 |= PCRE_UTF8;
947	if (!ucp8)
948		disabled_flags8 |= PCRE_UCP;
949	printf("  in  8 bit mode with utf8  %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
950#endif
951#ifdef SUPPORT_PCRE16
952	pcre16_config(PCRE_CONFIG_UTF16, &utf16);
953	pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
954	if (!utf16)
955		disabled_flags16 |= PCRE_UTF8;
956	if (!ucp16)
957		disabled_flags16 |= PCRE_UCP;
958	printf("  in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
959#endif
960
961	while (current->pattern) {
962		/* printf("\nPattern: %s :\n", current->pattern); */
963		total++;
964		if (current->start_offset & F_PROPERTY) {
965			is_ascii_pattern = 0;
966			is_ascii_input = 0;
967		} else {
968			is_ascii_pattern = check_ascii(current->pattern);
969			is_ascii_input = check_ascii(current->input);
970		}
971
972		if (current->flags & PCRE_PARTIAL_SOFT)
973			study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
974		else if (current->flags & PCRE_PARTIAL_HARD)
975			study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
976		else
977			study_mode = PCRE_STUDY_JIT_COMPILE;
978		error = NULL;
979#ifdef SUPPORT_PCRE8
980		re8 = NULL;
981		if (!(current->start_offset & F_NO8))
982			re8 = pcre_compile(current->pattern,
983				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
984				&error, &err_offs, tables(0));
985
986		extra8 = NULL;
987		if (re8) {
988			error = NULL;
989			extra8 = pcre_study(re8, study_mode, &error);
990			if (!extra8) {
991				printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
992				pcre_free(re8);
993				re8 = NULL;
994			}
995			if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
996				printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
997				pcre_free_study(extra8);
998				pcre_free(re8);
999				re8 = NULL;
1000			}
1001			extra8->flags |= PCRE_EXTRA_MARK;
1002		} else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
1003			printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
1004#endif
1005#ifdef SUPPORT_PCRE16
1006		if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1007			convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
1008		else
1009			copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
1010
1011		re16 = NULL;
1012		if (!(current->start_offset & F_NO16))
1013			re16 = pcre16_compile(regtest_buf,
1014				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
1015				&error, &err_offs, tables(0));
1016
1017		extra16 = NULL;
1018		if (re16) {
1019			error = NULL;
1020			extra16 = pcre16_study(re16, study_mode, &error);
1021			if (!extra16) {
1022				printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1023				pcre16_free(re16);
1024				re16 = NULL;
1025			}
1026			if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1027				printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1028				pcre16_free_study(extra16);
1029				pcre16_free(re16);
1030				re16 = NULL;
1031			}
1032			extra16->flags |= PCRE_EXTRA_MARK;
1033		} else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1034			printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
1035#endif
1036
1037		counter++;
1038		if ((counter & 0x3) != 0) {
1039#ifdef SUPPORT_PCRE8
1040			setstack8(NULL);
1041#endif
1042#ifdef SUPPORT_PCRE16
1043			setstack16(NULL);
1044#endif
1045		}
1046
1047#ifdef SUPPORT_PCRE8
1048		return_value8_1 = -1000;
1049		return_value8_2 = -1000;
1050		for (i = 0; i < 32; ++i)
1051			ovector8_1[i] = -2;
1052		for (i = 0; i < 32; ++i)
1053			ovector8_2[i] = -2;
1054		if (re8) {
1055			mark8_1 = NULL;
1056			mark8_2 = NULL;
1057			setstack8(extra8);
1058			extra8->mark = &mark8_1;
1059			return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1060				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1061			memset(&dummy_extra8, 0, sizeof(pcre_extra));
1062			dummy_extra8.flags = PCRE_EXTRA_MARK;
1063			dummy_extra8.mark = &mark8_2;
1064			return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1065				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1066		}
1067#endif
1068
1069#ifdef SUPPORT_PCRE16
1070		return_value16_1 = -1000;
1071		return_value16_2 = -1000;
1072		for (i = 0; i < 32; ++i)
1073			ovector16_1[i] = -2;
1074		for (i = 0; i < 32; ++i)
1075			ovector16_2[i] = -2;
1076		if (re16) {
1077			mark16_1 = NULL;
1078			mark16_2 = NULL;
1079			setstack16(extra16);
1080			if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1081				length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1082			else
1083				length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1084			extra16->mark = &mark16_1;
1085			return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1086				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1087			memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1088			dummy_extra16.flags = PCRE_EXTRA_MARK;
1089			dummy_extra16.mark = &mark16_2;
1090			return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1091				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1092		}
1093#endif
1094
1095		/* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1096
1097		/* If F_DIFF is set, just run the test, but do not compare the results.
1098		Segfaults can still be captured. */
1099
1100		is_successful = 1;
1101		if (!(current->start_offset & F_DIFF)) {
1102#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1103			if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1104				/* All results must be the same. */
1105				if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1106					printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1107						return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1108						total, current->pattern, current->input);
1109					is_successful = 0;
1110				} else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1111					if (return_value8_1 == PCRE_ERROR_PARTIAL) {
1112						return_value8_1 = 2;
1113						return_value16_1 = 2;
1114					} else {
1115						return_value8_1 *= 2;
1116						return_value16_1 *= 2;
1117					}
1118
1119					/* Transform back the results. */
1120					if (current->flags & PCRE_UTF8) {
1121						for (i = 0; i < return_value8_1; ++i) {
1122							if (ovector16_1[i] >= 0)
1123								ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1124							if (ovector16_2[i] >= 0)
1125								ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1126						}
1127					}
1128
1129					for (i = 0; i < return_value8_1; ++i)
1130						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1131							printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1132								i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1133								total, current->pattern, current->input);
1134							is_successful = 0;
1135						}
1136				}
1137			} else {
1138#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1139				/* Only the 8 bit and 16 bit results must be equal. */
1140#ifdef SUPPORT_PCRE8
1141				if (return_value8_1 != return_value8_2) {
1142					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1143						return_value8_1, return_value8_2, total, current->pattern, current->input);
1144					is_successful = 0;
1145				} else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1146					if (return_value8_1 == PCRE_ERROR_PARTIAL)
1147						return_value8_1 = 2;
1148					else
1149						return_value8_1 *= 2;
1150
1151					for (i = 0; i < return_value8_1; ++i)
1152						if (ovector8_1[i] != ovector8_2[i]) {
1153							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1154								i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1155							is_successful = 0;
1156						}
1157				}
1158#endif
1159
1160#ifdef SUPPORT_PCRE16
1161				if (return_value16_1 != return_value16_2) {
1162					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1163						return_value16_1, return_value16_2, total, current->pattern, current->input);
1164					is_successful = 0;
1165				} else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
1166					if (return_value16_1 == PCRE_ERROR_PARTIAL)
1167						return_value16_1 = 2;
1168					else
1169						return_value16_1 *= 2;
1170
1171					for (i = 0; i < return_value16_1; ++i)
1172						if (ovector16_1[i] != ovector16_2[i]) {
1173							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1174								i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1175							is_successful = 0;
1176						}
1177				}
1178#endif
1179
1180#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1181			}
1182#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1183		}
1184
1185		if (is_successful) {
1186#ifdef SUPPORT_PCRE8
1187			if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1188				if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1189					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1190						total, current->pattern, current->input);
1191					is_successful = 0;
1192				}
1193
1194				if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1195					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1196						total, current->pattern, current->input);
1197					is_successful = 0;
1198				}
1199			}
1200#endif
1201#ifdef SUPPORT_PCRE16
1202			if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1203				if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1204					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1205						total, current->pattern, current->input);
1206					is_successful = 0;
1207				}
1208
1209				if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1210					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1211						total, current->pattern, current->input);
1212					is_successful = 0;
1213				}
1214			}
1215#endif
1216		}
1217
1218		if (is_successful) {
1219#ifdef SUPPORT_PCRE8
1220			if (mark8_1 != mark8_2) {
1221				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1222					total, current->pattern, current->input);
1223				is_successful = 0;
1224			}
1225#endif
1226#ifdef SUPPORT_PCRE16
1227			if (mark16_1 != mark16_2) {
1228				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1229					total, current->pattern, current->input);
1230				is_successful = 0;
1231			}
1232#endif
1233		}
1234
1235#ifdef SUPPORT_PCRE8
1236		if (re8) {
1237			pcre_free_study(extra8);
1238			pcre_free(re8);
1239		}
1240#endif
1241#ifdef SUPPORT_PCRE16
1242		if (re16) {
1243			pcre16_free_study(extra16);
1244			pcre16_free(re16);
1245		}
1246#endif
1247
1248		if (is_successful) {
1249			successful++;
1250			successful_row++;
1251			printf(".");
1252			if (successful_row >= 60) {
1253				successful_row = 0;
1254				printf("\n");
1255			}
1256		} else
1257			successful_row = 0;
1258
1259		fflush(stdout);
1260		current++;
1261	}
1262	tables(1);
1263#ifdef SUPPORT_PCRE8
1264	setstack8(NULL);
1265#endif
1266#ifdef SUPPORT_PCRE16
1267	setstack16(NULL);
1268#endif
1269
1270	if (total == successful) {
1271		printf("\nAll JIT regression tests are successfully passed.\n");
1272		return 0;
1273	} else {
1274		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1275		return 1;
1276	}
1277}
1278
1279/* End of pcre_jit_test.c */
1280