1/*
2    Regualar expressions package test suite.
3*/
4module std.regex.internal.tests;
5
6package(std.regex):
7
8import std.conv, std.exception, std.meta, std.range,
9    std.typecons, std.regex;
10
11import std.uni : Escapables; // characters that need escaping
12
13debug(std_regex_test) import std.stdio;
14
15@safe unittest
16{//sanity checks
17    regex("(a|b)*");
18    regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19    regex("abc|edf|ighrg");
20    auto r1 = regex("abc");
21    auto r2 = regex("(gylba)");
22    assert(match("abcdef", r1).hit == "abc");
23    assert(!match("wida",r2));
24    assert(bmatch("abcdef", r1).hit == "abc");
25    assert(!bmatch("wida", r2));
26    assert(match("abc", "abc".dup));
27    assert(bmatch("abc", "abc".dup));
28    Regex!char rc;
29    assert(rc.empty);
30    rc = regex("test");
31    assert(!rc.empty);
32}
33
34/* The test vectors in this file are altered from Henry Spencer's regexp
35   test code. His copyright notice is:
36
37        Copyright (c) 1986 by University of Toronto.
38        Written by Henry Spencer.  Not derived from licensed software.
39
40        Permission is granted to anyone to use this software for any
41        purpose on any computer system, and to redistribute it freely,
42        subject to the following restrictions:
43
44        1. The author is not responsible for the consequences of use of
45                this software, no matter how awful, even if they arise
46                from defects in it.
47
48        2. The origin of this software must not be misrepresented, either
49                by explicit claim or by omission.
50
51        3. Altered versions must be plainly marked as such, and must not
52                be misrepresented as being the original software.
53
54
55 */
56
57@safe unittest
58{
59    struct TestVectors
60    {
61        string pattern;
62        string input;
63        string result;
64        string format;
65        string replace;
66        string flags;
67    }
68
69    static immutable TestVectors[] tv = [
70        TestVectors(  "a\\b",       "a",  "y",    "$&",    "a" ),
71        TestVectors(  "(a)b\\1",   "abaab","y",    "$&",    "aba" ),
72        TestVectors(  "()b\\1",     "aaab", "y",    "$&",    "b" ),
73        TestVectors(  "abc",       "abc",  "y",    "$&",    "abc" ),
74        TestVectors(  "abc",       "xbc",  "n",    "-",    "-" ),
75        TestVectors(  "abc",       "axc",  "n",    "-",    "-" ),
76        TestVectors(  "abc",       "abx",  "n",    "-",    "-" ),
77        TestVectors(  "abc",       "xabcy","y",    "$&",    "abc" ),
78        TestVectors(  "abc",       "ababc","y",    "$&",    "abc" ),
79        TestVectors(  "ab*c",      "abc",  "y",    "$&",    "abc" ),
80        TestVectors(  "ab*bc",     "abc",  "y",    "$&",    "abc" ),
81        TestVectors(  "ab*bc",     "abbc", "y",    "$&",    "abbc" ),
82        TestVectors(  "ab*bc",     "abbbbc","y",   "$&",    "abbbbc" ),
83        TestVectors(  "ab+bc",     "abbc", "y",    "$&",    "abbc" ),
84        TestVectors(  "ab+bc",     "abc",  "n",    "-",    "-" ),
85        TestVectors(  "ab+bc",     "abq",  "n",    "-",    "-" ),
86        TestVectors(  "ab+bc",     "abbbbc","y",   "$&",    "abbbbc" ),
87        TestVectors(  "ab?bc",     "abbc", "y",    "$&",    "abbc" ),
88        TestVectors(  "ab?bc",     "abc",  "y",    "$&",    "abc" ),
89        TestVectors(  "ab?bc",     "abbbbc","n",   "-",    "-" ),
90        TestVectors(  "ab?c",      "abc",  "y",    "$&",    "abc" ),
91        TestVectors(  "^abc$",     "abc",  "y",    "$&",    "abc" ),
92        TestVectors(  "^abc$",     "abcc", "n",    "-",    "-" ),
93        TestVectors(  "^abc",      "abcc", "y",    "$&",    "abc" ),
94        TestVectors(  "^abc$",     "aabc", "n",    "-",    "-" ),
95        TestVectors(  "abc$",      "aabc", "y",    "$&",    "abc" ),
96        TestVectors(  "^",         "abc",  "y",    "$&",    "" ),
97        TestVectors(  "$",         "abc",  "y",    "$&",    "" ),
98        TestVectors(  "a.c",       "abc",  "y",    "$&",    "abc" ),
99        TestVectors(  "a.c",       "axc",  "y",    "$&",    "axc" ),
100        TestVectors(  "a.*c",      "axyzc","y",    "$&",    "axyzc" ),
101        TestVectors(  "a.*c",      "axyzd","n",    "-",    "-" ),
102        TestVectors(  "a[bc]d",    "abc",  "n",    "-",    "-" ),
103        TestVectors(  "a[bc]d",    "abd",  "y",    "$&",    "abd" ),
104        TestVectors(  "a[b-d]e",   "abd",  "n",    "-",    "-" ),
105        TestVectors(  "a[b-d]e",   "ace",  "y",    "$&",    "ace" ),
106        TestVectors(  "a[b-d]",    "aac",  "y",    "$&",    "ac" ),
107        TestVectors(  "a[-b]",     "a-",   "y",    "$&",    "a-" ),
108        TestVectors(  "a[b-]",     "a-",   "y",    "$&",    "a-" ),
109        TestVectors(  "a[b-a]",    "-",    "c",    "-",    "-" ),
110        TestVectors(  "a[]b",      "-",    "c",    "-",    "-" ),
111        TestVectors(  "a[",        "-",    "c",    "-",    "-" ),
112        TestVectors(  "a]",        "a]",   "y",    "$&",    "a]" ),
113        TestVectors(  "a[\\]]b",     "a]b",  "y",  "$&",    "a]b" ),
114        TestVectors(  "a[^bc]d",   "aed",  "y",    "$&",    "aed" ),
115        TestVectors(  "a[^bc]d",   "abd",  "n",    "-",    "-" ),
116        TestVectors(  "a[^-b]c",   "adc",  "y",    "$&",    "adc" ),
117        TestVectors(  "a[^-b]c",   "a-c",  "n",    "-",    "-" ),
118        TestVectors(  "a[^\\]b]c",   "adc",  "y",  "$&",    "adc" ),
119        TestVectors(  "ab|cd",     "abc",  "y",    "$&",    "ab" ),
120        TestVectors(  "ab|cd",     "abcd", "y",    "$&",    "ab" ),
121        TestVectors(  "()ef",      "def",  "y",    "$&-$1",        "ef-" ),
122        TestVectors(  "()*",       "-",    "y",    "-",    "-" ),
123        TestVectors(  "*a",        "-",    "c",    "-",    "-" ),
124        TestVectors(  "^*",        "-",    "y",    "-",    "-" ),
125        TestVectors(  "$*",        "-",    "y",    "-",    "-" ),
126        TestVectors(  "(*)b",      "-",    "c",    "-",    "-" ),
127        TestVectors(  "$b",        "b",    "n",    "-",    "-" ),
128        TestVectors(  "a\\",       "-",    "c",    "-",    "-" ),
129        TestVectors(  "a\\(b",     "a(b",  "y",    "$&-$1",        "a(b-" ),
130        TestVectors(  "a\\(*b",    "ab",   "y",    "$&",    "ab" ),
131        TestVectors(  "a\\(*b",    "a((b", "y",    "$&",    "a((b" ),
132        TestVectors(  "a\\\\b",    "a\\b", "y",    "$&",    "a\\b" ),
133        TestVectors(  "abc)",      "-",    "c",    "-",    "-" ),
134        TestVectors(  "(abc",      "-",    "c",    "-",    "-" ),
135        TestVectors(  "((a))",     "abc",  "y",    "$&-$1-$2",    "a-a-a" ),
136        TestVectors(  "(a)b(c)",   "abc",  "y",    "$&-$1-$2",    "abc-a-c" ),
137        TestVectors(  "a+b+c",     "aabbabc","y",  "$&",    "abc" ),
138        TestVectors(  "a**",       "-",    "c",    "-",    "-" ),
139        TestVectors(  "a*?a",      "aa",   "y",    "$&",    "a" ),
140        TestVectors(  "(a*)*",     "aaa",  "y",    "-",    "-" ),
141        TestVectors(  "(a*)+",     "aaa",  "y",    "-",    "-" ),
142        TestVectors(  "(a|)*",     "-",    "y",    "-",    "-" ),
143        TestVectors(  "(a*|b)*",   "aabb", "y",    "-",    "-" ),
144        TestVectors(  "(a|b)*",    "ab",   "y",    "$&-$1",        "ab-b" ),
145        TestVectors(  "(a+|b)*",   "ab",   "y",    "$&-$1",        "ab-b" ),
146        TestVectors(  "(a+|b)+",   "ab",   "y",    "$&-$1",        "ab-b" ),
147        TestVectors(  "(a+|b)?",   "ab",   "y",    "$&-$1",        "a-a" ),
148        TestVectors(  "[^ab]*",    "cde",  "y",    "$&",    "cde" ),
149        TestVectors(  "(^)*",      "-",    "y",    "-",    "-" ),
150        TestVectors(  "(ab|)*",    "-",    "y",    "-",    "-" ),
151        TestVectors(  ")(",        "-",    "c",    "-",    "-" ),
152        TestVectors(  "",  "abc",  "y",    "$&",    "" ),
153        TestVectors(  "abc",       "",     "n",    "-",    "-" ),
154        TestVectors(  "a*",        "",     "y",    "$&",    "" ),
155        TestVectors(  "([abc])*d", "abbbcd",       "y",    "$&-$1",        "abbbcd-c" ),
156        TestVectors(  "([abc])*bcd", "abcd",       "y",    "$&-$1",        "abcd-a" ),
157        TestVectors(  "a|b|c|d|e", "e",    "y",    "$&",    "e" ),
158        TestVectors(  "(a|b|c|d|e)f", "ef",        "y",    "$&-$1",        "ef-e" ),
159        TestVectors(  "((a*|b))*", "aabb", "y",    "-",    "-" ),
160        TestVectors(  "abcd*efg",  "abcdefg",      "y",    "$&",    "abcdefg" ),
161        TestVectors(  "ab*",       "xabyabbbz",    "y",    "$&",    "ab" ),
162        TestVectors(  "ab*",       "xayabbbz",     "y",    "$&",    "a" ),
163        TestVectors(  "(ab|cd)e",  "abcde",        "y",    "$&-$1",        "cde-cd" ),
164        TestVectors(  "[abhgefdc]ij",      "hij",  "y",    "$&",    "hij" ),
165        TestVectors(  "^(ab|cd)e", "abcde",        "n",    "x$1y",        "xy" ),
166        TestVectors(  "(abc|)ef",  "abcdef",       "y",    "$&-$1",        "ef-" ),
167        TestVectors(  "(a|b)c*d",  "abcd",         "y",    "$&-$1",        "bcd-b" ),
168        TestVectors(  "(ab|ab*)bc",        "abc",  "y",    "$&-$1",        "abc-a" ),
169        TestVectors(  "a([bc]*)c*",        "abc",  "y",    "$&-$1",        "abc-bc" ),
170        TestVectors(  "a([bc]*)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
171        TestVectors(  "a([bc]+)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
172        TestVectors(  "a([bc]*)(c+d)",     "abcd", "y",    "$&-$1-$2",    "abcd-b-cd" ),
173        TestVectors(  "a[bcd]*dcdcde",     "adcdcde",      "y",    "$&",    "adcdcde" ),
174        TestVectors(  "a[bcd]+dcdcde",     "adcdcde",      "n",    "-",    "-" ),
175        TestVectors(  "(ab|a)b*c", "abc",           "y",    "$&-$1",        "abc-ab" ),
176        TestVectors(  "((a)(b)c)(d)",      "abcd",  "y",    "$1-$2-$3-$4",      "abc-a-b-d" ),
177        TestVectors(  "[a-zA-Z_][a-zA-Z0-9_]*",    "alpha",        "y",    "$&",    "alpha" ),
178        TestVectors(  "^a(bc+|b[eh])g|.h$",        "abh",  "y",    "$&-$1",        "bh-" ),
179        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effgz",        "y",    "$&-$1-$2",    "effgz-effgz-" ),
180        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "ij",   "y",    "$&-$1-$2",    "ij-ij-j" ),
181        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effg", "n",    "-",    "-" ),
182        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "bcdd", "n",    "-",    "-" ),
183        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "reffgz",       "y",    "$&-$1-$2",    "effgz-effgz-" ),
184        TestVectors(  "(((((((((a)))))))))",       "a",    "y",    "$&",    "a" ),
185        TestVectors(  "multiple words of text",    "uh-uh",        "n",    "-",    "-" ),
186        TestVectors(  "multiple words",    "multiple words, yeah", "y",    "$&",    "multiple words" ),
187        TestVectors(  "(.*)c(.*)", "abcde",                "y",    "$&-$1-$2",    "abcde-ab-de" ),
188        TestVectors(  "\\((.*), (.*)\\)",  "(a, b)",       "y",    "($2, $1)",   "(b, a)" ),
189        TestVectors(  "abcd",      "abcd",                   "y",    "$&-&-$$$&",  "abcd-&-$abcd" ),
190        TestVectors(  "a(bc)d",    "abcd",                 "y",    "$1-$$1-$$$1",    "bc-$1-$bc" ),
191        TestVectors(  "[k]",                       "ab",   "n",    "-",    "-" ),
192        TestVectors(  "[ -~]*",                    "abc",  "y",    "$&",    "abc" ),
193        TestVectors(  "[ -~ -~]*",                 "abc",  "y",    "$&",    "abc" ),
194        TestVectors(  "[ -~ -~ -~]*",              "abc",  "y",    "$&",    "abc" ),
195        TestVectors(  "[ -~ -~ -~ -~]*",           "abc",  "y",    "$&",    "abc" ),
196        TestVectors(  "[ -~ -~ -~ -~ -~]*",        "abc",  "y",    "$&",    "abc" ),
197        TestVectors(  "[ -~ -~ -~ -~ -~ -~]*",     "abc",  "y",    "$&",    "abc" ),
198        TestVectors(  "[ -~ -~ -~ -~ -~ -~ -~]*",  "abc",  "y",    "$&",    "abc" ),
199        TestVectors(  "a{2}",      "candy",                "n",    "",     "" ),
200        TestVectors(  "a{2}",      "caandy",               "y",    "$&",    "aa" ),
201        TestVectors(  "a{2}",      "caaandy",              "y",    "$&",    "aa" ),
202        TestVectors(  "a{2,}",     "candy",                "n",    "",     "" ),
203        TestVectors(  "a{2,}",     "caandy",               "y",    "$&",    "aa" ),
204        TestVectors(  "a{2,}",     "caaaaaandy",           "y",    "$&",    "aaaaaa" ),
205        TestVectors(  "a{1,3}",    "cndy",                 "n",    "",     "" ),
206        TestVectors(  "a{1,3}",    "candy",                "y",    "$&",    "a" ),
207        TestVectors(  "a{1,3}",    "caandy",               "y",    "$&",    "aa" ),
208        TestVectors(  "a{1,3}",    "caaaaaandy",           "y",    "$&",    "aaa" ),
209        TestVectors(  "e?le?",     "angel",                "y",    "$&",    "el" ),
210        TestVectors(  "e?le?",     "angle",                "y",    "$&",    "le" ),
211        TestVectors(  "\\bn\\w",   "noonday",              "y",    "$&",    "no" ),
212        TestVectors(  "\\wy\\b",   "possibly yesterday",   "y",    "$&",    "ly" ),
213        TestVectors(  "\\w\\Bn",   "noonday",              "y",    "$&",    "on" ),
214        TestVectors(  "y\\B\\w",   "possibly yesterday",   "y",    "$&",    "ye" ),
215        TestVectors(  "\\cJ",      "abc\ndef",             "y",    "$&",    "\n" ),
216        TestVectors(  "\\d",       "B2 is",                "y",    "$&",    "2" ),
217        TestVectors(  "\\D",       "B2 is",                "y",    "$&",    "B" ),
218        TestVectors(  "\\s\\w*",   "foo bar",              "y",    "$&",    " bar" ),
219        TestVectors(  "\\S\\w*",   "foo bar",              "y",    "$&",    "foo" ),
220        TestVectors(  "abc",       "ababc",                "y",    "$&",    "abc" ),
221        TestVectors(  "apple(,)\\sorange\\1",      "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222        TestVectors(  "(\\w+)\\s(\\w+)",           "John Smith", "y", "$2, $1", "Smith, John" ),
223        TestVectors(  "\\n\\f\\r\\t\\v",           "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224        TestVectors(  ".*c",       "abcde",                        "y",    "$&",    "abc" ),
225        TestVectors(  "^\\w+((;|=)\\w+)+$", "some=host=tld",    "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226        TestVectors(  "^\\w+((\\.|-)\\w+)+$", "some.host.tld",    "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227        TestVectors(  "q(a|b)*q",  "xxqababqyy",                "y",    "$&-$1",        "qababq-b" ),
228        TestVectors(  "^(a)(b){0,1}(c*)",   "abcc", "y", "$1 $2 $3", "a b cc" ),
229        TestVectors(  "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230        TestVectors(  "^(a)(b)?(c*)",       "abcc", "y", "$1 $2 $3", "a b cc" ),
231        TestVectors(  "^(a)((b)?)(c*)",     "abcc", "y", "$1 $2 $3", "a b b" ),
232        TestVectors(  "^(a)(b){0,1}(c*)",   "acc",  "y", "$1 $2 $3", "a  cc" ),
233        TestVectors(  "^(a)((b){0,1})(c*)", "acc",  "y", "$1 $2 $3", "a  " ),
234        TestVectors(  "^(a)(b)?(c*)",       "acc",  "y", "$1 $2 $3", "a  cc" ),
235        TestVectors(  "^(a)((b)?)(c*)",     "acc",  "y", "$1 $2 $3", "a  " ),
236        TestVectors(  "(?:ab){3}",       "_abababc","y", "$&-$1",    "ababab-" ),
237        TestVectors(  "(?:a(?:x)?)+",    "aaxaxx",  "y", "$&-$1-$2", "aaxax--" ),
238        TestVectors(  `\W\w\W`,         "aa b!ca",  "y", "$&",       " b!"),
239//more repetitions:
240        TestVectors(  "(?:a{2,4}b{1,3}){1,2}",  "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241        TestVectors(  "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242//groups:
243        TestVectors(  "(abc)|(edf)|(xyz)",     "xyz",             "y",   "$1-$2-$3","--xyz"),
244        TestVectors(  "(?P<q>\\d+)/(?P<d>\\d+)",     "2/3",       "y",     "${d}/${q}",    "3/2"),
245//set operations:
246        TestVectors(  "[a-z--d-f]",                  " dfa",      "y",   "$&",     "a"),
247        TestVectors(  "[abc[pq--acq]]{2}",           "bqpaca",    "y",   "$&",     "pa"),
248        TestVectors(  "[a-z9&&abc0-9]{3}",           "z90a0abc",  "y",   "$&",     "abc"),
249        TestVectors(  "[0-9a-f~~0-5a-z]{2}",         "g0a58x",    "y",   "$&",     "8x"),
250        TestVectors(  "[abc[pq]xyz[rs]]{4}",         "cqxr",      "y",   "$&",     "cqxr"),
251        TestVectors(  "[abcdf--[ab&&[bcd]][acd]]",   "abcdefgh",  "y",   "$&",     "f"),
252        TestVectors(  "[a-c||d-f]+",    "abcdef", "y", "$&", "abcdef"),
253        TestVectors(  "[a-f--a-c]+",    "abcdef", "y", "$&", "def"),
254        TestVectors(  "[a-c&&b-f]+",    "abcdef", "y", "$&", "bc"),
255        TestVectors(  "[a-c~~b-f]+",    "abcdef", "y", "$&", "a"),
256//unicode blocks & properties:
257        TestVectors(  `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258        TestVectors(  `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259            "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260        TestVectors(  `[-+*/\p{in-mathematical-operators}]{2}`,    "a+\u2212",    "y",    "$&",    "+\u2212"),
261        TestVectors(  `\p{Ll}+`,                      "XabcD",    "y",  "$&",      "abc"),
262        TestVectors(  `\p{Lu}+`,                      "������������",   "y",  "$&",      "������"),
263        TestVectors(  `^\p{Currency Symbol}\p{Sc}`,   "$���",       "y",  "$&",      "$���"),
264        TestVectors(  `\p{Common}\p{Thai}`,           "!���",       "y",  "$&",      "!���"),
265        TestVectors(  `[\d\s]*\D`,  "12 \t3\U00001680\u0F20_2",   "y",  "$&", "12 \t3\U00001680\u0F20_"),
266        TestVectors(  `[c-w��]����`, "������", "y", "$&", "������"),
267//case insensitive:
268        TestVectors(   `^abcdEf$`,           "AbCdEF",              "y",   "$&", "AbCdEF",      "i"),
269        TestVectors(   `�������������� ��������`, "�������������� ��������", "y", "$&", "�������������� ��������", "i"),
270        TestVectors(   `���������` ,        "���������",                   "y",   "$&", "���������",      "i"),
271        TestVectors(   "\U00010400{2}",  "\U00010428\U00010400 ",   "y",   "$&", "\U00010428\U00010400", "i"),
272        TestVectors(   `[adz��-��]{4}`,    "Dz����",                   "y",   "$&", "Dz����", "i"),
273        TestVectors(   `\p{L}\p{Lu}{10}`, "����������������������", "y",   "$&", "����������������������", "i"),
274        TestVectors(   `(?:D��b){3}`,  "D��bD��Bd��b",                  "y",   "$&", "D��bD��Bd��b", "i"),
275//escapes:
276        TestVectors(    `\u0041\u005a\U00000065\u0001`,         "AZe\u0001",       "y",   "$&", "AZe\u0001"),
277        TestVectors(    `\u`,               "",   "c",   "-",  "-"),
278        TestVectors(    `\U`,               "",   "c",   "-",  "-"),
279        TestVectors(    `\u003`,            "",   "c",   "-",  "-"),
280        TestVectors(    `[\x00-\x7f]{4}`,        "\x00\x09ab",   "y", "$&", "\x00\x09ab"),
281        TestVectors(    `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282        TestVectors(    `\r\n\v\t\f\\`,     "\r\n\v\t\f\\",   "y",   "$&", "\r\n\v\t\f\\"),
283        TestVectors(    `[\u0003\u0001]{2}`,  "\u0001\u0003",         "y",   "$&", "\u0001\u0003"),
284        TestVectors(    `^[\u0020-\u0080\u0001\n-\r]{8}`,  "abc\u0001\v\f\r\n",  "y",   "$&", "abc\u0001\v\f\r\n"),
285        TestVectors(    `\w+\S\w+`, "ab7!44c",  "y", "$&", "ab7!44c"),
286        TestVectors(    `\b\w+\b`,  " abde4 ",  "y", "$&", "abde4"),
287        TestVectors(    `\b\w+\b`,  " abde4",   "y", "$&", "abde4"),
288        TestVectors(    `\b\w+\b`,  "abde4 ",   "y", "$&", "abde4"),
289        TestVectors(    `\pL\pS`,   "a\u02DA",  "y", "$&", "a\u02DA"),
290        TestVectors(    `\pX`,      "",         "c", "-",  "-"),
291// ^, $, \b, \B, multiline :
292        TestVectors(    `\r.*?$`,    "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293        TestVectors(    `^a$^b$`,    "a\r\nb\n",  "n", "$&", "-", "m"),
294        TestVectors(    `^a$\r\n^b$`,"a\r\nb\n",  "y", "$&", "a\r\nb", "m"),
295        TestVectors(    `^$`,        "\r\n",      "y", "$&", "", "m"),
296        TestVectors(    `^a$\nx$`,   "a\nx\u2028","y", "$&", "a\nx", "m"),
297        TestVectors(    `^a$\nx$`,   "a\nx\u2029","y", "$&", "a\nx", "m"),
298        TestVectors(    `^a$\nx$`,   "a\nx\u0085","y", "$&", "a\nx","m"),
299        TestVectors(    `^x$`,       "\u2028x",   "y", "$&", "x", "m"),
300        TestVectors(    `^x$`,       "\u2029x",   "y", "$&", "x", "m"),
301        TestVectors(    `^x$`,       "\u0085x",   "y", "$&", "x", "m"),
302        TestVectors(    `\b^.`,      "ab",        "y", "$&", "a"),
303        TestVectors(    `\B^.`,      "ab",        "n", "-",  "-"),
304        TestVectors(    `^ab\Bc\B`,  "\r\nabcd",  "y", "$&", "abc", "m"),
305        TestVectors(    `^.*$`,      "12345678",  "y", "$&", "12345678"),
306
307// luckily obtained regression on incremental matching in backtracker
308        TestVectors(  `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309            "0020  ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310//lookahead
311        TestVectors(    "(foo.)(?=(bar))",     "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312        TestVectors(    `\b(\d+)[a-z](?=\1)`,  "123a123",        "y", "$&-$1", "123a-123" ),
313        TestVectors(    `\$(?!\d{3})\w+`,      "$123 $abc",      "y", "$&", "$abc"),
314        TestVectors(    `(abc)(?=(ed(f))\3)`,    "abcedff",      "y", "-", "-"),
315        TestVectors(    `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com",  "y", "$&-$1", "x-@"),
316        TestVectors(    `x()(abc)(?=(d)(e)(f)\2)`,   "xabcdefabc", "y", "$&", "xabc"),
317        TestVectors(    `x()(abc)(?=(d)(e)(f)()\3\4\5)`,   "xabcdefdef", "y", "$&", "xabc"),
318//lookback
319        TestVectors(    `(?<=(ab))\d`,    "12ba3ab4",    "y",   "$&-$1", "4-ab",  "i"),
320        TestVectors(    `\w(?<!\d)\w`,   "123ab24",  "y",   "$&", "ab"),
321        TestVectors(    `(?<=D��b)x\w`,  "D��bD��Bxd��b",  "y",   "$&", "xd", "i"),
322        TestVectors(    `(?<=(ab*c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
323        TestVectors(    `(?<=(ab*?c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
324        TestVectors(    `(?<=(a.*?c))x`,   "ababbcxac",  "y",   "$&-$1", "x-abbc"),
325        TestVectors(    `(?<=(a{2,4}b{1,3}))x`,   "yyaaaabx",  "y",   "$&-$1", "x-aaaab"),
326        TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aabbbaaaab"),
327        TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}?))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aaaab"),
328        TestVectors(    `(?<=(abc|def|aef))x`,    "abcx", "y",        "$&-$1",  "x-abc"),
329        TestVectors(    `(?<=(abc|def|aef))x`,    "aefx", "y",        "$&-$1",  "x-aef"),
330        TestVectors(    `(?<=(abc|dabc))(x)`,    "dabcx", "y",        "$&-$1-$2",  "x-abc-x"),
331        TestVectors(    `(?<=(|abc))x`,        "dabcx", "y",        "$&-$1",  "x-"),
332        TestVectors(    `(?<=((ab|da)*))x`,    "abdaabx", "y",        "$&-$2-$1",  "x-ab-abdaab"),
333        TestVectors(    `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334        TestVectors(    `.(?<!b).`,   "bax",  "y", "$&", "ax"),
335        TestVectors(    `(?<=b(?<!ab)).`,   "abbx",  "y",  "$&", "x"),
336        TestVectors(    `(?<=\.|[!?]+)X`,   "Hey?!X", "y", "$&", "X"),
337        TestVectors(    `(?<=\.|[!?]+)a{3}`,   ".Nope.aaaX", "y", "$&", "aaa"),
338//mixed lookaround
339        TestVectors(   `a(?<=a(?=b))b`,    "ab", "y",      "$&", "ab"),
340        TestVectors(   `a(?<=a(?!b))c`,    "ac", "y",      "$&", "ac"),
341        TestVectors(   `a(?i)bc`,         "aBc", "y",      "$&", "aBc"),
342        TestVectors(   `a(?i)bc`,         "Abc", "n",      "$&", "-"),
343        TestVectors(   `(?i)a(?-i)bc`, "aBcAbc", "y",      "$&", "Abc"),
344        TestVectors(   `(?s).(?-s).`, "\n\n\na", "y",      "$&", "\na"),
345        TestVectors(   `(?m)^a(?-m)$`,  "\na",   "y",      "$&", "a")
346        ];
347    string produceExpected(M,String)(auto ref M m, String fmt)
348    {
349        auto app = appender!(String)();
350        replaceFmt(fmt, m.captures, app, true);
351        return app.data;
352    }
353    void run_tests(alias matchFn)()
354    {
355        int i;
356        static foreach (Char; AliasSeq!( char, wchar, dchar))
357        {{
358            alias String = immutable(Char)[];
359            String produceExpected(M,Range)(auto ref M m, Range fmt)
360            {
361                auto app = appender!(String)();
362                replaceFmt(fmt, m.captures, app, true);
363                return app.data;
364            }
365            Regex!(Char) r;
366            foreach (a, tvd; tv)
367            {
368                uint c = tvd.result[0];
369                debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
370                try
371                {
372                    i = 1;
373                    r = regex(to!(String)(tvd.pattern), tvd.flags);
374                }
375                catch (RegexException e)
376                {
377                    i = 0;
378                    debug(std_regex_test) writeln(e.msg);
379                }
380
381                assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
382
383                if (c != 'c')
384                {
385                    auto m = matchFn(to!(String)(tvd.input), r);
386                    i = !m.empty;
387                    assert(
388                        (c == 'y') ? i : !i,
389                        text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
390                    );
391                    if (c == 'y')
392                    {
393                        auto result = produceExpected(m, to!(String)(tvd.format));
394                        assert(result == to!String(tvd.replace),
395                            text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396                                    tvd.replace, " vs ", result));
397                    }
398                }
399            }
400        }}
401        debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
402    }
403
404
405    void ct_tests()
406    {
407        import std.algorithm.comparison : equal;
408        version (std_regex_ct1)
409        {
410            pragma(msg, "Testing 1st part of ctRegex");
411            enum Tests = iota(0, 155);
412        }
413        else version (std_regex_ct2)
414        {
415            pragma(msg, "Testing 2nd part of ctRegex");
416            enum Tests = iota(155, 174);
417        }
418        //FIXME: #174-178 contains CTFE parser bug
419        else version (std_regex_ct3)
420        {
421            pragma(msg, "Testing 3rd part of ctRegex");
422            enum Tests = iota(178, 220);
423        }
424        else version (std_regex_ct4)
425        {
426            pragma(msg, "Testing 4th part of ctRegex");
427            enum Tests = iota(220, tv.length);
428        }
429        else
430            enum Tests = chain(iota(0, 30), iota(235, tv.length-5));
431        static foreach (v; Tests)
432        {{
433            enum tvd = tv[v];
434            static if (tvd.result == "c")
435            {
436                static assert(!__traits(compiles, (){
437                    enum r = regex(tvd.pattern, tvd.flags);
438                }), "errornously compiles regex pattern: " ~ tvd.pattern);
439            }
440            else
441            {
442                //BUG: tv[v] is fine but tvd is not known at compile time?!
443                auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444                auto nr = regex(tvd.pattern, tvd.flags);
445                assert(equal(r.ir, nr.ir),
446                    text("!C-T regex! failed to compile pattern #", v ,": ", tvd.pattern));
447                auto m = match(tvd.input, r);
448                auto c = tvd.result[0];
449                bool ok = (c == 'y') ^ m.empty;
450                assert(ok, text("ctRegex: failed to match pattern #",
451                    v ,": ", tvd.pattern));
452                if (c == 'y')
453                {
454                    auto result = produceExpected(m, tvd.format);
455                    assert(result == tvd.replace, text("ctRegex mismatch pattern #", v,
456                        ": ", tvd.pattern," expected: ", tvd.replace, " vs ", result));
457                }
458            }
459        }}
460        debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
461    }
462
463    ct_tests();
464    run_tests!bmatch(); //backtracker
465    run_tests!match(); //thompson VM
466}
467
468