1/*
2    Regualar expressions package test suite.
3*/
4module std.regex.internal.tests;
5
6package(std.regex):
7
8import std.conv, std.exception, std.meta, std.range,
9    std.typecons, std.regex;
10
11import std.regex.internal.parser : Escapables; // characters that need escaping
12
13alias Sequence(int B, int E) = staticIota!(B, E);
14
15@safe unittest
16{//sanity checks
17    regex("(a|b)*");
18    regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19    regex("abc|edf|ighrg");
20    auto r1 = regex("abc");
21    auto r2 = regex("(gylba)");
22    assert(match("abcdef", r1).hit == "abc");
23    assert(!match("wida",r2));
24    assert(bmatch("abcdef", r1).hit == "abc");
25    assert(!bmatch("wida", r2));
26    assert(match("abc", "abc".dup));
27    assert(bmatch("abc", "abc".dup));
28    Regex!char rc;
29    assert(rc.empty);
30    rc = regex("test");
31    assert(!rc.empty);
32}
33
34/* The test vectors in this file are altered from Henry Spencer's regexp
35   test code. His copyright notice is:
36
37        Copyright (c) 1986 by University of Toronto.
38        Written by Henry Spencer.  Not derived from licensed software.
39
40        Permission is granted to anyone to use this software for any
41        purpose on any computer system, and to redistribute it freely,
42        subject to the following restrictions:
43
44        1. The author is not responsible for the consequences of use of
45                this software, no matter how awful, even if they arise
46                from defects in it.
47
48        2. The origin of this software must not be misrepresented, either
49                by explicit claim or by omission.
50
51        3. Altered versions must be plainly marked as such, and must not
52                be misrepresented as being the original software.
53
54
55 */
56
57@safe unittest
58{
59    struct TestVectors
60    {
61        string pattern;
62        string input;
63        string result;
64        string format;
65        string replace;
66        string flags;
67    }
68
69    static immutable TestVectors[] tv = [
70        TestVectors(  "a\\b",       "a",  "y",    "$&",    "a" ),
71        TestVectors(  "(a)b\\1",   "abaab","y",    "$&",    "aba" ),
72        TestVectors(  "()b\\1",     "aaab", "y",    "$&",    "b" ),
73        TestVectors(  "abc",       "abc",  "y",    "$&",    "abc" ),
74        TestVectors(  "abc",       "xbc",  "n",    "-",    "-" ),
75        TestVectors(  "abc",       "axc",  "n",    "-",    "-" ),
76        TestVectors(  "abc",       "abx",  "n",    "-",    "-" ),
77        TestVectors(  "abc",       "xabcy","y",    "$&",    "abc" ),
78        TestVectors(  "abc",       "ababc","y",    "$&",    "abc" ),
79        TestVectors(  "ab*c",      "abc",  "y",    "$&",    "abc" ),
80        TestVectors(  "ab*bc",     "abc",  "y",    "$&",    "abc" ),
81        TestVectors(  "ab*bc",     "abbc", "y",    "$&",    "abbc" ),
82        TestVectors(  "ab*bc",     "abbbbc","y",   "$&",    "abbbbc" ),
83        TestVectors(  "ab+bc",     "abbc", "y",    "$&",    "abbc" ),
84        TestVectors(  "ab+bc",     "abc",  "n",    "-",    "-" ),
85        TestVectors(  "ab+bc",     "abq",  "n",    "-",    "-" ),
86        TestVectors(  "ab+bc",     "abbbbc","y",   "$&",    "abbbbc" ),
87        TestVectors(  "ab?bc",     "abbc", "y",    "$&",    "abbc" ),
88        TestVectors(  "ab?bc",     "abc",  "y",    "$&",    "abc" ),
89        TestVectors(  "ab?bc",     "abbbbc","n",   "-",    "-" ),
90        TestVectors(  "ab?c",      "abc",  "y",    "$&",    "abc" ),
91        TestVectors(  "^abc$",     "abc",  "y",    "$&",    "abc" ),
92        TestVectors(  "^abc$",     "abcc", "n",    "-",    "-" ),
93        TestVectors(  "^abc",      "abcc", "y",    "$&",    "abc" ),
94        TestVectors(  "^abc$",     "aabc", "n",    "-",    "-" ),
95        TestVectors(  "abc$",      "aabc", "y",    "$&",    "abc" ),
96        TestVectors(  "^",         "abc",  "y",    "$&",    "" ),
97        TestVectors(  "$",         "abc",  "y",    "$&",    "" ),
98        TestVectors(  "a.c",       "abc",  "y",    "$&",    "abc" ),
99        TestVectors(  "a.c",       "axc",  "y",    "$&",    "axc" ),
100        TestVectors(  "a.*c",      "axyzc","y",    "$&",    "axyzc" ),
101        TestVectors(  "a.*c",      "axyzd","n",    "-",    "-" ),
102        TestVectors(  "a[bc]d",    "abc",  "n",    "-",    "-" ),
103        TestVectors(  "a[bc]d",    "abd",  "y",    "$&",    "abd" ),
104        TestVectors(  "a[b-d]e",   "abd",  "n",    "-",    "-" ),
105        TestVectors(  "a[b-d]e",   "ace",  "y",    "$&",    "ace" ),
106        TestVectors(  "a[b-d]",    "aac",  "y",    "$&",    "ac" ),
107        TestVectors(  "a[-b]",     "a-",   "y",    "$&",    "a-" ),
108        TestVectors(  "a[b-]",     "a-",   "y",    "$&",    "a-" ),
109        TestVectors(  "a[b-a]",    "-",    "c",    "-",    "-" ),
110        TestVectors(  "a[]b",      "-",    "c",    "-",    "-" ),
111        TestVectors(  "a[",        "-",    "c",    "-",    "-" ),
112        TestVectors(  "a]",        "a]",   "y",    "$&",    "a]" ),
113        TestVectors(  "a[\\]]b",     "a]b",  "y",  "$&",    "a]b" ),
114        TestVectors(  "a[^bc]d",   "aed",  "y",    "$&",    "aed" ),
115        TestVectors(  "a[^bc]d",   "abd",  "n",    "-",    "-" ),
116        TestVectors(  "a[^-b]c",   "adc",  "y",    "$&",    "adc" ),
117        TestVectors(  "a[^-b]c",   "a-c",  "n",    "-",    "-" ),
118        TestVectors(  "a[^\\]b]c",   "adc",  "y",  "$&",    "adc" ),
119        TestVectors(  "ab|cd",     "abc",  "y",    "$&",    "ab" ),
120        TestVectors(  "ab|cd",     "abcd", "y",    "$&",    "ab" ),
121        TestVectors(  "()ef",      "def",  "y",    "$&-$1",        "ef-" ),
122        TestVectors(  "()*",       "-",    "y",    "-",    "-" ),
123        TestVectors(  "*a",        "-",    "c",    "-",    "-" ),
124        TestVectors(  "^*",        "-",    "y",    "-",    "-" ),
125        TestVectors(  "$*",        "-",    "y",    "-",    "-" ),
126        TestVectors(  "(*)b",      "-",    "c",    "-",    "-" ),
127        TestVectors(  "$b",        "b",    "n",    "-",    "-" ),
128        TestVectors(  "a\\",       "-",    "c",    "-",    "-" ),
129        TestVectors(  "a\\(b",     "a(b",  "y",    "$&-$1",        "a(b-" ),
130        TestVectors(  "a\\(*b",    "ab",   "y",    "$&",    "ab" ),
131        TestVectors(  "a\\(*b",    "a((b", "y",    "$&",    "a((b" ),
132        TestVectors(  "a\\\\b",    "a\\b", "y",    "$&",    "a\\b" ),
133        TestVectors(  "abc)",      "-",    "c",    "-",    "-" ),
134        TestVectors(  "(abc",      "-",    "c",    "-",    "-" ),
135        TestVectors(  "((a))",     "abc",  "y",    "$&-$1-$2",    "a-a-a" ),
136        TestVectors(  "(a)b(c)",   "abc",  "y",    "$&-$1-$2",    "abc-a-c" ),
137        TestVectors(  "a+b+c",     "aabbabc","y",  "$&",    "abc" ),
138        TestVectors(  "a**",       "-",    "c",    "-",    "-" ),
139        TestVectors(  "a*?a",      "aa",   "y",    "$&",    "a" ),
140        TestVectors(  "(a*)*",     "aaa",  "y",    "-",    "-" ),
141        TestVectors(  "(a*)+",     "aaa",  "y",    "-",    "-" ),
142        TestVectors(  "(a|)*",     "-",    "y",    "-",    "-" ),
143        TestVectors(  "(a*|b)*",   "aabb", "y",    "-",    "-" ),
144        TestVectors(  "(a|b)*",    "ab",   "y",    "$&-$1",        "ab-b" ),
145        TestVectors(  "(a+|b)*",   "ab",   "y",    "$&-$1",        "ab-b" ),
146        TestVectors(  "(a+|b)+",   "ab",   "y",    "$&-$1",        "ab-b" ),
147        TestVectors(  "(a+|b)?",   "ab",   "y",    "$&-$1",        "a-a" ),
148        TestVectors(  "[^ab]*",    "cde",  "y",    "$&",    "cde" ),
149        TestVectors(  "(^)*",      "-",    "y",    "-",    "-" ),
150        TestVectors(  "(ab|)*",    "-",    "y",    "-",    "-" ),
151        TestVectors(  ")(",        "-",    "c",    "-",    "-" ),
152        TestVectors(  "",  "abc",  "y",    "$&",    "" ),
153        TestVectors(  "abc",       "",     "n",    "-",    "-" ),
154        TestVectors(  "a*",        "",     "y",    "$&",    "" ),
155        TestVectors(  "([abc])*d", "abbbcd",       "y",    "$&-$1",        "abbbcd-c" ),
156        TestVectors(  "([abc])*bcd", "abcd",       "y",    "$&-$1",        "abcd-a" ),
157        TestVectors(  "a|b|c|d|e", "e",    "y",    "$&",    "e" ),
158        TestVectors(  "(a|b|c|d|e)f", "ef",        "y",    "$&-$1",        "ef-e" ),
159        TestVectors(  "((a*|b))*", "aabb", "y",    "-",    "-" ),
160        TestVectors(  "abcd*efg",  "abcdefg",      "y",    "$&",    "abcdefg" ),
161        TestVectors(  "ab*",       "xabyabbbz",    "y",    "$&",    "ab" ),
162        TestVectors(  "ab*",       "xayabbbz",     "y",    "$&",    "a" ),
163        TestVectors(  "(ab|cd)e",  "abcde",        "y",    "$&-$1",        "cde-cd" ),
164        TestVectors(  "[abhgefdc]ij",      "hij",  "y",    "$&",    "hij" ),
165        TestVectors(  "^(ab|cd)e", "abcde",        "n",    "x$1y",        "xy" ),
166        TestVectors(  "(abc|)ef",  "abcdef",       "y",    "$&-$1",        "ef-" ),
167        TestVectors(  "(a|b)c*d",  "abcd",         "y",    "$&-$1",        "bcd-b" ),
168        TestVectors(  "(ab|ab*)bc",        "abc",  "y",    "$&-$1",        "abc-a" ),
169        TestVectors(  "a([bc]*)c*",        "abc",  "y",    "$&-$1",        "abc-bc" ),
170        TestVectors(  "a([bc]*)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
171        TestVectors(  "a([bc]+)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
172        TestVectors(  "a([bc]*)(c+d)",     "abcd", "y",    "$&-$1-$2",    "abcd-b-cd" ),
173        TestVectors(  "a[bcd]*dcdcde",     "adcdcde",      "y",    "$&",    "adcdcde" ),
174        TestVectors(  "a[bcd]+dcdcde",     "adcdcde",      "n",    "-",    "-" ),
175        TestVectors(  "(ab|a)b*c", "abc",           "y",    "$&-$1",        "abc-ab" ),
176        TestVectors(  "((a)(b)c)(d)",      "abcd",  "y",    "$1-$2-$3-$4",      "abc-a-b-d" ),
177        TestVectors(  "[a-zA-Z_][a-zA-Z0-9_]*",    "alpha",        "y",    "$&",    "alpha" ),
178        TestVectors(  "^a(bc+|b[eh])g|.h$",        "abh",  "y",    "$&-$1",        "bh-" ),
179        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effgz",        "y",    "$&-$1-$2",    "effgz-effgz-" ),
180        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "ij",   "y",    "$&-$1-$2",    "ij-ij-j" ),
181        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effg", "n",    "-",    "-" ),
182        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "bcdd", "n",    "-",    "-" ),
183        TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "reffgz",       "y",    "$&-$1-$2",    "effgz-effgz-" ),
184        TestVectors(  "(((((((((a)))))))))",       "a",    "y",    "$&",    "a" ),
185        TestVectors(  "multiple words of text",    "uh-uh",        "n",    "-",    "-" ),
186        TestVectors(  "multiple words",    "multiple words, yeah", "y",    "$&",    "multiple words" ),
187        TestVectors(  "(.*)c(.*)", "abcde",                "y",    "$&-$1-$2",    "abcde-ab-de" ),
188        TestVectors(  "\\((.*), (.*)\\)",  "(a, b)",       "y",    "($2, $1)",   "(b, a)" ),
189        TestVectors(  "abcd",      "abcd",                   "y",    "$&-&-$$$&",  "abcd-&-$abcd" ),
190        TestVectors(  "a(bc)d",    "abcd",                 "y",    "$1-$$1-$$$1",    "bc-$1-$bc" ),
191        TestVectors(  "[k]",                       "ab",   "n",    "-",    "-" ),
192        TestVectors(  "[ -~]*",                    "abc",  "y",    "$&",    "abc" ),
193        TestVectors(  "[ -~ -~]*",                 "abc",  "y",    "$&",    "abc" ),
194        TestVectors(  "[ -~ -~ -~]*",              "abc",  "y",    "$&",    "abc" ),
195        TestVectors(  "[ -~ -~ -~ -~]*",           "abc",  "y",    "$&",    "abc" ),
196        TestVectors(  "[ -~ -~ -~ -~ -~]*",        "abc",  "y",    "$&",    "abc" ),
197        TestVectors(  "[ -~ -~ -~ -~ -~ -~]*",     "abc",  "y",    "$&",    "abc" ),
198        TestVectors(  "[ -~ -~ -~ -~ -~ -~ -~]*",  "abc",  "y",    "$&",    "abc" ),
199        TestVectors(  "a{2}",      "candy",                "n",    "",     "" ),
200        TestVectors(  "a{2}",      "caandy",               "y",    "$&",    "aa" ),
201        TestVectors(  "a{2}",      "caaandy",              "y",    "$&",    "aa" ),
202        TestVectors(  "a{2,}",     "candy",                "n",    "",     "" ),
203        TestVectors(  "a{2,}",     "caandy",               "y",    "$&",    "aa" ),
204        TestVectors(  "a{2,}",     "caaaaaandy",           "y",    "$&",    "aaaaaa" ),
205        TestVectors(  "a{1,3}",    "cndy",                 "n",    "",     "" ),
206        TestVectors(  "a{1,3}",    "candy",                "y",    "$&",    "a" ),
207        TestVectors(  "a{1,3}",    "caandy",               "y",    "$&",    "aa" ),
208        TestVectors(  "a{1,3}",    "caaaaaandy",           "y",    "$&",    "aaa" ),
209        TestVectors(  "e?le?",     "angel",                "y",    "$&",    "el" ),
210        TestVectors(  "e?le?",     "angle",                "y",    "$&",    "le" ),
211        TestVectors(  "\\bn\\w",   "noonday",              "y",    "$&",    "no" ),
212        TestVectors(  "\\wy\\b",   "possibly yesterday",   "y",    "$&",    "ly" ),
213        TestVectors(  "\\w\\Bn",   "noonday",              "y",    "$&",    "on" ),
214        TestVectors(  "y\\B\\w",   "possibly yesterday",   "y",    "$&",    "ye" ),
215        TestVectors(  "\\cJ",      "abc\ndef",             "y",    "$&",    "\n" ),
216        TestVectors(  "\\d",       "B2 is",                "y",    "$&",    "2" ),
217        TestVectors(  "\\D",       "B2 is",                "y",    "$&",    "B" ),
218        TestVectors(  "\\s\\w*",   "foo bar",              "y",    "$&",    " bar" ),
219        TestVectors(  "\\S\\w*",   "foo bar",              "y",    "$&",    "foo" ),
220        TestVectors(  "abc",       "ababc",                "y",    "$&",    "abc" ),
221        TestVectors(  "apple(,)\\sorange\\1",      "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222        TestVectors(  "(\\w+)\\s(\\w+)",           "John Smith", "y", "$2, $1", "Smith, John" ),
223        TestVectors(  "\\n\\f\\r\\t\\v",           "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224        TestVectors(  ".*c",       "abcde",                        "y",    "$&",    "abc" ),
225        TestVectors(  "^\\w+((;|=)\\w+)+$", "some=host=tld",    "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226        TestVectors(  "^\\w+((\\.|-)\\w+)+$", "some.host.tld",    "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227        TestVectors(  "q(a|b)*q",  "xxqababqyy",                "y",    "$&-$1",        "qababq-b" ),
228        TestVectors(  "^(a)(b){0,1}(c*)",   "abcc", "y", "$1 $2 $3", "a b cc" ),
229        TestVectors(  "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230        TestVectors(  "^(a)(b)?(c*)",       "abcc", "y", "$1 $2 $3", "a b cc" ),
231        TestVectors(  "^(a)((b)?)(c*)",     "abcc", "y", "$1 $2 $3", "a b b" ),
232        TestVectors(  "^(a)(b){0,1}(c*)",   "acc",  "y", "$1 $2 $3", "a  cc" ),
233        TestVectors(  "^(a)((b){0,1})(c*)", "acc",  "y", "$1 $2 $3", "a  " ),
234        TestVectors(  "^(a)(b)?(c*)",       "acc",  "y", "$1 $2 $3", "a  cc" ),
235        TestVectors(  "^(a)((b)?)(c*)",     "acc",  "y", "$1 $2 $3", "a  " ),
236        TestVectors(  "(?:ab){3}",       "_abababc","y", "$&-$1",    "ababab-" ),
237        TestVectors(  "(?:a(?:x)?)+",    "aaxaxx",  "y", "$&-$1-$2", "aaxax--" ),
238        TestVectors(  `\W\w\W`,         "aa b!ca",  "y", "$&",       " b!"),
239//more repetitions:
240        TestVectors(  "(?:a{2,4}b{1,3}){1,2}",  "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241        TestVectors(  "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242//groups:
243        TestVectors(  "(abc)|(edf)|(xyz)",     "xyz",             "y",   "$1-$2-$3","--xyz"),
244        TestVectors(  "(?P<q>\\d+)/(?P<d>\\d+)",     "2/3",       "y",     "${d}/${q}",    "3/2"),
245//set operations:
246        TestVectors(  "[a-z--d-f]",                  " dfa",      "y",   "$&",     "a"),
247        TestVectors(  "[abc[pq--acq]]{2}",           "bqpaca",    "y",   "$&",     "pa"),
248        TestVectors(  "[a-z9&&abc0-9]{3}",           "z90a0abc",  "y",   "$&",     "abc"),
249        TestVectors(  "[0-9a-f~~0-5a-z]{2}",         "g0a58x",    "y",   "$&",     "8x"),
250        TestVectors(  "[abc[pq]xyz[rs]]{4}",         "cqxr",      "y",   "$&",     "cqxr"),
251        TestVectors(  "[abcdf--[ab&&[bcd]][acd]]",   "abcdefgh",  "y",   "$&",     "f"),
252        TestVectors(  "[a-c||d-f]+",    "abcdef", "y", "$&", "abcdef"),
253        TestVectors(  "[a-f--a-c]+",    "abcdef", "y", "$&", "def"),
254        TestVectors(  "[a-c&&b-f]+",    "abcdef", "y", "$&", "bc"),
255        TestVectors(  "[a-c~~b-f]+",    "abcdef", "y", "$&", "a"),
256//unicode blocks & properties:
257        TestVectors(  `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258        TestVectors(  `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259            "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260        TestVectors(  `[-+*/\p{in-mathematical-operators}]{2}`,    "a+\u2212",    "y",    "$&",    "+\u2212"),
261        TestVectors(  `\p{Ll}+`,                      "XabcD",    "y",  "$&",      "abc"),
262        TestVectors(  `\p{Lu}+`,                      "������������",   "y",  "$&",      "������"),
263        TestVectors(  `^\p{Currency Symbol}\p{Sc}`,   "$���",       "y",  "$&",      "$���"),
264        TestVectors(  `\p{Common}\p{Thai}`,           "!���",       "y",  "$&",      "!���"),
265        TestVectors(  `[\d\s]*\D`,  "12 \t3\U00001680\u0F20_2",   "y",  "$&", "12 \t3\U00001680\u0F20_"),
266        TestVectors(  `[c-w��]����`, "������", "y", "$&", "������"),
267//case insensitive:
268        TestVectors(   `^abcdEf$`,           "AbCdEF",              "y",   "$&", "AbCdEF",      "i"),
269        TestVectors(   `�������������� ��������`, "�������������� ��������", "y", "$&", "�������������� ��������", "i"),
270        TestVectors(   `���������` ,        "���������",                   "y",   "$&", "���������",      "i"),
271        TestVectors(   "\U00010400{2}",  "\U00010428\U00010400 ",   "y",   "$&", "\U00010428\U00010400", "i"),
272        TestVectors(   `[adz��-��]{4}`,    "Dz����",                   "y",   "$&", "Dz����", "i"),
273        TestVectors(   `\p{L}\p{Lu}{10}`, "����������������������", "y",   "$&", "����������������������", "i"),
274        TestVectors(   `(?:D��b){3}`,  "D��bD��Bd��b",                  "y",   "$&", "D��bD��Bd��b", "i"),
275//escapes:
276        TestVectors(    `\u0041\u005a\U00000065\u0001`,         "AZe\u0001",       "y",   "$&", "AZe\u0001"),
277        TestVectors(    `\u`,               "",   "c",   "-",  "-"),
278        TestVectors(    `\U`,               "",   "c",   "-",  "-"),
279        TestVectors(    `\u003`,            "",   "c",   "-",  "-"),
280        TestVectors(    `[\x00-\x7f]{4}`,        "\x00\x09ab",   "y", "$&", "\x00\x09ab"),
281        TestVectors(    `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282        TestVectors(    `\r\n\v\t\f\\`,     "\r\n\v\t\f\\",   "y",   "$&", "\r\n\v\t\f\\"),
283        TestVectors(    `[\u0003\u0001]{2}`,  "\u0001\u0003",         "y",   "$&", "\u0001\u0003"),
284        TestVectors(    `^[\u0020-\u0080\u0001\n-\r]{8}`,  "abc\u0001\v\f\r\n",  "y",   "$&", "abc\u0001\v\f\r\n"),
285        TestVectors(    `\w+\S\w+`, "ab7!44c",  "y", "$&", "ab7!44c"),
286        TestVectors(    `\b\w+\b`,  " abde4 ",  "y", "$&", "abde4"),
287        TestVectors(    `\b\w+\b`,  " abde4",   "y", "$&", "abde4"),
288        TestVectors(    `\b\w+\b`,  "abde4 ",   "y", "$&", "abde4"),
289        TestVectors(    `\pL\pS`,   "a\u02DA",  "y", "$&", "a\u02DA"),
290        TestVectors(    `\pX`,      "",         "c", "-",  "-"),
291// ^, $, \b, \B, multiline :
292        TestVectors(    `\r.*?$`,    "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293        TestVectors(    `^a$^b$`,    "a\r\nb\n",  "n", "$&", "-", "m"),
294        TestVectors(    `^a$\r\n^b$`,"a\r\nb\n",  "y", "$&", "a\r\nb", "m"),
295        TestVectors(    `^$`,        "\r\n",      "y", "$&", "", "m"),
296        TestVectors(    `^a$\nx$`,   "a\nx\u2028","y", "$&", "a\nx", "m"),
297        TestVectors(    `^a$\nx$`,   "a\nx\u2029","y", "$&", "a\nx", "m"),
298        TestVectors(    `^a$\nx$`,   "a\nx\u0085","y", "$&", "a\nx","m"),
299        TestVectors(    `^x$`,       "\u2028x",   "y", "$&", "x", "m"),
300        TestVectors(    `^x$`,       "\u2029x",   "y", "$&", "x", "m"),
301        TestVectors(    `^x$`,       "\u0085x",   "y", "$&", "x", "m"),
302        TestVectors(    `\b^.`,      "ab",        "y", "$&", "a"),
303        TestVectors(    `\B^.`,      "ab",        "n", "-",  "-"),
304        TestVectors(    `^ab\Bc\B`,  "\r\nabcd",  "y", "$&", "abc", "m"),
305        TestVectors(    `^.*$`,      "12345678",  "y", "$&", "12345678"),
306
307// luckily obtained regression on incremental matching in backtracker
308        TestVectors(  `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309            "0020  ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310//lookahead
311        TestVectors(    "(foo.)(?=(bar))",     "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312        TestVectors(    `\b(\d+)[a-z](?=\1)`,  "123a123",        "y", "$&-$1", "123a-123" ),
313        TestVectors(    `\$(?!\d{3})\w+`,      "$123 $abc",      "y", "$&", "$abc"),
314        TestVectors(    `(abc)(?=(ed(f))\3)`,    "abcedff",      "y", "-", "-"),
315        TestVectors(    `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com",  "y", "$&-$1", "x-@"),
316        TestVectors(    `x()(abc)(?=(d)(e)(f)\2)`,   "xabcdefabc", "y", "$&", "xabc"),
317        TestVectors(    `x()(abc)(?=(d)(e)(f)()\3\4\5)`,   "xabcdefdef", "y", "$&", "xabc"),
318//lookback
319        TestVectors(    `(?<=(ab))\d`,    "12ba3ab4",    "y",   "$&-$1", "4-ab",  "i"),
320        TestVectors(    `\w(?<!\d)\w`,   "123ab24",  "y",   "$&", "ab"),
321        TestVectors(    `(?<=D��b)x\w`,  "D��bD��Bxd��b",  "y",   "$&", "xd", "i"),
322        TestVectors(    `(?<=(ab*c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
323        TestVectors(    `(?<=(ab*?c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
324        TestVectors(    `(?<=(a.*?c))x`,   "ababbcxac",  "y",   "$&-$1", "x-abbc"),
325        TestVectors(    `(?<=(a{2,4}b{1,3}))x`,   "yyaaaabx",  "y",   "$&-$1", "x-aaaab"),
326        TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aabbbaaaab"),
327        TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}?))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aaaab"),
328        TestVectors(    `(?<=(abc|def|aef))x`,    "abcx", "y",        "$&-$1",  "x-abc"),
329        TestVectors(    `(?<=(abc|def|aef))x`,    "aefx", "y",        "$&-$1",  "x-aef"),
330        TestVectors(    `(?<=(abc|dabc))(x)`,    "dabcx", "y",        "$&-$1-$2",  "x-abc-x"),
331        TestVectors(    `(?<=(|abc))x`,        "dabcx", "y",        "$&-$1",  "x-"),
332        TestVectors(    `(?<=((ab|da)*))x`,    "abdaabx", "y",        "$&-$2-$1",  "x-ab-abdaab"),
333        TestVectors(    `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334        TestVectors(    `.(?<!b).`,   "bax",  "y", "$&", "ax"),
335        TestVectors(    `(?<=b(?<!ab)).`,   "abbx",  "y",  "$&", "x"),
336        TestVectors(    `(?<=\.|[!?]+)X`,   "Hey?!X", "y", "$&", "X"),
337        TestVectors(    `(?<=\.|[!?]+)a{3}`,   ".Nope.aaaX", "y", "$&", "aaa"),
338//mixed lookaround
339        TestVectors(   `a(?<=a(?=b))b`,    "ab", "y",      "$&", "ab"),
340        TestVectors(   `a(?<=a(?!b))c`,    "ac", "y",      "$&", "ac"),
341        TestVectors(   `a(?i)bc`,         "aBc", "y",      "$&", "aBc"),
342        TestVectors(   `a(?i)bc`,         "Abc", "n",      "$&", "-"),
343        TestVectors(   `(?i)a(?-i)bc`, "aBcAbc", "y",      "$&", "Abc"),
344        TestVectors(   `(?s).(?-s).`, "\n\n\na", "y",      "$&", "\na"),
345        TestVectors(   `(?m)^a(?-m)$`,  "\na",   "y",      "$&", "a")
346        ];
347    string produceExpected(M,String)(auto ref M m, String fmt)
348    {
349        auto app = appender!(String)();
350        replaceFmt(fmt, m.captures, app, true);
351        return app.data;
352    }
353    void run_tests(alias matchFn)()
354    {
355        int i;
356        foreach (Char; AliasSeq!( char, wchar, dchar))
357        (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
358            alias String = immutable(Char)[];
359            String produceExpected(M,Range)(auto ref M m, Range fmt)
360            {
361                auto app = appender!(String)();
362                replaceFmt(fmt, m.captures, app, true);
363                return app.data;
364            }
365            Regex!(Char) r;
366            foreach (a, tvd; tv)
367            {
368                uint c = tvd.result[0];
369                debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
370                try
371                {
372                    i = 1;
373                    r = regex(to!(String)(tvd.pattern), tvd.flags);
374                }
375                catch (RegexException e)
376                {
377                    i = 0;
378                    debug(std_regex_test) writeln(e.msg);
379                }
380
381                assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
382
383                if (c != 'c')
384                {
385                    auto m = matchFn(to!(String)(tvd.input), r);
386                    i = !m.empty;
387                    assert(
388                        (c == 'y') ? i : !i,
389                        text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
390                    );
391                    if (c == 'y')
392                    {
393                        auto result = produceExpected(m, to!(String)(tvd.format));
394                        assert(result == to!String(tvd.replace),
395                            text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396                                    tvd.replace, " vs ", result));
397                    }
398                }
399            }
400        }();
401        debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
402    }
403
404
405    void ct_tests()
406    {
407        import std.algorithm.comparison : equal;
408        version (std_regex_ct1)
409        {
410            pragma(msg, "Testing 1st part of ctRegex");
411            alias Tests = Sequence!(0, 155);
412        }
413        else version (std_regex_ct2)
414        {
415            pragma(msg, "Testing 2nd part of ctRegex");
416            alias Tests = Sequence!(155, 174);
417        }
418        //FIXME: #174-178 contains CTFE parser bug
419        else version (std_regex_ct3)
420        {
421            pragma(msg, "Testing 3rd part of ctRegex");
422            alias Tests = Sequence!(178, 220);
423        }
424        else version (std_regex_ct4)
425        {
426            pragma(msg, "Testing 4th part of ctRegex");
427            alias Tests = Sequence!(220, tv.length);
428        }
429        else
430            alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
431        foreach (a, v; Tests)
432        (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
433            enum tvd = tv[v];
434            static if (tvd.result == "c")
435            {
436                static assert(!__traits(compiles, (){
437                    enum r = regex(tvd.pattern, tvd.flags);
438                }), "errornously compiles regex pattern: " ~ tvd.pattern);
439            }
440            else
441            {
442                //BUG: tv[v] is fine but tvd is not known at compile time?!
443                auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444                auto nr = regex(tvd.pattern, tvd.flags);
445                assert(equal(r.ir, nr.ir),
446                    text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
447                auto m = match(tvd.input, r);
448                auto c = tvd.result[0];
449                bool ok = (c == 'y') ^ m.empty;
450                assert(ok, text("ctRegex: failed to match pattern #",
451                    a ,": ", tvd.pattern));
452                if (c == 'y')
453                {
454                    import std.stdio;
455                    auto result = produceExpected(m, tvd.format);
456                    if (result != tvd.replace)
457                        writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
458                                tvd.replace, " vs ", result);
459                }
460            }
461        }();
462        debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
463    }
464
465    ct_tests();
466    run_tests!bmatch(); //backtracker
467    run_tests!match(); //thompson VM
468}
469
470@safe unittest
471{
472    auto cr = ctRegex!("abc");
473    assert(bmatch("abc",cr).hit == "abc");
474    auto cr2 = ctRegex!("ab*c");
475    assert(bmatch("abbbbc",cr2).hit == "abbbbc");
476}
477@safe unittest
478{
479    auto cr3 = ctRegex!("^abc$");
480    assert(bmatch("abc",cr3).hit == "abc");
481    auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
482    assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
483}
484
485@safe unittest
486{
487    auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
488    assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
489    auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
490    assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
491}
492
493@safe unittest
494{
495    auto cr7 = ctRegex!(`\r.*?$`,"sm");
496    assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
497    auto greed =  ctRegex!("<packet.*?/packet>");
498    assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
499            == "<packet>text</packet>");
500}
501
502@safe unittest
503{
504    import std.algorithm.comparison : equal;
505    auto cr8 = ctRegex!("^(a)(b)?(c*)");
506    auto m8 = bmatch("abcc",cr8);
507    assert(m8);
508    assert(m8.captures[1] == "a");
509    assert(m8.captures[2] == "b");
510    assert(m8.captures[3] == "cc");
511    auto cr9 = ctRegex!("q(a|b)*q");
512    auto m9 = match("xxqababqyy",cr9);
513    assert(m9);
514    assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
515}
516
517@safe unittest
518{
519    import std.algorithm.comparison : equal;
520    auto rtr = regex("a|b|c");
521    enum ctr = regex("a|b|c");
522    assert(equal(rtr.ir,ctr.ir));
523    //CTFE parser BUG is triggered by group
524    //in the middle of alternation (at least not first and not last)
525    enum testCT = regex(`abc|(edf)|xyz`);
526    auto testRT = regex(`abc|(edf)|xyz`);
527    assert(equal(testCT.ir,testRT.ir));
528}
529
530@safe unittest
531{
532    import std.algorithm.comparison : equal;
533    import std.algorithm.iteration : map;
534    enum cx = ctRegex!"(A|B|C)";
535    auto mx = match("B",cx);
536    assert(mx);
537    assert(equal(mx.captures, [ "B", "B"]));
538    enum cx2 = ctRegex!"(A|B)*";
539    assert(match("BAAA",cx2));
540
541    enum cx3 = ctRegex!("a{3,4}","i");
542    auto mx3 = match("AaA",cx3);
543    assert(mx3);
544    assert(mx3.captures[0] == "AaA");
545    enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
546    auto mx4 = match("aaaabc", cx4);
547    assert(mx4);
548    assert(mx4.captures[0] == "aaaab");
549    auto cr8 = ctRegex!("(a)(b)?(c*)");
550    auto m8 = bmatch("abcc",cr8);
551    assert(m8);
552    assert(m8.captures[1] == "a");
553    assert(m8.captures[2] == "b");
554    assert(m8.captures[3] == "cc");
555    auto cr9 = ctRegex!(".*$", "gm");
556    auto m9 = match("First\rSecond", cr9);
557    assert(m9);
558    assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
559}
560
561@safe unittest
562{
563    import std.algorithm.comparison : equal;
564    import std.algorithm.iteration : map;
565//global matching
566    void test_body(alias matchFn)()
567    {
568        string s = "a quick brown fox jumps over a lazy dog";
569        auto r1 = regex("\\b[a-z]+\\b","g");
570        string[] test;
571        foreach (m; matchFn(s, r1))
572            test ~= m.hit;
573        assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
574        auto free_reg = regex(`
575
576            abc
577            \s+
578            "
579            (
580                    [^"]+
581                |   \\ "
582            )+
583            "
584            z
585        `, "x");
586        auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
587        assert(m);
588        string mails = " hey@you.com no@spam.net ";
589        auto rm = regex(`@(?<=\S+@)\S+`,"g");
590        assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
591        auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
592        assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
593        auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
594        assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
595        auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
596        assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
597        debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
598    }
599    test_body!bmatch();
600    test_body!match();
601}
602
603//tests for accumulated std.regex issues and other regressions
604@safe unittest
605{
606    import std.algorithm.comparison : equal;
607    import std.algorithm.iteration : map;
608    void test_body(alias matchFn)()
609    {
610        //issue 5857
611        //matching goes out of control if ... in (...){x} has .*/.+
612        auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
613        assert(c[0] == "axxxzayyyyyzd");
614        assert(c[1] == "ayyyyyz");
615        auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
616        assert(c2[0] == "axxxayyyyyd");
617        assert(c2[1] == "ayyyyy");
618        //issue 2108
619        //greedy vs non-greedy
620        auto nogreed = regex("<packet.*?/packet>");
621        assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
622               == "<packet>text</packet>");
623        auto greed =  regex("<packet.*/packet>");
624        assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
625               == "<packet>text</packet><packet>text</packet>");
626        //issue 4574
627        //empty successful match still advances the input
628        string[] pres, posts, hits;
629        foreach (m; matchFn("abcabc", regex("","g")))
630        {
631            pres ~= m.pre;
632            posts ~= m.post;
633            assert(m.hit.empty);
634
635        }
636        auto heads = [
637            "abcabc",
638            "abcab",
639            "abca",
640            "abc",
641            "ab",
642            "a",
643            ""
644        ];
645        auto tails = [
646            "abcabc",
647             "bcabc",
648              "cabc",
649               "abc",
650                "bc",
651                 "c",
652                  ""
653        ];
654        assert(pres == array(retro(heads)));
655        assert(posts == tails);
656        //issue 6076
657        //regression on .*
658        auto re = regex("c.*|d");
659        auto m = matchFn("mm", re);
660        assert(!m);
661        debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
662        auto rprealloc = regex(`((.){5}.{1,10}){5}`);
663        auto arr = array(repeat('0',100));
664        auto m2 = matchFn(arr, rprealloc);
665        assert(m2);
666        assert(collectException(
667                regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
668                ) is null);
669        foreach (ch; [Escapables])
670        {
671            assert(match(to!string(ch),regex(`[\`~ch~`]`)));
672            assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
673            assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
674        }
675        //bugzilla 7718
676        string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
677        auto reStrCmd = regex (`(".*")|('.*')`, "g");
678        assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
679                     [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
680    }
681    test_body!bmatch();
682    test_body!match();
683}
684
685// tests for replace
686@safe unittest
687{
688    void test(alias matchFn)()
689    {
690        import std.uni : toUpper;
691
692        foreach (i, v; AliasSeq!(string, wstring, dstring))
693        {
694            auto baz(Cap)(Cap m)
695            if (is(Cap == Captures!(Cap.String)))
696            {
697                return toUpper(m.hit);
698            }
699            alias String = v;
700            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
701                   == to!String("ack rapacity"));
702            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
703                   == to!String("ack capacity"));
704            assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
705                   == to!String("[n]oon"));
706            assert(std.regex.replace!(matchFn)(
707                to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
708            ) == to!String(": test2 test1 :"));
709            auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
710                    regex(to!String("[ar]"), "g"));
711            assert(s == "StRAp A Rocket engine on A chicken.");
712        }
713        debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
714    }
715    test!(bmatch)();
716    test!(match)();
717}
718
719// tests for splitter
720@safe unittest
721{
722    import std.algorithm.comparison : equal;
723    auto s1 = ", abc, de,     fg, hi, ";
724    auto sp1 = splitter(s1, regex(", *"));
725    auto w1 = ["", "abc", "de", "fg", "hi", ""];
726    assert(equal(sp1, w1));
727
728    auto s2 = ", abc, de,  fg, hi";
729    auto sp2 = splitter(s2, regex(", *"));
730    auto w2 = ["", "abc", "de", "fg", "hi"];
731
732    uint cnt;
733    foreach (e; sp2)
734    {
735        assert(w2[cnt++] == e);
736    }
737    assert(equal(sp2, w2));
738}
739
740@safe unittest
741{
742    char[] s1 = ", abc, de,  fg, hi, ".dup;
743    auto sp2 = splitter(s1, regex(", *"));
744}
745
746@safe unittest
747{
748    import std.algorithm.comparison : equal;
749    auto s1 = ", abc, de,  fg, hi, ";
750    auto w1 = ["", "abc", "de", "fg", "hi", ""];
751    assert(equal(split(s1, regex(", *")), w1[]));
752}
753
754@safe unittest
755{ // bugzilla 7141
756    string pattern = `[a\--b]`;
757    assert(match("-", pattern));
758    assert(match("b", pattern));
759    string pattern2 = `[&-z]`;
760    assert(match("b", pattern2));
761}
762@safe unittest
763{//bugzilla 7111
764    assert(match("", regex("^")));
765}
766@safe unittest
767{//bugzilla 7300
768    assert(!match("a"d, "aa"d));
769}
770
771// bugzilla 7551
772@safe unittest
773{
774    auto r = regex("[]abc]*");
775    assert("]ab".matchFirst(r).hit == "]ab");
776    assertThrown(regex("[]"));
777    auto r2 = regex("[]abc--ab]*");
778    assert("]ac".matchFirst(r2).hit == "]");
779}
780
781@safe unittest
782{//bugzilla 7674
783    assert("1234".replace(regex("^"), "$$") == "$1234");
784    assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
785    assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
786}
787@safe unittest
788{// bugzilla 7679
789    import std.algorithm.comparison : equal;
790    foreach (S; AliasSeq!(string, wstring, dstring))
791    (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
792        enum re = ctRegex!(to!S(r"\."));
793        auto str = to!S("a.b");
794        assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
795        assert(split(str, re) == [to!S("a"), to!S("b")]);
796    }();
797}
798@safe unittest
799{//bugzilla 8203
800    string data = "
801    NAME   = XPAW01_STA:STATION
802    NAME   = XPAW01_STA
803    ";
804    auto uniFileOld = data;
805    auto r = regex(
806       r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
807    auto uniCapturesNew = match(uniFileOld, r);
808    for (int i = 0; i < 20; i++)
809        foreach (matchNew; uniCapturesNew) {}
810    //a second issue with same symptoms
811    auto r2 = regex(`([��-����-��\-_]+\s*)+(?<=[\s\.,\^])`);
812    match("���������� ����������������������", r2);
813}
814@safe unittest
815{// bugzilla 8637 purity of enforce
816    auto m = match("hello world", regex("world"));
817    enforce(m);
818}
819
820// bugzilla 8725
821@safe unittest
822{
823  static italic = regex( r"\*
824                (?!\s+)
825                (.*?)
826                (?!\s+)
827                \*", "gx" );
828  string input = "this * is* interesting, *very* interesting";
829  assert(replace(input, italic, "<i>$1</i>") ==
830      "this * is* interesting, <i>very</i> interesting");
831}
832
833// bugzilla 8349
834@safe unittest
835{
836    enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
837    enum peakRegex = ctRegex!(peakRegexStr);
838    //note that the regex pattern itself is probably bogus
839    assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
840}
841
842// bugzilla 9211
843@safe unittest
844{
845    import std.algorithm.comparison : equal;
846    auto rx_1 =  regex(r"^(\w)*(\d)");
847    auto m = match("1234", rx_1);
848    assert(equal(m.front, ["1234", "3", "4"]));
849    auto rx_2 = regex(r"^([0-9])*(\d)");
850    auto m2 = match("1234", rx_2);
851    assert(equal(m2.front, ["1234", "3", "4"]));
852}
853
854// bugzilla 9280
855@safe unittest
856{
857    string tomatch = "a!b@c";
858    static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
859    auto nm = match(tomatch, r);
860    assert(nm);
861    auto c = nm.captures;
862    assert(c[1] == "a");
863    assert(c["nick"] == "a");
864}
865
866
867// bugzilla 9579
868@safe unittest
869{
870    char[] input = ['a', 'b', 'c'];
871    string format = "($1)";
872    // used to give a compile error:
873    auto re = regex(`(a)`, "g");
874    auto r = replace(input, re, format);
875    assert(r == "(a)bc");
876}
877
878// bugzilla 9634
879@safe unittest
880{
881    auto re = ctRegex!"(?:a+)";
882    assert(match("aaaa", re).hit == "aaaa");
883}
884
885//bugzilla 10798
886@safe unittest
887{
888    auto cr = ctRegex!("[abcd--c]*");
889    auto m  = "abc".match(cr);
890    assert(m);
891    assert(m.hit == "ab");
892}
893
894// bugzilla 10913
895@system unittest
896{
897    @system static string foo(const(char)[] s)
898    {
899        return s.dup;
900    }
901    @safe static string bar(const(char)[] s)
902    {
903        return s.dup;
904    }
905    () @system {
906        replace!((a) => foo(a.hit))("blah", regex(`a`));
907    }();
908    () @safe {
909        replace!((a) => bar(a.hit))("blah", regex(`a`));
910    }();
911}
912
913// bugzilla 11262
914@safe unittest
915{
916    enum reg = ctRegex!(r",", "g");
917    auto str = "This,List";
918    str = str.replace(reg, "-");
919    assert(str == "This-List");
920}
921
922// bugzilla 11775
923@safe unittest
924{
925    assert(collectException(regex("a{1,0}")));
926}
927
928// bugzilla 11839
929@safe unittest
930{
931    import std.algorithm.comparison : equal;
932    assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
933    assert(collectException(regex(`(?P<1>\w+)`)));
934    assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
935    assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
936    assert(regex(`(?P<��>\w+)`).namedCaptures.equal(["��"]));
937}
938
939// bugzilla 12076
940@safe unittest
941{
942    auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
943    string s = "one two";
944    auto m = match(s, RE);
945}
946
947// bugzilla 12105
948@safe unittest
949{
950    auto r = ctRegex!`.*?(?!a)`;
951    assert("aaab".matchFirst(r).hit == "aaa");
952    auto r2 = ctRegex!`.*(?!a)`;
953    assert("aaab".matchFirst(r2).hit == "aaab");
954}
955
956//bugzilla 11784
957@safe unittest
958{
959    assert("abcdefghijklmnopqrstuvwxyz"
960        .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
961}
962
963//bugzilla 12366
964@safe unittest
965{
966     auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
967     assert("xxxxxxxx".match(re).empty);
968     assert(!"xxxx".match(re).empty);
969}
970
971// bugzilla 12582
972@safe unittest
973{
974    auto r = regex(`(?P<a>abc)`);
975    assert(collectException("abc".matchFirst(r)["b"]));
976}
977
978// bugzilla 12691
979@safe unittest
980{
981    assert(bmatch("e@", "^([a-z]|)*$").empty);
982    assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
983}
984
985//bugzilla  12713
986@safe unittest
987{
988    assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
989}
990
991//bugzilla 12747
992@safe unittest
993{
994    assertThrown(regex(`^x(\1)`));
995    assertThrown(regex(`^(x(\1))`));
996    assertThrown(regex(`^((x)(?=\1))`));
997}
998
999// bugzilla 14504
1000@safe unittest
1001{
1002    auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
1003            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1004}
1005
1006// bugzilla 14529
1007@safe unittest
1008{
1009    auto ctPat2 = regex(r"^[CDF]$", "i");
1010    foreach (v; ["C", "c", "D", "d", "F", "f"])
1011        assert(matchAll(v, ctPat2).front.hit == v);
1012}
1013
1014// bugzilla 14615
1015@safe unittest
1016{
1017    import std.array : appender;
1018    import std.regex : replaceFirst, replaceFirstInto, regex;
1019    import std.stdio : writeln;
1020
1021    auto example = "Hello, world!";
1022    auto pattern = regex("^Hello, (bug)");  // won't find this one
1023    auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
1024    assert(result == "Hello, world!");  // Ok.
1025
1026    auto sink = appender!string;
1027    replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
1028    assert(sink.data == "Hello, world!");
1029    replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
1030    assert(sink.data == "Hello, world!Hello, world!");
1031}
1032
1033// bugzilla 15573
1034@safe unittest
1035{
1036    auto rx = regex("[c d]", "x");
1037    assert("a b".matchFirst(rx));
1038}
1039
1040// bugzilla 15864
1041@safe unittest
1042{
1043    regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
1044}
1045
1046@safe unittest
1047{
1048    auto r = regex("(?# comment)abc(?# comment2)");
1049    assert("abc".matchFirst(r));
1050    assertThrown(regex("(?#..."));
1051}
1052
1053// bugzilla 17075
1054@safe unittest
1055{
1056    enum titlePattern = `<title>(.+)</title>`;
1057    static titleRegex = ctRegex!titlePattern;
1058    string input = "<title>" ~ "<".repeat(100_000).join;
1059    assert(input.matchFirst(titleRegex).empty);
1060}
1061
1062// bugzilla 17212
1063@safe unittest
1064{
1065    auto r = regex(" [a] ", "x");
1066    assert("a".matchFirst(r));
1067}
1068
1069// bugzilla 17157
1070@safe unittest
1071{
1072    import std.algorithm.comparison : equal;
1073    auto ctr = ctRegex!"(a)|(b)|(c)|(d)";
1074    auto r = regex("(a)|(b)|(c)|(d)", "g");
1075    auto s = "--a--b--c--d--";
1076    auto outcomes = [
1077        ["a", "a", "", "", ""],
1078        ["b", "", "b", "", ""],
1079        ["c", "", "", "c", ""],
1080        ["d", "", "", "", "d"]
1081    ];
1082    assert(equal!equal(s.matchAll(ctr), outcomes));
1083    assert(equal!equal(s.bmatch(r), outcomes));
1084}
1085
1086// bugzilla 17667
1087@safe unittest
1088{
1089    import std.algorithm.searching : canFind;
1090    void willThrow(T, size_t line = __LINE__)(T arg, string msg)
1091    {
1092        auto e = collectException(regex(arg));
1093        assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg);
1094    }
1095    willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
1096    willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
1097    willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
1098    willThrow([r"[a-\", r"123"], "invalid escape sequence");
1099    willThrow([r"\", r"123"], "invalid escape sequence");
1100}
1101
1102// bugzilla 17668
1103@safe unittest
1104{
1105    import std.algorithm.searching;
1106    auto e = collectException!RegexException(regex(q"<[^]>"));
1107    assert(e.msg.canFind("no operand for '^'"));
1108}
1109
1110// bugzilla 17673
1111@safe unittest
1112{
1113    string str = `<">`;
1114    string[] regexps = ["abc", "\"|x"];
1115    auto regexp = regex(regexps);
1116    auto c = matchFirst(str, regexp);
1117    assert(c);
1118    assert(c.whichPattern == 2);
1119}
1120
1121