1/* 2 Regualar expressions package test suite. 3*/ 4module std.regex.internal.tests; 5 6package(std.regex): 7 8import std.conv, std.exception, std.meta, std.range, 9 std.typecons, std.regex; 10 11import std.regex.internal.parser : Escapables; // characters that need escaping 12 13alias Sequence(int B, int E) = staticIota!(B, E); 14 15@safe unittest 16{//sanity checks 17 regex("(a|b)*"); 18 regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`); 19 regex("abc|edf|ighrg"); 20 auto r1 = regex("abc"); 21 auto r2 = regex("(gylba)"); 22 assert(match("abcdef", r1).hit == "abc"); 23 assert(!match("wida",r2)); 24 assert(bmatch("abcdef", r1).hit == "abc"); 25 assert(!bmatch("wida", r2)); 26 assert(match("abc", "abc".dup)); 27 assert(bmatch("abc", "abc".dup)); 28 Regex!char rc; 29 assert(rc.empty); 30 rc = regex("test"); 31 assert(!rc.empty); 32} 33 34/* The test vectors in this file are altered from Henry Spencer's regexp 35 test code. His copyright notice is: 36 37 Copyright (c) 1986 by University of Toronto. 38 Written by Henry Spencer. Not derived from licensed software. 39 40 Permission is granted to anyone to use this software for any 41 purpose on any computer system, and to redistribute it freely, 42 subject to the following restrictions: 43 44 1. The author is not responsible for the consequences of use of 45 this software, no matter how awful, even if they arise 46 from defects in it. 47 48 2. The origin of this software must not be misrepresented, either 49 by explicit claim or by omission. 50 51 3. Altered versions must be plainly marked as such, and must not 52 be misrepresented as being the original software. 53 54 55 */ 56 57@safe unittest 58{ 59 struct TestVectors 60 { 61 string pattern; 62 string input; 63 string result; 64 string format; 65 string replace; 66 string flags; 67 } 68 69 static immutable TestVectors[] tv = [ 70 TestVectors( "a\\b", "a", "y", "$&", "a" ), 71 TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ), 72 TestVectors( "()b\\1", "aaab", "y", "$&", "b" ), 73 TestVectors( "abc", "abc", "y", "$&", "abc" ), 74 TestVectors( "abc", "xbc", "n", "-", "-" ), 75 TestVectors( "abc", "axc", "n", "-", "-" ), 76 TestVectors( "abc", "abx", "n", "-", "-" ), 77 TestVectors( "abc", "xabcy","y", "$&", "abc" ), 78 TestVectors( "abc", "ababc","y", "$&", "abc" ), 79 TestVectors( "ab*c", "abc", "y", "$&", "abc" ), 80 TestVectors( "ab*bc", "abc", "y", "$&", "abc" ), 81 TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ), 82 TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ), 83 TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ), 84 TestVectors( "ab+bc", "abc", "n", "-", "-" ), 85 TestVectors( "ab+bc", "abq", "n", "-", "-" ), 86 TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ), 87 TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ), 88 TestVectors( "ab?bc", "abc", "y", "$&", "abc" ), 89 TestVectors( "ab?bc", "abbbbc","n", "-", "-" ), 90 TestVectors( "ab?c", "abc", "y", "$&", "abc" ), 91 TestVectors( "^abc$", "abc", "y", "$&", "abc" ), 92 TestVectors( "^abc$", "abcc", "n", "-", "-" ), 93 TestVectors( "^abc", "abcc", "y", "$&", "abc" ), 94 TestVectors( "^abc$", "aabc", "n", "-", "-" ), 95 TestVectors( "abc$", "aabc", "y", "$&", "abc" ), 96 TestVectors( "^", "abc", "y", "$&", "" ), 97 TestVectors( "$", "abc", "y", "$&", "" ), 98 TestVectors( "a.c", "abc", "y", "$&", "abc" ), 99 TestVectors( "a.c", "axc", "y", "$&", "axc" ), 100 TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ), 101 TestVectors( "a.*c", "axyzd","n", "-", "-" ), 102 TestVectors( "a[bc]d", "abc", "n", "-", "-" ), 103 TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ), 104 TestVectors( "a[b-d]e", "abd", "n", "-", "-" ), 105 TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ), 106 TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ), 107 TestVectors( "a[-b]", "a-", "y", "$&", "a-" ), 108 TestVectors( "a[b-]", "a-", "y", "$&", "a-" ), 109 TestVectors( "a[b-a]", "-", "c", "-", "-" ), 110 TestVectors( "a[]b", "-", "c", "-", "-" ), 111 TestVectors( "a[", "-", "c", "-", "-" ), 112 TestVectors( "a]", "a]", "y", "$&", "a]" ), 113 TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ), 114 TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ), 115 TestVectors( "a[^bc]d", "abd", "n", "-", "-" ), 116 TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ), 117 TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ), 118 TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ), 119 TestVectors( "ab|cd", "abc", "y", "$&", "ab" ), 120 TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ), 121 TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ), 122 TestVectors( "()*", "-", "y", "-", "-" ), 123 TestVectors( "*a", "-", "c", "-", "-" ), 124 TestVectors( "^*", "-", "y", "-", "-" ), 125 TestVectors( "$*", "-", "y", "-", "-" ), 126 TestVectors( "(*)b", "-", "c", "-", "-" ), 127 TestVectors( "$b", "b", "n", "-", "-" ), 128 TestVectors( "a\\", "-", "c", "-", "-" ), 129 TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ), 130 TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ), 131 TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ), 132 TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ), 133 TestVectors( "abc)", "-", "c", "-", "-" ), 134 TestVectors( "(abc", "-", "c", "-", "-" ), 135 TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ), 136 TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ), 137 TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ), 138 TestVectors( "a**", "-", "c", "-", "-" ), 139 TestVectors( "a*?a", "aa", "y", "$&", "a" ), 140 TestVectors( "(a*)*", "aaa", "y", "-", "-" ), 141 TestVectors( "(a*)+", "aaa", "y", "-", "-" ), 142 TestVectors( "(a|)*", "-", "y", "-", "-" ), 143 TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ), 144 TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ), 145 TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ), 146 TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ), 147 TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ), 148 TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ), 149 TestVectors( "(^)*", "-", "y", "-", "-" ), 150 TestVectors( "(ab|)*", "-", "y", "-", "-" ), 151 TestVectors( ")(", "-", "c", "-", "-" ), 152 TestVectors( "", "abc", "y", "$&", "" ), 153 TestVectors( "abc", "", "n", "-", "-" ), 154 TestVectors( "a*", "", "y", "$&", "" ), 155 TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ), 156 TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ), 157 TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ), 158 TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ), 159 TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ), 160 TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ), 161 TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ), 162 TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ), 163 TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ), 164 TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ), 165 TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ), 166 TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ), 167 TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ), 168 TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ), 169 TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ), 170 TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), 171 TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), 172 TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ), 173 TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ), 174 TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ), 175 TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ), 176 TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ), 177 TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ), 178 TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ), 179 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ), 180 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ), 181 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ), 182 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ), 183 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ), 184 TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ), 185 TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ), 186 TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ), 187 TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ), 188 TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ), 189 TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ), 190 TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ), 191 TestVectors( "[k]", "ab", "n", "-", "-" ), 192 TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ), 193 TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ), 194 TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ), 195 TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), 196 TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), 197 TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), 198 TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), 199 TestVectors( "a{2}", "candy", "n", "", "" ), 200 TestVectors( "a{2}", "caandy", "y", "$&", "aa" ), 201 TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ), 202 TestVectors( "a{2,}", "candy", "n", "", "" ), 203 TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ), 204 TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ), 205 TestVectors( "a{1,3}", "cndy", "n", "", "" ), 206 TestVectors( "a{1,3}", "candy", "y", "$&", "a" ), 207 TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ), 208 TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ), 209 TestVectors( "e?le?", "angel", "y", "$&", "el" ), 210 TestVectors( "e?le?", "angle", "y", "$&", "le" ), 211 TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ), 212 TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ), 213 TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ), 214 TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ), 215 TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ), 216 TestVectors( "\\d", "B2 is", "y", "$&", "2" ), 217 TestVectors( "\\D", "B2 is", "y", "$&", "B" ), 218 TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ), 219 TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ), 220 TestVectors( "abc", "ababc", "y", "$&", "abc" ), 221 TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ), 222 TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ), 223 TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ), 224 TestVectors( ".*c", "abcde", "y", "$&", "abc" ), 225 TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ), 226 TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ), 227 TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ), 228 TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), 229 TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), 230 TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), 231 TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), 232 TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ), 233 TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ), 234 TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ), 235 TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ), 236 TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ), 237 TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ), 238 TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"), 239//more repetitions: 240 TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ), 241 TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ), 242//groups: 243 TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"), 244 TestVectors( "(?P<q>\\d+)/(?P<d>\\d+)", "2/3", "y", "${d}/${q}", "3/2"), 245//set operations: 246 TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"), 247 TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"), 248 TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"), 249 TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"), 250 TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"), 251 TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"), 252 TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"), 253 TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"), 254 TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"), 255 TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"), 256//unicode blocks & properties: 257 TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"), 258 TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, 259 "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."), 260 TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"), 261 TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"), 262 TestVectors( `\p{Lu}+`, "������������", "y", "$&", "������"), 263 TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$���", "y", "$&", "$���"), 264 TestVectors( `\p{Common}\p{Thai}`, "!���", "y", "$&", "!���"), 265 TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"), 266 TestVectors( `[c-w��]����`, "������", "y", "$&", "������"), 267//case insensitive: 268 TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"), 269 TestVectors( `�������������� ��������`, "�������������� ��������", "y", "$&", "�������������� ��������", "i"), 270 TestVectors( `���������` , "���������", "y", "$&", "���������", "i"), 271 TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"), 272 TestVectors( `[adz��-��]{4}`, "Dz����", "y", "$&", "Dz����", "i"), 273 TestVectors( `\p{L}\p{Lu}{10}`, "����������������������", "y", "$&", "����������������������", "i"), 274 TestVectors( `(?:D��b){3}`, "D��bD��Bd��b", "y", "$&", "D��bD��Bd��b", "i"), 275//escapes: 276 TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"), 277 TestVectors( `\u`, "", "c", "-", "-"), 278 TestVectors( `\U`, "", "c", "-", "-"), 279 TestVectors( `\u003`, "", "c", "-", "-"), 280 TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"), 281 TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"), 282 TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"), 283 TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"), 284 TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"), 285 TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"), 286 TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"), 287 TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"), 288 TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"), 289 TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"), 290 TestVectors( `\pX`, "", "c", "-", "-"), 291// ^, $, \b, \B, multiline : 292 TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"), 293 TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"), 294 TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"), 295 TestVectors( `^$`, "\r\n", "y", "$&", "", "m"), 296 TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"), 297 TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"), 298 TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"), 299 TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"), 300 TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"), 301 TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"), 302 TestVectors( `\b^.`, "ab", "y", "$&", "a"), 303 TestVectors( `\B^.`, "ab", "n", "-", "-"), 304 TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"), 305 TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"), 306 307// luckily obtained regression on incremental matching in backtracker 308 TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`, 309 "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"), 310//lookahead 311 TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ), 312 TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ), 313 TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"), 314 TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"), 315 TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"), 316 TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"), 317 TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"), 318//lookback 319 TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"), 320 TestVectors( `\w(?<!\d)\w`, "123ab24", "y", "$&", "ab"), 321 TestVectors( `(?<=D��b)x\w`, "D��bD��Bxd��b", "y", "$&", "xd", "i"), 322 TestVectors( `(?<=(ab*c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"), 323 TestVectors( `(?<=(ab*?c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"), 324 TestVectors( `(?<=(a.*?c))x`, "ababbcxac", "y", "$&-$1", "x-abbc"), 325 TestVectors( `(?<=(a{2,4}b{1,3}))x`, "yyaaaabx", "y", "$&-$1", "x-aaaab"), 326 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}))x`, "aabbbaaaabx", "y", "$&-$1", "x-aabbbaaaab"), 327 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}?))x`, "aabbbaaaabx", "y", "$&-$1", "x-aaaab"), 328 TestVectors( `(?<=(abc|def|aef))x`, "abcx", "y", "$&-$1", "x-abc"), 329 TestVectors( `(?<=(abc|def|aef))x`, "aefx", "y", "$&-$1", "x-aef"), 330 TestVectors( `(?<=(abc|dabc))(x)`, "dabcx", "y", "$&-$1-$2", "x-abc-x"), 331 TestVectors( `(?<=(|abc))x`, "dabcx", "y", "$&-$1", "x-"), 332 TestVectors( `(?<=((ab|da)*))x`, "abdaabx", "y", "$&-$2-$1", "x-ab-abdaab"), 333 TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"), 334 TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"), 335 TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"), 336 TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"), 337 TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"), 338//mixed lookaround 339 TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"), 340 TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"), 341 TestVectors( `a(?i)bc`, "aBc", "y", "$&", "aBc"), 342 TestVectors( `a(?i)bc`, "Abc", "n", "$&", "-"), 343 TestVectors( `(?i)a(?-i)bc`, "aBcAbc", "y", "$&", "Abc"), 344 TestVectors( `(?s).(?-s).`, "\n\n\na", "y", "$&", "\na"), 345 TestVectors( `(?m)^a(?-m)$`, "\na", "y", "$&", "a") 346 ]; 347 string produceExpected(M,String)(auto ref M m, String fmt) 348 { 349 auto app = appender!(String)(); 350 replaceFmt(fmt, m.captures, app, true); 351 return app.data; 352 } 353 void run_tests(alias matchFn)() 354 { 355 int i; 356 foreach (Char; AliasSeq!( char, wchar, dchar)) 357 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 358 alias String = immutable(Char)[]; 359 String produceExpected(M,Range)(auto ref M m, Range fmt) 360 { 361 auto app = appender!(String)(); 362 replaceFmt(fmt, m.captures, app, true); 363 return app.data; 364 } 365 Regex!(Char) r; 366 foreach (a, tvd; tv) 367 { 368 uint c = tvd.result[0]; 369 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof); 370 try 371 { 372 i = 1; 373 r = regex(to!(String)(tvd.pattern), tvd.flags); 374 } 375 catch (RegexException e) 376 { 377 i = 0; 378 debug(std_regex_test) writeln(e.msg); 379 } 380 381 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern); 382 383 if (c != 'c') 384 { 385 auto m = matchFn(to!(String)(tvd.input), r); 386 i = !m.empty; 387 assert( 388 (c == 'y') ? i : !i, 389 text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern) 390 ); 391 if (c == 'y') 392 { 393 auto result = produceExpected(m, to!(String)(tvd.format)); 394 assert(result == to!String(tvd.replace), 395 text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ", 396 tvd.replace, " vs ", result)); 397 } 398 } 399 } 400 }(); 401 debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!"); 402 } 403 404 405 void ct_tests() 406 { 407 import std.algorithm.comparison : equal; 408 version (std_regex_ct1) 409 { 410 pragma(msg, "Testing 1st part of ctRegex"); 411 alias Tests = Sequence!(0, 155); 412 } 413 else version (std_regex_ct2) 414 { 415 pragma(msg, "Testing 2nd part of ctRegex"); 416 alias Tests = Sequence!(155, 174); 417 } 418 //FIXME: #174-178 contains CTFE parser bug 419 else version (std_regex_ct3) 420 { 421 pragma(msg, "Testing 3rd part of ctRegex"); 422 alias Tests = Sequence!(178, 220); 423 } 424 else version (std_regex_ct4) 425 { 426 pragma(msg, "Testing 4th part of ctRegex"); 427 alias Tests = Sequence!(220, tv.length); 428 } 429 else 430 alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5)); 431 foreach (a, v; Tests) 432 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 433 enum tvd = tv[v]; 434 static if (tvd.result == "c") 435 { 436 static assert(!__traits(compiles, (){ 437 enum r = regex(tvd.pattern, tvd.flags); 438 }), "errornously compiles regex pattern: " ~ tvd.pattern); 439 } 440 else 441 { 442 //BUG: tv[v] is fine but tvd is not known at compile time?! 443 auto r = ctRegex!(tv[v].pattern, tv[v].flags); 444 auto nr = regex(tvd.pattern, tvd.flags); 445 assert(equal(r.ir, nr.ir), 446 text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern)); 447 auto m = match(tvd.input, r); 448 auto c = tvd.result[0]; 449 bool ok = (c == 'y') ^ m.empty; 450 assert(ok, text("ctRegex: failed to match pattern #", 451 a ,": ", tvd.pattern)); 452 if (c == 'y') 453 { 454 import std.stdio; 455 auto result = produceExpected(m, tvd.format); 456 if (result != tvd.replace) 457 writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ", 458 tvd.replace, " vs ", result); 459 } 460 } 461 }(); 462 debug(std_regex_test) writeln("!!! FReD C-T test done !!!"); 463 } 464 465 ct_tests(); 466 run_tests!bmatch(); //backtracker 467 run_tests!match(); //thompson VM 468} 469 470@safe unittest 471{ 472 auto cr = ctRegex!("abc"); 473 assert(bmatch("abc",cr).hit == "abc"); 474 auto cr2 = ctRegex!("ab*c"); 475 assert(bmatch("abbbbc",cr2).hit == "abbbbc"); 476} 477@safe unittest 478{ 479 auto cr3 = ctRegex!("^abc$"); 480 assert(bmatch("abc",cr3).hit == "abc"); 481 auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); 482 assert(array(match("azb",cr4).captures) == ["azb", "azb"]); 483} 484 485@safe unittest 486{ 487 auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); 488 assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); 489 auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); 490 assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); 491} 492 493@safe unittest 494{ 495 auto cr7 = ctRegex!(`\r.*?$`,"sm"); 496 assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); 497 auto greed = ctRegex!("<packet.*?/packet>"); 498 assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit 499 == "<packet>text</packet>"); 500} 501 502@safe unittest 503{ 504 import std.algorithm.comparison : equal; 505 auto cr8 = ctRegex!("^(a)(b)?(c*)"); 506 auto m8 = bmatch("abcc",cr8); 507 assert(m8); 508 assert(m8.captures[1] == "a"); 509 assert(m8.captures[2] == "b"); 510 assert(m8.captures[3] == "cc"); 511 auto cr9 = ctRegex!("q(a|b)*q"); 512 auto m9 = match("xxqababqyy",cr9); 513 assert(m9); 514 assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); 515} 516 517@safe unittest 518{ 519 import std.algorithm.comparison : equal; 520 auto rtr = regex("a|b|c"); 521 enum ctr = regex("a|b|c"); 522 assert(equal(rtr.ir,ctr.ir)); 523 //CTFE parser BUG is triggered by group 524 //in the middle of alternation (at least not first and not last) 525 enum testCT = regex(`abc|(edf)|xyz`); 526 auto testRT = regex(`abc|(edf)|xyz`); 527 assert(equal(testCT.ir,testRT.ir)); 528} 529 530@safe unittest 531{ 532 import std.algorithm.comparison : equal; 533 import std.algorithm.iteration : map; 534 enum cx = ctRegex!"(A|B|C)"; 535 auto mx = match("B",cx); 536 assert(mx); 537 assert(equal(mx.captures, [ "B", "B"])); 538 enum cx2 = ctRegex!"(A|B)*"; 539 assert(match("BAAA",cx2)); 540 541 enum cx3 = ctRegex!("a{3,4}","i"); 542 auto mx3 = match("AaA",cx3); 543 assert(mx3); 544 assert(mx3.captures[0] == "AaA"); 545 enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); 546 auto mx4 = match("aaaabc", cx4); 547 assert(mx4); 548 assert(mx4.captures[0] == "aaaab"); 549 auto cr8 = ctRegex!("(a)(b)?(c*)"); 550 auto m8 = bmatch("abcc",cr8); 551 assert(m8); 552 assert(m8.captures[1] == "a"); 553 assert(m8.captures[2] == "b"); 554 assert(m8.captures[3] == "cc"); 555 auto cr9 = ctRegex!(".*$", "gm"); 556 auto m9 = match("First\rSecond", cr9); 557 assert(m9); 558 assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); 559} 560 561@safe unittest 562{ 563 import std.algorithm.comparison : equal; 564 import std.algorithm.iteration : map; 565//global matching 566 void test_body(alias matchFn)() 567 { 568 string s = "a quick brown fox jumps over a lazy dog"; 569 auto r1 = regex("\\b[a-z]+\\b","g"); 570 string[] test; 571 foreach (m; matchFn(s, r1)) 572 test ~= m.hit; 573 assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); 574 auto free_reg = regex(` 575 576 abc 577 \s+ 578 " 579 ( 580 [^"]+ 581 | \\ " 582 )+ 583 " 584 z 585 `, "x"); 586 auto m = match(`abc "quoted string with \" inside"z`,free_reg); 587 assert(m); 588 string mails = " hey@you.com no@spam.net "; 589 auto rm = regex(`@(?<=\S+@)\S+`,"g"); 590 assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); 591 auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); 592 assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); 593 auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); 594 assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); 595 auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); 596 assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); 597 debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); 598 } 599 test_body!bmatch(); 600 test_body!match(); 601} 602 603//tests for accumulated std.regex issues and other regressions 604@safe unittest 605{ 606 import std.algorithm.comparison : equal; 607 import std.algorithm.iteration : map; 608 void test_body(alias matchFn)() 609 { 610 //issue 5857 611 //matching goes out of control if ... in (...){x} has .*/.+ 612 auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; 613 assert(c[0] == "axxxzayyyyyzd"); 614 assert(c[1] == "ayyyyyz"); 615 auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; 616 assert(c2[0] == "axxxayyyyyd"); 617 assert(c2[1] == "ayyyyy"); 618 //issue 2108 619 //greedy vs non-greedy 620 auto nogreed = regex("<packet.*?/packet>"); 621 assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit 622 == "<packet>text</packet>"); 623 auto greed = regex("<packet.*/packet>"); 624 assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit 625 == "<packet>text</packet><packet>text</packet>"); 626 //issue 4574 627 //empty successful match still advances the input 628 string[] pres, posts, hits; 629 foreach (m; matchFn("abcabc", regex("","g"))) 630 { 631 pres ~= m.pre; 632 posts ~= m.post; 633 assert(m.hit.empty); 634 635 } 636 auto heads = [ 637 "abcabc", 638 "abcab", 639 "abca", 640 "abc", 641 "ab", 642 "a", 643 "" 644 ]; 645 auto tails = [ 646 "abcabc", 647 "bcabc", 648 "cabc", 649 "abc", 650 "bc", 651 "c", 652 "" 653 ]; 654 assert(pres == array(retro(heads))); 655 assert(posts == tails); 656 //issue 6076 657 //regression on .* 658 auto re = regex("c.*|d"); 659 auto m = matchFn("mm", re); 660 assert(!m); 661 debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); 662 auto rprealloc = regex(`((.){5}.{1,10}){5}`); 663 auto arr = array(repeat('0',100)); 664 auto m2 = matchFn(arr, rprealloc); 665 assert(m2); 666 assert(collectException( 667 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") 668 ) is null); 669 foreach (ch; [Escapables]) 670 { 671 assert(match(to!string(ch),regex(`[\`~ch~`]`))); 672 assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); 673 assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); 674 } 675 //bugzilla 7718 676 string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; 677 auto reStrCmd = regex (`(".*")|('.*')`, "g"); 678 assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), 679 [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); 680 } 681 test_body!bmatch(); 682 test_body!match(); 683} 684 685// tests for replace 686@safe unittest 687{ 688 void test(alias matchFn)() 689 { 690 import std.uni : toUpper; 691 692 foreach (i, v; AliasSeq!(string, wstring, dstring)) 693 { 694 auto baz(Cap)(Cap m) 695 if (is(Cap == Captures!(Cap.String))) 696 { 697 return toUpper(m.hit); 698 } 699 alias String = v; 700 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) 701 == to!String("ack rapacity")); 702 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) 703 == to!String("ack capacity")); 704 assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) 705 == to!String("[n]oon")); 706 assert(std.regex.replace!(matchFn)( 707 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") 708 ) == to!String(": test2 test1 :")); 709 auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), 710 regex(to!String("[ar]"), "g")); 711 assert(s == "StRAp A Rocket engine on A chicken."); 712 } 713 debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); 714 } 715 test!(bmatch)(); 716 test!(match)(); 717} 718 719// tests for splitter 720@safe unittest 721{ 722 import std.algorithm.comparison : equal; 723 auto s1 = ", abc, de, fg, hi, "; 724 auto sp1 = splitter(s1, regex(", *")); 725 auto w1 = ["", "abc", "de", "fg", "hi", ""]; 726 assert(equal(sp1, w1)); 727 728 auto s2 = ", abc, de, fg, hi"; 729 auto sp2 = splitter(s2, regex(", *")); 730 auto w2 = ["", "abc", "de", "fg", "hi"]; 731 732 uint cnt; 733 foreach (e; sp2) 734 { 735 assert(w2[cnt++] == e); 736 } 737 assert(equal(sp2, w2)); 738} 739 740@safe unittest 741{ 742 char[] s1 = ", abc, de, fg, hi, ".dup; 743 auto sp2 = splitter(s1, regex(", *")); 744} 745 746@safe unittest 747{ 748 import std.algorithm.comparison : equal; 749 auto s1 = ", abc, de, fg, hi, "; 750 auto w1 = ["", "abc", "de", "fg", "hi", ""]; 751 assert(equal(split(s1, regex(", *")), w1[])); 752} 753 754@safe unittest 755{ // bugzilla 7141 756 string pattern = `[a\--b]`; 757 assert(match("-", pattern)); 758 assert(match("b", pattern)); 759 string pattern2 = `[&-z]`; 760 assert(match("b", pattern2)); 761} 762@safe unittest 763{//bugzilla 7111 764 assert(match("", regex("^"))); 765} 766@safe unittest 767{//bugzilla 7300 768 assert(!match("a"d, "aa"d)); 769} 770 771// bugzilla 7551 772@safe unittest 773{ 774 auto r = regex("[]abc]*"); 775 assert("]ab".matchFirst(r).hit == "]ab"); 776 assertThrown(regex("[]")); 777 auto r2 = regex("[]abc--ab]*"); 778 assert("]ac".matchFirst(r2).hit == "]"); 779} 780 781@safe unittest 782{//bugzilla 7674 783 assert("1234".replace(regex("^"), "$$") == "$1234"); 784 assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); 785 assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); 786} 787@safe unittest 788{// bugzilla 7679 789 import std.algorithm.comparison : equal; 790 foreach (S; AliasSeq!(string, wstring, dstring)) 791 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 792 enum re = ctRegex!(to!S(r"\.")); 793 auto str = to!S("a.b"); 794 assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); 795 assert(split(str, re) == [to!S("a"), to!S("b")]); 796 }(); 797} 798@safe unittest 799{//bugzilla 8203 800 string data = " 801 NAME = XPAW01_STA:STATION 802 NAME = XPAW01_STA 803 "; 804 auto uniFileOld = data; 805 auto r = regex( 806 r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); 807 auto uniCapturesNew = match(uniFileOld, r); 808 for (int i = 0; i < 20; i++) 809 foreach (matchNew; uniCapturesNew) {} 810 //a second issue with same symptoms 811 auto r2 = regex(`([��-����-��\-_]+\s*)+(?<=[\s\.,\^])`); 812 match("���������� ����������������������", r2); 813} 814@safe unittest 815{// bugzilla 8637 purity of enforce 816 auto m = match("hello world", regex("world")); 817 enforce(m); 818} 819 820// bugzilla 8725 821@safe unittest 822{ 823 static italic = regex( r"\* 824 (?!\s+) 825 (.*?) 826 (?!\s+) 827 \*", "gx" ); 828 string input = "this * is* interesting, *very* interesting"; 829 assert(replace(input, italic, "<i>$1</i>") == 830 "this * is* interesting, <i>very</i> interesting"); 831} 832 833// bugzilla 8349 834@safe unittest 835{ 836 enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; 837 enum peakRegex = ctRegex!(peakRegexStr); 838 //note that the regex pattern itself is probably bogus 839 assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); 840} 841 842// bugzilla 9211 843@safe unittest 844{ 845 import std.algorithm.comparison : equal; 846 auto rx_1 = regex(r"^(\w)*(\d)"); 847 auto m = match("1234", rx_1); 848 assert(equal(m.front, ["1234", "3", "4"])); 849 auto rx_2 = regex(r"^([0-9])*(\d)"); 850 auto m2 = match("1234", rx_2); 851 assert(equal(m2.front, ["1234", "3", "4"])); 852} 853 854// bugzilla 9280 855@safe unittest 856{ 857 string tomatch = "a!b@c"; 858 static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); 859 auto nm = match(tomatch, r); 860 assert(nm); 861 auto c = nm.captures; 862 assert(c[1] == "a"); 863 assert(c["nick"] == "a"); 864} 865 866 867// bugzilla 9579 868@safe unittest 869{ 870 char[] input = ['a', 'b', 'c']; 871 string format = "($1)"; 872 // used to give a compile error: 873 auto re = regex(`(a)`, "g"); 874 auto r = replace(input, re, format); 875 assert(r == "(a)bc"); 876} 877 878// bugzilla 9634 879@safe unittest 880{ 881 auto re = ctRegex!"(?:a+)"; 882 assert(match("aaaa", re).hit == "aaaa"); 883} 884 885//bugzilla 10798 886@safe unittest 887{ 888 auto cr = ctRegex!("[abcd--c]*"); 889 auto m = "abc".match(cr); 890 assert(m); 891 assert(m.hit == "ab"); 892} 893 894// bugzilla 10913 895@system unittest 896{ 897 @system static string foo(const(char)[] s) 898 { 899 return s.dup; 900 } 901 @safe static string bar(const(char)[] s) 902 { 903 return s.dup; 904 } 905 () @system { 906 replace!((a) => foo(a.hit))("blah", regex(`a`)); 907 }(); 908 () @safe { 909 replace!((a) => bar(a.hit))("blah", regex(`a`)); 910 }(); 911} 912 913// bugzilla 11262 914@safe unittest 915{ 916 enum reg = ctRegex!(r",", "g"); 917 auto str = "This,List"; 918 str = str.replace(reg, "-"); 919 assert(str == "This-List"); 920} 921 922// bugzilla 11775 923@safe unittest 924{ 925 assert(collectException(regex("a{1,0}"))); 926} 927 928// bugzilla 11839 929@safe unittest 930{ 931 import std.algorithm.comparison : equal; 932 assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); 933 assert(collectException(regex(`(?P<1>\w+)`))); 934 assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); 935 assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); 936 assert(regex(`(?P<��>\w+)`).namedCaptures.equal(["��"])); 937} 938 939// bugzilla 12076 940@safe unittest 941{ 942 auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); 943 string s = "one two"; 944 auto m = match(s, RE); 945} 946 947// bugzilla 12105 948@safe unittest 949{ 950 auto r = ctRegex!`.*?(?!a)`; 951 assert("aaab".matchFirst(r).hit == "aaa"); 952 auto r2 = ctRegex!`.*(?!a)`; 953 assert("aaab".matchFirst(r2).hit == "aaab"); 954} 955 956//bugzilla 11784 957@safe unittest 958{ 959 assert("abcdefghijklmnopqrstuvwxyz" 960 .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); 961} 962 963//bugzilla 12366 964@safe unittest 965{ 966 auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); 967 assert("xxxxxxxx".match(re).empty); 968 assert(!"xxxx".match(re).empty); 969} 970 971// bugzilla 12582 972@safe unittest 973{ 974 auto r = regex(`(?P<a>abc)`); 975 assert(collectException("abc".matchFirst(r)["b"])); 976} 977 978// bugzilla 12691 979@safe unittest 980{ 981 assert(bmatch("e@", "^([a-z]|)*$").empty); 982 assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); 983} 984 985//bugzilla 12713 986@safe unittest 987{ 988 assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); 989} 990 991//bugzilla 12747 992@safe unittest 993{ 994 assertThrown(regex(`^x(\1)`)); 995 assertThrown(regex(`^(x(\1))`)); 996 assertThrown(regex(`^((x)(?=\1))`)); 997} 998 999// bugzilla 14504 1000@safe unittest 1001{ 1002 auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ 1003 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); 1004} 1005 1006// bugzilla 14529 1007@safe unittest 1008{ 1009 auto ctPat2 = regex(r"^[CDF]$", "i"); 1010 foreach (v; ["C", "c", "D", "d", "F", "f"]) 1011 assert(matchAll(v, ctPat2).front.hit == v); 1012} 1013 1014// bugzilla 14615 1015@safe unittest 1016{ 1017 import std.array : appender; 1018 import std.regex : replaceFirst, replaceFirstInto, regex; 1019 import std.stdio : writeln; 1020 1021 auto example = "Hello, world!"; 1022 auto pattern = regex("^Hello, (bug)"); // won't find this one 1023 auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); 1024 assert(result == "Hello, world!"); // Ok. 1025 1026 auto sink = appender!string; 1027 replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); 1028 assert(sink.data == "Hello, world!"); 1029 replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); 1030 assert(sink.data == "Hello, world!Hello, world!"); 1031} 1032 1033// bugzilla 15573 1034@safe unittest 1035{ 1036 auto rx = regex("[c d]", "x"); 1037 assert("a b".matchFirst(rx)); 1038} 1039 1040// bugzilla 15864 1041@safe unittest 1042{ 1043 regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); 1044} 1045 1046@safe unittest 1047{ 1048 auto r = regex("(?# comment)abc(?# comment2)"); 1049 assert("abc".matchFirst(r)); 1050 assertThrown(regex("(?#...")); 1051} 1052 1053// bugzilla 17075 1054@safe unittest 1055{ 1056 enum titlePattern = `<title>(.+)</title>`; 1057 static titleRegex = ctRegex!titlePattern; 1058 string input = "<title>" ~ "<".repeat(100_000).join; 1059 assert(input.matchFirst(titleRegex).empty); 1060} 1061 1062// bugzilla 17212 1063@safe unittest 1064{ 1065 auto r = regex(" [a] ", "x"); 1066 assert("a".matchFirst(r)); 1067} 1068 1069// bugzilla 17157 1070@safe unittest 1071{ 1072 import std.algorithm.comparison : equal; 1073 auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; 1074 auto r = regex("(a)|(b)|(c)|(d)", "g"); 1075 auto s = "--a--b--c--d--"; 1076 auto outcomes = [ 1077 ["a", "a", "", "", ""], 1078 ["b", "", "b", "", ""], 1079 ["c", "", "", "c", ""], 1080 ["d", "", "", "", "d"] 1081 ]; 1082 assert(equal!equal(s.matchAll(ctr), outcomes)); 1083 assert(equal!equal(s.bmatch(r), outcomes)); 1084} 1085 1086// bugzilla 17667 1087@safe unittest 1088{ 1089 import std.algorithm.searching : canFind; 1090 void willThrow(T, size_t line = __LINE__)(T arg, string msg) 1091 { 1092 auto e = collectException(regex(arg)); 1093 assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); 1094 } 1095 willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); 1096 willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); 1097 willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); 1098 willThrow([r"[a-\", r"123"], "invalid escape sequence"); 1099 willThrow([r"\", r"123"], "invalid escape sequence"); 1100} 1101 1102// bugzilla 17668 1103@safe unittest 1104{ 1105 import std.algorithm.searching; 1106 auto e = collectException!RegexException(regex(q"<[^]>")); 1107 assert(e.msg.canFind("no operand for '^'")); 1108} 1109 1110// bugzilla 17673 1111@safe unittest 1112{ 1113 string str = `<">`; 1114 string[] regexps = ["abc", "\"|x"]; 1115 auto regexp = regex(regexps); 1116 auto c = matchFirst(str, regexp); 1117 assert(c); 1118 assert(c.whichPattern == 2); 1119} 1120 1121