/* Regualar expressions package test suite. */ module std.regex.internal.tests; package(std.regex): import std.conv, std.exception, std.meta, std.range, std.typecons, std.regex; import std.uni : Escapables; // characters that need escaping debug(std_regex_test) import std.stdio; @safe unittest {//sanity checks regex("(a|b)*"); regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`); regex("abc|edf|ighrg"); auto r1 = regex("abc"); auto r2 = regex("(gylba)"); assert(match("abcdef", r1).hit == "abc"); assert(!match("wida",r2)); assert(bmatch("abcdef", r1).hit == "abc"); assert(!bmatch("wida", r2)); assert(match("abc", "abc".dup)); assert(bmatch("abc", "abc".dup)); Regex!char rc; assert(rc.empty); rc = regex("test"); assert(!rc.empty); } /* The test vectors in this file are altered from Henry Spencer's regexp test code. His copyright notice is: Copyright (c) 1986 by University of Toronto. Written by Henry Spencer. Not derived from licensed software. Permission is granted to anyone to use this software for any purpose on any computer system, and to redistribute it freely, subject to the following restrictions: 1. The author is not responsible for the consequences of use of this software, no matter how awful, even if they arise from defects in it. 2. The origin of this software must not be misrepresented, either by explicit claim or by omission. 3. Altered versions must be plainly marked as such, and must not be misrepresented as being the original software. */ @safe unittest { struct TestVectors { string pattern; string input; string result; string format; string replace; string flags; } static immutable TestVectors[] tv = [ TestVectors( "a\\b", "a", "y", "$&", "a" ), TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ), TestVectors( "()b\\1", "aaab", "y", "$&", "b" ), TestVectors( "abc", "abc", "y", "$&", "abc" ), TestVectors( "abc", "xbc", "n", "-", "-" ), TestVectors( "abc", "axc", "n", "-", "-" ), TestVectors( "abc", "abx", "n", "-", "-" ), TestVectors( "abc", "xabcy","y", "$&", "abc" ), TestVectors( "abc", "ababc","y", "$&", "abc" ), TestVectors( "ab*c", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab+bc", "abc", "n", "-", "-" ), TestVectors( "ab+bc", "abq", "n", "-", "-" ), TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab?bc", "abc", "y", "$&", "abc" ), TestVectors( "ab?bc", "abbbbc","n", "-", "-" ), TestVectors( "ab?c", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abcc", "n", "-", "-" ), TestVectors( "^abc", "abcc", "y", "$&", "abc" ), TestVectors( "^abc$", "aabc", "n", "-", "-" ), TestVectors( "abc$", "aabc", "y", "$&", "abc" ), TestVectors( "^", "abc", "y", "$&", "" ), TestVectors( "$", "abc", "y", "$&", "" ), TestVectors( "a.c", "abc", "y", "$&", "abc" ), TestVectors( "a.c", "axc", "y", "$&", "axc" ), TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ), TestVectors( "a.*c", "axyzd","n", "-", "-" ), TestVectors( "a[bc]d", "abc", "n", "-", "-" ), TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ), TestVectors( "a[b-d]e", "abd", "n", "-", "-" ), TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ), TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ), TestVectors( "a[-b]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-a]", "-", "c", "-", "-" ), TestVectors( "a[]b", "-", "c", "-", "-" ), TestVectors( "a[", "-", "c", "-", "-" ), TestVectors( "a]", "a]", "y", "$&", "a]" ), TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ), TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ), TestVectors( "a[^bc]d", "abd", "n", "-", "-" ), TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ), TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ), TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ), TestVectors( "ab|cd", "abc", "y", "$&", "ab" ), TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ), TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ), TestVectors( "()*", "-", "y", "-", "-" ), TestVectors( "*a", "-", "c", "-", "-" ), TestVectors( "^*", "-", "y", "-", "-" ), TestVectors( "$*", "-", "y", "-", "-" ), TestVectors( "(*)b", "-", "c", "-", "-" ), TestVectors( "$b", "b", "n", "-", "-" ), TestVectors( "a\\", "-", "c", "-", "-" ), TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ), TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ), TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ), TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ), TestVectors( "abc)", "-", "c", "-", "-" ), TestVectors( "(abc", "-", "c", "-", "-" ), TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ), TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ), TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ), TestVectors( "a**", "-", "c", "-", "-" ), TestVectors( "a*?a", "aa", "y", "$&", "a" ), TestVectors( "(a*)*", "aaa", "y", "-", "-" ), TestVectors( "(a*)+", "aaa", "y", "-", "-" ), TestVectors( "(a|)*", "-", "y", "-", "-" ), TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ), TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ), TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ), TestVectors( "(^)*", "-", "y", "-", "-" ), TestVectors( "(ab|)*", "-", "y", "-", "-" ), TestVectors( ")(", "-", "c", "-", "-" ), TestVectors( "", "abc", "y", "$&", "" ), TestVectors( "abc", "", "n", "-", "-" ), TestVectors( "a*", "", "y", "$&", "" ), TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ), TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ), TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ), TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ), TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ), TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ), TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ), TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ), TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ), TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ), TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ), TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ), TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ), TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ), TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ), TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ), TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ), TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ), TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ), TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ), TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ), TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ), TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ), TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ), TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ), TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ), TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ), TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ), TestVectors( "[k]", "ab", "n", "-", "-" ), TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "a{2}", "candy", "n", "", "" ), TestVectors( "a{2}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "candy", "n", "", "" ), TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ), TestVectors( "a{1,3}", "cndy", "n", "", "" ), TestVectors( "a{1,3}", "candy", "y", "$&", "a" ), TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ), TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ), TestVectors( "e?le?", "angel", "y", "$&", "el" ), TestVectors( "e?le?", "angle", "y", "$&", "le" ), TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ), TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ), TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ), TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ), TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ), TestVectors( "\\d", "B2 is", "y", "$&", "2" ), TestVectors( "\\D", "B2 is", "y", "$&", "B" ), TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ), TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ), TestVectors( "abc", "ababc", "y", "$&", "abc" ), TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ), TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ), TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ), TestVectors( ".*c", "abcde", "y", "$&", "abc" ), TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ), TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ), TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ), TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ), TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ), TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"), //more repetitions: TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ), TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ), //groups: TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"), TestVectors( "(?P\\d+)/(?P\\d+)", "2/3", "y", "${d}/${q}", "3/2"), //set operations: TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"), TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"), TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"), TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"), TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"), TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"), TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"), TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"), TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"), TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"), //unicode blocks & properties: TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"), TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."), TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"), TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"), TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"), TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"), TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"), TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"), TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"), //case insensitive: TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"), TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"), TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"), TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"), TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"), TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"), TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"), //escapes: TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"), TestVectors( `\u`, "", "c", "-", "-"), TestVectors( `\U`, "", "c", "-", "-"), TestVectors( `\u003`, "", "c", "-", "-"), TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"), TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"), TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"), TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"), TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"), TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"), TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"), TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"), TestVectors( `\pX`, "", "c", "-", "-"), // ^, $, \b, \B, multiline : TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"), TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"), TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"), TestVectors( `^$`, "\r\n", "y", "$&", "", "m"), TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"), TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"), TestVectors( `\b^.`, "ab", "y", "$&", "a"), TestVectors( `\B^.`, "ab", "n", "-", "-"), TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"), TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"), // luckily obtained regression on incremental matching in backtracker TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`, "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"), //lookahead TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ), TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ), TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"), TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"), TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"), TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"), TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"), //lookback TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"), TestVectors( `\w(?