/* Regualar expressions package test suite. */ module std.regex.internal.tests; package(std.regex): import std.conv, std.exception, std.meta, std.range, std.typecons, std.regex; import std.regex.internal.parser : Escapables; // characters that need escaping alias Sequence(int B, int E) = staticIota!(B, E); @safe unittest {//sanity checks regex("(a|b)*"); regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`); regex("abc|edf|ighrg"); auto r1 = regex("abc"); auto r2 = regex("(gylba)"); assert(match("abcdef", r1).hit == "abc"); assert(!match("wida",r2)); assert(bmatch("abcdef", r1).hit == "abc"); assert(!bmatch("wida", r2)); assert(match("abc", "abc".dup)); assert(bmatch("abc", "abc".dup)); Regex!char rc; assert(rc.empty); rc = regex("test"); assert(!rc.empty); } /* The test vectors in this file are altered from Henry Spencer's regexp test code. His copyright notice is: Copyright (c) 1986 by University of Toronto. Written by Henry Spencer. Not derived from licensed software. Permission is granted to anyone to use this software for any purpose on any computer system, and to redistribute it freely, subject to the following restrictions: 1. The author is not responsible for the consequences of use of this software, no matter how awful, even if they arise from defects in it. 2. The origin of this software must not be misrepresented, either by explicit claim or by omission. 3. Altered versions must be plainly marked as such, and must not be misrepresented as being the original software. */ @safe unittest { struct TestVectors { string pattern; string input; string result; string format; string replace; string flags; } static immutable TestVectors[] tv = [ TestVectors( "a\\b", "a", "y", "$&", "a" ), TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ), TestVectors( "()b\\1", "aaab", "y", "$&", "b" ), TestVectors( "abc", "abc", "y", "$&", "abc" ), TestVectors( "abc", "xbc", "n", "-", "-" ), TestVectors( "abc", "axc", "n", "-", "-" ), TestVectors( "abc", "abx", "n", "-", "-" ), TestVectors( "abc", "xabcy","y", "$&", "abc" ), TestVectors( "abc", "ababc","y", "$&", "abc" ), TestVectors( "ab*c", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab+bc", "abc", "n", "-", "-" ), TestVectors( "ab+bc", "abq", "n", "-", "-" ), TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab?bc", "abc", "y", "$&", "abc" ), TestVectors( "ab?bc", "abbbbc","n", "-", "-" ), TestVectors( "ab?c", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abcc", "n", "-", "-" ), TestVectors( "^abc", "abcc", "y", "$&", "abc" ), TestVectors( "^abc$", "aabc", "n", "-", "-" ), TestVectors( "abc$", "aabc", "y", "$&", "abc" ), TestVectors( "^", "abc", "y", "$&", "" ), TestVectors( "$", "abc", "y", "$&", "" ), TestVectors( "a.c", "abc", "y", "$&", "abc" ), TestVectors( "a.c", "axc", "y", "$&", "axc" ), TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ), TestVectors( "a.*c", "axyzd","n", "-", "-" ), TestVectors( "a[bc]d", "abc", "n", "-", "-" ), TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ), TestVectors( "a[b-d]e", "abd", "n", "-", "-" ), TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ), TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ), TestVectors( "a[-b]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-a]", "-", "c", "-", "-" ), TestVectors( "a[]b", "-", "c", "-", "-" ), TestVectors( "a[", "-", "c", "-", "-" ), TestVectors( "a]", "a]", "y", "$&", "a]" ), TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ), TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ), TestVectors( "a[^bc]d", "abd", "n", "-", "-" ), TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ), TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ), TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ), TestVectors( "ab|cd", "abc", "y", "$&", "ab" ), TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ), TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ), TestVectors( "()*", "-", "y", "-", "-" ), TestVectors( "*a", "-", "c", "-", "-" ), TestVectors( "^*", "-", "y", "-", "-" ), TestVectors( "$*", "-", "y", "-", "-" ), TestVectors( "(*)b", "-", "c", "-", "-" ), TestVectors( "$b", "b", "n", "-", "-" ), TestVectors( "a\\", "-", "c", "-", "-" ), TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ), TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ), TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ), TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ), TestVectors( "abc)", "-", "c", "-", "-" ), TestVectors( "(abc", "-", "c", "-", "-" ), TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ), TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ), TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ), TestVectors( "a**", "-", "c", "-", "-" ), TestVectors( "a*?a", "aa", "y", "$&", "a" ), TestVectors( "(a*)*", "aaa", "y", "-", "-" ), TestVectors( "(a*)+", "aaa", "y", "-", "-" ), TestVectors( "(a|)*", "-", "y", "-", "-" ), TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ), TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ), TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ), TestVectors( "(^)*", "-", "y", "-", "-" ), TestVectors( "(ab|)*", "-", "y", "-", "-" ), TestVectors( ")(", "-", "c", "-", "-" ), TestVectors( "", "abc", "y", "$&", "" ), TestVectors( "abc", "", "n", "-", "-" ), TestVectors( "a*", "", "y", "$&", "" ), TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ), TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ), TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ), TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ), TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ), TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ), TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ), TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ), TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ), TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ), TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ), TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ), TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ), TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ), TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ), TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ), TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ), TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ), TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ), TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ), TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ), TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ), TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ), TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ), TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ), TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ), TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ), TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ), TestVectors( "[k]", "ab", "n", "-", "-" ), TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "a{2}", "candy", "n", "", "" ), TestVectors( "a{2}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "candy", "n", "", "" ), TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ), TestVectors( "a{1,3}", "cndy", "n", "", "" ), TestVectors( "a{1,3}", "candy", "y", "$&", "a" ), TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ), TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ), TestVectors( "e?le?", "angel", "y", "$&", "el" ), TestVectors( "e?le?", "angle", "y", "$&", "le" ), TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ), TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ), TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ), TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ), TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ), TestVectors( "\\d", "B2 is", "y", "$&", "2" ), TestVectors( "\\D", "B2 is", "y", "$&", "B" ), TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ), TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ), TestVectors( "abc", "ababc", "y", "$&", "abc" ), TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ), TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ), TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ), TestVectors( ".*c", "abcde", "y", "$&", "abc" ), TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ), TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ), TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ), TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ), TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ), TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"), //more repetitions: TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ), TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ), //groups: TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"), TestVectors( "(?P\\d+)/(?P\\d+)", "2/3", "y", "${d}/${q}", "3/2"), //set operations: TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"), TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"), TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"), TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"), TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"), TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"), TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"), TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"), TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"), TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"), //unicode blocks & properties: TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"), TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."), TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"), TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"), TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"), TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"), TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"), TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"), TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"), //case insensitive: TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"), TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"), TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"), TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"), TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"), TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"), TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"), //escapes: TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"), TestVectors( `\u`, "", "c", "-", "-"), TestVectors( `\U`, "", "c", "-", "-"), TestVectors( `\u003`, "", "c", "-", "-"), TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"), TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"), TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"), TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"), TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"), TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"), TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"), TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"), TestVectors( `\pX`, "", "c", "-", "-"), // ^, $, \b, \B, multiline : TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"), TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"), TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"), TestVectors( `^$`, "\r\n", "y", "$&", "", "m"), TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"), TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"), TestVectors( `\b^.`, "ab", "y", "$&", "a"), TestVectors( `\B^.`, "ab", "n", "-", "-"), TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"), TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"), // luckily obtained regression on incremental matching in backtracker TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`, "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"), //lookahead TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ), TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ), TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"), TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"), TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"), TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"), TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"), //lookback TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"), TestVectors( `\w(?"); assert(bmatch("texttext", greed).hit == "text"); } @safe unittest { import std.algorithm.comparison : equal; auto cr8 = ctRegex!("^(a)(b)?(c*)"); auto m8 = bmatch("abcc",cr8); assert(m8); assert(m8.captures[1] == "a"); assert(m8.captures[2] == "b"); assert(m8.captures[3] == "cc"); auto cr9 = ctRegex!("q(a|b)*q"); auto m9 = match("xxqababqyy",cr9); assert(m9); assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); } @safe unittest { import std.algorithm.comparison : equal; auto rtr = regex("a|b|c"); enum ctr = regex("a|b|c"); assert(equal(rtr.ir,ctr.ir)); //CTFE parser BUG is triggered by group //in the middle of alternation (at least not first and not last) enum testCT = regex(`abc|(edf)|xyz`); auto testRT = regex(`abc|(edf)|xyz`); assert(equal(testCT.ir,testRT.ir)); } @safe unittest { import std.algorithm.comparison : equal; import std.algorithm.iteration : map; enum cx = ctRegex!"(A|B|C)"; auto mx = match("B",cx); assert(mx); assert(equal(mx.captures, [ "B", "B"])); enum cx2 = ctRegex!"(A|B)*"; assert(match("BAAA",cx2)); enum cx3 = ctRegex!("a{3,4}","i"); auto mx3 = match("AaA",cx3); assert(mx3); assert(mx3.captures[0] == "AaA"); enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); auto mx4 = match("aaaabc", cx4); assert(mx4); assert(mx4.captures[0] == "aaaab"); auto cr8 = ctRegex!("(a)(b)?(c*)"); auto m8 = bmatch("abcc",cr8); assert(m8); assert(m8.captures[1] == "a"); assert(m8.captures[2] == "b"); assert(m8.captures[3] == "cc"); auto cr9 = ctRegex!(".*$", "gm"); auto m9 = match("First\rSecond", cr9); assert(m9); assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); } @safe unittest { import std.algorithm.comparison : equal; import std.algorithm.iteration : map; //global matching void test_body(alias matchFn)() { string s = "a quick brown fox jumps over a lazy dog"; auto r1 = regex("\\b[a-z]+\\b","g"); string[] test; foreach (m; matchFn(s, r1)) test ~= m.hit; assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); auto free_reg = regex(` abc \s+ " ( [^"]+ | \\ " )+ " z `, "x"); auto m = match(`abc "quoted string with \" inside"z`,free_reg); assert(m); string mails = " hey@you.com no@spam.net "; auto rm = regex(`@(?<=\S+@)\S+`,"g"); assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); } test_body!bmatch(); test_body!match(); } //tests for accumulated std.regex issues and other regressions @safe unittest { import std.algorithm.comparison : equal; import std.algorithm.iteration : map; void test_body(alias matchFn)() { //issue 5857 //matching goes out of control if ... in (...){x} has .*/.+ auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; assert(c[0] == "axxxzayyyyyzd"); assert(c[1] == "ayyyyyz"); auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; assert(c2[0] == "axxxayyyyyd"); assert(c2[1] == "ayyyyy"); //issue 2108 //greedy vs non-greedy auto nogreed = regex(""); assert(matchFn("texttext", nogreed).hit == "text"); auto greed = regex(""); assert(matchFn("texttext", greed).hit == "texttext"); //issue 4574 //empty successful match still advances the input string[] pres, posts, hits; foreach (m; matchFn("abcabc", regex("","g"))) { pres ~= m.pre; posts ~= m.post; assert(m.hit.empty); } auto heads = [ "abcabc", "abcab", "abca", "abc", "ab", "a", "" ]; auto tails = [ "abcabc", "bcabc", "cabc", "abc", "bc", "c", "" ]; assert(pres == array(retro(heads))); assert(posts == tails); //issue 6076 //regression on .* auto re = regex("c.*|d"); auto m = matchFn("mm", re); assert(!m); debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); auto rprealloc = regex(`((.){5}.{1,10}){5}`); auto arr = array(repeat('0',100)); auto m2 = matchFn(arr, rprealloc); assert(m2); assert(collectException( regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") ) is null); foreach (ch; [Escapables]) { assert(match(to!string(ch),regex(`[\`~ch~`]`))); assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); } //bugzilla 7718 string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; auto reStrCmd = regex (`(".*")|('.*')`, "g"); assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); } test_body!bmatch(); test_body!match(); } // tests for replace @safe unittest { void test(alias matchFn)() { import std.uni : toUpper; foreach (i, v; AliasSeq!(string, wstring, dstring)) { auto baz(Cap)(Cap m) if (is(Cap == Captures!(Cap.String))) { return toUpper(m.hit); } alias String = v; assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) == to!String("ack rapacity")); assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) == to!String("ack capacity")); assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) == to!String("[n]oon")); assert(std.regex.replace!(matchFn)( to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") ) == to!String(": test2 test1 :")); auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), regex(to!String("[ar]"), "g")); assert(s == "StRAp A Rocket engine on A chicken."); } debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); } test!(bmatch)(); test!(match)(); } // tests for splitter @safe unittest { import std.algorithm.comparison : equal; auto s1 = ", abc, de, fg, hi, "; auto sp1 = splitter(s1, regex(", *")); auto w1 = ["", "abc", "de", "fg", "hi", ""]; assert(equal(sp1, w1)); auto s2 = ", abc, de, fg, hi"; auto sp2 = splitter(s2, regex(", *")); auto w2 = ["", "abc", "de", "fg", "hi"]; uint cnt; foreach (e; sp2) { assert(w2[cnt++] == e); } assert(equal(sp2, w2)); } @safe unittest { char[] s1 = ", abc, de, fg, hi, ".dup; auto sp2 = splitter(s1, regex(", *")); } @safe unittest { import std.algorithm.comparison : equal; auto s1 = ", abc, de, fg, hi, "; auto w1 = ["", "abc", "de", "fg", "hi", ""]; assert(equal(split(s1, regex(", *")), w1[])); } @safe unittest { // bugzilla 7141 string pattern = `[a\--b]`; assert(match("-", pattern)); assert(match("b", pattern)); string pattern2 = `[&-z]`; assert(match("b", pattern2)); } @safe unittest {//bugzilla 7111 assert(match("", regex("^"))); } @safe unittest {//bugzilla 7300 assert(!match("a"d, "aa"d)); } // bugzilla 7551 @safe unittest { auto r = regex("[]abc]*"); assert("]ab".matchFirst(r).hit == "]ab"); assertThrown(regex("[]")); auto r2 = regex("[]abc--ab]*"); assert("]ac".matchFirst(r2).hit == "]"); } @safe unittest {//bugzilla 7674 assert("1234".replace(regex("^"), "$$") == "$1234"); assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); } @safe unittest {// bugzilla 7679 import std.algorithm.comparison : equal; foreach (S; AliasSeq!(string, wstring, dstring)) (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 enum re = ctRegex!(to!S(r"\.")); auto str = to!S("a.b"); assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); assert(split(str, re) == [to!S("a"), to!S("b")]); }(); } @safe unittest {//bugzilla 8203 string data = " NAME = XPAW01_STA:STATION NAME = XPAW01_STA "; auto uniFileOld = data; auto r = regex( r"^NAME = (?P[a-zA-Z0-9_]+):*(?P[a-zA-Z0-9_]*)","gm"); auto uniCapturesNew = match(uniFileOld, r); for (int i = 0; i < 20; i++) foreach (matchNew; uniCapturesNew) {} //a second issue with same symptoms auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); match("аллея Театральная", r2); } @safe unittest {// bugzilla 8637 purity of enforce auto m = match("hello world", regex("world")); enforce(m); } // bugzilla 8725 @safe unittest { static italic = regex( r"\* (?!\s+) (.*?) (?!\s+) \*", "gx" ); string input = "this * is* interesting, *very* interesting"; assert(replace(input, italic, "$1") == "this * is* interesting, very interesting"); } // bugzilla 8349 @safe unittest { enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)"; enum peakRegex = ctRegex!(peakRegexStr); //note that the regex pattern itself is probably bogus assert(match(r"\>wgEncode-blah-Tfbs.narrow", peakRegex)); } // bugzilla 9211 @safe unittest { import std.algorithm.comparison : equal; auto rx_1 = regex(r"^(\w)*(\d)"); auto m = match("1234", rx_1); assert(equal(m.front, ["1234", "3", "4"])); auto rx_2 = regex(r"^([0-9])*(\d)"); auto m2 = match("1234", rx_2); assert(equal(m2.front, ["1234", "3", "4"])); } // bugzilla 9280 @safe unittest { string tomatch = "a!b@c"; static r = regex(r"^(?P.*?)!(?P.*?)@(?P.*?)$"); auto nm = match(tomatch, r); assert(nm); auto c = nm.captures; assert(c[1] == "a"); assert(c["nick"] == "a"); } // bugzilla 9579 @safe unittest { char[] input = ['a', 'b', 'c']; string format = "($1)"; // used to give a compile error: auto re = regex(`(a)`, "g"); auto r = replace(input, re, format); assert(r == "(a)bc"); } // bugzilla 9634 @safe unittest { auto re = ctRegex!"(?:a+)"; assert(match("aaaa", re).hit == "aaaa"); } //bugzilla 10798 @safe unittest { auto cr = ctRegex!("[abcd--c]*"); auto m = "abc".match(cr); assert(m); assert(m.hit == "ab"); } // bugzilla 10913 @system unittest { @system static string foo(const(char)[] s) { return s.dup; } @safe static string bar(const(char)[] s) { return s.dup; } () @system { replace!((a) => foo(a.hit))("blah", regex(`a`)); }(); () @safe { replace!((a) => bar(a.hit))("blah", regex(`a`)); }(); } // bugzilla 11262 @safe unittest { enum reg = ctRegex!(r",", "g"); auto str = "This,List"; str = str.replace(reg, "-"); assert(str == "This-List"); } // bugzilla 11775 @safe unittest { assert(collectException(regex("a{1,0}"))); } // bugzilla 11839 @safe unittest { import std.algorithm.comparison : equal; assert(regex(`(?P\w+)`).namedCaptures.equal(["var1"])); assert(collectException(regex(`(?P<1>\w+)`))); assert(regex(`(?P\w+)`).namedCaptures.equal(["v1"])); assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); } // bugzilla 12076 @safe unittest { auto RE = ctRegex!(r"(?abc)`); assert(collectException("abc".matchFirst(r)["b"])); } // bugzilla 12691 @safe unittest { assert(bmatch("e@", "^([a-z]|)*$").empty); assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); } //bugzilla 12713 @safe unittest { assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); } //bugzilla 12747 @safe unittest { assertThrown(regex(`^x(\1)`)); assertThrown(regex(`^(x(\1))`)); assertThrown(regex(`^((x)(?=\1))`)); } // bugzilla 14504 @safe unittest { auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); } // bugzilla 14529 @safe unittest { auto ctPat2 = regex(r"^[CDF]$", "i"); foreach (v; ["C", "c", "D", "d", "F", "f"]) assert(matchAll(v, ctPat2).front.hit == v); } // bugzilla 14615 @safe unittest { import std.array : appender; import std.regex : replaceFirst, replaceFirstInto, regex; import std.stdio : writeln; auto example = "Hello, world!"; auto pattern = regex("^Hello, (bug)"); // won't find this one auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); assert(result == "Hello, world!"); // Ok. auto sink = appender!string; replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); assert(sink.data == "Hello, world!"); replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); assert(sink.data == "Hello, world!Hello, world!"); } // bugzilla 15573 @safe unittest { auto rx = regex("[c d]", "x"); assert("a b".matchFirst(rx)); } // bugzilla 15864 @safe unittest { regex(`((.+)`; static titleRegex = ctRegex!titlePattern; string input = "" ~ "<".repeat(100_000).join; assert(input.matchFirst(titleRegex).empty); } // bugzilla 17212 @safe unittest { auto r = regex(" [a] ", "x"); assert("a".matchFirst(r)); } // bugzilla 17157 @safe unittest { import std.algorithm.comparison : equal; auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; auto r = regex("(a)|(b)|(c)|(d)", "g"); auto s = "--a--b--c--d--"; auto outcomes = [ ["a", "a", "", "", ""], ["b", "", "b", "", ""], ["c", "", "", "c", ""], ["d", "", "", "", "d"] ]; assert(equal!equal(s.matchAll(ctr), outcomes)); assert(equal!equal(s.bmatch(r), outcomes)); } // bugzilla 17667 @safe unittest { import std.algorithm.searching : canFind; void willThrow(T, size_t line = __LINE__)(T arg, string msg) { auto e = collectException(regex(arg)); assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); } willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); willThrow([r"[a-\", r"123"], "invalid escape sequence"); willThrow([r"\", r"123"], "invalid escape sequence"); } // bugzilla 17668 @safe unittest { import std.algorithm.searching; auto e = collectException!RegexException(regex(q"<[^]>")); assert(e.msg.canFind("no operand for '^'")); } // bugzilla 17673 @safe unittest { string str = `<">`; string[] regexps = ["abc", "\"|x"]; auto regexp = regex(regexps); auto c = matchFirst(str, regexp); assert(c); assert(c.whichPattern == 2); }