1/* 2 Generators - components that generate strings for a given regex pattern. 3 4 For the moment undocumented, and is subject to change. 5*/ 6module std.regex.internal.generator; 7 8/* 9 Useful utility for self-testing, an infinite range of string samples 10 that _have_ to match given compiled regex. 11 Caveats: supports only a simple subset of bytecode. 12*/ 13@trusted private struct SampleGenerator(Char) 14{ 15 import std.array : appender, Appender; 16 import std.format : formattedWrite; 17 import std.random : Xorshift; 18 import std.regex.internal.ir : Regex, IR, IRL; 19 import std.utf : isValidDchar, byChar; 20 Regex!Char re; 21 Appender!(char[]) app; 22 uint limit, seed; 23 Xorshift gen; 24 //generator for pattern r, with soft maximum of threshold elements 25 //and a given random seed 26 this(ref Regex!Char r, uint threshold, uint randomSeed) 27 { 28 re = r; 29 limit = threshold; 30 seed = randomSeed; 31 app = appender!(Char[])(); 32 compose(); 33 } 34 35 uint rand(uint x) 36 { 37 uint r = gen.front % x; 38 gen.popFront(); 39 return r; 40 } 41 42 void compose() 43 { 44 uint pc = 0, counter = 0, dataLenOld = uint.max; 45 for (;;) 46 { 47 switch (re.ir[pc].code) 48 { 49 case IR.Char: 50 formattedWrite(app,"%s", cast(dchar) re.ir[pc].data); 51 pc += IRL!(IR.Char); 52 break; 53 case IR.OrChar: 54 uint len = re.ir[pc].sequence; 55 formattedWrite(app, "%s", cast(dchar) re.ir[pc + rand(len)].data); 56 pc += len; 57 break; 58 case IR.CodepointSet: 59 case IR.Trie: 60 auto set = re.charsets[re.ir[pc].data]; 61 auto x = rand(cast(uint) set.byInterval.length); 62 auto y = rand(set.byInterval[x].b - set.byInterval[x].a); 63 formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y)); 64 pc += IRL!(IR.CodepointSet); 65 break; 66 case IR.Any: 67 uint x; 68 do 69 { 70 x = rand(0x11_000); 71 }while (x == '\r' || x == '\n' || !isValidDchar(x)); 72 formattedWrite(app, "%s", cast(dchar) x); 73 pc += IRL!(IR.Any); 74 break; 75 case IR.GotoEndOr: 76 pc += IRL!(IR.GotoEndOr)+re.ir[pc].data; 77 assert(re.ir[pc].code == IR.OrEnd); 78 goto case; 79 case IR.OrEnd: 80 pc += IRL!(IR.OrEnd); 81 break; 82 case IR.OrStart: 83 pc += IRL!(IR.OrStart); 84 goto case; 85 case IR.Option: 86 uint next = pc + re.ir[pc].data + IRL!(IR.Option); 87 uint nOpt = 0; 88 //queue next Option 89 while (re.ir[next].code == IR.Option) 90 { 91 nOpt++; 92 next += re.ir[next].data + IRL!(IR.Option); 93 } 94 nOpt++; 95 nOpt = rand(nOpt); 96 for (;nOpt; nOpt--) 97 { 98 pc += re.ir[pc].data + IRL!(IR.Option); 99 } 100 assert(re.ir[pc].code == IR.Option); 101 pc += IRL!(IR.Option); 102 break; 103 case IR.RepeatStart:case IR.RepeatQStart: 104 pc += IRL!(IR.RepeatStart)+re.ir[pc].data; 105 goto case IR.RepeatEnd; 106 case IR.RepeatEnd: 107 case IR.RepeatQEnd: 108 uint len = re.ir[pc].data; 109 uint step = re.ir[pc+2].raw; 110 uint min = re.ir[pc+3].raw; 111 if (counter < min) 112 { 113 counter += step; 114 pc -= len; 115 break; 116 } 117 uint max = re.ir[pc+4].raw; 118 if (counter < max) 119 { 120 if (app.data.length < limit && rand(3) > 0) 121 { 122 pc -= len; 123 counter += step; 124 } 125 else 126 { 127 counter = counter%step; 128 pc += IRL!(IR.RepeatEnd); 129 } 130 } 131 else 132 { 133 counter = counter%step; 134 pc += IRL!(IR.RepeatEnd); 135 } 136 break; 137 case IR.InfiniteStart, IR.InfiniteBloomStart, IR.InfiniteQStart: 138 pc += re.ir[pc].data + IRL!(IR.InfiniteStart); 139 goto case IR.InfiniteEnd; //both Q and non-Q 140 case IR.InfiniteEnd, IR.InfiniteBloomEnd, IR.InfiniteQEnd: 141 uint len = re.ir[pc].data; 142 if (app.data.length == dataLenOld) 143 { 144 pc += IRL!(IR.InfiniteEnd); 145 break; 146 } 147 dataLenOld = cast(uint) app.data.length; 148 if (app.data.length < limit && rand(3) > 0) 149 pc = pc - len; 150 else 151 pc = pc + re.ir[pc].length; 152 break; 153 case IR.GroupStart, IR.GroupEnd: 154 pc += IRL!(IR.GroupStart); 155 break; 156 case IR.Bol, IR.Wordboundary, IR.Notwordboundary: 157 case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart: 158 default: 159 return; 160 } 161 } 162 } 163 164 @property Char[] front() 165 { 166 return app.data; 167 } 168 169 enum empty = false; 170 171 void popFront() 172 { 173 app.shrinkTo(0); 174 compose(); 175 } 176} 177 178@system unittest 179{ 180 import std.range, std.regex; 181 auto re = regex(`P[a-z]{3,}q`); 182 auto gen = SampleGenerator!char(re, 20, 3141592); 183 static assert(isInputRange!(typeof(gen))); 184 //@@@BUG@@@ somehow gen.take(1_000) doesn't work 185 foreach (v; take(gen, 1_000)) 186 assert(v.match(re)); 187} 188