1/*
2    Generators - components that generate strings for a given regex pattern.
3
4    For the moment undocumented, and is subject to change.
5*/
6module std.regex.internal.generator;
7
8/*
9    Useful utility for self-testing, an infinite range of string samples
10    that _have_ to match given compiled regex.
11    Caveats: supports only a simple subset of bytecode.
12*/
13@trusted private struct SampleGenerator(Char)
14{
15    import std.array : appender, Appender;
16    import std.format : formattedWrite;
17    import std.random : Xorshift;
18    import std.regex.internal.ir : Regex, IR, IRL;
19    import std.utf : isValidDchar, byChar;
20    Regex!Char re;
21    Appender!(char[]) app;
22    uint limit, seed;
23    Xorshift gen;
24    //generator for pattern r, with soft maximum of threshold elements
25    //and a given random seed
26    this(ref Regex!Char r, uint threshold, uint randomSeed)
27    {
28        re = r;
29        limit = threshold;
30        seed = randomSeed;
31        app = appender!(Char[])();
32        compose();
33    }
34
35    uint rand(uint x)
36    {
37        uint r = gen.front % x;
38        gen.popFront();
39        return r;
40    }
41
42    void compose()
43    {
44        uint pc = 0, counter = 0, dataLenOld = uint.max;
45        for (;;)
46        {
47            switch (re.ir[pc].code)
48            {
49            case IR.Char:
50                    formattedWrite(app,"%s", cast(dchar) re.ir[pc].data);
51                    pc += IRL!(IR.Char);
52                    break;
53                case IR.OrChar:
54                    uint len = re.ir[pc].sequence;
55                    formattedWrite(app, "%s", cast(dchar) re.ir[pc + rand(len)].data);
56                    pc += len;
57                    break;
58                case IR.CodepointSet:
59                case IR.Trie:
60                    auto set = re.charsets[re.ir[pc].data];
61                    auto x = rand(cast(uint) set.byInterval.length);
62                    auto y = rand(set.byInterval[x].b - set.byInterval[x].a);
63                    formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y));
64                    pc += IRL!(IR.CodepointSet);
65                    break;
66                case IR.Any:
67                    uint x;
68                    do
69                    {
70                        x = rand(0x11_000);
71                    }while (x == '\r' || x == '\n' || !isValidDchar(x));
72                    formattedWrite(app, "%s", cast(dchar) x);
73                    pc += IRL!(IR.Any);
74                    break;
75                case IR.GotoEndOr:
76                    pc += IRL!(IR.GotoEndOr)+re.ir[pc].data;
77                    assert(re.ir[pc].code == IR.OrEnd);
78                    goto case;
79                case IR.OrEnd:
80                    pc += IRL!(IR.OrEnd);
81                    break;
82                case IR.OrStart:
83                    pc += IRL!(IR.OrStart);
84                    goto case;
85                case IR.Option:
86                    uint next = pc + re.ir[pc].data + IRL!(IR.Option);
87                    uint nOpt = 0;
88                    //queue next Option
89                    while (re.ir[next].code == IR.Option)
90                    {
91                        nOpt++;
92                        next += re.ir[next].data + IRL!(IR.Option);
93                    }
94                    nOpt++;
95                    nOpt = rand(nOpt);
96                    for (;nOpt; nOpt--)
97                    {
98                        pc += re.ir[pc].data + IRL!(IR.Option);
99                    }
100                    assert(re.ir[pc].code == IR.Option);
101                    pc += IRL!(IR.Option);
102                    break;
103                case IR.RepeatStart:case IR.RepeatQStart:
104                    pc += IRL!(IR.RepeatStart)+re.ir[pc].data;
105                    goto case IR.RepeatEnd;
106                case IR.RepeatEnd:
107                case IR.RepeatQEnd:
108                    uint len = re.ir[pc].data;
109                    uint step = re.ir[pc+2].raw;
110                    uint min = re.ir[pc+3].raw;
111                    if (counter < min)
112                    {
113                        counter += step;
114                        pc -= len;
115                        break;
116                    }
117                    uint max = re.ir[pc+4].raw;
118                    if (counter < max)
119                    {
120                        if (app.data.length < limit && rand(3) > 0)
121                        {
122                            pc -= len;
123                            counter += step;
124                        }
125                        else
126                        {
127                            counter = counter%step;
128                            pc += IRL!(IR.RepeatEnd);
129                        }
130                    }
131                    else
132                    {
133                        counter = counter%step;
134                        pc += IRL!(IR.RepeatEnd);
135                    }
136                    break;
137                case IR.InfiniteStart, IR.InfiniteBloomStart, IR.InfiniteQStart:
138                    pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
139                    goto case IR.InfiniteEnd; //both Q and non-Q
140                case IR.InfiniteEnd, IR.InfiniteBloomEnd, IR.InfiniteQEnd:
141                    uint len = re.ir[pc].data;
142                    if (app.data.length == dataLenOld)
143                    {
144                        pc += IRL!(IR.InfiniteEnd);
145                        break;
146                    }
147                    dataLenOld = cast(uint) app.data.length;
148                    if (app.data.length < limit && rand(3) > 0)
149                        pc = pc - len;
150                    else
151                        pc = pc + re.ir[pc].length;
152                    break;
153                case IR.GroupStart, IR.GroupEnd:
154                    pc += IRL!(IR.GroupStart);
155                    break;
156                case IR.Bol, IR.Wordboundary, IR.Notwordboundary:
157                case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
158                default:
159                    return;
160            }
161        }
162    }
163
164    @property Char[] front()
165    {
166        return app.data;
167    }
168
169    enum empty = false;
170
171    void popFront()
172    {
173        app.shrinkTo(0);
174        compose();
175    }
176}
177
178@system unittest
179{
180    import std.range, std.regex;
181    auto re = regex(`P[a-z]{3,}q`);
182    auto gen = SampleGenerator!char(re, 20, 3141592);
183    static assert(isInputRange!(typeof(gen)));
184    //@@@BUG@@@ somehow gen.take(1_000) doesn't work
185    foreach (v; take(gen, 1_000))
186        assert(v.match(re));
187}
188