1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20
21package jdk.nashorn.internal.runtime.regexp.joni;
22
23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
24import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
25import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
26
27@SuppressWarnings("javadoc")
28public abstract class Matcher extends IntHolder {
29    protected final Regex regex;
30
31    protected final char[] chars;
32    protected final int str;
33    protected final int end;
34
35    protected int msaStart;
36    protected int msaOptions;
37    protected final Region msaRegion;
38    protected int msaBestLen;
39    protected int msaBestS;
40
41    protected int msaBegin;
42    protected int msaEnd;
43
44    public Matcher(final Regex regex, final char[] chars) {
45        this(regex, chars, 0, chars.length);
46    }
47
48    public Matcher(final Regex regex, final char[] chars, final int p, final int end) {
49        this.regex = regex;
50
51        this.chars = chars;
52        this.str = p;
53        this.end = end;
54
55        this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
56    }
57
58    // main matching method
59    protected abstract int matchAt(int range, int sstart, int sprev);
60
61    public final Region getRegion() {
62        return msaRegion;
63    }
64
65    public final int getBegin() {
66        return msaBegin;
67    }
68
69    public final int getEnd() {
70        return msaEnd;
71    }
72
73    protected final void msaInit(final int option, final int start) {
74        msaOptions = option;
75        msaStart = start;
76        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
77            msaBestLen = -1;
78        }
79    }
80
81    public final int match(final int at, final int range, final int option) {
82        msaInit(option, at);
83
84        final int prev = EncodingHelper.prevCharHead(str, at);
85
86        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
87            return matchAt(end /*range*/, at, prev);
88        }
89        return matchAt(range /*range*/, at, prev);
90    }
91
92    int low, high; // these are the return values
93    private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) {
94        int pprev = -1;
95        int p = s;
96
97        if (Config.DEBUG_SEARCH) {
98            Config.log.println("forward_search_range: "+
99                                "str: " + string +
100                                ", end: " + e +
101                                ", s: " + s +
102                                ", range: " + range);
103        }
104
105        if (regex.dMin > 0) {
106            p += regex.dMin;
107        }
108
109        retry:while (true) {
110            p = regex.searchAlgorithm.search(regex, ch, p, e, range);
111
112            if (p != -1 && p < range) {
113                if (p - regex.dMin < s) {
114                    // retry_gate:
115                    pprev = p;
116                    p++;
117                    continue retry;
118                }
119
120                if (regex.subAnchor != 0) {
121                    switch (regex.subAnchor) {
122                    case AnchorType.BEGIN_LINE:
123                        if (p != string) {
124                            final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
125                            if (!EncodingHelper.isNewLine(ch, prev, e)) {
126                                // goto retry_gate;
127                                pprev = p;
128                                p++;
129                                continue retry;
130                            }
131                        }
132                        break;
133
134                    case AnchorType.END_LINE:
135                        if (p == e) {
136                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
137                                final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
138                                if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) {
139                                    // goto retry_gate;
140                                    pprev = p;
141                                    p++;
142                                    continue retry;
143                                }
144                            }
145                        } else if (!EncodingHelper.isNewLine(ch, p, e)) {
146                            //if () break;
147                            // goto retry_gate;
148                            pprev = p;
149                            p++;
150                            continue retry;
151                        }
152                        break;
153
154                    default:
155                        break;
156                    } // switch
157                }
158
159                if (regex.dMax == 0) {
160                    low = p;
161                    if (lowPrev != null) { // ??? // remove null checks
162                        if (low > s) {
163                            lowPrev.value = EncodingHelper.prevCharHead(s, p);
164                        } else {
165                            lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
166                        }
167                    }
168                } else {
169                    if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
170                        low = p - regex.dMax;
171
172                        if (low > s) {
173                            low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
174                            if (lowPrev != null && lowPrev.value == -1) {
175                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
176                            }
177                        } else {
178                            if (lowPrev != null) {
179                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low);
180                            }
181                        }
182                    }
183                }
184                /* no needs to adjust *high, *high is used as range check only */
185                high = p - regex.dMin;
186
187                if (Config.DEBUG_SEARCH) {
188                    Config.log.println("forward_search_range success: "+
189                                        "low: " + (low - string) +
190                                        ", high: " + (high - string) +
191                                        ", dmin: " + regex.dMin +
192                                        ", dmax: " + regex.dMax);
193                }
194
195                return true;    /* success */
196            }
197
198            return false;   /* fail */
199        } //while
200    }
201
202    // low, high
203    private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) {
204        int r = range;
205        r += regex.dMin;
206        int p = s;
207
208        retry:while (true) {
209            p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r);
210
211            if (p != -1) {
212                if (regex.subAnchor != 0) {
213                    switch (regex.subAnchor) {
214                    case AnchorType.BEGIN_LINE:
215                        if (p != string) {
216                            final int prev = EncodingHelper.prevCharHead(string, p);
217                            if (!EncodingHelper.isNewLine(ch, prev, e)) {
218                                p = prev;
219                                continue retry;
220                            }
221                        }
222                        break;
223
224                    case AnchorType.END_LINE:
225                        if (p == e) {
226                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
227                                final int prev = EncodingHelper.prevCharHead(adjrange, p);
228                                if (prev == -1) {
229                                    return false;
230                                }
231                                if (EncodingHelper.isNewLine(ch, prev, e)) {
232                                    p = prev;
233                                    continue retry;
234                                }
235                            }
236                        } else if (!EncodingHelper.isNewLine(ch, p, e)) {
237                            p = EncodingHelper.prevCharHead(adjrange, p);
238                            if (p == -1) {
239                                return false;
240                            }
241                            continue retry;
242                        }
243                        break;
244
245                    default:
246                        break;
247                    } // switch
248                }
249
250                /* no needs to adjust *high, *high is used as range check only */
251                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
252                    low = p - regex.dMax;
253                    high = p - regex.dMin;
254                }
255
256                if (Config.DEBUG_SEARCH) {
257                    Config.log.println("backward_search_range: "+
258                                        "low: " + (low - string) +
259                                        ", high: " + (high - string));
260                }
261
262                return true;
263            }
264
265            if (Config.DEBUG_SEARCH) {
266                Config.log.println("backward_search_range: fail.");
267            }
268            return false;
269        } // while
270    }
271
272    // MATCH_AND_RETURN_CHECK
273    private boolean matchCheck(final int upperRange, final int s, final int prev) {
274        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
275            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
276                //range = upperRange;
277                if (matchAt(upperRange, s, prev) != -1) {
278                    if (!isFindLongest(regex.options)) {
279                        return true;
280                    }
281                }
282            } else {
283                //range = upperRange;
284                if (matchAt(upperRange, s, prev) != -1) {
285                    return true;
286                }
287            }
288        } else {
289            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
290                if (matchAt(end, s, prev) != -1) {
291                    //range = upperRange;
292                    if (!isFindLongest(regex.options)) {
293                        return true;
294                    }
295                }
296            } else {
297                //range = upperRange;
298                if (matchAt(end, s, prev) != -1) {
299                    return true;
300                }
301            }
302        }
303        return false;
304    }
305
306    public final int search(final int startp, final int rangep, final int option) {
307        int start = startp, range = rangep;
308        int s, prev;
309        int origStart = start;
310        final int origRange = range;
311
312        if (Config.DEBUG_SEARCH) {
313            Config.log.println("onig_search (entry point): "+
314                    "str: " + str +
315                    ", end: " + (end - str) +
316                    ", start: " + (start - str) +
317                    ", range " + (range - str));
318        }
319
320        if (start > end || start < str) {
321            return -1;
322        }
323
324        /* anchor optimize: resume search range */
325        if (regex.anchor != 0 && str < end) {
326            int minSemiEnd, maxSemiEnd;
327
328            if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
329                /* search start-position only */
330                // !begin_position:!
331                if (range > start) {
332                    range = start + 1;
333                } else {
334                    range = start;
335                }
336            } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
337                /* search str-position only */
338                if (range > start) {
339                    if (start != str)
340                     {
341                        return -1; // mismatch_no_msa;
342                    }
343                    range = str + 1;
344                } else {
345                    if (range <= str) {
346                        start = str;
347                        range = str;
348                    } else {
349                        return -1; // mismatch_no_msa;
350                    }
351                }
352            } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
353                minSemiEnd = maxSemiEnd = end;
354                // !end_buf:!
355                if (endBuf(start, range, minSemiEnd, maxSemiEnd))
356                 {
357                    return -1; // mismatch_no_msa;
358                }
359            } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
360                final int preEnd = EncodingHelper.stepBack(str, end, 1);
361                maxSemiEnd = end;
362                if (EncodingHelper.isNewLine(chars, preEnd, end)) {
363                    minSemiEnd = preEnd;
364                    if (minSemiEnd > str && start <= minSemiEnd) {
365                        // !goto end_buf;!
366                        if (endBuf(start, range, minSemiEnd, maxSemiEnd))
367                         {
368                            return -1; // mismatch_no_msa;
369                        }
370                    }
371                } else {
372                    minSemiEnd = end;
373                    // !goto end_buf;!
374                    if (endBuf(start, range, minSemiEnd, maxSemiEnd))
375                     {
376                        return -1; // mismatch_no_msa;
377                    }
378                }
379            } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
380                // goto !begin_position;!
381                if (range > start) {
382                    range = start + 1;
383                } else {
384                    range = start;
385                }
386            }
387
388        } else if (str == end) { /* empty string */
389            // empty address ?
390            if (Config.DEBUG_SEARCH) {
391                Config.log.println("onig_search: empty string.");
392            }
393
394            if (regex.thresholdLength == 0) {
395                s = start = str;
396                prev = -1;
397                msaInit(option, start);
398
399                if (matchCheck(end, s, prev)) {
400                    return match(s);
401                }
402                return mismatch();
403            }
404            return -1; // goto mismatch_no_msa;
405        }
406
407        if (Config.DEBUG_SEARCH) {
408            Config.log.println("onig_search(apply anchor): " +
409                                "end: " + (end - str) +
410                                ", start " + (start - str) +
411                                ", range " + (range - str));
412        }
413
414        msaInit(option, origStart);
415
416        s = start;
417        if (range > start) {    /* forward search */
418            if (s > str) {
419                prev = EncodingHelper.prevCharHead(str, s);
420            } else {
421                prev = 0; // -1
422            }
423
424            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
425                int schRange = range;
426                if (regex.dMax != 0) {
427                    if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
428                        schRange = end;
429                    } else {
430                        schRange += regex.dMax;
431                        if (schRange > end) {
432                            schRange = end;
433                        }
434                    }
435                }
436                if ((end - start) < regex.thresholdLength) {
437                    return mismatch();
438                }
439
440                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
441                    do {
442                        if (!forwardSearchRange(chars, str, end, s, schRange, this)) {
443                            return mismatch(); // low, high, lowPrev
444                        }
445                        if (s < low) {
446                            s = low;
447                            prev = value;
448                        }
449                        while (s <= high) {
450                            if (matchCheck(origRange, s, prev)) {
451                                return match(s); // ???
452                            }
453                            prev = s;
454                            s++;
455                        }
456                    } while (s < range);
457                }
458                /* check only. */
459                if (!forwardSearchRange(chars, str, end, s, schRange, null)) {
460                    return mismatch();
461                }
462
463                if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
464                    do {
465                        if (matchCheck(origRange, s, prev)) {
466                            return match(s);
467                        }
468                        prev = s;
469                        s++;
470                    } while (s < range);
471                    return mismatch();
472                }
473            }
474
475            do {
476                if (matchCheck(origRange, s, prev)) {
477                    return match(s);
478                }
479                prev = s;
480                s++;
481            } while (s < range);
482
483            if (s == range) { /* because empty match with /$/. */
484                if (matchCheck(origRange, s, prev)) {
485                    return match(s);
486                }
487            }
488        } else { /* backward search */
489            if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
490                if (origStart < end) {
491                    origStart++; // /* is upper range */
492                }
493            }
494
495            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
496                int adjrange;
497                if (range < end) {
498                    adjrange = range;
499                } else {
500                    adjrange = end;
501                }
502                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
503                    do {
504                        int schStart = s + regex.dMax;
505                        if (schStart > end) {
506                            schStart = end;
507                        }
508                        if (!backwardSearchRange(chars, str, end, schStart, range, adjrange))
509                         {
510                            return mismatch(); // low, high
511                        }
512                        if (s > high) {
513                            s = high;
514                        }
515                        while (s != -1 && s >= low) {
516                            prev = EncodingHelper.prevCharHead(str, s);
517                            if (matchCheck(origStart, s, prev)) {
518                                return match(s);
519                            }
520                            s = prev;
521                        }
522                    } while (s >= range);
523                    return mismatch();
524                }
525                if ((end - range) < regex.thresholdLength) {
526                    return mismatch();
527                }
528
529                int schStart = s;
530                if (regex.dMax != 0) {
531                    if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
532                        schStart = end;
533                    } else {
534                        schStart += regex.dMax;
535                        if (schStart > end) {
536                            schStart = end;
537                        }
538                    }
539                }
540                if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) {
541                    return mismatch();
542                }
543            }
544
545            do {
546                prev = EncodingHelper.prevCharHead(str, s);
547                if (matchCheck(origStart, s, prev)) {
548                    return match(s);
549                }
550                s = prev;
551            } while (s >= range);
552
553        }
554        return mismatch();
555    }
556
557    private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) {
558        int start = startp;
559        int range = rangep;
560
561        if ((maxSemiEnd - str) < regex.anchorDmin) {
562            return true; // mismatch_no_msa;
563        }
564
565        if (range > start) {
566            if ((minSemiEnd - start) > regex.anchorDmax) {
567                start = minSemiEnd - regex.anchorDmax;
568                if (start >= end) {
569                    /* match with empty at end */
570                    start = EncodingHelper.prevCharHead(str, end);
571                }
572            }
573            if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
574                range = maxSemiEnd - regex.anchorDmin + 1;
575            }
576            if (start >= range)
577             {
578                return true; // mismatch_no_msa;
579            }
580        } else {
581            if ((minSemiEnd - range) > regex.anchorDmax) {
582                range = minSemiEnd - regex.anchorDmax;
583            }
584            if ((maxSemiEnd - start) < regex.anchorDmin) {
585                start = maxSemiEnd - regex.anchorDmin;
586            }
587            if (range > start)
588             {
589                return true; // mismatch_no_msa;
590            }
591        }
592        return false;
593    }
594
595    private int match(final int s) {
596        return s - str; // sstart ???
597    }
598
599    private int mismatch() {
600        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
601            if (msaBestLen >= 0) {
602                final int s = msaBestS;
603                return match(s);
604            }
605        }
606        // falls through finish:
607        return -1;
608    }
609}
610