1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 21package jdk.nashorn.internal.runtime.regexp.joni; 22 23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 24import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 25import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 26 27@SuppressWarnings("javadoc") 28public abstract class Matcher extends IntHolder { 29 protected final Regex regex; 30 31 protected final char[] chars; 32 protected final int str; 33 protected final int end; 34 35 protected int msaStart; 36 protected int msaOptions; 37 protected final Region msaRegion; 38 protected int msaBestLen; 39 protected int msaBestS; 40 41 protected int msaBegin; 42 protected int msaEnd; 43 44 public Matcher(final Regex regex, final char[] chars) { 45 this(regex, chars, 0, chars.length); 46 } 47 48 public Matcher(final Regex regex, final char[] chars, final int p, final int end) { 49 this.regex = regex; 50 51 this.chars = chars; 52 this.str = p; 53 this.end = end; 54 55 this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); 56 } 57 58 // main matching method 59 protected abstract int matchAt(int range, int sstart, int sprev); 60 61 public final Region getRegion() { 62 return msaRegion; 63 } 64 65 public final int getBegin() { 66 return msaBegin; 67 } 68 69 public final int getEnd() { 70 return msaEnd; 71 } 72 73 protected final void msaInit(final int option, final int start) { 74 msaOptions = option; 75 msaStart = start; 76 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 77 msaBestLen = -1; 78 } 79 } 80 81 public final int match(final int at, final int range, final int option) { 82 msaInit(option, at); 83 84 final int prev = EncodingHelper.prevCharHead(str, at); 85 86 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 87 return matchAt(end /*range*/, at, prev); 88 } 89 return matchAt(range /*range*/, at, prev); 90 } 91 92 int low, high; // these are the return values 93 private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) { 94 int pprev = -1; 95 int p = s; 96 97 if (Config.DEBUG_SEARCH) { 98 Config.log.println("forward_search_range: "+ 99 "str: " + string + 100 ", end: " + e + 101 ", s: " + s + 102 ", range: " + range); 103 } 104 105 if (regex.dMin > 0) { 106 p += regex.dMin; 107 } 108 109 retry:while (true) { 110 p = regex.searchAlgorithm.search(regex, ch, p, e, range); 111 112 if (p != -1 && p < range) { 113 if (p - regex.dMin < s) { 114 // retry_gate: 115 pprev = p; 116 p++; 117 continue retry; 118 } 119 120 if (regex.subAnchor != 0) { 121 switch (regex.subAnchor) { 122 case AnchorType.BEGIN_LINE: 123 if (p != string) { 124 final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); 125 if (!EncodingHelper.isNewLine(ch, prev, e)) { 126 // goto retry_gate; 127 pprev = p; 128 p++; 129 continue retry; 130 } 131 } 132 break; 133 134 case AnchorType.END_LINE: 135 if (p == e) { 136 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 137 final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); 138 if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) { 139 // goto retry_gate; 140 pprev = p; 141 p++; 142 continue retry; 143 } 144 } 145 } else if (!EncodingHelper.isNewLine(ch, p, e)) { 146 //if () break; 147 // goto retry_gate; 148 pprev = p; 149 p++; 150 continue retry; 151 } 152 break; 153 154 default: 155 break; 156 } // switch 157 } 158 159 if (regex.dMax == 0) { 160 low = p; 161 if (lowPrev != null) { // ??? // remove null checks 162 if (low > s) { 163 lowPrev.value = EncodingHelper.prevCharHead(s, p); 164 } else { 165 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); 166 } 167 } 168 } else { 169 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 170 low = p - regex.dMax; 171 172 if (low > s) { 173 low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); 174 if (lowPrev != null && lowPrev.value == -1) { 175 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); 176 } 177 } else { 178 if (lowPrev != null) { 179 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low); 180 } 181 } 182 } 183 } 184 /* no needs to adjust *high, *high is used as range check only */ 185 high = p - regex.dMin; 186 187 if (Config.DEBUG_SEARCH) { 188 Config.log.println("forward_search_range success: "+ 189 "low: " + (low - string) + 190 ", high: " + (high - string) + 191 ", dmin: " + regex.dMin + 192 ", dmax: " + regex.dMax); 193 } 194 195 return true; /* success */ 196 } 197 198 return false; /* fail */ 199 } //while 200 } 201 202 // low, high 203 private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) { 204 int r = range; 205 r += regex.dMin; 206 int p = s; 207 208 retry:while (true) { 209 p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r); 210 211 if (p != -1) { 212 if (regex.subAnchor != 0) { 213 switch (regex.subAnchor) { 214 case AnchorType.BEGIN_LINE: 215 if (p != string) { 216 final int prev = EncodingHelper.prevCharHead(string, p); 217 if (!EncodingHelper.isNewLine(ch, prev, e)) { 218 p = prev; 219 continue retry; 220 } 221 } 222 break; 223 224 case AnchorType.END_LINE: 225 if (p == e) { 226 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 227 final int prev = EncodingHelper.prevCharHead(adjrange, p); 228 if (prev == -1) { 229 return false; 230 } 231 if (EncodingHelper.isNewLine(ch, prev, e)) { 232 p = prev; 233 continue retry; 234 } 235 } 236 } else if (!EncodingHelper.isNewLine(ch, p, e)) { 237 p = EncodingHelper.prevCharHead(adjrange, p); 238 if (p == -1) { 239 return false; 240 } 241 continue retry; 242 } 243 break; 244 245 default: 246 break; 247 } // switch 248 } 249 250 /* no needs to adjust *high, *high is used as range check only */ 251 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 252 low = p - regex.dMax; 253 high = p - regex.dMin; 254 } 255 256 if (Config.DEBUG_SEARCH) { 257 Config.log.println("backward_search_range: "+ 258 "low: " + (low - string) + 259 ", high: " + (high - string)); 260 } 261 262 return true; 263 } 264 265 if (Config.DEBUG_SEARCH) { 266 Config.log.println("backward_search_range: fail."); 267 } 268 return false; 269 } // while 270 } 271 272 // MATCH_AND_RETURN_CHECK 273 private boolean matchCheck(final int upperRange, final int s, final int prev) { 274 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 275 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 276 //range = upperRange; 277 if (matchAt(upperRange, s, prev) != -1) { 278 if (!isFindLongest(regex.options)) { 279 return true; 280 } 281 } 282 } else { 283 //range = upperRange; 284 if (matchAt(upperRange, s, prev) != -1) { 285 return true; 286 } 287 } 288 } else { 289 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 290 if (matchAt(end, s, prev) != -1) { 291 //range = upperRange; 292 if (!isFindLongest(regex.options)) { 293 return true; 294 } 295 } 296 } else { 297 //range = upperRange; 298 if (matchAt(end, s, prev) != -1) { 299 return true; 300 } 301 } 302 } 303 return false; 304 } 305 306 public final int search(final int startp, final int rangep, final int option) { 307 int start = startp, range = rangep; 308 int s, prev; 309 int origStart = start; 310 final int origRange = range; 311 312 if (Config.DEBUG_SEARCH) { 313 Config.log.println("onig_search (entry point): "+ 314 "str: " + str + 315 ", end: " + (end - str) + 316 ", start: " + (start - str) + 317 ", range " + (range - str)); 318 } 319 320 if (start > end || start < str) { 321 return -1; 322 } 323 324 /* anchor optimize: resume search range */ 325 if (regex.anchor != 0 && str < end) { 326 int minSemiEnd, maxSemiEnd; 327 328 if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { 329 /* search start-position only */ 330 // !begin_position:! 331 if (range > start) { 332 range = start + 1; 333 } else { 334 range = start; 335 } 336 } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { 337 /* search str-position only */ 338 if (range > start) { 339 if (start != str) 340 { 341 return -1; // mismatch_no_msa; 342 } 343 range = str + 1; 344 } else { 345 if (range <= str) { 346 start = str; 347 range = str; 348 } else { 349 return -1; // mismatch_no_msa; 350 } 351 } 352 } else if ((regex.anchor & AnchorType.END_BUF) != 0) { 353 minSemiEnd = maxSemiEnd = end; 354 // !end_buf:! 355 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) 356 { 357 return -1; // mismatch_no_msa; 358 } 359 } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { 360 final int preEnd = EncodingHelper.stepBack(str, end, 1); 361 maxSemiEnd = end; 362 if (EncodingHelper.isNewLine(chars, preEnd, end)) { 363 minSemiEnd = preEnd; 364 if (minSemiEnd > str && start <= minSemiEnd) { 365 // !goto end_buf;! 366 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) 367 { 368 return -1; // mismatch_no_msa; 369 } 370 } 371 } else { 372 minSemiEnd = end; 373 // !goto end_buf;! 374 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) 375 { 376 return -1; // mismatch_no_msa; 377 } 378 } 379 } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { 380 // goto !begin_position;! 381 if (range > start) { 382 range = start + 1; 383 } else { 384 range = start; 385 } 386 } 387 388 } else if (str == end) { /* empty string */ 389 // empty address ? 390 if (Config.DEBUG_SEARCH) { 391 Config.log.println("onig_search: empty string."); 392 } 393 394 if (regex.thresholdLength == 0) { 395 s = start = str; 396 prev = -1; 397 msaInit(option, start); 398 399 if (matchCheck(end, s, prev)) { 400 return match(s); 401 } 402 return mismatch(); 403 } 404 return -1; // goto mismatch_no_msa; 405 } 406 407 if (Config.DEBUG_SEARCH) { 408 Config.log.println("onig_search(apply anchor): " + 409 "end: " + (end - str) + 410 ", start " + (start - str) + 411 ", range " + (range - str)); 412 } 413 414 msaInit(option, origStart); 415 416 s = start; 417 if (range > start) { /* forward search */ 418 if (s > str) { 419 prev = EncodingHelper.prevCharHead(str, s); 420 } else { 421 prev = 0; // -1 422 } 423 424 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 425 int schRange = range; 426 if (regex.dMax != 0) { 427 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 428 schRange = end; 429 } else { 430 schRange += regex.dMax; 431 if (schRange > end) { 432 schRange = end; 433 } 434 } 435 } 436 if ((end - start) < regex.thresholdLength) { 437 return mismatch(); 438 } 439 440 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 441 do { 442 if (!forwardSearchRange(chars, str, end, s, schRange, this)) { 443 return mismatch(); // low, high, lowPrev 444 } 445 if (s < low) { 446 s = low; 447 prev = value; 448 } 449 while (s <= high) { 450 if (matchCheck(origRange, s, prev)) { 451 return match(s); // ??? 452 } 453 prev = s; 454 s++; 455 } 456 } while (s < range); 457 } 458 /* check only. */ 459 if (!forwardSearchRange(chars, str, end, s, schRange, null)) { 460 return mismatch(); 461 } 462 463 if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { 464 do { 465 if (matchCheck(origRange, s, prev)) { 466 return match(s); 467 } 468 prev = s; 469 s++; 470 } while (s < range); 471 return mismatch(); 472 } 473 } 474 475 do { 476 if (matchCheck(origRange, s, prev)) { 477 return match(s); 478 } 479 prev = s; 480 s++; 481 } while (s < range); 482 483 if (s == range) { /* because empty match with /$/. */ 484 if (matchCheck(origRange, s, prev)) { 485 return match(s); 486 } 487 } 488 } else { /* backward search */ 489 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 490 if (origStart < end) { 491 origStart++; // /* is upper range */ 492 } 493 } 494 495 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 496 int adjrange; 497 if (range < end) { 498 adjrange = range; 499 } else { 500 adjrange = end; 501 } 502 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { 503 do { 504 int schStart = s + regex.dMax; 505 if (schStart > end) { 506 schStart = end; 507 } 508 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) 509 { 510 return mismatch(); // low, high 511 } 512 if (s > high) { 513 s = high; 514 } 515 while (s != -1 && s >= low) { 516 prev = EncodingHelper.prevCharHead(str, s); 517 if (matchCheck(origStart, s, prev)) { 518 return match(s); 519 } 520 s = prev; 521 } 522 } while (s >= range); 523 return mismatch(); 524 } 525 if ((end - range) < regex.thresholdLength) { 526 return mismatch(); 527 } 528 529 int schStart = s; 530 if (regex.dMax != 0) { 531 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 532 schStart = end; 533 } else { 534 schStart += regex.dMax; 535 if (schStart > end) { 536 schStart = end; 537 } 538 } 539 } 540 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) { 541 return mismatch(); 542 } 543 } 544 545 do { 546 prev = EncodingHelper.prevCharHead(str, s); 547 if (matchCheck(origStart, s, prev)) { 548 return match(s); 549 } 550 s = prev; 551 } while (s >= range); 552 553 } 554 return mismatch(); 555 } 556 557 private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) { 558 int start = startp; 559 int range = rangep; 560 561 if ((maxSemiEnd - str) < regex.anchorDmin) { 562 return true; // mismatch_no_msa; 563 } 564 565 if (range > start) { 566 if ((minSemiEnd - start) > regex.anchorDmax) { 567 start = minSemiEnd - regex.anchorDmax; 568 if (start >= end) { 569 /* match with empty at end */ 570 start = EncodingHelper.prevCharHead(str, end); 571 } 572 } 573 if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { 574 range = maxSemiEnd - regex.anchorDmin + 1; 575 } 576 if (start >= range) 577 { 578 return true; // mismatch_no_msa; 579 } 580 } else { 581 if ((minSemiEnd - range) > regex.anchorDmax) { 582 range = minSemiEnd - regex.anchorDmax; 583 } 584 if ((maxSemiEnd - start) < regex.anchorDmin) { 585 start = maxSemiEnd - regex.anchorDmin; 586 } 587 if (range > start) 588 { 589 return true; // mismatch_no_msa; 590 } 591 } 592 return false; 593 } 594 595 private int match(final int s) { 596 return s - str; // sstart ??? 597 } 598 599 private int mismatch() { 600 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 601 if (msaBestLen >= 0) { 602 final int s = msaBestS; 603 return match(s); 604 } 605 } 606 // falls through finish: 607 return -1; 608 } 609} 610