1/* 2 tre-match-backtrack.c - TRE backtracking regex matching engine 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9/* 10 This matcher is for regexps that use back referencing. Regexp matching 11 with back referencing is an NP-complete problem on the number of back 12 references. The easiest way to match them is to use a backtracking 13 routine which basically goes through all possible paths in the TNFA 14 and chooses the one which results in the best (leftmost and longest) 15 match. This can be spectacularly expensive and may run out of stack 16 space, but there really is no better known generic algorithm. Quoting 17 Henry Spencer from comp.compilers: 18 <URL: http://compilers.iecc.com/comparch/article/93-03-102> 19 20 POSIX.2 REs require longest match, which is really exciting to 21 implement since the obsolete ("basic") variant also includes 22 \<digit>. I haven't found a better way of tackling this than doing 23 a preliminary match using a DFA (or simulation) on a modified RE 24 that just replicates subREs for \<digit>, and then doing a 25 backtracking match to determine whether the subRE matches were 26 right. This can be rather slow, but I console myself with the 27 thought that people who use \<digit> deserve very slow execution. 28 (Pun unintentional but very appropriate.) 29 30*/ 31 32 33#ifdef HAVE_CONFIG_H 34#include <config.h> 35#endif /* HAVE_CONFIG_H */ 36 37/* Unset TRE_USE_ALLOCA to avoid using the stack to hold all the state 38 info while running */ 39#undef TRE_USE_ALLOCA 40 41#ifdef TRE_USE_ALLOCA 42/* AIX requires this to be the first thing in the file. */ 43#ifndef __GNUC__ 44# if HAVE_ALLOCA_H 45# include <alloca.h> 46# else 47# ifdef _AIX 48 #pragma alloca 49# else 50# ifndef alloca /* predefined by HP cc +Olibcalls */ 51char *alloca (); 52# endif 53# endif 54# endif 55#endif 56#endif /* TRE_USE_ALLOCA */ 57 58#include <assert.h> 59#include <stdlib.h> 60#include <string.h> 61#ifdef HAVE_WCHAR_H 62#include <wchar.h> 63#endif /* HAVE_WCHAR_H */ 64#ifdef HAVE_WCTYPE_H 65#include <wctype.h> 66#endif /* HAVE_WCTYPE_H */ 67#ifndef TRE_WCHAR 68#include <ctype.h> 69#endif /* !TRE_WCHAR */ 70#ifdef HAVE_MALLOC_H 71#include <malloc.h> 72#endif /* HAVE_MALLOC_H */ 73 74#include "tre-internal.h" 75#include "tre-mem.h" 76#include "tre-match-utils.h" 77#include "tre.h" 78#include "xmalloc.h" 79 80typedef struct { 81 int pos; 82 unsigned int pos_add_next; 83 const char *str_byte; 84#ifdef TRE_WCHAR 85 const wchar_t *str_wide; 86#endif /* TRE_WCHAR */ 87 tre_tnfa_transition_t *state; 88 int state_id; 89 int next_c; 90 tre_tag_t *tags; 91#ifdef TRE_MBSTATE 92 mbstate_t mbstate; 93#endif /* TRE_MBSTATE */ 94} tre_backtrack_item_t; 95 96typedef struct tre_backtrack_struct { 97 tre_backtrack_item_t item; 98 struct tre_backtrack_struct *prev; 99 struct tre_backtrack_struct *next; 100} *tre_backtrack_t; 101 102#ifdef TRE_WCHAR 103#define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide) 104#define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide 105#else /* !TRE_WCHAR */ 106#define BT_STACK_WIDE_IN(_str_wide) 107#define BT_STACK_WIDE_OUT 108#endif /* !TRE_WCHAR */ 109 110#ifdef TRE_MBSTATE 111#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) 112#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate 113#else /* !TRE_MBSTATE */ 114#define BT_STACK_MBSTATE_IN 115#define BT_STACK_MBSTATE_OUT 116#endif /* !TRE_MBSTATE */ 117 118 119#ifdef TRE_USE_ALLOCA 120#define tre_bt_mem_new tre_mem_newa 121#define tre_bt_mem_alloc tre_mem_alloca 122#define tre_bt_mem_destroy(obj) do { } while (0) 123#else /* !TRE_USE_ALLOCA */ 124#define tre_bt_mem_new tre_mem_new 125#define tre_bt_mem_alloc tre_mem_alloc 126#define tre_bt_mem_destroy tre_mem_destroy 127#endif /* !TRE_USE_ALLOCA */ 128 129 130#define BT_STACK_PUSH(_pos, _pos_add_next, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ 131 do \ 132 { \ 133 if (!stack->next) \ 134 { \ 135 tre_backtrack_t s; \ 136 s = tre_bt_mem_alloc(mem, sizeof(*s)); \ 137 if (!s) \ 138 { \ 139 tre_bt_mem_destroy(mem); \ 140 if (tags) \ 141 xfree(tags); \ 142 if (pmatch) \ 143 xfree(pmatch); \ 144 if (states_seen) \ 145 xfree(states_seen); \ 146 return REG_ESPACE; \ 147 } \ 148 s->prev = stack; \ 149 s->next = NULL; \ 150 s->item.tags = tre_bt_mem_alloc(mem, \ 151 num_tags * sizeof(*tags)); \ 152 if (!s->item.tags) \ 153 { \ 154 tre_bt_mem_destroy(mem); \ 155 if (tags) \ 156 xfree(tags); \ 157 if (pmatch) \ 158 xfree(pmatch); \ 159 if (states_seen) \ 160 xfree(states_seen); \ 161 return REG_ESPACE; \ 162 } \ 163 stack->next = s; \ 164 stack = s; \ 165 } \ 166 else \ 167 stack = stack->next; \ 168 stack->item.pos = (_pos); \ 169 stack->item.pos_add_next = (_pos_add_next); \ 170 stack->item.str_byte = (_str_byte); \ 171 BT_STACK_WIDE_IN(_str_wide); \ 172 stack->item.state = (_state); \ 173 stack->item.state_id = (_state_id); \ 174 stack->item.next_c = (_next_c); \ 175 memcpy(stack->item.tags, (_tags), num_tags * sizeof(*(_tags))); \ 176 BT_STACK_MBSTATE_IN; \ 177 } \ 178 while (/*CONSTCOND*/0) 179 180#ifdef TRE_STR_USER 181#define BT_STACK_POP() \ 182 do \ 183 { \ 184 assert(stack->prev); \ 185 pos = stack->item.pos; \ 186 pos_add_next = stack->item.pos_add_next; \ 187 if (type == STR_USER) \ 188 str_source->rewind(pos + pos_add_next, str_source->context); \ 189 str_byte = stack->item.str_byte; \ 190 BT_STACK_WIDE_OUT; \ 191 state = stack->item.state; \ 192 next_c = stack->item.next_c; \ 193 memcpy(tags, stack->item.tags, num_tags * sizeof(*tags)); \ 194 BT_STACK_MBSTATE_OUT; \ 195 stack = stack->prev; \ 196 } \ 197 while (/*CONSTCOND*/0) 198#else /* !TRE_STR_USER */ 199#define BT_STACK_POP() \ 200 do \ 201 { \ 202 assert(stack->prev); \ 203 pos = stack->item.pos; \ 204 pos_add_next = stack->item.pos_add_next; \ 205 str_byte = stack->item.str_byte; \ 206 BT_STACK_WIDE_OUT; \ 207 state = stack->item.state; \ 208 next_c = stack->item.next_c; \ 209 memcpy(tags, stack->item.tags, num_tags * sizeof(*tags)); \ 210 BT_STACK_MBSTATE_OUT; \ 211 stack = stack->prev; \ 212 } \ 213 while (/*CONSTCOND*/0) 214#endif /* !TRE_STR_USER */ 215 216#undef MIN 217#define MIN(a, b) ((a) <= (b) ? (a) : (b)) 218 219reg_errcode_t 220tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 221 int len, tre_str_type_t type, tre_tag_t *match_tags, 222 int eflags, int *match_end_ofs) 223{ 224 /* State variables required by GET_NEXT_WCHAR. */ 225 tre_char_t prev_c = 0, next_c = 0; 226 const char *str_byte = string; 227 int pos = 0; 228 unsigned int pos_add_next = 1; 229#ifdef TRE_WCHAR 230 const wchar_t *str_wide = string; 231#ifdef TRE_MBSTATE 232 mbstate_t mbstate; 233#endif /* TRE_MBSTATE */ 234#endif /* TRE_WCHAR */ 235 int reg_notbol = eflags & REG_NOTBOL; 236 int reg_noteol = eflags & REG_NOTEOL; 237 int reg_newline = tnfa->cflags & REG_NEWLINE; 238#ifdef TRE_STR_USER 239 int str_user_end = 0; 240#endif /* TRE_STR_USER */ 241 int i; 242 243 /* These are used to remember the necessary values of the above 244 variables to return to the position where the current search 245 started from. */ 246 int next_c_start; 247 const char *str_byte_start; 248 int pos_start = -1; 249#ifdef TRE_WCHAR 250 const wchar_t *str_wide_start; 251#endif /* TRE_WCHAR */ 252#ifdef TRE_MBSTATE 253 mbstate_t mbstate_start; 254#endif /* TRE_MBSTATE */ 255 256 /* End offset of best match so far, or -1 if no match found yet. */ 257 int match_eo = -1; 258 /* Tag arrays. */ 259 int *next_tags; 260 tre_tag_t *tags = NULL; 261 /* Current TNFA state. */ 262 tre_tnfa_transition_t *state; 263 int *states_seen = NULL; 264 265 /* Memory allocator to for allocating the backtracking stack. */ 266 tre_mem_t mem = tre_bt_mem_new(); 267 268 /* The backtracking stack. */ 269 tre_backtrack_t stack; 270 271 tre_tnfa_transition_t *trans_i; 272 regmatch_t *pmatch = NULL; 273 reg_errcode_t ret; 274 275 int num_tags = tnfa->num_tags; 276 int touch = 1; 277 char *buf; 278 int tbytes; 279 280#ifdef TRE_MBSTATE 281 memset(&mbstate, '\0', sizeof(mbstate)); 282#endif /* TRE_MBSTATE */ 283 284 if (!mem) 285 return REG_ESPACE; 286 stack = tre_bt_mem_alloc(mem, sizeof(*stack)); 287 if (!stack) 288 { 289 ret = REG_ESPACE; 290 goto error_exit; 291 } 292 stack->prev = NULL; 293 stack->next = NULL; 294 295 DPRINT(("tnfa_execute_backtrack, input type %d\n", type)); 296 DPRINT(("len = %d\n", len)); 297 298 { 299 int pbytes, sbytes, total_bytes; 300 char *tmp_buf; 301 /* Compute the length of the block we need. */ 302 tbytes = sizeof(*tags) * num_tags; 303 pbytes = sizeof(*pmatch) * tnfa->num_submatches; 304 sbytes = sizeof(*states_seen) * tnfa->num_states; 305 total_bytes = 306 (sizeof(long) - 1) * 2 /* for alignment paddings */ 307 + tbytes + pbytes + sbytes; 308 309 DPRINT(("tre_tnfa_run_backtrack, allocate %d bytes\n", total_bytes)); 310 /* Allocate the memory. */ 311#ifdef TRE_USE_ALLOCA 312 buf = alloca(total_bytes); 313#else /* !TRE_USE_ALLOCA */ 314 buf = xmalloc((unsigned)total_bytes); 315#endif /* !TRE_USE_ALLOCA */ 316 if (buf == NULL) 317 return REG_ESPACE; 318 319 /* Get the various pointers within tmp_buf (properly aligned). */ 320 tags = (void *)buf; 321 tmp_buf = buf + tbytes; 322 tmp_buf += ALIGN(tmp_buf, long); 323 pmatch = (void *)tmp_buf; 324 tmp_buf += pbytes; 325 tmp_buf += ALIGN(tmp_buf, long); 326 states_seen = (void *)tmp_buf; 327 } 328 329 retry: 330 { 331 memset(tags, 0, num_tags * sizeof(*tags)); 332 if (match_tags) 333 memset(match_tags, 0, num_tags * sizeof(*match_tags)); 334 for (i = 0; i < tnfa->num_states; i++) 335 states_seen[i] = 0; 336 } 337 338 state = NULL; 339 pos = pos_start; 340#ifdef TRE_STR_USER 341 if (type == STR_USER) 342 str_source->rewind(pos + pos_add_next, str_source->context); 343#endif /* TRE_STR_USER */ 344 GET_NEXT_WCHAR(); 345 pos_start = pos; 346 next_c_start = next_c; 347 str_byte_start = str_byte; 348#ifdef TRE_WCHAR 349 str_wide_start = str_wide; 350#endif /* TRE_WCHAR */ 351#ifdef TRE_MBSTATE 352 mbstate_start = mbstate; 353#endif /* TRE_MBSTATE */ 354 355 /* Handle initial states. */ 356 next_tags = NULL; 357 for (trans_i = tnfa->initial; trans_i->state; trans_i++) 358 { 359 DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c)); 360 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) 361 { 362 DPRINT(("assert failed\n")); 363 continue; 364 } 365 if (state == NULL) 366 { 367 /* Start from this state. */ 368 state = trans_i->state; 369 next_tags = trans_i->tags; 370 } 371 else 372 { 373 /* Backtrack to this state. */ 374 DPRINT(("saving state %d for backtracking\n", trans_i->state_id)); 375 BT_STACK_PUSH(pos, pos_add_next, str_byte, str_wide, trans_i->state, 376 trans_i->state_id, next_c, tags, mbstate); 377 { 378 int *tmp = trans_i->tags; 379 if (tmp) 380 { 381 while (*tmp >= 0) 382 tre_tag_set(stack->item.tags, *tmp++, pos, touch); 383 touch++; 384 } 385 } 386 } 387 } 388 389 if (next_tags) 390 { 391 for (; *next_tags >= 0; next_tags++) 392 tre_tag_set(tags, *next_tags, pos, touch); 393 touch++; 394 } 395 396 397 DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte)); 398 DPRINT(("pos:chr/code | state and tags\n")); 399 DPRINT(("-------------+------------------------------------------------\n")); 400 401 if (state == NULL) 402 goto backtrack; 403 404 while (/*CONSTCOND*/1) 405 { 406 tre_tnfa_transition_t *next_state; 407 int empty_br_match; 408 409 DPRINT(("start loop\n")); 410 411 if (match_eo >= 0 && tnfa->num_minimals) 412 { 413 int skip = 0; 414#ifdef TRE_DEBUG 415 DPRINT(("Checking minimal conditions: match_eo=%d match_tags=", 416 match_eo)); 417 tre_print_tags(match_tags, tnfa->num_tags); 418 DPRINT(("\n")); 419#endif /* TRE_DEBUG */ 420 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) 421 { 422 int end = tnfa->minimal_tags[i]; 423 int start = tnfa->minimal_tags[i + 1]; 424 DPRINT((" Minimal start %d, end %d\n", start, end)); 425 if (tre_minimal_tag_order(start, end, match_tags, tags) > 0) 426 { 427 skip = 1; 428 break; 429 } 430 } 431 if (!skip) 432 { 433#ifdef TRE_DEBUG 434 DPRINT((" Keeping tags=")); 435 tre_print_tags(tags, tnfa->num_tags); 436 DPRINT(("\n")); 437#endif /* TRE_DEBUG */ 438 } 439 else 440 { 441#ifdef TRE_DEBUG 442 DPRINT((" Throwing out tags=")); 443 tre_print_tags(tags, tnfa->num_tags); 444 DPRINT(("\n")); 445#endif /* TRE_DEBUG */ 446 goto backtrack; 447 } 448 } 449 450 if (state == tnfa->final) 451 { 452 DPRINT((" match found, match_eo=%d pos=%d\n", match_eo, pos)); 453 454 if (match_eo >= 0 && tnfa->num_minimals) 455 { 456 int compare = 0; 457#ifdef TRE_DEBUG 458 DPRINT(("Checking minimal conditions: match_eo=%d " 459 "match_tags=", match_eo)); 460 tre_print_tags(match_tags, tnfa->num_tags); 461 DPRINT(("\n")); 462#endif /* TRE_DEBUG */ 463 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) 464 { 465 int end = tnfa->minimal_tags[i]; 466 int start = tnfa->minimal_tags[i + 1]; 467 DPRINT((" Minimal start %d, end %d\n", start, end)); 468 if ((compare = tre_minimal_tag_order(start, end, 469 match_tags, tags)) != 0) 470 break; 471 } 472 if (compare > 0) 473 { 474#ifdef TRE_DEBUG 475 DPRINT((" Throwing out new match, tags=")); 476 tre_print_tags(tags, tnfa->num_tags); 477 DPRINT(("\n")); 478#endif /* TRE_DEBUG */ 479 goto backtrack; 480 } 481 else if (compare < 0) 482 { 483#ifdef TRE_DEBUG 484 DPRINT((" Throwing out old match, tags=")); 485 tre_print_tags(match_tags, tnfa->num_tags); 486 DPRINT(("\n")); 487#endif /* TRE_DEBUG */ 488 match_eo = -1; 489 } 490 } 491 492 if (match_eo < pos 493 || (match_eo == pos 494 && match_tags 495 && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, 496 tags, match_tags))) 497 { 498 /* This match wins the previous match. */ 499#ifdef TRE_DEBUG 500 DPRINT((" win previous tags=")); 501 tre_print_tags(tags, tnfa->num_tags); 502 DPRINT(("\n")); 503#endif /* TRE_DEBUG */ 504 match_eo = pos; 505 if (match_tags) 506 memcpy(match_tags, tags, num_tags * sizeof(*tags)); 507 } 508 /* Our TNFAs never have transitions leaving from the final state, 509 so we jump right to backtracking. */ 510 goto backtrack; 511 } 512 513#ifdef TRE_DEBUG 514 DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c, 515 state)); 516 tre_print_tags(tags, tnfa->num_tags); 517 DPRINT(("\n")); 518#endif /* TRE_DEBUG */ 519 520 /* Go to the next character in the input string. */ 521 empty_br_match = 0; 522 trans_i = state; 523 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) 524 { 525 /* This is a back reference state. All transitions leaving from 526 this state have the same back reference "assertion". Instead 527 of reading the next character, we match the back reference. */ 528 int so, eo, bt = trans_i->u.backref; 529 int bt_len; 530 int result; 531 532 DPRINT((" should match back reference %d\n", bt)); 533 /* Get the substring we need to match against. Remember to 534 turn off REG_NOSUB temporarily. */ 535 ret = tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, 536 tnfa, tags, pos); 537 if (ret != REG_OK) goto error_exit; 538 so = pmatch[bt].rm_so; 539 eo = pmatch[bt].rm_eo; 540 bt_len = eo - so; 541 542#ifdef TRE_DEBUG 543 { 544 int slen; 545 if (len < 0) 546 slen = bt_len; 547 else 548 slen = MIN(bt_len, len - pos); 549 550 if (type == STR_BYTE) 551 { 552 DPRINT((" substring (len %d) is [%d, %d]: '%.*s'\n", 553 bt_len, so, eo, bt_len, (char*)string + so)); 554 DPRINT((" current string is '%.*s'\n", slen, str_byte - 1)); 555 } 556#ifdef TRE_WCHAR 557 else if (type == STR_WIDE) 558 { 559 DPRINT((" substring (len %d) is [%d, %d]: '%.*" STRF "'\n", 560 bt_len, so, eo, bt_len, (wchar_t*)string + so)); 561 DPRINT((" current string is '%.*" STRF "'\n", 562 slen, str_wide - 1)); 563 } 564#endif /* TRE_WCHAR */ 565 } 566#endif 567 568 if (so < 0) 569 { 570 result = 1; /* Back reference of nomatch doesn't match */ 571 } 572 else if (len < 0) 573 { 574#ifdef TRE_STR_USER 575 if (type == STR_USER) 576 result = str_source->compare((unsigned)so, (unsigned)pos, 577 (unsigned)bt_len, 578 str_source->context); 579 else 580#endif /* TRE_STR_USER */ 581#ifdef TRE_WCHAR 582 if (type == STR_WIDE) 583 result = wcsncmp((const wchar_t*)string + so, str_wide - 1, 584 (size_t)bt_len); 585 else 586#endif /* TRE_WCHAR */ 587 result = strncmp((const char*)string + so, str_byte - 1, 588 (size_t)bt_len); 589 } 590 else if (len - pos < bt_len) 591 result = 1; 592#ifdef TRE_WCHAR 593 else if (type == STR_WIDE) 594 result = wmemcmp((const wchar_t*)string + so, str_wide - 1, 595 (size_t)bt_len); 596#endif /* TRE_WCHAR */ 597 else 598 result = memcmp((const char*)string + so, str_byte - 1, 599 (size_t)bt_len); 600 601 if (result == 0) 602 { 603 /* Back reference matched. Check for infinite loop. */ 604 if (bt_len == 0) 605 empty_br_match = 1; 606 if (empty_br_match && states_seen[trans_i->state_id]) 607 { 608 DPRINT((" avoid loop\n")); 609 goto backtrack; 610 } 611 612 states_seen[trans_i->state_id] = empty_br_match; 613 614 /* Advance in input string and resync `prev_c', `next_c' 615 and pos. */ 616 DPRINT((" back reference matched\n")); 617 str_byte += bt_len - 1; 618#ifdef TRE_WCHAR 619 str_wide += bt_len - 1; 620#endif /* TRE_WCHAR */ 621 pos += bt_len - 1; 622 GET_NEXT_WCHAR(); 623 DPRINT((" pos now %d\n", pos)); 624 } 625 else 626 { 627 DPRINT((" back reference did not match\n")); 628 goto backtrack; 629 } 630 } 631 else 632 { 633 /* Check for end of string. */ 634 if (len < 0) 635 { 636#ifdef TRE_STR_USER 637 if (type == STR_USER) 638 { 639 if (str_user_end) 640 goto backtrack; 641 } 642 else 643#endif /* TRE_STR_USER */ 644 if (next_c == L'\0') 645 goto backtrack; 646 } 647 else 648 { 649 if (pos >= len) 650 goto backtrack; 651 } 652 653 /* Read the next character. */ 654 GET_NEXT_WCHAR(); 655 } 656 657 next_state = NULL; 658 for (trans_i = state; trans_i->state; trans_i++) 659 { 660 DPRINT((" transition %d-%d (%c-%c) %d to %d\n", 661 trans_i->code_min, trans_i->code_max, 662 trans_i->code_min, trans_i->code_max, 663 trans_i->assertions, trans_i->state_id)); 664 if (trans_i->code_min <= (tre_cint_t)prev_c 665 && trans_i->code_max >= (tre_cint_t)prev_c) 666 { 667 if (trans_i->assertions 668 && (CHECK_ASSERTIONS(trans_i->assertions) 669 || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) 670 { 671 DPRINT((" assertion failed\n")); 672 continue; 673 } 674 675 if (next_state == NULL) 676 { 677 /* First matching transition. */ 678 DPRINT((" Next state is %d\n", trans_i->state_id)); 679 next_state = trans_i->state; 680 next_tags = trans_i->tags; 681 } 682 else 683 { 684 /* Second matching transition. We may need to backtrack here 685 to take this transition instead of the first one, so we 686 push this transition in the backtracking stack so we can 687 jump back here if needed. */ 688 DPRINT((" saving state %d for backtracking\n", 689 trans_i->state_id)); 690 BT_STACK_PUSH(pos, pos_add_next, str_byte, str_wide, 691 trans_i->state, trans_i->state_id, next_c, 692 tags, mbstate); 693 { 694 int *tmp; 695 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) 696 tre_tag_set(stack->item.tags, *tmp, pos, touch); 697 touch++; 698 } 699#if 0 /* XXX - it's important not to look at all transitions here to keep 700 the stack small! */ 701 break; 702#endif 703 } 704 } 705 } 706 707 if (next_state != NULL) 708 { 709 /* Matching transitions were found. Take the first one. */ 710 state = next_state; 711 712 /* Update the tag values. */ 713 if (next_tags) 714 { 715 while (*next_tags >= 0) 716 tre_tag_set(tags, *next_tags++, pos, touch); 717 touch++; 718 } 719 } 720 else 721 { 722 backtrack: 723 /* A matching transition was not found. Try to backtrack. */ 724 if (stack->prev) 725 { 726 DPRINT((" backtracking\n")); 727 if (stack->item.state->assertions & ASSERT_BACKREF) 728 { 729 DPRINT((" states_seen[%d] = 0\n", 730 stack->item.state_id)); 731 states_seen[stack->item.state_id] = 0; 732 } 733 734 BT_STACK_POP(); 735 } 736 else if (match_eo < 0) 737 { 738 /* Try starting from a later position in the input string. */ 739 /* Check for end of string. */ 740 if (pos == pos_start) 741 { 742 if (len < 0) 743 { 744 if (next_c == L'\0') 745 { 746 DPRINT(("end of string.\n")); 747 break; 748 } 749 } 750 else 751 { 752 if (pos >= len) 753 { 754 DPRINT(("end of string.\n")); 755 break; 756 } 757 } 758 } 759 DPRINT(("restarting from next start position\n")); 760 next_c = next_c_start; 761#ifdef TRE_MBSTATE 762 mbstate = mbstate_start; 763#endif /* TRE_MBSTATE */ 764 str_byte = str_byte_start; 765#ifdef TRE_WCHAR 766 str_wide = str_wide_start; 767#endif /* TRE_WCHAR */ 768 goto retry; 769 } 770 else 771 { 772 DPRINT(("finished\n")); 773 break; 774 } 775 } 776 } 777 778 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 779 *match_end_ofs = match_eo; 780 781 error_exit: 782 tre_bt_mem_destroy(mem); 783#ifndef TRE_USE_ALLOCA 784 if (buf) 785 xfree(buf); 786#endif /* !TRE_USE_ALLOCA */ 787 788 return ret; 789} 790