1251881Speter/* 2251881Speter tre-match-approx.c - TRE approximate regex matching engine 3251881Speter 4251881Speter This software is released under a BSD-style license. 5251881Speter See the file LICENSE for details and copyright. 6251881Speter 7251881Speter*/ 8251881Speter#ifdef HAVE_CONFIG_H 9251881Speter#include <config.h> 10251881Speter#endif /* HAVE_CONFIG_H */ 11251881Speter 12251881Speter#include "tre-alloca.h" 13251881Speter 14251881Speter#define __USE_STRING_INLINES 15251881Speter#undef __NO_INLINE__ 16251881Speter 17251881Speter#include <assert.h> 18251881Speter#include <stdlib.h> 19251881Speter#include <string.h> 20251881Speter#include <limits.h> 21251881Speter#ifdef HAVE_WCHAR_H 22251881Speter#include <wchar.h> 23251881Speter#endif /* HAVE_WCHAR_H */ 24251881Speter#ifdef HAVE_WCTYPE_H 25251881Speter#include <wctype.h> 26251881Speter#endif /* HAVE_WCTYPE_H */ 27251881Speter#ifndef TRE_WCHAR 28251881Speter#include <ctype.h> 29251881Speter#endif /* !TRE_WCHAR */ 30251881Speter#ifdef HAVE_MALLOC_H 31251881Speter#include <malloc.h> 32251881Speter#endif /* HAVE_MALLOC_H */ 33251881Speter 34251881Speter#include "tre-internal.h" 35251881Speter#include "tre-match-utils.h" 36251881Speter#include "tre.h" 37251881Speter#include "xmalloc.h" 38251881Speter 39251881Speter#define TRE_M_COST 0 40251881Speter#define TRE_M_NUM_INS 1 41251881Speter#define TRE_M_NUM_DEL 2 42251881Speter#define TRE_M_NUM_SUBST 3 43251881Speter#define TRE_M_NUM_ERR 4 44251881Speter#define TRE_M_LAST 5 45251881Speter 46251881Speter#define TRE_M_MAX_DEPTH 3 47251881Speter 48251881Spetertypedef struct { 49251881Speter /* State in the TNFA transition table. */ 50251881Speter tre_tnfa_transition_t *state; 51251881Speter /* Position in input string. */ 52251881Speter int pos; 53251881Speter /* Tag values. */ 54251881Speter int *tags; 55251881Speter /* Matching parameters. */ 56251881Speter regaparams_t params; 57251881Speter /* Nesting depth of parameters. This is used as an index in 58251881Speter the `costs' array. */ 59251881Speter int depth; 60251881Speter /* Costs and counter values for different parameter nesting depths. */ 61251881Speter int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST]; 62251881Speter} tre_tnfa_approx_reach_t; 63251881Speter 64251881Speter 65251881Speter#ifdef TRE_DEBUG 66251881Speter/* Prints the `reach' array in a readable fashion with DPRINT. */ 67251881Speterstatic void 68251881Spetertre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach, 69251881Speter int pos, size_t num_tags) 70251881Speter{ 71251881Speter int id; 72251881Speter 73251881Speter /* Print each state on one line. */ 74251881Speter DPRINT((" reach:\n")); 75251881Speter for (id = 0; id < tnfa->num_states; id++) 76251881Speter { 77251881Speter int i, j; 78251881Speter if (reach[id].pos < pos) 79251881Speter continue; /* Not reached. */ 80251881Speter DPRINT((" %03d, costs ", id)); 81251881Speter for (i = 0; i <= reach[id].depth; i++) 82251881Speter { 83251881Speter DPRINT(("[")); 84251881Speter for (j = 0; j < TRE_M_LAST; j++) 85251881Speter { 86251881Speter DPRINT(("%2d", reach[id].costs[i][j])); 87251881Speter if (j + 1 < TRE_M_LAST) 88251881Speter DPRINT((",")); 89251881Speter } 90251881Speter DPRINT(("]")); 91251881Speter if (i + 1 <= reach[id].depth) 92251881Speter DPRINT((", ")); 93251881Speter } 94251881Speter DPRINT(("\n tags ")); 95251881Speter for (i = 0; i < num_tags; i++) 96251881Speter { 97251881Speter DPRINT(("%02d", reach[id].tags[i])); 98251881Speter if (i + 1 < num_tags) 99251881Speter DPRINT((",")); 100251881Speter } 101251881Speter DPRINT(("\n")); 102251881Speter } 103251881Speter DPRINT(("\n")); 104251881Speter} 105251881Speter#endif /* TRE_DEBUG */ 106251881Speter 107251881Speter 108251881Speter/* Sets the matching parameters in `reach' to the ones defined in the `pa' 109251881Speter array. If `pa' specifies default values, they are taken from 110251881Speter `default_params'. */ 111251881Speterinline static void 112251881Spetertre_set_params(tre_tnfa_approx_reach_t *reach, 113251881Speter int *pa, regaparams_t default_params) 114251881Speter{ 115251881Speter int value; 116251881Speter 117251881Speter /* If depth is increased reset costs and counters to zero for the 118251881Speter new levels. */ 119251881Speter value = pa[TRE_PARAM_DEPTH]; 120251881Speter assert(value <= TRE_M_MAX_DEPTH); 121251881Speter if (value > reach->depth) 122251881Speter { 123251881Speter int i, j; 124251881Speter for (i = reach->depth + 1; i <= value; i++) 125251881Speter for (j = 0; j < TRE_M_LAST; j++) 126251881Speter reach->costs[i][j] = 0; 127251881Speter } 128251881Speter reach->depth = value; 129251881Speter 130251881Speter /* Set insert cost. */ 131251881Speter value = pa[TRE_PARAM_COST_INS]; 132251881Speter if (value == TRE_PARAM_DEFAULT) 133251881Speter reach->params.cost_ins = default_params.cost_ins; 134251881Speter else if (value != TRE_PARAM_UNSET) 135251881Speter reach->params.cost_ins = value; 136251881Speter 137251881Speter /* Set delete cost. */ 138251881Speter value = pa[TRE_PARAM_COST_DEL]; 139251881Speter if (value == TRE_PARAM_DEFAULT) 140251881Speter reach->params.cost_del = default_params.cost_del; 141251881Speter else if (value != TRE_PARAM_UNSET) 142251881Speter reach->params.cost_del = value; 143251881Speter 144251881Speter /* Set substitute cost. */ 145251881Speter value = pa[TRE_PARAM_COST_SUBST]; 146 if (value == TRE_PARAM_DEFAULT) 147 reach->params.cost_subst = default_params.cost_subst; 148 else 149 reach->params.cost_subst = value; 150 151 /* Set maximum cost. */ 152 value = pa[TRE_PARAM_COST_MAX]; 153 if (value == TRE_PARAM_DEFAULT) 154 reach->params.max_cost = default_params.max_cost; 155 else if (value != TRE_PARAM_UNSET) 156 reach->params.max_cost = value; 157 158 /* Set maximum inserts. */ 159 value = pa[TRE_PARAM_MAX_INS]; 160 if (value == TRE_PARAM_DEFAULT) 161 reach->params.max_ins = default_params.max_ins; 162 else if (value != TRE_PARAM_UNSET) 163 reach->params.max_ins = value; 164 165 /* Set maximum deletes. */ 166 value = pa[TRE_PARAM_MAX_DEL]; 167 if (value == TRE_PARAM_DEFAULT) 168 reach->params.max_del = default_params.max_del; 169 else if (value != TRE_PARAM_UNSET) 170 reach->params.max_del = value; 171 172 /* Set maximum substitutes. */ 173 value = pa[TRE_PARAM_MAX_SUBST]; 174 if (value == TRE_PARAM_DEFAULT) 175 reach->params.max_subst = default_params.max_subst; 176 else if (value != TRE_PARAM_UNSET) 177 reach->params.max_subst = value; 178 179 /* Set maximum number of errors. */ 180 value = pa[TRE_PARAM_MAX_ERR]; 181 if (value == TRE_PARAM_DEFAULT) 182 reach->params.max_err = default_params.max_err; 183 else if (value != TRE_PARAM_UNSET) 184 reach->params.max_err = value; 185} 186 187reg_errcode_t 188tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 189 tre_str_type_t type, int *match_tags, 190 regamatch_t *match, regaparams_t default_params, 191 int eflags, int *match_end_ofs) 192{ 193 /* State variables required by GET_NEXT_WCHAR. */ 194 tre_char_t prev_c = 0, next_c = 0; 195 const char *str_byte = string; 196 int pos = -1; 197 unsigned int pos_add_next = 1; 198#ifdef TRE_WCHAR 199 const wchar_t *str_wide = string; 200#ifdef TRE_MBSTATE 201 mbstate_t mbstate; 202#endif /* !TRE_WCHAR */ 203#endif /* TRE_WCHAR */ 204 int reg_notbol = eflags & REG_NOTBOL; 205 int reg_noteol = eflags & REG_NOTEOL; 206 int reg_newline = tnfa->cflags & REG_NEWLINE; 207 size_t str_user_end = 0; 208 209 int prev_pos; 210 211 /* Number of tags. */ 212 size_t num_tags; 213 /* The reach tables. */ 214 tre_tnfa_approx_reach_t *reach, *reach_next; 215 /* Tag array for temporary use. */ 216 int *tmp_tags; 217 218 /* End offset of best match so far, or -1 if no match found yet. */ 219 int match_eo = -1; 220 /* Costs of the match. */ 221 int match_costs[TRE_M_LAST]; 222 223 /* Space for temporary data required for matching. */ 224 unsigned char *buf; 225 226 size_t i, id; 227 228 if (!match_tags) 229 num_tags = 0; 230 else 231 num_tags = tnfa->num_tags; 232 233#ifdef TRE_MBSTATE 234 memset(&mbstate, '\0', sizeof(mbstate)); 235#endif /* TRE_MBSTATE */ 236 237 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, " 238 "match_tags %p\n", 239 type, len, eflags, 240 match_tags)); 241 DPRINT(("max cost %d, ins %d, del %d, subst %d\n", 242 default_params.max_cost, 243 default_params.cost_ins, 244 default_params.cost_del, 245 default_params.cost_subst)); 246 247 /* Allocate memory for temporary data required for matching. This needs to 248 be done for every matching operation to be thread safe. This allocates 249 everything in a single large block from the stack frame using alloca() 250 or with malloc() if alloca is unavailable. */ 251 { 252 unsigned char *buf_cursor; 253 /* Space needed for one array of tags. */ 254 size_t tag_bytes = sizeof(*tmp_tags) * num_tags; 255 /* Space needed for one reach table. */ 256 size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states; 257 /* Total space needed. */ 258 size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes; 259 /* Add some extra to make sure we can align the pointers. The multiplier 260 used here must be equal to the number of ALIGN calls below. */ 261 total_bytes += (sizeof(long) - 1) * 3; 262 263 /* Allocate the memory. */ 264#ifdef TRE_USE_ALLOCA 265 buf = alloca(total_bytes); 266#else /* !TRE_USE_ALLOCA */ 267 buf = xmalloc((unsigned)total_bytes); 268#endif /* !TRE_USE_ALLOCA */ 269 if (!buf) 270 return REG_ESPACE; 271 memset(buf, 0, (size_t)total_bytes); 272 273 /* Allocate `tmp_tags' from `buf'. */ 274 tmp_tags = (void *)buf; 275 buf_cursor = buf + tag_bytes; 276 buf_cursor += ALIGN(buf_cursor, long); 277 278 /* Allocate `reach' from `buf'. */ 279 reach = (void *)buf_cursor; 280 buf_cursor += reach_bytes; 281 buf_cursor += ALIGN(buf_cursor, long); 282 283 /* Allocate `reach_next' from `buf'. */ 284 reach_next = (void *)buf_cursor; 285 buf_cursor += reach_bytes; 286 buf_cursor += ALIGN(buf_cursor, long); 287 288 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */ 289 for (i = 0; i < tnfa->num_states; i++) 290 { 291 reach[i].tags = (void *)buf_cursor; 292 buf_cursor += tag_bytes; 293 reach_next[i].tags = (void *)buf_cursor; 294 buf_cursor += tag_bytes; 295 } 296 assert(buf_cursor <= buf + total_bytes); 297 } 298 299 for (i = 0; i < TRE_M_LAST; i++) 300 match_costs[i] = INT_MAX; 301 302 /* Mark the reach arrays empty. */ 303 for (i = 0; i < tnfa->num_states; i++) 304 reach[i].pos = reach_next[i].pos = -2; 305 306 prev_pos = pos; 307 GET_NEXT_WCHAR(); 308 pos = 0; 309 310 while (/*CONSTCOND*/1) 311 { 312 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c)); 313 314 /* Add initial states to `reach_next' if an exact match has not yet 315 been found. */ 316 if (match_costs[TRE_M_COST] > 0) 317 { 318 tre_tnfa_transition_t *trans; 319 DPRINT((" init")); 320 for (trans = tnfa->initial; trans->state; trans++) 321 { 322 int stateid = trans->state_id; 323 324 /* If this state is not currently in `reach_next', add it 325 there. */ 326 if (reach_next[stateid].pos < pos) 327 { 328 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 329 { 330 /* Assertions failed, don't add this state. */ 331 DPRINT((" !%d (assert)", stateid)); 332 continue; 333 } 334 DPRINT((" %d", stateid)); 335 reach_next[stateid].state = trans->state; 336 reach_next[stateid].pos = pos; 337 338 /* Compute tag values after this transition. */ 339 for (i = 0; i < num_tags; i++) 340 reach_next[stateid].tags[i] = -1; 341 342 if (trans->tags) 343 for (i = 0; trans->tags[i] >= 0; i++) 344 if ((size_t)trans->tags[i] < num_tags) 345 reach_next[stateid].tags[trans->tags[i]] = pos; 346 347 /* Set the parameters, depth, and costs. */ 348 reach_next[stateid].params = default_params; 349 reach_next[stateid].depth = 0; 350 for (i = 0; i < TRE_M_LAST; i++) 351 reach_next[stateid].costs[0][i] = 0; 352 if (trans->params) 353 tre_set_params(&reach_next[stateid], trans->params, 354 default_params); 355 356 /* If this is the final state, mark the exact match. */ 357 if (trans->state == tnfa->final) 358 { 359 match_eo = pos; 360 for (i = 0; i < num_tags; i++) 361 match_tags[i] = reach_next[stateid].tags[i]; 362 for (i = 0; i < TRE_M_LAST; i++) 363 match_costs[i] = 0; 364 } 365 } 366 } 367 DPRINT(("\n")); 368 } 369 370 371 /* Handle inserts. This is done by pretending there's an epsilon 372 transition from each state in `reach' back to the same state. 373 We don't need to worry about the final state here; this will never 374 give a better match than what we already have. */ 375 for (id = 0; id < tnfa->num_states; id++) 376 { 377 int depth; 378 int cost, cost0; 379 380 if (reach[id].pos != prev_pos) 381 { 382 DPRINT((" insert: %d not reached\n", id)); 383 continue; /* Not reached. */ 384 } 385 386 depth = reach[id].depth; 387 388 /* Compute and check cost at current depth. */ 389 cost = reach[id].costs[depth][TRE_M_COST]; 390 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 391 cost += reach[id].params.cost_ins; 392 if (cost > reach[id].params.max_cost) 393 continue; /* Cost too large. */ 394 395 /* Check number of inserts at current depth. */ 396 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1 397 > reach[id].params.max_ins) 398 continue; /* Too many inserts. */ 399 400 /* Check total number of errors at current depth. */ 401 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 402 > reach[id].params.max_err) 403 continue; /* Too many errors. */ 404 405 /* Compute overall cost. */ 406 cost0 = cost; 407 if (depth > 0) 408 { 409 cost0 = reach[id].costs[0][TRE_M_COST]; 410 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 411 cost0 += reach[id].params.cost_ins; 412 else 413 cost0 += default_params.cost_ins; 414 } 415 416 DPRINT((" insert: from %d to %d, cost %d: ", id, id, 417 reach[id].costs[depth][TRE_M_COST])); 418 if (reach_next[id].pos == pos 419 && (cost0 >= reach_next[id].costs[0][TRE_M_COST])) 420 { 421 DPRINT(("lose\n")); 422 continue; 423 } 424 DPRINT(("win\n")); 425 426 /* Copy state, position, tags, parameters, and depth. */ 427 reach_next[id].state = reach[id].state; 428 reach_next[id].pos = pos; 429 for (i = 0; i < num_tags; i++) 430 reach_next[id].tags[i] = reach[id].tags[i]; 431 reach_next[id].params = reach[id].params; 432 reach_next[id].depth = reach[id].depth; 433 434 /* Set the costs after this transition. */ 435 memcpy(reach_next[id].costs, reach[id].costs, 436 sizeof(reach_next[id].costs[0][0]) 437 * TRE_M_LAST * (depth + 1)); 438 reach_next[id].costs[depth][TRE_M_COST] = cost; 439 reach_next[id].costs[depth][TRE_M_NUM_INS]++; 440 reach_next[id].costs[depth][TRE_M_NUM_ERR]++; 441 if (depth > 0) 442 { 443 reach_next[id].costs[0][TRE_M_COST] = cost0; 444 reach_next[id].costs[0][TRE_M_NUM_INS]++; 445 reach_next[id].costs[0][TRE_M_NUM_ERR]++; 446 } 447 448 } 449 450 451 /* Handle deletes. This is done by traversing through the whole TNFA 452 pretending that all transitions are epsilon transitions, until 453 no more states can be reached with better costs. */ 454 { 455 /* XXX - dynamic ringbuffer size */ 456 tre_tnfa_approx_reach_t *ringbuffer[512]; 457 tre_tnfa_approx_reach_t **deque_start, **deque_end; 458 459 deque_start = deque_end = ringbuffer; 460 461 /* Add all states in `reach_next' to the deque. */ 462 for (id = 0; id < tnfa->num_states; id++) 463 { 464 if (reach_next[id].pos != pos) 465 continue; 466 *deque_end = &reach_next[id]; 467 deque_end++; 468 assert(deque_end != deque_start); 469 } 470 471 /* Repeat until the deque is empty. */ 472 while (deque_end != deque_start) 473 { 474 tre_tnfa_approx_reach_t *reach_p; 475 int depth; 476 int cost, cost0; 477 tre_tnfa_transition_t *trans; 478 479 /* Pop the first item off the deque. */ 480 reach_p = *deque_start; 481 id = reach_p - reach_next; 482 depth = reach_p->depth; 483 484 /* Compute cost at current depth. */ 485 cost = reach_p->costs[depth][TRE_M_COST]; 486 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 487 cost += reach_p->params.cost_del; 488 489 /* Check cost, number of deletes, and total number of errors 490 at current depth. */ 491 if (cost > reach_p->params.max_cost 492 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1 493 > reach_p->params.max_del) 494 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1 495 > reach_p->params.max_err)) 496 { 497 /* Too many errors or cost too large. */ 498 DPRINT((" delete: from %03d: cost too large\n", id)); 499 deque_start++; 500 if (deque_start >= (ringbuffer + 512)) 501 deque_start = ringbuffer; 502 continue; 503 } 504 505 /* Compute overall cost. */ 506 cost0 = cost; 507 if (depth > 0) 508 { 509 cost0 = reach_p->costs[0][TRE_M_COST]; 510 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 511 cost0 += reach_p->params.cost_del; 512 else 513 cost0 += default_params.cost_del; 514 } 515 516 for (trans = reach_p->state; trans->state; trans++) 517 { 518 int dest_id = trans->state_id; 519 DPRINT((" delete: from %03d to %03d, cost %d (%d): ", 520 id, dest_id, cost0, reach_p->params.max_cost)); 521 522 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 523 { 524 DPRINT(("assertion failed\n")); 525 continue; 526 } 527 528 /* Compute tag values after this transition. */ 529 for (i = 0; i < num_tags; i++) 530 tmp_tags[i] = reach_p->tags[i]; 531 if (trans->tags) 532 for (i = 0; trans->tags[i] >= 0; i++) 533 if ((size_t)trans->tags[i] < num_tags) 534 tmp_tags[trans->tags[i]] = pos; 535 536 /* If another path has also reached this state, choose the one 537 with the smallest cost or best tags if costs are equal. */ 538 if (reach_next[dest_id].pos == pos 539 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 540 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 541 && (!match_tags 542 || !tre_tag_order(num_tags, 543 tnfa->tag_directions, 544 tmp_tags, 545 reach_next[dest_id].tags))))) 546 { 547 DPRINT(("lose, cost0 %d, have %d\n", 548 cost0, reach_next[dest_id].costs[0][TRE_M_COST])); 549 continue; 550 } 551 DPRINT(("win\n")); 552 553 /* Set state, position, tags, parameters, depth, and costs. */ 554 reach_next[dest_id].state = trans->state; 555 reach_next[dest_id].pos = pos; 556 for (i = 0; i < num_tags; i++) 557 reach_next[dest_id].tags[i] = tmp_tags[i]; 558 559 reach_next[dest_id].params = reach_p->params; 560 if (trans->params) 561 tre_set_params(&reach_next[dest_id], trans->params, 562 default_params); 563 564 reach_next[dest_id].depth = reach_p->depth; 565 memcpy(&reach_next[dest_id].costs, 566 reach_p->costs, 567 sizeof(reach_p->costs[0][0]) 568 * TRE_M_LAST * (depth + 1)); 569 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 570 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++; 571 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++; 572 if (depth > 0) 573 { 574 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 575 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++; 576 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++; 577 } 578 579 if (trans->state == tnfa->final 580 && (match_eo < 0 581 || match_costs[TRE_M_COST] > cost0 582 || (match_costs[TRE_M_COST] == cost0 583 && (num_tags > 0 584 && tmp_tags[0] <= match_tags[0])))) 585 { 586 DPRINT((" setting new match at %d, cost %d\n", 587 pos, cost0)); 588 match_eo = pos; 589 memcpy(match_costs, reach_next[dest_id].costs[0], 590 sizeof(match_costs[0]) * TRE_M_LAST); 591 for (i = 0; i < num_tags; i++) 592 match_tags[i] = tmp_tags[i]; 593 } 594 595 /* Add to the end of the deque. */ 596 *deque_end = &reach_next[dest_id]; 597 deque_end++; 598 if (deque_end >= (ringbuffer + 512)) 599 deque_end = ringbuffer; 600 assert(deque_end != deque_start); 601 } 602 deque_start++; 603 if (deque_start >= (ringbuffer + 512)) 604 deque_start = ringbuffer; 605 } 606 607 } 608 609#ifdef TRE_DEBUG 610 tre_print_reach(tnfa, reach_next, pos, num_tags); 611#endif /* TRE_DEBUG */ 612 613 /* Check for end of string. */ 614 if (len < 0) 615 { 616 if (type == STR_USER) 617 { 618 if (str_user_end) 619 break; 620 } 621 else if (next_c == L'\0') 622 break; 623 } 624 else 625 { 626 if (pos >= len) 627 break; 628 } 629 630 prev_pos = pos; 631 GET_NEXT_WCHAR(); 632 633 /* Swap `reach' and `reach_next'. */ 634 { 635 tre_tnfa_approx_reach_t *tmp; 636 tmp = reach; 637 reach = reach_next; 638 reach_next = tmp; 639 } 640 641 /* Handle exact matches and substitutions. */ 642 for (id = 0; id < tnfa->num_states; id++) 643 { 644 tre_tnfa_transition_t *trans; 645 646 if (reach[id].pos < prev_pos) 647 continue; /* Not reached. */ 648 for (trans = reach[id].state; trans->state; trans++) 649 { 650 int dest_id; 651 int depth; 652 int cost, cost0, err; 653 654 if (trans->assertions 655 && (CHECK_ASSERTIONS(trans->assertions) 656 || CHECK_CHAR_CLASSES(trans, tnfa, eflags))) 657 { 658 DPRINT((" exact, from %d: assert failed\n", id)); 659 continue; 660 } 661 662 depth = reach[id].depth; 663 dest_id = trans->state_id; 664 665 cost = reach[id].costs[depth][TRE_M_COST]; 666 cost0 = reach[id].costs[0][TRE_M_COST]; 667 err = 0; 668 669 if (trans->code_min > (tre_cint_t)prev_c 670 || trans->code_max < (tre_cint_t)prev_c) 671 { 672 /* Handle substitutes. The required character was not in 673 the string, so match it in place of whatever was supposed 674 to be there and increase costs accordingly. */ 675 err = 1; 676 677 /* Compute and check cost at current depth. */ 678 cost = reach[id].costs[depth][TRE_M_COST]; 679 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 680 cost += reach[id].params.cost_subst; 681 if (cost > reach[id].params.max_cost) 682 continue; /* Cost too large. */ 683 684 /* Check number of substitutes at current depth. */ 685 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1 686 > reach[id].params.max_subst) 687 continue; /* Too many substitutes. */ 688 689 /* Check total number of errors at current depth. */ 690 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 691 > reach[id].params.max_err) 692 continue; /* Too many errors. */ 693 694 /* Compute overall cost. */ 695 cost0 = cost; 696 if (depth > 0) 697 { 698 cost0 = reach[id].costs[0][TRE_M_COST]; 699 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 700 cost0 += reach[id].params.cost_subst; 701 else 702 cost0 += default_params.cost_subst; 703 } 704 DPRINT((" subst, from %03d to %03d, cost %d: ", 705 id, dest_id, cost0)); 706 } 707 else 708 DPRINT((" exact, from %03d to %03d, cost %d: ", 709 id, dest_id, cost0)); 710 711 /* Compute tag values after this transition. */ 712 for (i = 0; i < num_tags; i++) 713 tmp_tags[i] = reach[id].tags[i]; 714 if (trans->tags) 715 for (i = 0; trans->tags[i] >= 0; i++) 716 if ((size_t)trans->tags[i] < num_tags) 717 tmp_tags[trans->tags[i]] = pos; 718 719 /* If another path has also reached this state, choose the 720 one with the smallest cost or best tags if costs are equal. */ 721 if (reach_next[dest_id].pos == pos 722 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 723 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 724 && !tre_tag_order(num_tags, tnfa->tag_directions, 725 tmp_tags, 726 reach_next[dest_id].tags)))) 727 { 728 DPRINT(("lose\n")); 729 continue; 730 } 731 DPRINT(("win %d %d\n", 732 reach_next[dest_id].pos, 733 reach_next[dest_id].costs[0][TRE_M_COST])); 734 735 /* Set state, position, tags, and depth. */ 736 reach_next[dest_id].state = trans->state; 737 reach_next[dest_id].pos = pos; 738 for (i = 0; i < num_tags; i++) 739 reach_next[dest_id].tags[i] = tmp_tags[i]; 740 reach_next[dest_id].depth = reach[id].depth; 741 742 /* Set parameters. */ 743 reach_next[dest_id].params = reach[id].params; 744 if (trans->params) 745 tre_set_params(&reach_next[dest_id], trans->params, 746 default_params); 747 748 /* Set the costs after this transition. */ 749 memcpy(&reach_next[dest_id].costs, 750 reach[id].costs, 751 sizeof(reach[id].costs[0][0]) 752 * TRE_M_LAST * (depth + 1)); 753 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 754 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err; 755 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err; 756 if (depth > 0) 757 { 758 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 759 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err; 760 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err; 761 } 762 763 if (trans->state == tnfa->final 764 && (match_eo < 0 765 || cost0 < match_costs[TRE_M_COST] 766 || (cost0 == match_costs[TRE_M_COST] 767 && num_tags > 0 && tmp_tags[0] <= match_tags[0]))) 768 { 769 DPRINT((" setting new match at %d, cost %d\n", 770 pos, cost0)); 771 match_eo = pos; 772 for (i = 0; i < TRE_M_LAST; i++) 773 match_costs[i] = reach_next[dest_id].costs[0][i]; 774 for (i = 0; i < num_tags; i++) 775 match_tags[i] = tmp_tags[i]; 776 } 777 } 778 } 779 } 780 781 DPRINT(("match end offset = %d, match cost = %d\n", match_eo, 782 match_costs[TRE_M_COST])); 783 784#ifndef TRE_USE_ALLOCA 785 if (buf) 786 xfree(buf); 787#endif /* !TRE_USE_ALLOCA */ 788 789 match->cost = match_costs[TRE_M_COST]; 790 match->num_ins = match_costs[TRE_M_NUM_INS]; 791 match->num_del = match_costs[TRE_M_NUM_DEL]; 792 match->num_subst = match_costs[TRE_M_NUM_SUBST]; 793 *match_end_ofs = match_eo; 794 795 return match_eo >= 0 ? REG_OK : REG_NOMATCH; 796} 797