1/* 2 tre-compile.c - TRE regex compiler 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9/* 10 TODO: 11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive 12 function calls. 13*/ 14 15 16#ifdef HAVE_CONFIG_H 17#include <config.h> 18#endif /* HAVE_CONFIG_H */ 19#include <stdio.h> 20#include <assert.h> 21#include <string.h> 22 23#include "tre-internal.h" 24#include "tre-mem.h" 25#include "tre-stack.h" 26#include "tre-ast.h" 27#include "tre-parse.h" 28#include "tre-compile.h" 29#include "tre.h" 30#include "xmalloc.h" 31 32/* 33 Algorithms to setup tags so that submatch addressing can be done. 34*/ 35 36 37/* Inserts a catenation node to the root of the tree given in `node'. 38 As the left child a new tag with number `tag_id' to `node' is added, 39 and the right child is the old root. */ 40static reg_errcode_t 41tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 42{ 43 tre_catenation_t *c; 44 45 DPRINT(("add_tag_left: tag %d\n", tag_id)); 46 47 c = tre_mem_alloc(mem, sizeof(*c)); 48 if (c == NULL) 49 return REG_ESPACE; 50 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); 51 if (c->left == NULL) 52 return REG_ESPACE; 53 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 54 if (c->right == NULL) 55 return REG_ESPACE; 56 57 c->right->obj = node->obj; 58 c->right->type = node->type; 59 c->right->nullable = -1; 60 c->right->submatch_id = -1; 61 c->right->firstpos = NULL; 62 c->right->lastpos = NULL; 63 c->right->num_tags = 0; 64 node->obj = c; 65 node->type = CATENATION; 66 return REG_OK; 67} 68 69/* Inserts a catenation node to the root of the tree given in `node'. 70 As the right child a new tag with number `tag_id' to `node' is added, 71 and the left child is the old root. */ 72static reg_errcode_t 73tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 74{ 75 tre_catenation_t *c; 76 77 DPRINT(("tre_add_tag_right: tag %d\n", tag_id)); 78 79 c = tre_mem_alloc(mem, sizeof(*c)); 80 if (c == NULL) 81 return REG_ESPACE; 82 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); 83 if (c->right == NULL) 84 return REG_ESPACE; 85 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 86 if (c->left == NULL) 87 return REG_ESPACE; 88 89 c->left->obj = node->obj; 90 c->left->type = node->type; 91 c->left->nullable = -1; 92 c->left->submatch_id = -1; 93 c->left->firstpos = NULL; 94 c->left->lastpos = NULL; 95 c->left->num_tags = 0; 96 node->obj = c; 97 node->type = CATENATION; 98 return REG_OK; 99} 100 101typedef enum { 102 ADDTAGS_RECURSE, 103 ADDTAGS_AFTER_ITERATION, 104 ADDTAGS_AFTER_UNION_LEFT, 105 ADDTAGS_AFTER_UNION_RIGHT, 106 ADDTAGS_AFTER_CAT_LEFT, 107 ADDTAGS_AFTER_CAT_RIGHT, 108 ADDTAGS_SET_SUBMATCH_END 109} tre_addtags_symbol_t; 110 111 112typedef struct { 113 int tag; 114 int next_tag; 115} tre_tag_states_t; 116 117 118/* Go through `regset' and set submatch data for submatches that are 119 using this tag. */ 120static void 121tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag) 122{ 123 int i; 124 125 for (i = 0; regset[i] >= 0; i++) 126 { 127 int id = regset[i] / 2; 128 int start = !(regset[i] % 2); 129 DPRINT((" Using tag %d for %s offset of " 130 "submatch %d\n", tag, 131 start ? "start" : "end", id)); 132 if (start) 133 tnfa->submatch_data[id].so_tag = tag; 134 else 135 tnfa->submatch_data[id].eo_tag = tag; 136 } 137 regset[0] = -1; 138} 139 140 141/* Adds tags to appropriate locations in the parse tree in `tree', so that 142 subexpressions marked for submatch addressing can be traced. */ 143static reg_errcode_t 144tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, 145 tre_tnfa_t *tnfa) 146{ 147 reg_errcode_t status = REG_OK; 148 tre_addtags_symbol_t symbol; 149 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ 150 int bottom = tre_stack_num_objects(stack); 151 /* True for first pass (counting number of needed tags) */ 152 int first_pass = (mem == NULL || tnfa == NULL); 153 int *regset, *orig_regset; 154 int num_tags = 0; /* Total number of tags. */ 155 int num_minimals = 0; /* Number of special minimal tags. */ 156 int tag = 0; /* The tag that is to be added next. */ 157 int next_tag = 1; /* Next tag to use after this one. */ 158 int *parents; /* Stack of submatches the current submatch is 159 contained in. */ 160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ 161 tre_tag_states_t *saved_states; 162 163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE; 164 if (!first_pass) 165 { 166 tnfa->end_tag = 0; 167 tnfa->minimal_tags[0] = -1; 168 } 169 170 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); 171 if (regset == NULL) 172 return REG_ESPACE; 173 regset[0] = -1; 174 orig_regset = regset; 175 176 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); 177 if (parents == NULL) 178 { 179 xfree(regset); 180 return REG_ESPACE; 181 } 182 parents[0] = -1; 183 184 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); 185 if (saved_states == NULL) 186 { 187 xfree(regset); 188 xfree(parents); 189 return REG_ESPACE; 190 } 191 else 192 { 193 unsigned int i; 194 for (i = 0; i <= tnfa->num_submatches; i++) 195 saved_states[i].tag = -1; 196 } 197 198 STACK_PUSH(stack, voidptr, node); 199 STACK_PUSH(stack, long, ADDTAGS_RECURSE); 200 201 while (tre_stack_num_objects(stack) > bottom) 202 { 203 if (status != REG_OK) 204 break; 205 206 symbol = (tre_addtags_symbol_t)tre_stack_pop_long(stack); 207 switch (symbol) 208 { 209 210 case ADDTAGS_SET_SUBMATCH_END: 211 { 212 int id = (int)tre_stack_pop_long(stack); 213 int i; 214 215 /* Add end of this submatch to regset. */ 216 for (i = 0; regset[i] >= 0; i++); 217 regset[i] = id * 2 + 1; 218 regset[i + 1] = -1; 219 220 /* Pop this submatch from the parents stack. */ 221 for (i = 0; parents[i] >= 0; i++); 222 parents[i - 1] = -1; 223 break; 224 } 225 226 case ADDTAGS_RECURSE: 227 node = tre_stack_pop_voidptr(stack); 228 229 if (node->submatch_id >= 0) 230 { 231 int id = node->submatch_id; 232 int i; 233 234 235 /* Add start of this submatch to regset. */ 236 for (i = 0; regset[i] >= 0; i++); 237 regset[i] = id * 2; 238 regset[i + 1] = -1; 239 240 if (!first_pass) 241 { 242 for (i = 0; parents[i] >= 0; i++); 243 tnfa->submatch_data[id].parents = NULL; 244 if (i > 0) 245 { 246 int *p = xmalloc(sizeof(*p) * (i + 1)); 247 if (p == NULL) 248 { 249 status = REG_ESPACE; 250 break; 251 } 252 assert(tnfa->submatch_data[id].parents == NULL); 253 tnfa->submatch_data[id].parents = p; 254 for (i = 0; parents[i] >= 0; i++) 255 p[i] = parents[i]; 256 p[i] = -1; 257 } 258 } 259 260 /* Add end of this submatch to regset after processing this 261 node. */ 262 STACK_PUSHX(stack, long, node->submatch_id); 263 STACK_PUSHX(stack, long, ADDTAGS_SET_SUBMATCH_END); 264 } 265 266 switch (node->type) 267 { 268 case LITERAL: 269 { 270 tre_literal_t *lit = node->obj; 271 272 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 273 { 274 int i; 275 DPRINT(("Literal %d-%d\n", 276 (int)lit->code_min, (int)lit->code_max)); 277 if (regset[0] >= 0) 278 { 279 /* Regset is not empty, so add a tag before the 280 literal or backref. */ 281 if (!first_pass) 282 { 283 status = tre_add_tag_left(mem, node, tag); 284 tnfa->tag_directions[tag] = direction; 285 if (minimal_tag >= 0) 286 { 287 DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); 288 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 289 tnfa->minimal_tags[i] = tag; 290 tnfa->minimal_tags[i + 1] = minimal_tag; 291 tnfa->minimal_tags[i + 2] = -1; 292 minimal_tag = -1; 293 num_minimals++; 294 } 295 tre_purge_regset(regset, tnfa, tag); 296 } 297 else 298 { 299 DPRINT((" num_tags = 1\n")); 300 node->num_tags = 1; 301 } 302 303 DPRINT((" num_tags++\n")); 304 regset[0] = -1; 305 tag = next_tag; 306 num_tags++; 307 next_tag++; 308 } 309 } 310 else 311 { 312 assert(!IS_TAG(lit)); 313 } 314 break; 315 } 316 case CATENATION: 317 { 318 tre_catenation_t *cat = node->obj; 319 tre_ast_node_t *left = cat->left; 320 tre_ast_node_t *right = cat->right; 321 int reserved_tag = -1; 322 DPRINT(("Catenation, next_tag = %d\n", next_tag)); 323 324 325 /* After processing right child. */ 326 STACK_PUSHX(stack, voidptr, node); 327 STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_RIGHT); 328 329 /* Process right child. */ 330 STACK_PUSHX(stack, voidptr, right); 331 STACK_PUSHX(stack, long, ADDTAGS_RECURSE); 332 333 /* After processing left child. */ 334 STACK_PUSHX(stack, long, (long)(next_tag + left->num_tags)); 335 DPRINT((" Pushing %d for after left\n", 336 next_tag + left->num_tags)); 337 if (left->num_tags > 0 && right->num_tags > 0) 338 { 339 /* Reserve the next tag to the right child. */ 340 DPRINT((" Reserving next_tag %d to right child\n", 341 next_tag)); 342 reserved_tag = next_tag; 343 next_tag++; 344 } 345 STACK_PUSHX(stack, long, reserved_tag); 346 STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_LEFT); 347 348 /* Process left child. */ 349 STACK_PUSHX(stack, voidptr, left); 350 STACK_PUSHX(stack, long, ADDTAGS_RECURSE); 351 352 } 353 break; 354 case ITERATION: 355 { 356 tre_iteration_t *iter = node->obj; 357 DPRINT(("Iteration\n")); 358 359 if (first_pass) 360 { 361 STACK_PUSHX(stack, long, regset[0] >= 0 || iter->minimal); 362 } 363 else 364 { 365 STACK_PUSHX(stack, long, tag); 366 STACK_PUSHX(stack, long, iter->minimal); 367 } 368 STACK_PUSHX(stack, voidptr, node); 369 STACK_PUSHX(stack, long, ADDTAGS_AFTER_ITERATION); 370 371 STACK_PUSHX(stack, voidptr, iter->arg); 372 STACK_PUSHX(stack, long, ADDTAGS_RECURSE); 373 374 /* Regset is not empty, so add a tag here. */ 375 if (regset[0] >= 0 || iter->minimal) 376 { 377 if (!first_pass) 378 { 379 int i; 380 status = tre_add_tag_left(mem, node, tag); 381 if (iter->minimal) 382 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; 383 else 384 tnfa->tag_directions[tag] = direction; 385 if (minimal_tag >= 0) 386 { 387 DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); 388 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 389 tnfa->minimal_tags[i] = tag; 390 tnfa->minimal_tags[i + 1] = minimal_tag; 391 tnfa->minimal_tags[i + 2] = -1; 392 minimal_tag = -1; 393 num_minimals++; 394 } 395 tre_purge_regset(regset, tnfa, tag); 396 } 397 398 DPRINT((" num_tags++\n")); 399 regset[0] = -1; 400 tag = next_tag; 401 num_tags++; 402 next_tag++; 403 } 404 direction = TRE_TAG_MINIMIZE; 405 } 406 break; 407 case UNION: 408 { 409 tre_union_t *uni = node->obj; 410 tre_ast_node_t *left = uni->left; 411 tre_ast_node_t *right = uni->right; 412 int left_tag; 413 int right_tag; 414 415 if (regset[0] >= 0) 416 { 417 left_tag = next_tag; 418 right_tag = next_tag + 1; 419 } 420 else 421 { 422 left_tag = tag; 423 right_tag = next_tag; 424 } 425 426 DPRINT(("Union\n")); 427 428 /* After processing right child. */ 429 STACK_PUSHX(stack, long, right_tag); 430 STACK_PUSHX(stack, long, left_tag); 431 STACK_PUSHX(stack, voidptr, regset); 432 STACK_PUSHX(stack, long, regset[0] >= 0); 433 STACK_PUSHX(stack, voidptr, node); 434 STACK_PUSHX(stack, voidptr, right); 435 STACK_PUSHX(stack, voidptr, left); 436 STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_RIGHT); 437 438 /* Process right child. */ 439 STACK_PUSHX(stack, voidptr, right); 440 STACK_PUSHX(stack, long, ADDTAGS_RECURSE); 441 442 /* After processing left child. */ 443 STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_LEFT); 444 445 /* Process left child. */ 446 STACK_PUSHX(stack, voidptr, left); 447 STACK_PUSHX(stack, long, ADDTAGS_RECURSE); 448 449 /* Regset is not empty, so add a tag here. */ 450 if (regset[0] >= 0) 451 { 452 if (!first_pass) 453 { 454 int i; 455 status = tre_add_tag_left(mem, node, tag); 456 tnfa->tag_directions[tag] = direction; 457 if (minimal_tag >= 0) 458 { 459 DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); 460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 461 tnfa->minimal_tags[i] = tag; 462 tnfa->minimal_tags[i + 1] = minimal_tag; 463 tnfa->minimal_tags[i + 2] = -1; 464 minimal_tag = -1; 465 num_minimals++; 466 } 467 tre_purge_regset(regset, tnfa, tag); 468 } 469 470 DPRINT((" num_tags++\n")); 471 regset[0] = -1; 472 tag = next_tag; 473 num_tags++; 474 next_tag++; 475 } 476 477 if (node->num_submatches > 0) 478 { 479 /* The next two tags are reserved for markers. */ 480 next_tag++; 481 tag = next_tag; 482 next_tag++; 483 } 484 485 break; 486 } 487 } 488 489 if (node->submatch_id >= 0) 490 { 491 int i; 492 /* Push this submatch on the parents stack. */ 493 for (i = 0; parents[i] >= 0; i++); 494 parents[i] = node->submatch_id; 495 parents[i + 1] = -1; 496 } 497 498 break; /* end case: ADDTAGS_RECURSE */ 499 500 case ADDTAGS_AFTER_ITERATION: 501 { 502 int minimal = 0; 503 int enter_tag; 504 node = tre_stack_pop_voidptr(stack); 505 if (first_pass) 506 { 507 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags 508 + tre_stack_pop_long(stack); 509 minimal_tag = -1; 510 } 511 else 512 { 513 minimal = tre_stack_pop_int(stack); 514 enter_tag = tre_stack_pop_int(stack); 515 if (minimal) 516 minimal_tag = enter_tag; 517 } 518 519 DPRINT(("After iteration\n")); 520 if (!first_pass) 521 { 522 DPRINT((" Setting direction to %s\n", 523 minimal ? "minimize" : "maximize")); 524 if (minimal) 525 direction = TRE_TAG_MINIMIZE; 526 else 527 direction = TRE_TAG_MAXIMIZE; 528 } 529 break; 530 } 531 532 case ADDTAGS_AFTER_CAT_LEFT: 533 { 534 int new_tag = tre_stack_pop_int(stack); 535 next_tag = tre_stack_pop_int(stack); 536 DPRINT(("After cat left, tag = %d, next_tag = %d\n", 537 tag, next_tag)); 538 if (new_tag >= 0) 539 { 540 DPRINT((" Setting tag to %d\n", new_tag)); 541 tag = new_tag; 542 } 543 break; 544 } 545 546 case ADDTAGS_AFTER_CAT_RIGHT: 547 DPRINT(("After cat right\n")); 548 node = tre_stack_pop_voidptr(stack); 549 if (first_pass) 550 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags 551 + ((tre_catenation_t *)node->obj)->right->num_tags; 552 break; 553 554 case ADDTAGS_AFTER_UNION_LEFT: 555 DPRINT(("After union left\n")); 556 /* Lift the bottom of the `regset' array so that when processing 557 the right operand the items currently in the array are 558 invisible. The original bottom was saved at ADDTAGS_UNION and 559 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ 560 while (*regset >= 0) 561 regset++; 562 break; 563 564 case ADDTAGS_AFTER_UNION_RIGHT: 565 { 566 int added_tags, tag_left, tag_right; 567 tre_ast_node_t *left = tre_stack_pop_voidptr(stack); 568 tre_ast_node_t *right = tre_stack_pop_voidptr(stack); 569 DPRINT(("After union right\n")); 570 node = tre_stack_pop_voidptr(stack); 571 added_tags = tre_stack_pop_int(stack); 572 if (first_pass) 573 { 574 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags 575 + ((tre_union_t *)node->obj)->right->num_tags + added_tags 576 + ((node->num_submatches > 0) ? 2 : 0); 577 } 578 regset = tre_stack_pop_voidptr(stack); 579 tag_left = tre_stack_pop_int(stack); 580 tag_right = tre_stack_pop_int(stack); 581 582 /* Add tags after both children, the left child gets a smaller 583 tag than the right child. This guarantees that we prefer 584 the left child over the right child. */ 585 /* XXX - This is not always necessary (if the children have 586 tags which must be seen for every match of that child). */ 587 /* XXX - Check if this is the only place where tre_add_tag_right 588 is used. If so, use tre_add_tag_left (putting the tag before 589 the child as opposed after the child) and throw away 590 tre_add_tag_right. */ 591 if (node->num_submatches > 0) 592 { 593 if (!first_pass) 594 { 595 status = tre_add_tag_right(mem, left, tag_left); 596 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; 597 status = tre_add_tag_right(mem, right, tag_right); 598 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; 599 } 600 DPRINT((" num_tags += 2\n")); 601 num_tags += 2; 602 } 603 direction = TRE_TAG_MAXIMIZE; 604 break; 605 } 606 607 default: 608 assert(0); 609 break; 610 611 } /* end switch(symbol) */ 612 } /* end while(tre_stack_num_objects(stack) > bottom) */ 613 614 if (!first_pass) 615 tre_purge_regset(regset, tnfa, tag); 616 617 if (!first_pass && minimal_tag >= 0) 618 { 619 int i; 620 DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); 621 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 622 tnfa->minimal_tags[i] = tag; 623 tnfa->minimal_tags[i + 1] = minimal_tag; 624 tnfa->minimal_tags[i + 2] = -1; 625 minimal_tag = -1; 626 num_minimals++; 627 } 628 629 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n", 630 first_pass? "First pass" : "Second pass", num_tags)); 631 632 assert(tree->num_tags == num_tags); 633 tnfa->end_tag = num_tags; 634 tnfa->num_tags = num_tags; 635 tnfa->num_minimals = num_minimals; 636 xfree(orig_regset); 637 xfree(parents); 638 xfree(saved_states); 639 return status; 640} 641 642 643 644/* 645 AST to TNFA compilation routines. 646*/ 647 648typedef enum { 649 COPY_RECURSE, 650 COPY_SET_RESULT_PTR 651} tre_copyast_symbol_t; 652 653/* Flags for tre_copy_ast(). */ 654#define COPY_REMOVE_TAGS 1 655#define COPY_MAXIMIZE_FIRST_TAG 2 656 657static reg_errcode_t 658tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 659 int flags, int *pos_add, tre_tag_direction_t *tag_directions, 660 tre_ast_node_t **copy, int *max_pos) 661{ 662 reg_errcode_t status = REG_OK; 663 int bottom = tre_stack_num_objects(stack); 664 int num_copied = 0; 665 int first_tag = 1; 666 tre_ast_node_t **result = copy; 667 tre_copyast_symbol_t symbol; 668 669 STACK_PUSH(stack, voidptr, ast); 670 STACK_PUSH(stack, long, COPY_RECURSE); 671 672 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 673 { 674 tre_ast_node_t *node; 675 if (status != REG_OK) 676 break; 677 678 symbol = (tre_copyast_symbol_t)tre_stack_pop_long(stack); 679 switch (symbol) 680 { 681 case COPY_SET_RESULT_PTR: 682 result = tre_stack_pop_voidptr(stack); 683 break; 684 case COPY_RECURSE: 685 node = tre_stack_pop_voidptr(stack); 686 switch (node->type) 687 { 688 case LITERAL: 689 { 690 tre_literal_t *lit = node->obj; 691 int pos = lit->position; 692 int min = lit->code_min; 693 int max = lit->code_max; 694 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 695 { 696 /* XXX - e.g. [ab] has only one position but two 697 nodes, so we are creating holes in the state space 698 here. Not fatal, just wastes memory. */ 699 pos += *pos_add; 700 num_copied++; 701 } 702 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) 703 { 704 /* Change this tag to empty. */ 705 min = EMPTY; 706 max = pos = -1; 707 } 708 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) 709 && first_tag) 710 { 711 /* Maximize the first tag. */ 712 tag_directions[max] = TRE_TAG_MAXIMIZE; 713 first_tag = 0; 714 } 715 *result = tre_ast_new_literal(mem, min, max, pos); 716 if (*result == NULL) 717 status = REG_ESPACE; 718 719 if (pos > *max_pos) 720 *max_pos = pos; 721 break; 722 } 723 case UNION: 724 { 725 tre_union_t *uni = node->obj; 726 tre_union_t *tmp; 727 *result = tre_ast_new_union(mem, uni->left, uni->right); 728 if (*result == NULL) 729 { 730 status = REG_ESPACE; 731 break; 732 } 733 tmp = (*result)->obj; 734 result = &tmp->left; 735 STACK_PUSHX(stack, voidptr, uni->right); 736 STACK_PUSHX(stack, long, COPY_RECURSE); 737 STACK_PUSHX(stack, voidptr, &tmp->right); 738 STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR); 739 STACK_PUSHX(stack, voidptr, uni->left); 740 STACK_PUSHX(stack, long, COPY_RECURSE); 741 break; 742 } 743 case CATENATION: 744 { 745 tre_catenation_t *cat = node->obj; 746 tre_catenation_t *tmp; 747 *result = tre_ast_new_catenation(mem, cat->left, cat->right); 748 if (*result == NULL) 749 { 750 status = REG_ESPACE; 751 break; 752 } 753 tmp = (*result)->obj; 754 tmp->left = NULL; 755 tmp->right = NULL; 756 result = &tmp->left; 757 758 STACK_PUSHX(stack, voidptr, cat->right); 759 STACK_PUSHX(stack, long, COPY_RECURSE); 760 STACK_PUSHX(stack, voidptr, &tmp->right); 761 STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR); 762 STACK_PUSHX(stack, voidptr, cat->left); 763 STACK_PUSHX(stack, long, COPY_RECURSE); 764 break; 765 } 766 case ITERATION: 767 { 768 tre_iteration_t *iter = node->obj; 769 STACK_PUSHX(stack, voidptr, iter->arg); 770 STACK_PUSHX(stack, long, COPY_RECURSE); 771 *result = tre_ast_new_iter(mem, iter->arg, iter->min, 772 iter->max, iter->minimal); 773 if (*result == NULL) 774 { 775 status = REG_ESPACE; 776 break; 777 } 778 iter = (*result)->obj; 779 result = &iter->arg; 780 break; 781 } 782 default: 783 assert(0); 784 break; 785 } 786 break; 787 } 788 } 789 *pos_add += num_copied; 790 return status; 791} 792 793typedef enum { 794 EXPAND_RECURSE, 795 EXPAND_AFTER_ITER 796} tre_expand_ast_symbol_t; 797 798/* Expands each iteration node that has a finite nonzero minimum or maximum 799 iteration count to a catenated sequence of copies of the node. */ 800static reg_errcode_t 801tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 802 int *position, tre_tag_direction_t *tag_directions, 803 int *max_depth) 804{ 805 reg_errcode_t status = REG_OK; 806 int bottom = tre_stack_num_objects(stack); 807 int pos_add = 0; 808 int pos_add_total = 0; 809 int max_pos = 0; 810 /* Current approximate matching parameters. */ 811 int params[TRE_PARAM_LAST]; 812 /* Approximate parameter nesting level. */ 813 int params_depth = 0; 814 int iter_depth = 0; 815 int i; 816 817 for (i = 0; i < TRE_PARAM_LAST; i++) 818 params[i] = TRE_PARAM_DEFAULT; 819 820 STACK_PUSHR(stack, voidptr, ast); 821 STACK_PUSHR(stack, long, EXPAND_RECURSE); 822 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 823 { 824 tre_ast_node_t *node; 825 tre_expand_ast_symbol_t symbol; 826 827 if (status != REG_OK) 828 break; 829 830 DPRINT(("pos_add %d\n", pos_add)); 831 832 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_long(stack); 833 node = tre_stack_pop_voidptr(stack); 834 switch (symbol) 835 { 836 case EXPAND_RECURSE: 837 switch (node->type) 838 { 839 case LITERAL: 840 { 841 tre_literal_t *lit= node->obj; 842 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 843 { 844 lit->position += pos_add; 845 if (lit->position > max_pos) 846 max_pos = lit->position; 847 } 848 break; 849 } 850 case UNION: 851 { 852 tre_union_t *uni = node->obj; 853 STACK_PUSHX(stack, voidptr, uni->right); 854 STACK_PUSHX(stack, long, EXPAND_RECURSE); 855 STACK_PUSHX(stack, voidptr, uni->left); 856 STACK_PUSHX(stack, long, EXPAND_RECURSE); 857 break; 858 } 859 case CATENATION: 860 { 861 tre_catenation_t *cat = node->obj; 862 STACK_PUSHX(stack, voidptr, cat->right); 863 STACK_PUSHX(stack, long, EXPAND_RECURSE); 864 STACK_PUSHX(stack, voidptr, cat->left); 865 STACK_PUSHX(stack, long, EXPAND_RECURSE); 866 break; 867 } 868 case ITERATION: 869 { 870 tre_iteration_t *iter = node->obj; 871 STACK_PUSHX(stack, long, pos_add); 872 STACK_PUSHX(stack, voidptr, node); 873 STACK_PUSHX(stack, long, EXPAND_AFTER_ITER); 874 STACK_PUSHX(stack, voidptr, iter->arg); 875 STACK_PUSHX(stack, long, EXPAND_RECURSE); 876 /* If we are going to expand this node at EXPAND_AFTER_ITER 877 then don't increase the `pos' fields of the nodes now, it 878 will get done when expanding. */ 879 if (iter->min > 1 || iter->max > 1) 880 pos_add = 0; 881 iter_depth++; 882 DPRINT(("iter\n")); 883 break; 884 } 885 default: 886 assert(0); 887 break; 888 } 889 break; 890 case EXPAND_AFTER_ITER: 891 { 892 tre_iteration_t *iter = node->obj; 893 int pos_add_last; 894 pos_add = tre_stack_pop_int(stack); 895 pos_add_last = pos_add; 896 if (iter->min > 1 || iter->max > 1) 897 { 898 tre_ast_node_t *seq1 = NULL, *seq2 = NULL; 899 int j; 900 int pos_add_save = pos_add; 901 902 /* Create a catenated sequence of copies of the node. */ 903 for (j = 0; j < iter->min; j++) 904 { 905 tre_ast_node_t *copy; 906 /* Remove tags from all but the last copy. */ 907 int flags = ((j + 1 < iter->min) 908 ? COPY_REMOVE_TAGS 909 : COPY_MAXIMIZE_FIRST_TAG); 910 DPRINT((" pos_add %d\n", pos_add)); 911 pos_add_save = pos_add; 912 status = tre_copy_ast(mem, stack, iter->arg, flags, 913 &pos_add, tag_directions, ©, 914 &max_pos); 915 if (status != REG_OK) 916 return status; 917 if (seq1 != NULL) 918 seq1 = tre_ast_new_catenation(mem, seq1, copy); 919 else 920 seq1 = copy; 921 if (seq1 == NULL) 922 return REG_ESPACE; 923 } 924 925 if (iter->max == -1) 926 { 927 /* No upper limit. */ 928 pos_add_save = pos_add; 929 status = tre_copy_ast(mem, stack, iter->arg, 0, 930 &pos_add, NULL, &seq2, &max_pos); 931 if (status != REG_OK) 932 return status; 933 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); 934 if (seq2 == NULL) 935 return REG_ESPACE; 936 } 937 else 938 { 939 for (j = iter->min; j < iter->max; j++) 940 { 941 tre_ast_node_t *tmp, *copy; 942 pos_add_save = pos_add; 943 status = tre_copy_ast(mem, stack, iter->arg, 0, 944 &pos_add, NULL, ©, &max_pos); 945 if (status != REG_OK) 946 return status; 947 if (seq2 != NULL) 948 seq2 = tre_ast_new_catenation(mem, copy, seq2); 949 else 950 seq2 = copy; 951 if (seq2 == NULL) 952 return REG_ESPACE; 953 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); 954 if (tmp == NULL) 955 return REG_ESPACE; 956 seq2 = tre_ast_new_union(mem, tmp, seq2); 957 if (seq2 == NULL) 958 return REG_ESPACE; 959 } 960 } 961 962 pos_add = pos_add_save; 963 if (seq1 == NULL) 964 seq1 = seq2; 965 else if (seq2 != NULL) 966 seq1 = tre_ast_new_catenation(mem, seq1, seq2); 967 if (seq1 == NULL) 968 return REG_ESPACE; 969 node->obj = seq1->obj; 970 node->type = seq1->type; 971 } 972 973 iter_depth--; 974 pos_add_total += pos_add - pos_add_last; 975 if (iter_depth == 0) 976 pos_add = pos_add_total; 977 978 /* If approximate parameters are specified, surround the result 979 with two parameter setting nodes. The one on the left sets 980 the specified parameters, and the one on the right restores 981 the old parameters. */ 982 if (iter->params) 983 { 984 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy; 985 int *old_params; 986 987 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1); 988 if (!tmp_l) 989 return REG_ESPACE; 990 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params; 991 iter->params[TRE_PARAM_DEPTH] = params_depth + 1; 992 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1); 993 if (!tmp_r) 994 return REG_ESPACE; 995 old_params = tre_mem_alloc(mem, sizeof(*old_params) 996 * TRE_PARAM_LAST); 997 if (!old_params) 998 return REG_ESPACE; 999 for (i = 0; i < TRE_PARAM_LAST; i++) 1000 old_params[i] = params[i]; 1001 ((tre_literal_t *)tmp_r->obj)->u.params = old_params; 1002 old_params[TRE_PARAM_DEPTH] = params_depth; 1003 /* XXX - this is the only place where ast_new_node is 1004 needed -- should be moved inside AST module. */ 1005 node_copy = tre_ast_new_node(mem, ITERATION, 1006 sizeof(tre_iteration_t)); 1007 if (!node_copy) 1008 return REG_ESPACE; 1009 node_copy->obj = node->obj; 1010 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy); 1011 if (!tmp_node) 1012 return REG_ESPACE; 1013 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r); 1014 if (!tmp_node) 1015 return REG_ESPACE; 1016 /* Replace the contents of `node' with `tmp_node'. */ 1017 memcpy(node, tmp_node, sizeof(*node)); 1018 node->obj = tmp_node->obj; 1019 node->type = tmp_node->type; 1020 params_depth++; 1021 if (params_depth > *max_depth) 1022 *max_depth = params_depth; 1023 } 1024 break; 1025 } 1026 default: 1027 assert(0); 1028 break; 1029 } 1030 } 1031 1032 *position += pos_add_total; 1033 1034 /* `max_pos' should never be larger than `*position' if the above 1035 code works, but just an extra safeguard let's make sure 1036 `*position' is set large enough so enough memory will be 1037 allocated for the transition table. */ 1038 if (max_pos > *position) 1039 *position = max_pos; 1040 1041#ifdef TRE_DEBUG 1042 DPRINT(("Expanded AST:\n")); 1043 tre_ast_print(ast); 1044 DPRINT(("*position %d, max_pos %d\n", *position, max_pos)); 1045#endif 1046 1047 return status; 1048} 1049 1050static tre_pos_and_tags_t * 1051tre_set_empty(tre_mem_t mem) 1052{ 1053 tre_pos_and_tags_t *new_set; 1054 1055 new_set = tre_mem_calloc(mem, sizeof(*new_set)); 1056 if (new_set == NULL) 1057 return NULL; 1058 1059 new_set[0].position = -1; 1060 new_set[0].code_min = -1; 1061 new_set[0].code_max = -1; 1062 1063 return new_set; 1064} 1065 1066static tre_pos_and_tags_t * 1067tre_set_one(tre_mem_t mem, int position, int code_min, int code_max, 1068 tre_ctype_t class, tre_ctype_t *neg_classes, int backref) 1069{ 1070 tre_pos_and_tags_t *new_set; 1071 1072 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); 1073 if (new_set == NULL) 1074 return NULL; 1075 1076 new_set[0].position = position; 1077 new_set[0].code_min = code_min; 1078 new_set[0].code_max = code_max; 1079 new_set[0].class = class; 1080 new_set[0].neg_classes = neg_classes; 1081 new_set[0].backref = backref; 1082 new_set[1].position = -1; 1083 new_set[1].code_min = -1; 1084 new_set[1].code_max = -1; 1085 1086 return new_set; 1087} 1088 1089static tre_pos_and_tags_t * 1090tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, 1091 int *tags, int assertions, int *params) 1092{ 1093 int s1, s2, i, j; 1094 tre_pos_and_tags_t *new_set; 1095 int *new_tags; 1096 int num_tags; 1097 1098 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); 1099 for (s1 = 0; set1[s1].position >= 0; s1++); 1100 for (s2 = 0; set2[s2].position >= 0; s2++); 1101 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); 1102 if (!new_set ) 1103 return NULL; 1104 1105 for (s1 = 0; set1[s1].position >= 0; s1++) 1106 { 1107 new_set[s1].position = set1[s1].position; 1108 new_set[s1].code_min = set1[s1].code_min; 1109 new_set[s1].code_max = set1[s1].code_max; 1110 new_set[s1].assertions = set1[s1].assertions | assertions; 1111 new_set[s1].class = set1[s1].class; 1112 new_set[s1].neg_classes = set1[s1].neg_classes; 1113 new_set[s1].backref = set1[s1].backref; 1114 if (set1[s1].tags == NULL && tags == NULL) 1115 new_set[s1].tags = NULL; 1116 else 1117 { 1118 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); 1119 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) 1120 * (i + num_tags + 1))); 1121 if (new_tags == NULL) 1122 return NULL; 1123 for (j = 0; j < i; j++) 1124 new_tags[j] = set1[s1].tags[j]; 1125 for (i = 0; i < num_tags; i++) 1126 new_tags[j + i] = tags[i]; 1127 new_tags[j + i] = -1; 1128 new_set[s1].tags = new_tags; 1129 } 1130 if (set1[s1].params) 1131 new_set[s1].params = set1[s1].params; 1132 if (params) 1133 { 1134 if (!new_set[s1].params) 1135 new_set[s1].params = params; 1136 else 1137 { 1138 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) * 1139 TRE_PARAM_LAST); 1140 if (!new_set[s1].params) 1141 return NULL; 1142 for (i = 0; i < TRE_PARAM_LAST; i++) 1143 if (params[i] != TRE_PARAM_UNSET) 1144 new_set[s1].params[i] = params[i]; 1145 } 1146 } 1147 } 1148 1149 for (s2 = 0; set2[s2].position >= 0; s2++) 1150 { 1151 new_set[s1 + s2].position = set2[s2].position; 1152 new_set[s1 + s2].code_min = set2[s2].code_min; 1153 new_set[s1 + s2].code_max = set2[s2].code_max; 1154 /* XXX - why not | assertions here as well? */ 1155 new_set[s1 + s2].assertions = set2[s2].assertions; 1156 new_set[s1 + s2].class = set2[s2].class; 1157 new_set[s1 + s2].neg_classes = set2[s2].neg_classes; 1158 new_set[s1 + s2].backref = set2[s2].backref; 1159 if (set2[s2].tags == NULL) 1160 new_set[s1 + s2].tags = NULL; 1161 else 1162 { 1163 for (i = 0; set2[s2].tags[i] >= 0; i++); 1164 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); 1165 if (new_tags == NULL) 1166 return NULL; 1167 for (j = 0; j < i; j++) 1168 new_tags[j] = set2[s2].tags[j]; 1169 new_tags[j] = -1; 1170 new_set[s1 + s2].tags = new_tags; 1171 } 1172 if (set2[s2].params) 1173 new_set[s1 + s2].params = set2[s2].params; 1174 if (params) 1175 { 1176 if (!new_set[s1 + s2].params) 1177 new_set[s1 + s2].params = params; 1178 else 1179 { 1180 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) * 1181 TRE_PARAM_LAST); 1182 if (!new_set[s1 + s2].params) 1183 return NULL; 1184 for (i = 0; i < TRE_PARAM_LAST; i++) 1185 if (params[i] != TRE_PARAM_UNSET) 1186 new_set[s1 + s2].params[i] = params[i]; 1187 } 1188 } 1189 } 1190 new_set[s1 + s2].position = -1; 1191 return new_set; 1192} 1193 1194/* Finds the empty path through `node' which is the one that should be 1195 taken according to POSIX.2 rules, and adds the tags on that path to 1196 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is 1197 set to the number of tags seen on the path. */ 1198static reg_errcode_t 1199tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, 1200 int *assertions, int *params, int *num_tags_seen, 1201 int *params_seen) 1202{ 1203 tre_literal_t *lit; 1204 tre_union_t *uni; 1205 tre_catenation_t *cat; 1206 tre_iteration_t *iter; 1207 int i; 1208 int bottom = tre_stack_num_objects(stack); 1209 reg_errcode_t status = REG_OK; 1210 if (num_tags_seen) 1211 *num_tags_seen = 0; 1212 if (params_seen) 1213 *params_seen = 0; 1214 1215 status = tre_stack_push_voidptr(stack, node); 1216 1217 /* Walk through the tree recursively. */ 1218 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 1219 { 1220 node = tre_stack_pop_voidptr(stack); 1221 1222 switch (node->type) 1223 { 1224 case LITERAL: 1225 lit = (tre_literal_t *)node->obj; 1226 switch (lit->code_min) 1227 { 1228 case TAG: 1229 if (lit->code_max >= 0) 1230 { 1231 if (tags != NULL) 1232 { 1233 /* Add the tag to `tags'. */ 1234 for (i = 0; tags[i] >= 0; i++) 1235 if (tags[i] == lit->code_max) 1236 break; 1237 if (tags[i] < 0) 1238 { 1239 tags[i] = lit->code_max; 1240 tags[i + 1] = -1; 1241 } 1242 } 1243 if (num_tags_seen) 1244 (*num_tags_seen)++; 1245 } 1246 break; 1247 case ASSERTION: 1248 assert(lit->code_max >= 1 1249 || lit->code_max <= ASSERT_LAST); 1250 if (assertions != NULL) 1251 *assertions |= lit->code_max; 1252 break; 1253 case PARAMETER: 1254 if (params != NULL) 1255 for (i = 0; i < TRE_PARAM_LAST; i++) 1256 params[i] = lit->u.params[i]; 1257 if (params_seen != NULL) 1258 *params_seen = 1; 1259 break; 1260 case EMPTY: 1261 break; 1262 default: 1263 assert(0); 1264 break; 1265 } 1266 break; 1267 1268 case UNION: 1269 /* Subexpressions starting earlier take priority over ones 1270 starting later, so we prefer the left subexpression over the 1271 right subexpression. */ 1272 uni = (tre_union_t *)node->obj; 1273 if (uni->left->nullable) 1274 STACK_PUSHX(stack, voidptr, uni->left) 1275 else if (uni->right->nullable) 1276 STACK_PUSHX(stack, voidptr, uni->right) 1277 else 1278 assert(0); 1279 break; 1280 1281 case CATENATION: 1282 /* The path must go through both children. */ 1283 cat = (tre_catenation_t *)node->obj; 1284 assert(cat->left->nullable); 1285 assert(cat->right->nullable); 1286 STACK_PUSHX(stack, voidptr, cat->left); 1287 STACK_PUSHX(stack, voidptr, cat->right); 1288 break; 1289 1290 case ITERATION: 1291 /* A match with an empty string is preferred over no match at 1292 all, so we go through the argument if possible. */ 1293 iter = (tre_iteration_t *)node->obj; 1294 if (iter->arg->nullable) 1295 STACK_PUSHX(stack, voidptr, iter->arg); 1296 break; 1297 1298 default: 1299 assert(0); 1300 break; 1301 } 1302 } 1303 1304 return status; 1305} 1306 1307 1308typedef enum { 1309 NFL_RECURSE, 1310 NFL_POST_UNION, 1311 NFL_POST_CATENATION, 1312 NFL_POST_ITERATION 1313} tre_nfl_stack_symbol_t; 1314 1315 1316/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for 1317 the nodes of the AST `tree'. */ 1318static reg_errcode_t 1319tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) 1320{ 1321 int bottom = tre_stack_num_objects(stack); 1322 1323 STACK_PUSHR(stack, voidptr, tree); 1324 STACK_PUSHR(stack, long, NFL_RECURSE); 1325 1326 while (tre_stack_num_objects(stack) > bottom) 1327 { 1328 tre_nfl_stack_symbol_t symbol; 1329 tre_ast_node_t *node; 1330 1331 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_long(stack); 1332 node = tre_stack_pop_voidptr(stack); 1333 switch (symbol) 1334 { 1335 case NFL_RECURSE: 1336 switch (node->type) 1337 { 1338 case LITERAL: 1339 { 1340 tre_literal_t *lit = (tre_literal_t *)node->obj; 1341 if (IS_BACKREF(lit)) 1342 { 1343 /* Back references: nullable = false, firstpos = {i}, 1344 lastpos = {i}. */ 1345 node->nullable = 0; 1346 node->firstpos = tre_set_one(mem, lit->position, 0, 1347 TRE_CHAR_MAX, 0, NULL, -1); 1348 if (!node->firstpos) 1349 return REG_ESPACE; 1350 node->lastpos = tre_set_one(mem, lit->position, 0, 1351 TRE_CHAR_MAX, 0, NULL, 1352 (int)lit->code_max); 1353 if (!node->lastpos) 1354 return REG_ESPACE; 1355 } 1356 else if (lit->code_min < 0) 1357 { 1358 /* Tags, empty strings, params, and zero width assertions: 1359 nullable = true, firstpos = {}, and lastpos = {}. */ 1360 node->nullable = 1; 1361 node->firstpos = tre_set_empty(mem); 1362 if (!node->firstpos) 1363 return REG_ESPACE; 1364 node->lastpos = tre_set_empty(mem); 1365 if (!node->lastpos) 1366 return REG_ESPACE; 1367 } 1368 else 1369 { 1370 /* Literal at position i: nullable = false, firstpos = {i}, 1371 lastpos = {i}. */ 1372 node->nullable = 0; 1373 node->firstpos = 1374 tre_set_one(mem, lit->position, (int)lit->code_min, 1375 (int)lit->code_max, 0, NULL, -1); 1376 if (!node->firstpos) 1377 return REG_ESPACE; 1378 node->lastpos = tre_set_one(mem, lit->position, 1379 (int)lit->code_min, 1380 (int)lit->code_max, 1381 lit->u.class, lit->neg_classes, 1382 -1); 1383 if (!node->lastpos) 1384 return REG_ESPACE; 1385 } 1386 break; 1387 } 1388 1389 case UNION: 1390 /* Compute the attributes for the two subtrees, and after that 1391 for this node. */ 1392 STACK_PUSHR(stack, voidptr, node); 1393 STACK_PUSHR(stack, long, NFL_POST_UNION); 1394 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); 1395 STACK_PUSHR(stack, long, NFL_RECURSE); 1396 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); 1397 STACK_PUSHR(stack, long, NFL_RECURSE); 1398 break; 1399 1400 case CATENATION: 1401 /* Compute the attributes for the two subtrees, and after that 1402 for this node. */ 1403 STACK_PUSHR(stack, voidptr, node); 1404 STACK_PUSHR(stack, long, NFL_POST_CATENATION); 1405 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right); 1406 STACK_PUSHR(stack, long, NFL_RECURSE); 1407 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left); 1408 STACK_PUSHR(stack, long, NFL_RECURSE); 1409 break; 1410 1411 case ITERATION: 1412 /* Compute the attributes for the subtree, and after that for 1413 this node. */ 1414 STACK_PUSHR(stack, voidptr, node); 1415 STACK_PUSHR(stack, long, NFL_POST_ITERATION); 1416 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); 1417 STACK_PUSHR(stack, long, NFL_RECURSE); 1418 break; 1419 } 1420 break; /* end case: NFL_RECURSE */ 1421 1422 case NFL_POST_UNION: 1423 { 1424 tre_union_t *uni = (tre_union_t *)node->obj; 1425 node->nullable = uni->left->nullable || uni->right->nullable; 1426 node->firstpos = tre_set_union(mem, uni->left->firstpos, 1427 uni->right->firstpos, NULL, 0, NULL); 1428 if (!node->firstpos) 1429 return REG_ESPACE; 1430 node->lastpos = tre_set_union(mem, uni->left->lastpos, 1431 uni->right->lastpos, NULL, 0, NULL); 1432 if (!node->lastpos) 1433 return REG_ESPACE; 1434 break; 1435 } 1436 1437 case NFL_POST_ITERATION: 1438 { 1439 tre_iteration_t *iter = (tre_iteration_t *)node->obj; 1440 1441 if (iter->min == 0 || iter->arg->nullable) 1442 node->nullable = 1; 1443 else 1444 node->nullable = 0; 1445 node->firstpos = iter->arg->firstpos; 1446 node->lastpos = iter->arg->lastpos; 1447 break; 1448 } 1449 1450 case NFL_POST_CATENATION: 1451 { 1452 int num_tags, *tags, assertions, params_seen; 1453 int *params; 1454 reg_errcode_t status; 1455 tre_catenation_t *cat = node->obj; 1456 node->nullable = cat->left->nullable && cat->right->nullable; 1457 1458 /* Compute firstpos. */ 1459 if (cat->left->nullable) 1460 { 1461 /* The left side matches the empty string. Make a first pass 1462 with tre_match_empty() to get the number of tags and 1463 parameters. */ 1464 status = tre_match_empty(stack, cat->left, 1465 NULL, NULL, NULL, &num_tags, 1466 ¶ms_seen); 1467 if (status != REG_OK) 1468 return status; 1469 /* Allocate arrays for the tags and parameters. */ 1470 tags = xmalloc(sizeof(*tags) * (num_tags + 1)); 1471 if (!tags) 1472 return REG_ESPACE; 1473 tags[0] = -1; 1474 assertions = 0; 1475 params = NULL; 1476 if (params_seen) 1477 { 1478 params = tre_mem_alloc(mem, sizeof(*params) 1479 * TRE_PARAM_LAST); 1480 if (!params) 1481 { 1482 xfree(tags); 1483 return REG_ESPACE; 1484 } 1485 } 1486 /* Second pass with tre_mach_empty() to get the list of 1487 tags and parameters. */ 1488 status = tre_match_empty(stack, cat->left, tags, 1489 &assertions, params, NULL, NULL); 1490 if (status != REG_OK) 1491 { 1492 xfree(tags); 1493 return status; 1494 } 1495 node->firstpos = 1496 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, 1497 tags, assertions, params); 1498 xfree(tags); 1499 if (!node->firstpos) 1500 return REG_ESPACE; 1501 } 1502 else 1503 { 1504 node->firstpos = cat->left->firstpos; 1505 } 1506 1507 /* Compute lastpos. */ 1508 if (cat->right->nullable) 1509 { 1510 /* The right side matches the empty string. Make a first pass 1511 with tre_match_empty() to get the number of tags and 1512 parameters. */ 1513 status = tre_match_empty(stack, cat->right, 1514 NULL, NULL, NULL, &num_tags, 1515 ¶ms_seen); 1516 if (status != REG_OK) 1517 return status; 1518 /* Allocate arrays for the tags and parameters. */ 1519 tags = xmalloc(sizeof(int) * (num_tags + 1)); 1520 if (!tags) 1521 return REG_ESPACE; 1522 tags[0] = -1; 1523 assertions = 0; 1524 params = NULL; 1525 if (params_seen) 1526 { 1527 params = tre_mem_alloc(mem, sizeof(*params) 1528 * TRE_PARAM_LAST); 1529 if (!params) 1530 { 1531 xfree(tags); 1532 return REG_ESPACE; 1533 } 1534 } 1535 /* Second pass with tre_mach_empty() to get the list of 1536 tags and parameters. */ 1537 status = tre_match_empty(stack, cat->right, tags, 1538 &assertions, params, NULL, NULL); 1539 if (status != REG_OK) 1540 { 1541 xfree(tags); 1542 return status; 1543 } 1544 node->lastpos = 1545 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, 1546 tags, assertions, params); 1547 xfree(tags); 1548 if (!node->lastpos) 1549 return REG_ESPACE; 1550 } 1551 else 1552 { 1553 node->lastpos = cat->right->lastpos; 1554 } 1555 break; 1556 } 1557 1558 default: 1559 assert(0); 1560 break; 1561 } 1562 } 1563 1564 return REG_OK; 1565} 1566 1567 1568/* Adds a transition from each position in `p1' to each position in `p2'. */ 1569static reg_errcode_t 1570tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, 1571 tre_tnfa_transition_t *transitions, 1572 int *counts, int *offs) 1573{ 1574 tre_pos_and_tags_t *orig_p2 = p2; 1575 tre_tnfa_transition_t *trans; 1576 int i, j, k, l, dup, prev_p2_pos; 1577 1578 if (transitions != NULL) 1579 while (p1->position >= 0) 1580 { 1581 p2 = orig_p2; 1582 prev_p2_pos = -1; 1583 while (p2->position >= 0) 1584 { 1585 /* Optimization: if this position was already handled, skip it. */ 1586 if (p2->position == prev_p2_pos) 1587 { 1588 p2++; 1589 continue; 1590 } 1591 prev_p2_pos = p2->position; 1592 /* Set `trans' to point to the next unused transition from 1593 position `p1->position'. */ 1594 trans = transitions + offs[p1->position]; 1595 while (trans->state != NULL) 1596 { 1597#if 0 1598 /* If we find a previous transition from `p1->position' to 1599 `p2->position', it is overwritten. This can happen only 1600 if there are nested loops in the regexp, like in "((a)*)*". 1601 In POSIX.2 repetition using the outer loop is always 1602 preferred over using the inner loop. Therefore the 1603 transition for the inner loop is useless and can be thrown 1604 away. */ 1605 /* XXX - The same position is used for all nodes in a bracket 1606 expression, so this optimization cannot be used (it will 1607 break bracket expressions) unless I figure out a way to 1608 detect it here. */ 1609 if (trans->state_id == p2->position) 1610 { 1611 DPRINT(("*")); 1612 break; 1613 } 1614#endif 1615 trans++; 1616 } 1617 1618 if (trans->state == NULL) 1619 (trans + 1)->state = NULL; 1620 /* Use the character ranges, assertions, etc. from `p1' for 1621 the transition from `p1' to `p2'. */ 1622 trans->code_min = p1->code_min; 1623 trans->code_max = p1->code_max; 1624 trans->state = transitions + offs[p2->position]; 1625 trans->state_id = p2->position; 1626 trans->assertions = p1->assertions | p2->assertions 1627 | (p1->class ? ASSERT_CHAR_CLASS : 0) 1628 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); 1629 if (p1->backref >= 0) 1630 { 1631 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); 1632 assert(p2->backref < 0); 1633 trans->u.backref = p1->backref; 1634 trans->assertions |= ASSERT_BACKREF; 1635 } 1636 else 1637 trans->u.class = p1->class; 1638 if (p1->neg_classes != NULL) 1639 { 1640 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); 1641 trans->neg_classes = 1642 xmalloc(sizeof(*trans->neg_classes) * (i + 1)); 1643 if (trans->neg_classes == NULL) 1644 return REG_ESPACE; 1645 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) 1646 trans->neg_classes[i] = p1->neg_classes[i]; 1647 trans->neg_classes[i] = (tre_ctype_t)0; 1648 } 1649 else 1650 trans->neg_classes = NULL; 1651 1652 /* Find out how many tags this transition has. */ 1653 i = 0; 1654 if (p1->tags != NULL) 1655 while(p1->tags[i] >= 0) 1656 i++; 1657 j = 0; 1658 if (p2->tags != NULL) 1659 while(p2->tags[j] >= 0) 1660 j++; 1661 1662 /* If we are overwriting a transition, free the old tag array. */ 1663 if (trans->tags != NULL) 1664 xfree(trans->tags); 1665 trans->tags = NULL; 1666 1667 /* If there were any tags, allocate an array and fill it. */ 1668 if (i + j > 0) 1669 { 1670 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); 1671 if (!trans->tags) 1672 return REG_ESPACE; 1673 i = 0; 1674 if (p1->tags != NULL) 1675 while(p1->tags[i] >= 0) 1676 { 1677 trans->tags[i] = p1->tags[i]; 1678 i++; 1679 } 1680 l = i; 1681 j = 0; 1682 if (p2->tags != NULL) 1683 while (p2->tags[j] >= 0) 1684 { 1685 /* Don't add duplicates. */ 1686 dup = 0; 1687 for (k = 0; k < i; k++) 1688 if (trans->tags[k] == p2->tags[j]) 1689 { 1690 dup = 1; 1691 break; 1692 } 1693 if (!dup) 1694 trans->tags[l++] = p2->tags[j]; 1695 j++; 1696 } 1697 trans->tags[l] = -1; 1698 } 1699 1700 /* Set the parameter array. If both `p2' and `p1' have same 1701 parameters, the values in `p2' override those in `p1'. */ 1702 if (p1->params || p2->params) 1703 { 1704 if (!trans->params) 1705 trans->params = xmalloc(sizeof(*trans->params) 1706 * TRE_PARAM_LAST); 1707 if (!trans->params) 1708 return REG_ESPACE; 1709 for (i = 0; i < TRE_PARAM_LAST; i++) 1710 { 1711 trans->params[i] = TRE_PARAM_UNSET; 1712 if (p1->params && p1->params[i] != TRE_PARAM_UNSET) 1713 trans->params[i] = p1->params[i]; 1714 if (p2->params && p2->params[i] != TRE_PARAM_UNSET) 1715 trans->params[i] = p2->params[i]; 1716 } 1717 } 1718 else 1719 { 1720 if (trans->params) 1721 xfree(trans->params); 1722 trans->params = NULL; 1723 } 1724 1725 1726#ifdef TRE_DEBUG 1727 { 1728 int *tags; 1729 1730 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position, 1731 p1->code_min)); 1732 if (p1->code_max != p1->code_min) 1733 DPRINT(("-%3d", p1->code_max)); 1734 tags = trans->tags; 1735 if (tags) 1736 { 1737 DPRINT((", tags [")); 1738 while (*tags >= 0) 1739 { 1740 DPRINT(("%d", *tags)); 1741 tags++; 1742 if (*tags >= 0) 1743 DPRINT((",")); 1744 } 1745 DPRINT(("]")); 1746 } 1747 if (trans->assertions) 1748 DPRINT((", assert %d", trans->assertions)); 1749 if (trans->assertions & ASSERT_BACKREF) 1750 DPRINT((", backref %d", trans->u.backref)); 1751 else if (trans->u.class) 1752 DPRINT((", class %ld", (long)trans->u.class)); 1753 if (trans->neg_classes) 1754 DPRINT((", neg_classes %p", trans->neg_classes)); 1755 if (trans->params) 1756 { 1757 DPRINT((", ")); 1758 tre_print_params(trans->params); 1759 } 1760 DPRINT(("\n")); 1761 } 1762#endif /* TRE_DEBUG */ 1763 p2++; 1764 } 1765 p1++; 1766 } 1767 else 1768 /* Compute a maximum limit for the number of transitions leaving 1769 from each state. */ 1770 while (p1->position >= 0) 1771 { 1772 p2 = orig_p2; 1773 while (p2->position >= 0) 1774 { 1775 counts[p1->position]++; 1776 p2++; 1777 } 1778 p1++; 1779 } 1780 return REG_OK; 1781} 1782 1783/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are 1784 labelled with one character range (there are no transitions on empty 1785 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of 1786 the regexp. */ 1787static reg_errcode_t 1788tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, 1789 int *counts, int *offs) 1790{ 1791 tre_union_t *uni; 1792 tre_catenation_t *cat; 1793 tre_iteration_t *iter; 1794 reg_errcode_t errcode = REG_OK; 1795 1796 /* XXX - recurse using a stack!. */ 1797 switch (node->type) 1798 { 1799 case LITERAL: 1800 break; 1801 case UNION: 1802 uni = (tre_union_t *)node->obj; 1803 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); 1804 if (errcode != REG_OK) 1805 return errcode; 1806 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); 1807 break; 1808 1809 case CATENATION: 1810 cat = (tre_catenation_t *)node->obj; 1811 /* Add a transition from each position in cat->left->lastpos 1812 to each position in cat->right->firstpos. */ 1813 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, 1814 transitions, counts, offs); 1815 if (errcode != REG_OK) 1816 return errcode; 1817 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); 1818 if (errcode != REG_OK) 1819 return errcode; 1820 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); 1821 break; 1822 1823 case ITERATION: 1824 iter = (tre_iteration_t *)node->obj; 1825 assert(iter->max == -1 || iter->max == 1); 1826 1827 if (iter->max == -1) 1828 { 1829 assert(iter->min == 0 || iter->min == 1); 1830 /* Add a transition from each last position in the iterated 1831 expression to each first position. */ 1832 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, 1833 transitions, counts, offs); 1834 if (errcode != REG_OK) 1835 return errcode; 1836 } 1837 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); 1838 break; 1839 } 1840 return errcode; 1841} 1842 1843 1844#define ERROR_EXIT(err) \ 1845 do \ 1846 { \ 1847 errcode = err; \ 1848 if (/*CONSTCOND*/1) \ 1849 goto error_exit; \ 1850 } \ 1851 while (/*CONSTCOND*/0) 1852 1853 1854int 1855tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) 1856{ 1857 tre_stack_t *stack; 1858 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; 1859 tre_pos_and_tags_t *p; 1860 int *counts = NULL, *offs = NULL; 1861 int i, add = 0; 1862 tre_tnfa_transition_t *transitions, *initial; 1863 tre_tnfa_t *tnfa = NULL; 1864 tre_submatch_data_t *submatch_data; 1865 tre_tag_direction_t *tag_directions = NULL; 1866 reg_errcode_t errcode; 1867 tre_mem_t mem; 1868 1869 /* Parse context. */ 1870 tre_parse_ctx_t parse_ctx; 1871 1872 /* Allocate a stack used throughout the compilation process for various 1873 purposes. */ 1874 stack = tre_stack_new(512, 10240, 128); 1875 if (!stack) 1876 return REG_ESPACE; 1877 /* Allocate a fast memory allocator. */ 1878 mem = tre_mem_new(); 1879 if (!mem) 1880 { 1881 tre_stack_destroy(stack); 1882 return REG_ESPACE; 1883 } 1884 1885 /* Parse the regexp. */ 1886 memset(&parse_ctx, 0, sizeof(parse_ctx)); 1887 parse_ctx.mem = mem; 1888 parse_ctx.stack = stack; 1889 parse_ctx.re = regex; 1890 parse_ctx.len = n; 1891 parse_ctx.cflags = cflags; 1892 parse_ctx.max_backref = -1; 1893 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex)); 1894 errcode = tre_parse(&parse_ctx); 1895 if (errcode != REG_OK) 1896 ERROR_EXIT(errcode); 1897 preg->re_nsub = parse_ctx.submatch_id - 1; 1898 tree = parse_ctx.result; 1899 1900 /* Back references and approximate matching cannot currently be used 1901 in the same regexp. */ 1902 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx) 1903 ERROR_EXIT(REG_BADPAT); 1904 1905#ifdef TRE_DEBUG 1906 tre_ast_print(tree); 1907#endif /* TRE_DEBUG */ 1908 1909 /* Referring to nonexistent subexpressions is illegal. */ 1910 if (parse_ctx.max_backref > (int)preg->re_nsub) 1911 ERROR_EXIT(REG_ESUBREG); 1912 1913 /* Allocate the TNFA struct. */ 1914 tnfa = xcalloc(1, sizeof(tre_tnfa_t)); 1915 if (tnfa == NULL) 1916 ERROR_EXIT(REG_ESPACE); 1917 tnfa->have_backrefs = parse_ctx.max_backref >= 0; 1918 tnfa->have_approx = parse_ctx.have_approx; 1919 tnfa->num_submatches = parse_ctx.submatch_id; 1920 1921 /* Set up tags for submatch addressing. If REG_NOSUB is set and the 1922 regexp does not have back references, this can be skipped. */ 1923 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) 1924 { 1925 DPRINT(("tre_compile: setting up tags\n")); 1926 1927 /* Figure out how many tags we will need. */ 1928 errcode = tre_add_tags(NULL, stack, tree, tnfa); 1929 if (errcode != REG_OK) 1930 ERROR_EXIT(errcode); 1931#ifdef TRE_DEBUG 1932 tre_ast_print(tree); 1933#endif /* TRE_DEBUG */ 1934 1935 if (tnfa->num_tags > 0) 1936 { 1937 tag_directions = xmalloc(sizeof(*tag_directions) 1938 * (tnfa->num_tags + 1)); 1939 if (tag_directions == NULL) 1940 ERROR_EXIT(REG_ESPACE); 1941 tnfa->tag_directions = tag_directions; 1942 memset(tag_directions, -1, 1943 sizeof(*tag_directions) * (tnfa->num_tags + 1)); 1944 } 1945 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1, 1946 sizeof(tnfa->minimal_tags)); 1947 if (tnfa->minimal_tags == NULL) 1948 ERROR_EXIT(REG_ESPACE); 1949 1950 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, 1951 sizeof(*submatch_data)); 1952 if (submatch_data == NULL) 1953 ERROR_EXIT(REG_ESPACE); 1954 tnfa->submatch_data = submatch_data; 1955 1956 errcode = tre_add_tags(mem, stack, tree, tnfa); 1957 if (errcode != REG_OK) 1958 ERROR_EXIT(errcode); 1959 1960#ifdef TRE_DEBUG 1961 for (i = 0; i < parse_ctx.submatch_id; i++) 1962 DPRINT(("pmatch[%d] = {t%d, t%d}\n", 1963 i, submatch_data[i].so_tag, submatch_data[i].eo_tag)); 1964 for (i = 0; i < tnfa->num_tags; i++) 1965 DPRINT(("t%d is %s\n", i, 1966 tag_directions[i] == TRE_TAG_MINIMIZE ? 1967 "minimized" : "maximized")); 1968#endif /* TRE_DEBUG */ 1969 } 1970 1971 /* Expand iteration nodes. */ 1972 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, 1973 tag_directions, &tnfa->params_depth); 1974 if (errcode != REG_OK) 1975 ERROR_EXIT(errcode); 1976 1977 /* Add a dummy node for the final state. 1978 XXX - For certain patterns this dummy node can be optimized away, 1979 for example "a*" or "ab*". Figure out a simple way to detect 1980 this possibility. */ 1981 tmp_ast_l = tree; 1982 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); 1983 if (tmp_ast_r == NULL) 1984 ERROR_EXIT(REG_ESPACE); 1985 1986 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); 1987 if (tree == NULL) 1988 ERROR_EXIT(REG_ESPACE); 1989 1990#ifdef TRE_DEBUG 1991 tre_ast_print(tree); 1992 DPRINT(("Number of states: %d\n", parse_ctx.position)); 1993#endif /* TRE_DEBUG */ 1994 1995 errcode = tre_compute_nfl(mem, stack, tree); 1996 if (errcode != REG_OK) 1997 ERROR_EXIT(errcode); 1998 1999 counts = xmalloc(sizeof(int) * parse_ctx.position); 2000 if (counts == NULL) 2001 ERROR_EXIT(REG_ESPACE); 2002 2003 offs = xmalloc(sizeof(int) * parse_ctx.position); 2004 if (offs == NULL) 2005 ERROR_EXIT(REG_ESPACE); 2006 2007 for (i = 0; i < parse_ctx.position; i++) 2008 counts[i] = 0; 2009 tre_ast_to_tnfa(tree, NULL, counts, NULL); 2010 2011 add = 0; 2012 for (i = 0; i < parse_ctx.position; i++) 2013 { 2014 offs[i] = add; 2015 add += counts[i] + 1; 2016 counts[i] = 0; 2017 } 2018 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); 2019 if (transitions == NULL) 2020 ERROR_EXIT(REG_ESPACE); 2021 tnfa->transitions = transitions; 2022 tnfa->num_transitions = add; 2023 2024 DPRINT(("Converting to TNFA:\n")); 2025 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); 2026 if (errcode != REG_OK) 2027 ERROR_EXIT(errcode); 2028 2029 /* If in eight bit mode, compute a table of characters that can be the 2030 first character of a match. */ 2031 tnfa->first_char = -1; 2032 if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable) 2033 { 2034 int count = 0; 2035 tre_cint_t k; 2036 DPRINT(("Characters that can start a match:")); 2037 tnfa->firstpos_chars = xcalloc(256, sizeof(char)); 2038 if (tnfa->firstpos_chars == NULL) 2039 ERROR_EXIT(REG_ESPACE); 2040 for (p = tree->firstpos; p->position >= 0; p++) 2041 { 2042 tre_tnfa_transition_t *j = transitions + offs[p->position]; 2043 while (j->state != NULL) 2044 { 2045 for (k = j->code_min; k <= j->code_max && k < 256; k++) 2046 { 2047 DPRINT((" %d", k)); 2048 tnfa->firstpos_chars[k] = 1; 2049 count++; 2050 } 2051 j++; 2052 } 2053 } 2054 DPRINT(("\n")); 2055#define TRE_OPTIMIZE_FIRST_CHAR 1 2056#if TRE_OPTIMIZE_FIRST_CHAR 2057 if (count == 1) 2058 { 2059 for (k = 0; k < 256; k++) 2060 if (tnfa->firstpos_chars[k]) 2061 { 2062 DPRINT(("first char must be %d\n", k)); 2063 tnfa->first_char = k; 2064 xfree(tnfa->firstpos_chars); 2065 tnfa->firstpos_chars = NULL; 2066 break; 2067 } 2068 } 2069#endif 2070 2071 } 2072 else 2073 tnfa->firstpos_chars = NULL; 2074 2075 2076 p = tree->firstpos; 2077 i = 0; 2078 while (p->position >= 0) 2079 { 2080 i++; 2081 2082#ifdef TRE_DEBUG 2083 { 2084 int *tags; 2085 DPRINT(("initial: %d", p->position)); 2086 tags = p->tags; 2087 if (tags != NULL) 2088 { 2089 if (*tags >= 0) 2090 DPRINT(("/")); 2091 while (*tags >= 0) 2092 { 2093 DPRINT(("%d", *tags)); 2094 tags++; 2095 if (*tags >= 0) 2096 DPRINT((",")); 2097 } 2098 } 2099 DPRINT((", assert %d", p->assertions)); 2100 if (p->params) 2101 { 2102 DPRINT((", ")); 2103 tre_print_params(p->params); 2104 } 2105 DPRINT(("\n")); 2106 } 2107#endif /* TRE_DEBUG */ 2108 2109 p++; 2110 } 2111 2112 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); 2113 if (initial == NULL) 2114 ERROR_EXIT(REG_ESPACE); 2115 tnfa->initial = initial; 2116 2117 i = 0; 2118 for (p = tree->firstpos; p->position >= 0; p++) 2119 { 2120 initial[i].state = transitions + offs[p->position]; 2121 initial[i].state_id = p->position; 2122 initial[i].tags = NULL; 2123 /* Copy the arrays p->tags, and p->params, they are allocated 2124 from a tre_mem object. */ 2125 if (p->tags) 2126 { 2127 int j; 2128 for (j = 0; p->tags[j] >= 0; j++); 2129 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); 2130 if (!initial[i].tags) 2131 ERROR_EXIT(REG_ESPACE); 2132 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); 2133 } 2134 initial[i].params = NULL; 2135 if (p->params) 2136 { 2137 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST); 2138 if (!initial[i].params) 2139 ERROR_EXIT(REG_ESPACE); 2140 memcpy(initial[i].params, p->params, 2141 sizeof(*p->params) * TRE_PARAM_LAST); 2142 } 2143 initial[i].assertions = p->assertions; 2144 i++; 2145 } 2146 initial[i].state = NULL; 2147 2148 tnfa->num_transitions = add; 2149 tnfa->final = transitions + offs[tree->lastpos[0].position]; 2150 tnfa->num_states = parse_ctx.position; 2151 tnfa->cflags = cflags; 2152 2153 DPRINT(("final state %p\n", (void *)tnfa->final)); 2154 2155 tre_mem_destroy(mem); 2156 tre_stack_destroy(stack); 2157 xfree(counts); 2158 xfree(offs); 2159 2160 preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2161 return REG_OK; 2162 2163 error_exit: 2164 /* Free everything that was allocated and return the error code. */ 2165 tre_mem_destroy(mem); 2166 if (stack != NULL) 2167 tre_stack_destroy(stack); 2168 if (counts != NULL) 2169 xfree(counts); 2170 if (offs != NULL) 2171 xfree(offs); 2172 preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2173 tre_free(preg); 2174 return errcode; 2175} 2176 2177 2178 2179 2180void 2181tre_free(regex_t *preg) 2182{ 2183 tre_tnfa_t *tnfa; 2184 unsigned int i; 2185 tre_tnfa_transition_t *trans; 2186 2187 tnfa = (void *)preg->TRE_REGEX_T_FIELD; 2188 if (!tnfa) 2189 return; 2190 2191 for (i = 0; i < tnfa->num_transitions; i++) 2192 if (tnfa->transitions[i].state) 2193 { 2194 if (tnfa->transitions[i].tags) 2195 xfree(tnfa->transitions[i].tags); 2196 if (tnfa->transitions[i].neg_classes) 2197 xfree(tnfa->transitions[i].neg_classes); 2198 if (tnfa->transitions[i].params) 2199 xfree(tnfa->transitions[i].params); 2200 } 2201 if (tnfa->transitions) 2202 xfree(tnfa->transitions); 2203 2204 if (tnfa->initial) 2205 { 2206 for (trans = tnfa->initial; trans->state; trans++) 2207 { 2208 if (trans->tags) 2209 xfree(trans->tags); 2210 if (trans->params) 2211 xfree(trans->params); 2212 } 2213 xfree(tnfa->initial); 2214 } 2215 2216 if (tnfa->submatch_data) 2217 { 2218 for (i = 0; i < tnfa->num_submatches; i++) 2219 if (tnfa->submatch_data[i].parents) 2220 xfree(tnfa->submatch_data[i].parents); 2221 xfree(tnfa->submatch_data); 2222 } 2223 2224 if (tnfa->tag_directions) 2225 xfree(tnfa->tag_directions); 2226 if (tnfa->firstpos_chars) 2227 xfree(tnfa->firstpos_chars); 2228 if (tnfa->minimal_tags) 2229 xfree(tnfa->minimal_tags); 2230 xfree(tnfa); 2231} 2232 2233char * 2234tre_version(void) 2235{ 2236 static char str[256]; 2237 char *version; 2238 2239 if (str[0] == 0) 2240 { 2241 (void) tre_config(TRE_CONFIG_VERSION, &version); 2242 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version); 2243 } 2244 return str; 2245} 2246 2247int 2248tre_config(int query, void *result) 2249{ 2250 int *int_result = result; 2251 const char **string_result = result; 2252 2253 switch (query) 2254 { 2255 case TRE_CONFIG_APPROX: 2256#ifdef TRE_APPROX 2257 *int_result = 1; 2258#else /* !TRE_APPROX */ 2259 *int_result = 0; 2260#endif /* !TRE_APPROX */ 2261 return REG_OK; 2262 2263 case TRE_CONFIG_WCHAR: 2264#ifdef TRE_WCHAR 2265 *int_result = 1; 2266#else /* !TRE_WCHAR */ 2267 *int_result = 0; 2268#endif /* !TRE_WCHAR */ 2269 return REG_OK; 2270 2271 case TRE_CONFIG_MULTIBYTE: 2272#ifdef TRE_MULTIBYTE 2273 *int_result = 1; 2274#else /* !TRE_MULTIBYTE */ 2275 *int_result = 0; 2276#endif /* !TRE_MULTIBYTE */ 2277 return REG_OK; 2278 2279 case TRE_CONFIG_SYSTEM_ABI: 2280#ifdef TRE_CONFIG_SYSTEM_ABI 2281 *int_result = 1; 2282#else /* !TRE_CONFIG_SYSTEM_ABI */ 2283 *int_result = 0; 2284#endif /* !TRE_CONFIG_SYSTEM_ABI */ 2285 return REG_OK; 2286 2287 case TRE_CONFIG_VERSION: 2288 *string_result = TRE_VERSION; 2289 return REG_OK; 2290 } 2291 2292 return REG_NOMATCH; 2293} 2294 2295 2296/* EOF */ 2297