1/* dfa - DFA construction routines */ 2 3/*- 4 * Copyright (c) 1990 The Regents of the University of California. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Vern Paxson. 9 * 10 * The United States Government has rights in this work pursuant 11 * to contract no. DE-AC03-76SF00098 between the United States 12 * Department of Energy and the University of California. 13 * 14 * Redistribution and use in source and binary forms with or without 15 * modification are permitted provided that: (1) source distributions retain 16 * this entire copyright notice and comment, and (2) distributions including 17 * binaries display the following acknowledgement: ``This product includes 18 * software developed by the University of California, Berkeley and its 19 * contributors'' in the documentation or other materials provided with the 20 * distribution and in all advertising materials mentioning features or use 21 * of this software. Neither the name of the University nor the names of 22 * its contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 25 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 27 */ 28 29/* $Header: /ramdisk/repositories/20_cvs_clean_up/2011-02-11_sj/src/router/flex/dfa.c,v 1.1.1.1 2001-04-08 23:53:37 mhuang Exp $ */ 30 31#include "flexdef.h" 32 33 34/* declare functions that have forward references */ 35 36void dump_associated_rules PROTO((FILE*, int)); 37void dump_transitions PROTO((FILE*, int[])); 38void sympartition PROTO((int[], int, int[], int[])); 39int symfollowset PROTO((int[], int, int, int[])); 40 41 42/* check_for_backing_up - check a DFA state for backing up 43 * 44 * synopsis 45 * void check_for_backing_up( int ds, int state[numecs] ); 46 * 47 * ds is the number of the state to check and state[] is its out-transitions, 48 * indexed by equivalence class. 49 */ 50 51void check_for_backing_up( ds, state ) 52int ds; 53int state[]; 54 { 55 if ( (reject && ! dfaacc[ds].dfaacc_set) || 56 (! reject && ! dfaacc[ds].dfaacc_state) ) 57 { /* state is non-accepting */ 58 ++num_backing_up; 59 60 if ( backing_up_report ) 61 { 62 fprintf( backing_up_file, 63 _( "State #%d is non-accepting -\n" ), ds ); 64 65 /* identify the state */ 66 dump_associated_rules( backing_up_file, ds ); 67 68 /* Now identify it further using the out- and 69 * jam-transitions. 70 */ 71 dump_transitions( backing_up_file, state ); 72 73 putc( '\n', backing_up_file ); 74 } 75 } 76 } 77 78 79/* check_trailing_context - check to see if NFA state set constitutes 80 * "dangerous" trailing context 81 * 82 * synopsis 83 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 84 * int accset[nacc+1], int nacc ); 85 * 86 * NOTES 87 * Trailing context is "dangerous" if both the head and the trailing 88 * part are of variable size \and/ there's a DFA state which contains 89 * both an accepting state for the head part of the rule and NFA states 90 * which occur after the beginning of the trailing context. 91 * 92 * When such a rule is matched, it's impossible to tell if having been 93 * in the DFA state indicates the beginning of the trailing context or 94 * further-along scanning of the pattern. In these cases, a warning 95 * message is issued. 96 * 97 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 98 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 99 */ 100 101void check_trailing_context( nfa_states, num_states, accset, nacc ) 102int *nfa_states, num_states; 103int *accset; 104int nacc; 105 { 106 register int i, j; 107 108 for ( i = 1; i <= num_states; ++i ) 109 { 110 int ns = nfa_states[i]; 111 register int type = state_type[ns]; 112 register int ar = assoc_rule[ns]; 113 114 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) 115 { /* do nothing */ 116 } 117 118 else if ( type == STATE_TRAILING_CONTEXT ) 119 { 120 /* Potential trouble. Scan set of accepting numbers 121 * for the one marking the end of the "head". We 122 * assume that this looping will be fairly cheap 123 * since it's rare that an accepting number set 124 * is large. 125 */ 126 for ( j = 1; j <= nacc; ++j ) 127 if ( accset[j] & YY_TRAILING_HEAD_MASK ) 128 { 129 line_warning( 130 _( "dangerous trailing context" ), 131 rule_linenum[ar] ); 132 return; 133 } 134 } 135 } 136 } 137 138 139/* dump_associated_rules - list the rules associated with a DFA state 140 * 141 * Goes through the set of NFA states associated with the DFA and 142 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 143 * and writes a report to the given file. 144 */ 145 146void dump_associated_rules( file, ds ) 147FILE *file; 148int ds; 149 { 150 register int i, j; 151 register int num_associated_rules = 0; 152 int rule_set[MAX_ASSOC_RULES + 1]; 153 int *dset = dss[ds]; 154 int size = dfasiz[ds]; 155 156 for ( i = 1; i <= size; ++i ) 157 { 158 register int rule_num = rule_linenum[assoc_rule[dset[i]]]; 159 160 for ( j = 1; j <= num_associated_rules; ++j ) 161 if ( rule_num == rule_set[j] ) 162 break; 163 164 if ( j > num_associated_rules ) 165 { /* new rule */ 166 if ( num_associated_rules < MAX_ASSOC_RULES ) 167 rule_set[++num_associated_rules] = rule_num; 168 } 169 } 170 171 bubble( rule_set, num_associated_rules ); 172 173 fprintf( file, _( " associated rule line numbers:" ) ); 174 175 for ( i = 1; i <= num_associated_rules; ++i ) 176 { 177 if ( i % 8 == 1 ) 178 putc( '\n', file ); 179 180 fprintf( file, "\t%d", rule_set[i] ); 181 } 182 183 putc( '\n', file ); 184 } 185 186 187/* dump_transitions - list the transitions associated with a DFA state 188 * 189 * synopsis 190 * dump_transitions( FILE *file, int state[numecs] ); 191 * 192 * Goes through the set of out-transitions and lists them in human-readable 193 * form (i.e., not as equivalence classes); also lists jam transitions 194 * (i.e., all those which are not out-transitions, plus EOF). The dump 195 * is done to the given file. 196 */ 197 198void dump_transitions( file, state ) 199FILE *file; 200int state[]; 201 { 202 register int i, ec; 203 int out_char_set[CSIZE]; 204 205 for ( i = 0; i < csize; ++i ) 206 { 207 ec = ABS( ecgroup[i] ); 208 out_char_set[i] = state[ec]; 209 } 210 211 fprintf( file, _( " out-transitions: " ) ); 212 213 list_character_set( file, out_char_set ); 214 215 /* now invert the members of the set to get the jam transitions */ 216 for ( i = 0; i < csize; ++i ) 217 out_char_set[i] = ! out_char_set[i]; 218 219 fprintf( file, _( "\n jam-transitions: EOF " ) ); 220 221 list_character_set( file, out_char_set ); 222 223 putc( '\n', file ); 224 } 225 226 227/* epsclosure - construct the epsilon closure of a set of ndfa states 228 * 229 * synopsis 230 * int *epsclosure( int t[num_states], int *numstates_addr, 231 * int accset[num_rules+1], int *nacc_addr, 232 * int *hashval_addr ); 233 * 234 * NOTES 235 * The epsilon closure is the set of all states reachable by an arbitrary 236 * number of epsilon transitions, which themselves do not have epsilon 237 * transitions going out, unioned with the set of states which have non-null 238 * accepting numbers. t is an array of size numstates of nfa state numbers. 239 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 240 * accset holds a list of the accepting numbers, and the size of accset is 241 * given by *nacc_addr. t may be subjected to reallocation if it is not 242 * large enough to hold the epsilon closure. 243 * 244 * hashval is the hash value for the dfa corresponding to the state set. 245 */ 246 247int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) 248int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 249 { 250 register int stkpos, ns, tsp; 251 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 252 int stkend, nstate; 253 static int did_stk_init = false, *stk; 254 255#define MARK_STATE(state) \ 256trans1[state] = trans1[state] - MARKER_DIFFERENCE; 257 258#define IS_MARKED(state) (trans1[state] < 0) 259 260#define UNMARK_STATE(state) \ 261trans1[state] = trans1[state] + MARKER_DIFFERENCE; 262 263#define CHECK_ACCEPT(state) \ 264{ \ 265nfaccnum = accptnum[state]; \ 266if ( nfaccnum != NIL ) \ 267accset[++nacc] = nfaccnum; \ 268} 269 270#define DO_REALLOCATION \ 271{ \ 272current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 273++num_reallocs; \ 274t = reallocate_integer_array( t, current_max_dfa_size ); \ 275stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 276} \ 277 278#define PUT_ON_STACK(state) \ 279{ \ 280if ( ++stkend >= current_max_dfa_size ) \ 281DO_REALLOCATION \ 282stk[stkend] = state; \ 283MARK_STATE(state) \ 284} 285 286#define ADD_STATE(state) \ 287{ \ 288if ( ++numstates >= current_max_dfa_size ) \ 289DO_REALLOCATION \ 290t[numstates] = state; \ 291hashval += state; \ 292} 293 294#define STACK_STATE(state) \ 295{ \ 296PUT_ON_STACK(state) \ 297CHECK_ACCEPT(state) \ 298if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 299ADD_STATE(state) \ 300} 301 302 303 if ( ! did_stk_init ) 304 { 305 stk = allocate_integer_array( current_max_dfa_size ); 306 did_stk_init = true; 307 } 308 309 nacc = stkend = hashval = 0; 310 311 for ( nstate = 1; nstate <= numstates; ++nstate ) 312 { 313 ns = t[nstate]; 314 315 /* The state could be marked if we've already pushed it onto 316 * the stack. 317 */ 318 if ( ! IS_MARKED(ns) ) 319 { 320 PUT_ON_STACK(ns) 321 CHECK_ACCEPT(ns) 322 hashval += ns; 323 } 324 } 325 326 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 327 { 328 ns = stk[stkpos]; 329 transsym = transchar[ns]; 330 331 if ( transsym == SYM_EPSILON ) 332 { 333 tsp = trans1[ns] + MARKER_DIFFERENCE; 334 335 if ( tsp != NO_TRANSITION ) 336 { 337 if ( ! IS_MARKED(tsp) ) 338 STACK_STATE(tsp) 339 340 tsp = trans2[ns]; 341 342 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) 343 STACK_STATE(tsp) 344 } 345 } 346 } 347 348 /* Clear out "visit" markers. */ 349 350 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 351 { 352 if ( IS_MARKED(stk[stkpos]) ) 353 UNMARK_STATE(stk[stkpos]) 354 else 355 flexfatal( 356 _( "consistency check failed in epsclosure()" ) ); 357 } 358 359 *ns_addr = numstates; 360 *hv_addr = hashval; 361 *nacc_addr = nacc; 362 363 return t; 364 } 365 366 367/* increase_max_dfas - increase the maximum number of DFAs */ 368 369void increase_max_dfas() 370 { 371 current_max_dfas += MAX_DFAS_INCREMENT; 372 373 ++num_reallocs; 374 375 base = reallocate_integer_array( base, current_max_dfas ); 376 def = reallocate_integer_array( def, current_max_dfas ); 377 dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); 378 accsiz = reallocate_integer_array( accsiz, current_max_dfas ); 379 dhash = reallocate_integer_array( dhash, current_max_dfas ); 380 dss = reallocate_int_ptr_array( dss, current_max_dfas ); 381 dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); 382 383 if ( nultrans ) 384 nultrans = 385 reallocate_integer_array( nultrans, current_max_dfas ); 386 } 387 388 389/* ntod - convert an ndfa to a dfa 390 * 391 * Creates the dfa corresponding to the ndfa we've constructed. The 392 * dfa starts out in state #1. 393 */ 394 395void ntod() 396 { 397 int *accset, ds, nacc, newds; 398 int sym, hashval, numstates, dsize; 399 int num_full_table_rows; /* used only for -f */ 400 int *nset, *dset; 401 int targptr, totaltrans, i, comstate, comfreq, targ; 402 int symlist[CSIZE + 1]; 403 int num_start_states; 404 int todo_head, todo_next; 405 406 /* Note that the following are indexed by *equivalence classes* 407 * and not by characters. Since equivalence classes are indexed 408 * beginning with 1, even if the scanner accepts NUL's, this 409 * means that (since every character is potentially in its own 410 * equivalence class) these arrays must have room for indices 411 * from 1 to CSIZE, so their size must be CSIZE + 1. 412 */ 413 int duplist[CSIZE + 1], state[CSIZE + 1]; 414 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 415 416 accset = allocate_integer_array( num_rules + 1 ); 417 nset = allocate_integer_array( current_max_dfa_size ); 418 419 /* The "todo" queue is represented by the head, which is the DFA 420 * state currently being processed, and the "next", which is the 421 * next DFA state number available (not in use). We depend on the 422 * fact that snstods() returns DFA's \in increasing order/, and thus 423 * need only know the bounds of the dfas to be processed. 424 */ 425 todo_head = todo_next = 0; 426 427 for ( i = 0; i <= csize; ++i ) 428 { 429 duplist[i] = NIL; 430 symlist[i] = false; 431 } 432 433 for ( i = 0; i <= num_rules; ++i ) 434 accset[i] = NIL; 435 436 if ( trace ) 437 { 438 dumpnfa( scset[1] ); 439 fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); 440 } 441 442 inittbl(); 443 444 /* Check to see whether we should build a separate table for 445 * transitions on NUL characters. We don't do this for full-speed 446 * (-F) scanners, since for them we don't have a simple state 447 * number lying around with which to index the table. We also 448 * don't bother doing it for scanners unless (1) NUL is in its own 449 * equivalence class (indicated by a positive value of 450 * ecgroup[NUL]), (2) NUL's equivalence class is the last 451 * equivalence class, and (3) the number of equivalence classes is 452 * the same as the number of characters. This latter case comes 453 * about when useecs is false or when it's true but every character 454 * still manages to land in its own class (unlikely, but it's 455 * cheap to check for). If all these things are true then the 456 * character code needed to represent NUL's equivalence class for 457 * indexing the tables is going to take one more bit than the 458 * number of characters, and therefore we won't be assured of 459 * being able to fit it into a YY_CHAR variable. This rules out 460 * storing the transitions in a compressed table, since the code 461 * for interpreting them uses a YY_CHAR variable (perhaps it 462 * should just use an integer, though; this is worth pondering ... 463 * ###). 464 * 465 * Finally, for full tables, we want the number of entries in the 466 * table to be a power of two so the array references go fast (it 467 * will just take a shift to compute the major index). If 468 * encoding NUL's transitions in the table will spoil this, we 469 * give it its own table (note that this will be the case if we're 470 * not using equivalence classes). 471 */ 472 473 /* Note that the test for ecgroup[0] == numecs below accomplishes 474 * both (1) and (2) above 475 */ 476 if ( ! fullspd && ecgroup[0] == numecs ) 477 { 478 /* NUL is alone in its equivalence class, which is the 479 * last one. 480 */ 481 int use_NUL_table = (numecs == csize); 482 483 if ( fulltbl && ! use_NUL_table ) 484 { 485 /* We still may want to use the table if numecs 486 * is a power of 2. 487 */ 488 int power_of_two; 489 490 for ( power_of_two = 1; power_of_two <= csize; 491 power_of_two *= 2 ) 492 if ( numecs == power_of_two ) 493 { 494 use_NUL_table = true; 495 break; 496 } 497 } 498 499 if ( use_NUL_table ) 500 nultrans = allocate_integer_array( current_max_dfas ); 501 502 /* From now on, nultrans != nil indicates that we're 503 * saving null transitions for later, separate encoding. 504 */ 505 } 506 507 508 if ( fullspd ) 509 { 510 for ( i = 0; i <= numecs; ++i ) 511 state[i] = 0; 512 513 place_state( state, 0, 0 ); 514 dfaacc[0].dfaacc_state = 0; 515 } 516 517 else if ( fulltbl ) 518 { 519 if ( nultrans ) 520 /* We won't be including NUL's transitions in the 521 * table, so build it for entries from 0 .. numecs - 1. 522 */ 523 num_full_table_rows = numecs; 524 525 else 526 /* Take into account the fact that we'll be including 527 * the NUL entries in the transition table. Build it 528 * from 0 .. numecs. 529 */ 530 num_full_table_rows = numecs + 1; 531 532 /* Unless -Ca, declare it "short" because it's a real 533 * long-shot that that won't be large enough. 534 */ 535 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n", 536 /* '}' so vi doesn't get too confused */ 537 long_align ? "long" : "short", num_full_table_rows ); 538 539 outn( " {" ); 540 541 /* Generate 0 entries for state #0. */ 542 for ( i = 0; i < num_full_table_rows; ++i ) 543 mk2data( 0 ); 544 545 dataflush(); 546 outn( " },\n" ); 547 } 548 549 /* Create the first states. */ 550 551 num_start_states = lastsc * 2; 552 553 for ( i = 1; i <= num_start_states; ++i ) 554 { 555 numstates = 1; 556 557 /* For each start condition, make one state for the case when 558 * we're at the beginning of the line (the '^' operator) and 559 * one for the case when we're not. 560 */ 561 if ( i % 2 == 1 ) 562 nset[numstates] = scset[(i / 2) + 1]; 563 else 564 nset[numstates] = 565 mkbranch( scbol[i / 2], scset[i / 2] ); 566 567 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); 568 569 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) 570 { 571 numas += nacc; 572 totnst += numstates; 573 ++todo_next; 574 575 if ( variable_trailing_context_rules && nacc > 0 ) 576 check_trailing_context( nset, numstates, 577 accset, nacc ); 578 } 579 } 580 581 if ( ! fullspd ) 582 { 583 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) 584 flexfatal( 585 _( "could not create unique end-of-buffer state" ) ); 586 587 ++numas; 588 ++num_start_states; 589 ++todo_next; 590 } 591 592 while ( todo_head < todo_next ) 593 { 594 targptr = 0; 595 totaltrans = 0; 596 597 for ( i = 1; i <= numecs; ++i ) 598 state[i] = 0; 599 600 ds = ++todo_head; 601 602 dset = dss[ds]; 603 dsize = dfasiz[ds]; 604 605 if ( trace ) 606 fprintf( stderr, _( "state # %d:\n" ), ds ); 607 608 sympartition( dset, dsize, symlist, duplist ); 609 610 for ( sym = 1; sym <= numecs; ++sym ) 611 { 612 if ( symlist[sym] ) 613 { 614 symlist[sym] = 0; 615 616 if ( duplist[sym] == NIL ) 617 { 618 /* Symbol has unique out-transitions. */ 619 numstates = symfollowset( dset, dsize, 620 sym, nset ); 621 nset = epsclosure( nset, &numstates, 622 accset, &nacc, &hashval ); 623 624 if ( snstods( nset, numstates, accset, 625 nacc, hashval, &newds ) ) 626 { 627 totnst = totnst + numstates; 628 ++todo_next; 629 numas += nacc; 630 631 if ( 632 variable_trailing_context_rules && 633 nacc > 0 ) 634 check_trailing_context( 635 nset, numstates, 636 accset, nacc ); 637 } 638 639 state[sym] = newds; 640 641 if ( trace ) 642 fprintf( stderr, "\t%d\t%d\n", 643 sym, newds ); 644 645 targfreq[++targptr] = 1; 646 targstate[targptr] = newds; 647 ++numuniq; 648 } 649 650 else 651 { 652 /* sym's equivalence class has the same 653 * transitions as duplist(sym)'s 654 * equivalence class. 655 */ 656 targ = state[duplist[sym]]; 657 state[sym] = targ; 658 659 if ( trace ) 660 fprintf( stderr, "\t%d\t%d\n", 661 sym, targ ); 662 663 /* Update frequency count for 664 * destination state. 665 */ 666 667 i = 0; 668 while ( targstate[++i] != targ ) 669 ; 670 671 ++targfreq[i]; 672 ++numdup; 673 } 674 675 ++totaltrans; 676 duplist[sym] = NIL; 677 } 678 } 679 680 if ( caseins && ! useecs ) 681 { 682 register int j; 683 684 for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) 685 { 686 if ( state[i] == 0 && state[j] != 0 ) 687 /* We're adding a transition. */ 688 ++totaltrans; 689 690 else if ( state[i] != 0 && state[j] == 0 ) 691 /* We're taking away a transition. */ 692 --totaltrans; 693 694 state[i] = state[j]; 695 } 696 } 697 698 numsnpairs += totaltrans; 699 700 if ( ds > num_start_states ) 701 check_for_backing_up( ds, state ); 702 703 if ( nultrans ) 704 { 705 nultrans[ds] = state[NUL_ec]; 706 state[NUL_ec] = 0; /* remove transition */ 707 } 708 709 if ( fulltbl ) 710 { 711 outn( " {" ); 712 713 /* Supply array's 0-element. */ 714 if ( ds == end_of_buffer_state ) 715 mk2data( -end_of_buffer_state ); 716 else 717 mk2data( end_of_buffer_state ); 718 719 for ( i = 1; i < num_full_table_rows; ++i ) 720 /* Jams are marked by negative of state 721 * number. 722 */ 723 mk2data( state[i] ? state[i] : -ds ); 724 725 dataflush(); 726 outn( " },\n" ); 727 } 728 729 else if ( fullspd ) 730 place_state( state, ds, totaltrans ); 731 732 else if ( ds == end_of_buffer_state ) 733 /* Special case this state to make sure it does what 734 * it's supposed to, i.e., jam on end-of-buffer. 735 */ 736 stack1( ds, 0, 0, JAMSTATE ); 737 738 else /* normal, compressed state */ 739 { 740 /* Determine which destination state is the most 741 * common, and how many transitions to it there are. 742 */ 743 744 comfreq = 0; 745 comstate = 0; 746 747 for ( i = 1; i <= targptr; ++i ) 748 if ( targfreq[i] > comfreq ) 749 { 750 comfreq = targfreq[i]; 751 comstate = targstate[i]; 752 } 753 754 bldtbl( state, ds, totaltrans, comstate, comfreq ); 755 } 756 } 757 758 if ( fulltbl ) 759 dataend(); 760 761 else if ( ! fullspd ) 762 { 763 cmptmps(); /* create compressed template entries */ 764 765 /* Create tables for all the states with only one 766 * out-transition. 767 */ 768 while ( onesp > 0 ) 769 { 770 mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], 771 onedef[onesp] ); 772 --onesp; 773 } 774 775 mkdeftbl(); 776 } 777 778 flex_free( (void *) accset ); 779 flex_free( (void *) nset ); 780 } 781 782 783/* snstods - converts a set of ndfa states into a dfa state 784 * 785 * synopsis 786 * is_new_state = snstods( int sns[numstates], int numstates, 787 * int accset[num_rules+1], int nacc, 788 * int hashval, int *newds_addr ); 789 * 790 * On return, the dfa state number is in newds. 791 */ 792 793int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) 794int sns[], numstates, accset[], nacc, hashval, *newds_addr; 795 { 796 int didsort = 0; 797 register int i, j; 798 int newds, *oldsns; 799 800 for ( i = 1; i <= lastdfa; ++i ) 801 if ( hashval == dhash[i] ) 802 { 803 if ( numstates == dfasiz[i] ) 804 { 805 oldsns = dss[i]; 806 807 if ( ! didsort ) 808 { 809 /* We sort the states in sns so we 810 * can compare it to oldsns quickly. 811 * We use bubble because there probably 812 * aren't very many states. 813 */ 814 bubble( sns, numstates ); 815 didsort = 1; 816 } 817 818 for ( j = 1; j <= numstates; ++j ) 819 if ( sns[j] != oldsns[j] ) 820 break; 821 822 if ( j > numstates ) 823 { 824 ++dfaeql; 825 *newds_addr = i; 826 return 0; 827 } 828 829 ++hshcol; 830 } 831 832 else 833 ++hshsave; 834 } 835 836 /* Make a new dfa. */ 837 838 if ( ++lastdfa >= current_max_dfas ) 839 increase_max_dfas(); 840 841 newds = lastdfa; 842 843 dss[newds] = allocate_integer_array( numstates + 1 ); 844 845 /* If we haven't already sorted the states in sns, we do so now, 846 * so that future comparisons with it can be made quickly. 847 */ 848 849 if ( ! didsort ) 850 bubble( sns, numstates ); 851 852 for ( i = 1; i <= numstates; ++i ) 853 dss[newds][i] = sns[i]; 854 855 dfasiz[newds] = numstates; 856 dhash[newds] = hashval; 857 858 if ( nacc == 0 ) 859 { 860 if ( reject ) 861 dfaacc[newds].dfaacc_set = (int *) 0; 862 else 863 dfaacc[newds].dfaacc_state = 0; 864 865 accsiz[newds] = 0; 866 } 867 868 else if ( reject ) 869 { 870 /* We sort the accepting set in increasing order so the 871 * disambiguating rule that the first rule listed is considered 872 * match in the event of ties will work. We use a bubble 873 * sort since the list is probably quite small. 874 */ 875 876 bubble( accset, nacc ); 877 878 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); 879 880 /* Save the accepting set for later */ 881 for ( i = 1; i <= nacc; ++i ) 882 { 883 dfaacc[newds].dfaacc_set[i] = accset[i]; 884 885 if ( accset[i] <= num_rules ) 886 /* Who knows, perhaps a REJECT can yield 887 * this rule. 888 */ 889 rule_useful[accset[i]] = true; 890 } 891 892 accsiz[newds] = nacc; 893 } 894 895 else 896 { 897 /* Find lowest numbered rule so the disambiguating rule 898 * will work. 899 */ 900 j = num_rules + 1; 901 902 for ( i = 1; i <= nacc; ++i ) 903 if ( accset[i] < j ) 904 j = accset[i]; 905 906 dfaacc[newds].dfaacc_state = j; 907 908 if ( j <= num_rules ) 909 rule_useful[j] = true; 910 } 911 912 *newds_addr = newds; 913 914 return 1; 915 } 916 917 918/* symfollowset - follow the symbol transitions one step 919 * 920 * synopsis 921 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 922 * int transsym, int nset[current_max_dfa_size] ); 923 */ 924 925int symfollowset( ds, dsize, transsym, nset ) 926int ds[], dsize, transsym, nset[]; 927 { 928 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 929 930 numstates = 0; 931 932 for ( i = 1; i <= dsize; ++i ) 933 { /* for each nfa state ns in the state set of ds */ 934 ns = ds[i]; 935 sym = transchar[ns]; 936 tsp = trans1[ns]; 937 938 if ( sym < 0 ) 939 { /* it's a character class */ 940 sym = -sym; 941 ccllist = cclmap[sym]; 942 lenccl = ccllen[sym]; 943 944 if ( cclng[sym] ) 945 { 946 for ( j = 0; j < lenccl; ++j ) 947 { 948 /* Loop through negated character 949 * class. 950 */ 951 ch = ccltbl[ccllist + j]; 952 953 if ( ch == 0 ) 954 ch = NUL_ec; 955 956 if ( ch > transsym ) 957 /* Transsym isn't in negated 958 * ccl. 959 */ 960 break; 961 962 else if ( ch == transsym ) 963 /* next 2 */ goto bottom; 964 } 965 966 /* Didn't find transsym in ccl. */ 967 nset[++numstates] = tsp; 968 } 969 970 else 971 for ( j = 0; j < lenccl; ++j ) 972 { 973 ch = ccltbl[ccllist + j]; 974 975 if ( ch == 0 ) 976 ch = NUL_ec; 977 978 if ( ch > transsym ) 979 break; 980 else if ( ch == transsym ) 981 { 982 nset[++numstates] = tsp; 983 break; 984 } 985 } 986 } 987 988 else if ( sym >= 'A' && sym <= 'Z' && caseins ) 989 flexfatal( 990 _( "consistency check failed in symfollowset" ) ); 991 992 else if ( sym == SYM_EPSILON ) 993 { /* do nothing */ 994 } 995 996 else if ( ABS( ecgroup[sym] ) == transsym ) 997 nset[++numstates] = tsp; 998 999 bottom: ; 1000 } 1001 1002 return numstates; 1003 } 1004 1005 1006/* sympartition - partition characters with same out-transitions 1007 * 1008 * synopsis 1009 * sympartition( int ds[current_max_dfa_size], int numstates, 1010 * int symlist[numecs], int duplist[numecs] ); 1011 */ 1012 1013void sympartition( ds, numstates, symlist, duplist ) 1014int ds[], numstates; 1015int symlist[], duplist[]; 1016 { 1017 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1018 1019 /* Partitioning is done by creating equivalence classes for those 1020 * characters which have out-transitions from the given state. Thus 1021 * we are really creating equivalence classes of equivalence classes. 1022 */ 1023 1024 for ( i = 1; i <= numecs; ++i ) 1025 { /* initialize equivalence class list */ 1026 duplist[i] = i - 1; 1027 dupfwd[i] = i + 1; 1028 } 1029 1030 duplist[1] = NIL; 1031 dupfwd[numecs] = NIL; 1032 1033 for ( i = 1; i <= numstates; ++i ) 1034 { 1035 ns = ds[i]; 1036 tch = transchar[ns]; 1037 1038 if ( tch != SYM_EPSILON ) 1039 { 1040 if ( tch < -lastccl || tch >= csize ) 1041 { 1042 flexfatal( 1043 _( "bad transition character detected in sympartition()" ) ); 1044 } 1045 1046 if ( tch >= 0 ) 1047 { /* character transition */ 1048 int ec = ecgroup[tch]; 1049 1050 mkechar( ec, dupfwd, duplist ); 1051 symlist[ec] = 1; 1052 } 1053 1054 else 1055 { /* character class */ 1056 tch = -tch; 1057 1058 lenccl = ccllen[tch]; 1059 cclp = cclmap[tch]; 1060 mkeccl( ccltbl + cclp, lenccl, dupfwd, 1061 duplist, numecs, NUL_ec ); 1062 1063 if ( cclng[tch] ) 1064 { 1065 j = 0; 1066 1067 for ( k = 0; k < lenccl; ++k ) 1068 { 1069 ich = ccltbl[cclp + k]; 1070 1071 if ( ich == 0 ) 1072 ich = NUL_ec; 1073 1074 for ( ++j; j < ich; ++j ) 1075 symlist[j] = 1; 1076 } 1077 1078 for ( ++j; j <= numecs; ++j ) 1079 symlist[j] = 1; 1080 } 1081 1082 else 1083 for ( k = 0; k < lenccl; ++k ) 1084 { 1085 ich = ccltbl[cclp + k]; 1086 1087 if ( ich == 0 ) 1088 ich = NUL_ec; 1089 1090 symlist[ich] = 1; 1091 } 1092 } 1093 } 1094 } 1095 } 1096