1/* dfa - DFA construction routines */
2
3/*-
4 * Copyright (c) 1990 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Vern Paxson.
9 *
10 * The United States Government has rights in this work pursuant
11 * to contract no. DE-AC03-76SF00098 between the United States
12 * Department of Energy and the University of California.
13 *
14 * Redistribution and use in source and binary forms with or without
15 * modification are permitted provided that: (1) source distributions retain
16 * this entire copyright notice and comment, and (2) distributions including
17 * binaries display the following acknowledgement:  ``This product includes
18 * software developed by the University of California, Berkeley and its
19 * contributors'' in the documentation or other materials provided with the
20 * distribution and in all advertising materials mentioning features or use
21 * of this software.  Neither the name of the University nor the names of
22 * its contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
25 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27 */
28
29/* $Header: /projects/cvsroot/src/router/flex/dfa.c,v 1.1.1.1 2001/04/08 23:53:37 mhuang Exp $ */
30
31#include "flexdef.h"
32
33
34/* declare functions that have forward references */
35
36void dump_associated_rules PROTO((FILE*, int));
37void dump_transitions PROTO((FILE*, int[]));
38void sympartition PROTO((int[], int, int[], int[]));
39int symfollowset PROTO((int[], int, int, int[]));
40
41
42/* check_for_backing_up - check a DFA state for backing up
43 *
44 * synopsis
45 *     void check_for_backing_up( int ds, int state[numecs] );
46 *
47 * ds is the number of the state to check and state[] is its out-transitions,
48 * indexed by equivalence class.
49 */
50
51void check_for_backing_up( ds, state )
52int ds;
53int state[];
54	{
55	if ( (reject && ! dfaacc[ds].dfaacc_set) ||
56	     (! reject && ! dfaacc[ds].dfaacc_state) )
57		{ /* state is non-accepting */
58		++num_backing_up;
59
60		if ( backing_up_report )
61			{
62			fprintf( backing_up_file,
63				_( "State #%d is non-accepting -\n" ), ds );
64
65			/* identify the state */
66			dump_associated_rules( backing_up_file, ds );
67
68			/* Now identify it further using the out- and
69			 * jam-transitions.
70			 */
71			dump_transitions( backing_up_file, state );
72
73			putc( '\n', backing_up_file );
74			}
75		}
76	}
77
78
79/* check_trailing_context - check to see if NFA state set constitutes
80 *                          "dangerous" trailing context
81 *
82 * synopsis
83 *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
84 *				int accset[nacc+1], int nacc );
85 *
86 * NOTES
87 *  Trailing context is "dangerous" if both the head and the trailing
88 *  part are of variable size \and/ there's a DFA state which contains
89 *  both an accepting state for the head part of the rule and NFA states
90 *  which occur after the beginning of the trailing context.
91 *
92 *  When such a rule is matched, it's impossible to tell if having been
93 *  in the DFA state indicates the beginning of the trailing context or
94 *  further-along scanning of the pattern.  In these cases, a warning
95 *  message is issued.
96 *
97 *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98 *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99 */
100
101void check_trailing_context( nfa_states, num_states, accset, nacc )
102int *nfa_states, num_states;
103int *accset;
104int nacc;
105	{
106	register int i, j;
107
108	for ( i = 1; i <= num_states; ++i )
109		{
110		int ns = nfa_states[i];
111		register int type = state_type[ns];
112		register int ar = assoc_rule[ns];
113
114		if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
115			{ /* do nothing */
116			}
117
118		else if ( type == STATE_TRAILING_CONTEXT )
119			{
120			/* Potential trouble.  Scan set of accepting numbers
121			 * for the one marking the end of the "head".  We
122			 * assume that this looping will be fairly cheap
123			 * since it's rare that an accepting number set
124			 * is large.
125			 */
126			for ( j = 1; j <= nacc; ++j )
127				if ( accset[j] & YY_TRAILING_HEAD_MASK )
128					{
129					line_warning(
130					_( "dangerous trailing context" ),
131						rule_linenum[ar] );
132					return;
133					}
134			}
135		}
136	}
137
138
139/* dump_associated_rules - list the rules associated with a DFA state
140 *
141 * Goes through the set of NFA states associated with the DFA and
142 * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
143 * and writes a report to the given file.
144 */
145
146void dump_associated_rules( file, ds )
147FILE *file;
148int ds;
149	{
150	register int i, j;
151	register int num_associated_rules = 0;
152	int rule_set[MAX_ASSOC_RULES + 1];
153	int *dset = dss[ds];
154	int size = dfasiz[ds];
155
156	for ( i = 1; i <= size; ++i )
157		{
158		register int rule_num = rule_linenum[assoc_rule[dset[i]]];
159
160		for ( j = 1; j <= num_associated_rules; ++j )
161			if ( rule_num == rule_set[j] )
162				break;
163
164		if ( j > num_associated_rules )
165			{ /* new rule */
166			if ( num_associated_rules < MAX_ASSOC_RULES )
167				rule_set[++num_associated_rules] = rule_num;
168			}
169		}
170
171	bubble( rule_set, num_associated_rules );
172
173	fprintf( file, _( " associated rule line numbers:" ) );
174
175	for ( i = 1; i <= num_associated_rules; ++i )
176		{
177		if ( i % 8 == 1 )
178			putc( '\n', file );
179
180		fprintf( file, "\t%d", rule_set[i] );
181		}
182
183	putc( '\n', file );
184	}
185
186
187/* dump_transitions - list the transitions associated with a DFA state
188 *
189 * synopsis
190 *     dump_transitions( FILE *file, int state[numecs] );
191 *
192 * Goes through the set of out-transitions and lists them in human-readable
193 * form (i.e., not as equivalence classes); also lists jam transitions
194 * (i.e., all those which are not out-transitions, plus EOF).  The dump
195 * is done to the given file.
196 */
197
198void dump_transitions( file, state )
199FILE *file;
200int state[];
201	{
202	register int i, ec;
203	int out_char_set[CSIZE];
204
205	for ( i = 0; i < csize; ++i )
206		{
207		ec = ABS( ecgroup[i] );
208		out_char_set[i] = state[ec];
209		}
210
211	fprintf( file, _( " out-transitions: " ) );
212
213	list_character_set( file, out_char_set );
214
215	/* now invert the members of the set to get the jam transitions */
216	for ( i = 0; i < csize; ++i )
217		out_char_set[i] = ! out_char_set[i];
218
219	fprintf( file, _( "\n jam-transitions: EOF " ) );
220
221	list_character_set( file, out_char_set );
222
223	putc( '\n', file );
224	}
225
226
227/* epsclosure - construct the epsilon closure of a set of ndfa states
228 *
229 * synopsis
230 *    int *epsclosure( int t[num_states], int *numstates_addr,
231 *			int accset[num_rules+1], int *nacc_addr,
232 *			int *hashval_addr );
233 *
234 * NOTES
235 *  The epsilon closure is the set of all states reachable by an arbitrary
236 *  number of epsilon transitions, which themselves do not have epsilon
237 *  transitions going out, unioned with the set of states which have non-null
238 *  accepting numbers.  t is an array of size numstates of nfa state numbers.
239 *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
240 *  accset holds a list of the accepting numbers, and the size of accset is
241 *  given by *nacc_addr.  t may be subjected to reallocation if it is not
242 *  large enough to hold the epsilon closure.
243 *
244 *  hashval is the hash value for the dfa corresponding to the state set.
245 */
246
247int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
248int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
249	{
250	register int stkpos, ns, tsp;
251	int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
252	int stkend, nstate;
253	static int did_stk_init = false, *stk;
254
255#define MARK_STATE(state) \
256trans1[state] = trans1[state] - MARKER_DIFFERENCE;
257
258#define IS_MARKED(state) (trans1[state] < 0)
259
260#define UNMARK_STATE(state) \
261trans1[state] = trans1[state] + MARKER_DIFFERENCE;
262
263#define CHECK_ACCEPT(state) \
264{ \
265nfaccnum = accptnum[state]; \
266if ( nfaccnum != NIL ) \
267accset[++nacc] = nfaccnum; \
268}
269
270#define DO_REALLOCATION \
271{ \
272current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
273++num_reallocs; \
274t = reallocate_integer_array( t, current_max_dfa_size ); \
275stk = reallocate_integer_array( stk, current_max_dfa_size ); \
276} \
277
278#define PUT_ON_STACK(state) \
279{ \
280if ( ++stkend >= current_max_dfa_size ) \
281DO_REALLOCATION \
282stk[stkend] = state; \
283MARK_STATE(state) \
284}
285
286#define ADD_STATE(state) \
287{ \
288if ( ++numstates >= current_max_dfa_size ) \
289DO_REALLOCATION \
290t[numstates] = state; \
291hashval += state; \
292}
293
294#define STACK_STATE(state) \
295{ \
296PUT_ON_STACK(state) \
297CHECK_ACCEPT(state) \
298if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
299ADD_STATE(state) \
300}
301
302
303	if ( ! did_stk_init )
304		{
305		stk = allocate_integer_array( current_max_dfa_size );
306		did_stk_init = true;
307		}
308
309	nacc = stkend = hashval = 0;
310
311	for ( nstate = 1; nstate <= numstates; ++nstate )
312		{
313		ns = t[nstate];
314
315		/* The state could be marked if we've already pushed it onto
316		 * the stack.
317		 */
318		if ( ! IS_MARKED(ns) )
319			{
320			PUT_ON_STACK(ns)
321			CHECK_ACCEPT(ns)
322			hashval += ns;
323			}
324		}
325
326	for ( stkpos = 1; stkpos <= stkend; ++stkpos )
327		{
328		ns = stk[stkpos];
329		transsym = transchar[ns];
330
331		if ( transsym == SYM_EPSILON )
332			{
333			tsp = trans1[ns] + MARKER_DIFFERENCE;
334
335			if ( tsp != NO_TRANSITION )
336				{
337				if ( ! IS_MARKED(tsp) )
338					STACK_STATE(tsp)
339
340				tsp = trans2[ns];
341
342				if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
343					STACK_STATE(tsp)
344				}
345			}
346		}
347
348	/* Clear out "visit" markers. */
349
350	for ( stkpos = 1; stkpos <= stkend; ++stkpos )
351		{
352		if ( IS_MARKED(stk[stkpos]) )
353			UNMARK_STATE(stk[stkpos])
354		else
355			flexfatal(
356			_( "consistency check failed in epsclosure()" ) );
357		}
358
359	*ns_addr = numstates;
360	*hv_addr = hashval;
361	*nacc_addr = nacc;
362
363	return t;
364	}
365
366
367/* increase_max_dfas - increase the maximum number of DFAs */
368
369void increase_max_dfas()
370	{
371	current_max_dfas += MAX_DFAS_INCREMENT;
372
373	++num_reallocs;
374
375	base = reallocate_integer_array( base, current_max_dfas );
376	def = reallocate_integer_array( def, current_max_dfas );
377	dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
378	accsiz = reallocate_integer_array( accsiz, current_max_dfas );
379	dhash = reallocate_integer_array( dhash, current_max_dfas );
380	dss = reallocate_int_ptr_array( dss, current_max_dfas );
381	dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
382
383	if ( nultrans )
384		nultrans =
385			reallocate_integer_array( nultrans, current_max_dfas );
386	}
387
388
389/* ntod - convert an ndfa to a dfa
390 *
391 * Creates the dfa corresponding to the ndfa we've constructed.  The
392 * dfa starts out in state #1.
393 */
394
395void ntod()
396	{
397	int *accset, ds, nacc, newds;
398	int sym, hashval, numstates, dsize;
399	int num_full_table_rows;	/* used only for -f */
400	int *nset, *dset;
401	int targptr, totaltrans, i, comstate, comfreq, targ;
402	int symlist[CSIZE + 1];
403	int num_start_states;
404	int todo_head, todo_next;
405
406	/* Note that the following are indexed by *equivalence classes*
407	 * and not by characters.  Since equivalence classes are indexed
408	 * beginning with 1, even if the scanner accepts NUL's, this
409	 * means that (since every character is potentially in its own
410	 * equivalence class) these arrays must have room for indices
411	 * from 1 to CSIZE, so their size must be CSIZE + 1.
412	 */
413	int duplist[CSIZE + 1], state[CSIZE + 1];
414	int targfreq[CSIZE + 1], targstate[CSIZE + 1];
415
416	accset = allocate_integer_array( num_rules + 1 );
417	nset = allocate_integer_array( current_max_dfa_size );
418
419	/* The "todo" queue is represented by the head, which is the DFA
420	 * state currently being processed, and the "next", which is the
421	 * next DFA state number available (not in use).  We depend on the
422	 * fact that snstods() returns DFA's \in increasing order/, and thus
423	 * need only know the bounds of the dfas to be processed.
424	 */
425	todo_head = todo_next = 0;
426
427	for ( i = 0; i <= csize; ++i )
428		{
429		duplist[i] = NIL;
430		symlist[i] = false;
431		}
432
433	for ( i = 0; i <= num_rules; ++i )
434		accset[i] = NIL;
435
436	if ( trace )
437		{
438		dumpnfa( scset[1] );
439		fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
440		}
441
442	inittbl();
443
444	/* Check to see whether we should build a separate table for
445	 * transitions on NUL characters.  We don't do this for full-speed
446	 * (-F) scanners, since for them we don't have a simple state
447	 * number lying around with which to index the table.  We also
448	 * don't bother doing it for scanners unless (1) NUL is in its own
449	 * equivalence class (indicated by a positive value of
450	 * ecgroup[NUL]), (2) NUL's equivalence class is the last
451	 * equivalence class, and (3) the number of equivalence classes is
452	 * the same as the number of characters.  This latter case comes
453	 * about when useecs is false or when it's true but every character
454	 * still manages to land in its own class (unlikely, but it's
455	 * cheap to check for).  If all these things are true then the
456	 * character code needed to represent NUL's equivalence class for
457	 * indexing the tables is going to take one more bit than the
458	 * number of characters, and therefore we won't be assured of
459	 * being able to fit it into a YY_CHAR variable.  This rules out
460	 * storing the transitions in a compressed table, since the code
461	 * for interpreting them uses a YY_CHAR variable (perhaps it
462	 * should just use an integer, though; this is worth pondering ...
463	 * ###).
464	 *
465	 * Finally, for full tables, we want the number of entries in the
466	 * table to be a power of two so the array references go fast (it
467	 * will just take a shift to compute the major index).  If
468	 * encoding NUL's transitions in the table will spoil this, we
469	 * give it its own table (note that this will be the case if we're
470	 * not using equivalence classes).
471	 */
472
473	/* Note that the test for ecgroup[0] == numecs below accomplishes
474	 * both (1) and (2) above
475	 */
476	if ( ! fullspd && ecgroup[0] == numecs )
477		{
478		/* NUL is alone in its equivalence class, which is the
479		 * last one.
480		 */
481		int use_NUL_table = (numecs == csize);
482
483		if ( fulltbl && ! use_NUL_table )
484			{
485			/* We still may want to use the table if numecs
486			 * is a power of 2.
487			 */
488			int power_of_two;
489
490			for ( power_of_two = 1; power_of_two <= csize;
491			      power_of_two *= 2 )
492				if ( numecs == power_of_two )
493					{
494					use_NUL_table = true;
495					break;
496					}
497			}
498
499		if ( use_NUL_table )
500			nultrans = allocate_integer_array( current_max_dfas );
501
502		/* From now on, nultrans != nil indicates that we're
503		 * saving null transitions for later, separate encoding.
504		 */
505		}
506
507
508	if ( fullspd )
509		{
510		for ( i = 0; i <= numecs; ++i )
511			state[i] = 0;
512
513		place_state( state, 0, 0 );
514		dfaacc[0].dfaacc_state = 0;
515		}
516
517	else if ( fulltbl )
518		{
519		if ( nultrans )
520			/* We won't be including NUL's transitions in the
521			 * table, so build it for entries from 0 .. numecs - 1.
522			 */
523			num_full_table_rows = numecs;
524
525		else
526			/* Take into account the fact that we'll be including
527			 * the NUL entries in the transition table.  Build it
528			 * from 0 .. numecs.
529			 */
530			num_full_table_rows = numecs + 1;
531
532		/* Unless -Ca, declare it "short" because it's a real
533		 * long-shot that that won't be large enough.
534		 */
535		out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
536			/* '}' so vi doesn't get too confused */
537			long_align ? "long" : "short", num_full_table_rows );
538
539		outn( "    {" );
540
541		/* Generate 0 entries for state #0. */
542		for ( i = 0; i < num_full_table_rows; ++i )
543			mk2data( 0 );
544
545		dataflush();
546		outn( "    },\n" );
547		}
548
549	/* Create the first states. */
550
551	num_start_states = lastsc * 2;
552
553	for ( i = 1; i <= num_start_states; ++i )
554		{
555		numstates = 1;
556
557		/* For each start condition, make one state for the case when
558		 * we're at the beginning of the line (the '^' operator) and
559		 * one for the case when we're not.
560		 */
561		if ( i % 2 == 1 )
562			nset[numstates] = scset[(i / 2) + 1];
563		else
564			nset[numstates] =
565				mkbranch( scbol[i / 2], scset[i / 2] );
566
567		nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
568
569		if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
570			{
571			numas += nacc;
572			totnst += numstates;
573			++todo_next;
574
575			if ( variable_trailing_context_rules && nacc > 0 )
576				check_trailing_context( nset, numstates,
577							accset, nacc );
578			}
579		}
580
581	if ( ! fullspd )
582		{
583		if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
584			flexfatal(
585			_( "could not create unique end-of-buffer state" ) );
586
587		++numas;
588		++num_start_states;
589		++todo_next;
590		}
591
592	while ( todo_head < todo_next )
593		{
594		targptr = 0;
595		totaltrans = 0;
596
597		for ( i = 1; i <= numecs; ++i )
598			state[i] = 0;
599
600		ds = ++todo_head;
601
602		dset = dss[ds];
603		dsize = dfasiz[ds];
604
605		if ( trace )
606			fprintf( stderr, _( "state # %d:\n" ), ds );
607
608		sympartition( dset, dsize, symlist, duplist );
609
610		for ( sym = 1; sym <= numecs; ++sym )
611			{
612			if ( symlist[sym] )
613				{
614				symlist[sym] = 0;
615
616				if ( duplist[sym] == NIL )
617					{
618					/* Symbol has unique out-transitions. */
619					numstates = symfollowset( dset, dsize,
620								sym, nset );
621					nset = epsclosure( nset, &numstates,
622						accset, &nacc, &hashval );
623
624					if ( snstods( nset, numstates, accset,
625						nacc, hashval, &newds ) )
626						{
627						totnst = totnst + numstates;
628						++todo_next;
629						numas += nacc;
630
631						if (
632					variable_trailing_context_rules &&
633							nacc > 0 )
634							check_trailing_context(
635								nset, numstates,
636								accset, nacc );
637						}
638
639					state[sym] = newds;
640
641					if ( trace )
642						fprintf( stderr, "\t%d\t%d\n",
643							sym, newds );
644
645					targfreq[++targptr] = 1;
646					targstate[targptr] = newds;
647					++numuniq;
648					}
649
650				else
651					{
652					/* sym's equivalence class has the same
653					 * transitions as duplist(sym)'s
654					 * equivalence class.
655					 */
656					targ = state[duplist[sym]];
657					state[sym] = targ;
658
659					if ( trace )
660						fprintf( stderr, "\t%d\t%d\n",
661							sym, targ );
662
663					/* Update frequency count for
664					 * destination state.
665					 */
666
667					i = 0;
668					while ( targstate[++i] != targ )
669						;
670
671					++targfreq[i];
672					++numdup;
673					}
674
675				++totaltrans;
676				duplist[sym] = NIL;
677				}
678			}
679
680		if ( caseins && ! useecs )
681			{
682			register int j;
683
684			for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
685				{
686				if ( state[i] == 0 && state[j] != 0 )
687					/* We're adding a transition. */
688					++totaltrans;
689
690				else if ( state[i] != 0 && state[j] == 0 )
691					/* We're taking away a transition. */
692					--totaltrans;
693
694				state[i] = state[j];
695				}
696			}
697
698		numsnpairs += totaltrans;
699
700		if ( ds > num_start_states )
701			check_for_backing_up( ds, state );
702
703		if ( nultrans )
704			{
705			nultrans[ds] = state[NUL_ec];
706			state[NUL_ec] = 0;	/* remove transition */
707			}
708
709		if ( fulltbl )
710			{
711			outn( "    {" );
712
713			/* Supply array's 0-element. */
714			if ( ds == end_of_buffer_state )
715				mk2data( -end_of_buffer_state );
716			else
717				mk2data( end_of_buffer_state );
718
719			for ( i = 1; i < num_full_table_rows; ++i )
720				/* Jams are marked by negative of state
721				 * number.
722				 */
723				mk2data( state[i] ? state[i] : -ds );
724
725			dataflush();
726			outn( "    },\n" );
727			}
728
729		else if ( fullspd )
730			place_state( state, ds, totaltrans );
731
732		else if ( ds == end_of_buffer_state )
733			/* Special case this state to make sure it does what
734			 * it's supposed to, i.e., jam on end-of-buffer.
735			 */
736			stack1( ds, 0, 0, JAMSTATE );
737
738		else /* normal, compressed state */
739			{
740			/* Determine which destination state is the most
741			 * common, and how many transitions to it there are.
742			 */
743
744			comfreq = 0;
745			comstate = 0;
746
747			for ( i = 1; i <= targptr; ++i )
748				if ( targfreq[i] > comfreq )
749					{
750					comfreq = targfreq[i];
751					comstate = targstate[i];
752					}
753
754			bldtbl( state, ds, totaltrans, comstate, comfreq );
755			}
756		}
757
758	if ( fulltbl )
759		dataend();
760
761	else if ( ! fullspd )
762		{
763		cmptmps();  /* create compressed template entries */
764
765		/* Create tables for all the states with only one
766		 * out-transition.
767		 */
768		while ( onesp > 0 )
769			{
770			mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
771			onedef[onesp] );
772			--onesp;
773			}
774
775		mkdeftbl();
776		}
777
778	flex_free( (void *) accset );
779	flex_free( (void *) nset );
780	}
781
782
783/* snstods - converts a set of ndfa states into a dfa state
784 *
785 * synopsis
786 *    is_new_state = snstods( int sns[numstates], int numstates,
787 *				int accset[num_rules+1], int nacc,
788 *				int hashval, int *newds_addr );
789 *
790 * On return, the dfa state number is in newds.
791 */
792
793int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
794int sns[], numstates, accset[], nacc, hashval, *newds_addr;
795	{
796	int didsort = 0;
797	register int i, j;
798	int newds, *oldsns;
799
800	for ( i = 1; i <= lastdfa; ++i )
801		if ( hashval == dhash[i] )
802			{
803			if ( numstates == dfasiz[i] )
804				{
805				oldsns = dss[i];
806
807				if ( ! didsort )
808					{
809					/* We sort the states in sns so we
810					 * can compare it to oldsns quickly.
811					 * We use bubble because there probably
812					 * aren't very many states.
813					 */
814					bubble( sns, numstates );
815					didsort = 1;
816					}
817
818				for ( j = 1; j <= numstates; ++j )
819					if ( sns[j] != oldsns[j] )
820						break;
821
822				if ( j > numstates )
823					{
824					++dfaeql;
825					*newds_addr = i;
826					return 0;
827					}
828
829				++hshcol;
830				}
831
832			else
833				++hshsave;
834			}
835
836	/* Make a new dfa. */
837
838	if ( ++lastdfa >= current_max_dfas )
839		increase_max_dfas();
840
841	newds = lastdfa;
842
843	dss[newds] = allocate_integer_array( numstates + 1 );
844
845	/* If we haven't already sorted the states in sns, we do so now,
846	 * so that future comparisons with it can be made quickly.
847	 */
848
849	if ( ! didsort )
850		bubble( sns, numstates );
851
852	for ( i = 1; i <= numstates; ++i )
853		dss[newds][i] = sns[i];
854
855	dfasiz[newds] = numstates;
856	dhash[newds] = hashval;
857
858	if ( nacc == 0 )
859		{
860		if ( reject )
861			dfaacc[newds].dfaacc_set = (int *) 0;
862		else
863			dfaacc[newds].dfaacc_state = 0;
864
865		accsiz[newds] = 0;
866		}
867
868	else if ( reject )
869		{
870		/* We sort the accepting set in increasing order so the
871		 * disambiguating rule that the first rule listed is considered
872		 * match in the event of ties will work.  We use a bubble
873		 * sort since the list is probably quite small.
874		 */
875
876		bubble( accset, nacc );
877
878		dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
879
880		/* Save the accepting set for later */
881		for ( i = 1; i <= nacc; ++i )
882			{
883			dfaacc[newds].dfaacc_set[i] = accset[i];
884
885			if ( accset[i] <= num_rules )
886				/* Who knows, perhaps a REJECT can yield
887				 * this rule.
888				 */
889				rule_useful[accset[i]] = true;
890			}
891
892		accsiz[newds] = nacc;
893		}
894
895	else
896		{
897		/* Find lowest numbered rule so the disambiguating rule
898		 * will work.
899		 */
900		j = num_rules + 1;
901
902		for ( i = 1; i <= nacc; ++i )
903			if ( accset[i] < j )
904				j = accset[i];
905
906		dfaacc[newds].dfaacc_state = j;
907
908		if ( j <= num_rules )
909			rule_useful[j] = true;
910		}
911
912	*newds_addr = newds;
913
914	return 1;
915	}
916
917
918/* symfollowset - follow the symbol transitions one step
919 *
920 * synopsis
921 *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
922 *				int transsym, int nset[current_max_dfa_size] );
923 */
924
925int symfollowset( ds, dsize, transsym, nset )
926int ds[], dsize, transsym, nset[];
927	{
928	int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
929
930	numstates = 0;
931
932	for ( i = 1; i <= dsize; ++i )
933		{ /* for each nfa state ns in the state set of ds */
934		ns = ds[i];
935		sym = transchar[ns];
936		tsp = trans1[ns];
937
938		if ( sym < 0 )
939			{ /* it's a character class */
940			sym = -sym;
941			ccllist = cclmap[sym];
942			lenccl = ccllen[sym];
943
944			if ( cclng[sym] )
945				{
946				for ( j = 0; j < lenccl; ++j )
947					{
948					/* Loop through negated character
949					 * class.
950					 */
951					ch = ccltbl[ccllist + j];
952
953					if ( ch == 0 )
954						ch = NUL_ec;
955
956					if ( ch > transsym )
957						/* Transsym isn't in negated
958						 * ccl.
959						 */
960						break;
961
962					else if ( ch == transsym )
963						/* next 2 */ goto bottom;
964					}
965
966				/* Didn't find transsym in ccl. */
967				nset[++numstates] = tsp;
968				}
969
970			else
971				for ( j = 0; j < lenccl; ++j )
972					{
973					ch = ccltbl[ccllist + j];
974
975					if ( ch == 0 )
976						ch = NUL_ec;
977
978					if ( ch > transsym )
979						break;
980					else if ( ch == transsym )
981						{
982						nset[++numstates] = tsp;
983						break;
984						}
985					}
986			}
987
988		else if ( sym >= 'A' && sym <= 'Z' && caseins )
989			flexfatal(
990			_( "consistency check failed in symfollowset" ) );
991
992		else if ( sym == SYM_EPSILON )
993			{ /* do nothing */
994			}
995
996		else if ( ABS( ecgroup[sym] ) == transsym )
997			nset[++numstates] = tsp;
998
999		bottom: ;
1000		}
1001
1002	return numstates;
1003	}
1004
1005
1006/* sympartition - partition characters with same out-transitions
1007 *
1008 * synopsis
1009 *    sympartition( int ds[current_max_dfa_size], int numstates,
1010 *			int symlist[numecs], int duplist[numecs] );
1011 */
1012
1013void sympartition( ds, numstates, symlist, duplist )
1014int ds[], numstates;
1015int symlist[], duplist[];
1016	{
1017	int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1018
1019	/* Partitioning is done by creating equivalence classes for those
1020	 * characters which have out-transitions from the given state.  Thus
1021	 * we are really creating equivalence classes of equivalence classes.
1022	 */
1023
1024	for ( i = 1; i <= numecs; ++i )
1025		{ /* initialize equivalence class list */
1026		duplist[i] = i - 1;
1027		dupfwd[i] = i + 1;
1028		}
1029
1030	duplist[1] = NIL;
1031	dupfwd[numecs] = NIL;
1032
1033	for ( i = 1; i <= numstates; ++i )
1034		{
1035		ns = ds[i];
1036		tch = transchar[ns];
1037
1038		if ( tch != SYM_EPSILON )
1039			{
1040			if ( tch < -lastccl || tch >= csize )
1041				{
1042				flexfatal(
1043		_( "bad transition character detected in sympartition()" ) );
1044				}
1045
1046			if ( tch >= 0 )
1047				{ /* character transition */
1048				int ec = ecgroup[tch];
1049
1050				mkechar( ec, dupfwd, duplist );
1051				symlist[ec] = 1;
1052				}
1053
1054			else
1055				{ /* character class */
1056				tch = -tch;
1057
1058				lenccl = ccllen[tch];
1059				cclp = cclmap[tch];
1060				mkeccl( ccltbl + cclp, lenccl, dupfwd,
1061					duplist, numecs, NUL_ec );
1062
1063				if ( cclng[tch] )
1064					{
1065					j = 0;
1066
1067					for ( k = 0; k < lenccl; ++k )
1068						{
1069						ich = ccltbl[cclp + k];
1070
1071						if ( ich == 0 )
1072							ich = NUL_ec;
1073
1074						for ( ++j; j < ich; ++j )
1075							symlist[j] = 1;
1076						}
1077
1078					for ( ++j; j <= numecs; ++j )
1079						symlist[j] = 1;
1080					}
1081
1082				else
1083					for ( k = 0; k < lenccl; ++k )
1084						{
1085						ich = ccltbl[cclp + k];
1086
1087						if ( ich == 0 )
1088							ich = NUL_ec;
1089
1090						symlist[ich] = 1;
1091						}
1092				}
1093			}
1094		}
1095	}
1096