1228072Sbapt/* nfa - NFA construction routines */
2228072Sbapt
3228072Sbapt/*  Copyright (c) 1990 The Regents of the University of California. */
4228072Sbapt/*  All rights reserved. */
5228072Sbapt
6228072Sbapt/*  This code is derived from software contributed to Berkeley by */
7228072Sbapt/*  Vern Paxson. */
8228072Sbapt
9228072Sbapt/*  The United States Government has rights in this work pursuant */
10228072Sbapt/*  to contract no. DE-AC03-76SF00098 between the United States */
11228072Sbapt/*  Department of Energy and the University of California. */
12228072Sbapt
13228072Sbapt/*  This file is part of flex. */
14228072Sbapt
15228072Sbapt/*  Redistribution and use in source and binary forms, with or without */
16228072Sbapt/*  modification, are permitted provided that the following conditions */
17228072Sbapt/*  are met: */
18228072Sbapt
19228072Sbapt/*  1. Redistributions of source code must retain the above copyright */
20228072Sbapt/*     notice, this list of conditions and the following disclaimer. */
21228072Sbapt/*  2. Redistributions in binary form must reproduce the above copyright */
22228072Sbapt/*     notice, this list of conditions and the following disclaimer in the */
23228072Sbapt/*     documentation and/or other materials provided with the distribution. */
24228072Sbapt
25228072Sbapt/*  Neither the name of the University nor the names of its contributors */
26228072Sbapt/*  may be used to endorse or promote products derived from this software */
27228072Sbapt/*  without specific prior written permission. */
28228072Sbapt
29228072Sbapt/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30228072Sbapt/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31228072Sbapt/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32228072Sbapt/*  PURPOSE. */
33228072Sbapt
34228072Sbapt#include "flexdef.h"
35228072Sbapt
36228072Sbapt
37228072Sbapt/* declare functions that have forward references */
38228072Sbapt
39228072Sbaptint dupmachine PROTO ((int));
40228072Sbaptvoid mkxtion PROTO ((int, int));
41228072Sbapt
42228072Sbapt
43228072Sbapt/* add_accept - add an accepting state to a machine
44228072Sbapt *
45228072Sbapt * accepting_number becomes mach's accepting number.
46228072Sbapt */
47228072Sbapt
48228072Sbaptvoid    add_accept (mach, accepting_number)
49228072Sbapt     int     mach, accepting_number;
50228072Sbapt{
51228072Sbapt	/* Hang the accepting number off an epsilon state.  if it is associated
52228072Sbapt	 * with a state that has a non-epsilon out-transition, then the state
53228072Sbapt	 * will accept BEFORE it makes that transition, i.e., one character
54228072Sbapt	 * too soon.
55228072Sbapt	 */
56228072Sbapt
57228072Sbapt	if (transchar[finalst[mach]] == SYM_EPSILON)
58228072Sbapt		accptnum[finalst[mach]] = accepting_number;
59228072Sbapt
60228072Sbapt	else {
61228072Sbapt		int     astate = mkstate (SYM_EPSILON);
62228072Sbapt
63228072Sbapt		accptnum[astate] = accepting_number;
64228072Sbapt		(void) link_machines (mach, astate);
65228072Sbapt	}
66228072Sbapt}
67228072Sbapt
68228072Sbapt
69228072Sbapt/* copysingl - make a given number of copies of a singleton machine
70228072Sbapt *
71228072Sbapt * synopsis
72228072Sbapt *
73228072Sbapt *   newsng = copysingl( singl, num );
74228072Sbapt *
75228072Sbapt *     newsng - a new singleton composed of num copies of singl
76228072Sbapt *     singl  - a singleton machine
77228072Sbapt *     num    - the number of copies of singl to be present in newsng
78228072Sbapt */
79228072Sbapt
80228072Sbaptint     copysingl (singl, num)
81228072Sbapt     int     singl, num;
82228072Sbapt{
83228072Sbapt	int     copy, i;
84228072Sbapt
85228072Sbapt	copy = mkstate (SYM_EPSILON);
86228072Sbapt
87228072Sbapt	for (i = 1; i <= num; ++i)
88228072Sbapt		copy = link_machines (copy, dupmachine (singl));
89228072Sbapt
90228072Sbapt	return copy;
91228072Sbapt}
92228072Sbapt
93228072Sbapt
94228072Sbapt/* dumpnfa - debugging routine to write out an nfa */
95228072Sbapt
96228072Sbaptvoid    dumpnfa (state1)
97228072Sbapt     int     state1;
98228072Sbapt
99228072Sbapt{
100228072Sbapt	int     sym, tsp1, tsp2, anum, ns;
101228072Sbapt
102228072Sbapt	fprintf (stderr,
103228072Sbapt		 _
104228072Sbapt		 ("\n\n********** beginning dump of nfa with start state %d\n"),
105228072Sbapt		 state1);
106228072Sbapt
107228072Sbapt	/* We probably should loop starting at firstst[state1] and going to
108228072Sbapt	 * lastst[state1], but they're not maintained properly when we "or"
109228072Sbapt	 * all of the rules together.  So we use our knowledge that the machine
110228072Sbapt	 * starts at state 1 and ends at lastnfa.
111228072Sbapt	 */
112228072Sbapt
113228072Sbapt	/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
114228072Sbapt	for (ns = 1; ns <= lastnfa; ++ns) {
115228072Sbapt		fprintf (stderr, _("state # %4d\t"), ns);
116228072Sbapt
117228072Sbapt		sym = transchar[ns];
118228072Sbapt		tsp1 = trans1[ns];
119228072Sbapt		tsp2 = trans2[ns];
120228072Sbapt		anum = accptnum[ns];
121228072Sbapt
122228072Sbapt		fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);
123228072Sbapt
124228072Sbapt		if (anum != NIL)
125228072Sbapt			fprintf (stderr, "  [%d]", anum);
126228072Sbapt
127228072Sbapt		fprintf (stderr, "\n");
128228072Sbapt	}
129228072Sbapt
130228072Sbapt	fprintf (stderr, _("********** end of dump\n"));
131228072Sbapt}
132228072Sbapt
133228072Sbapt
134228072Sbapt/* dupmachine - make a duplicate of a given machine
135228072Sbapt *
136228072Sbapt * synopsis
137228072Sbapt *
138228072Sbapt *   copy = dupmachine( mach );
139228072Sbapt *
140228072Sbapt *     copy - holds duplicate of mach
141228072Sbapt *     mach - machine to be duplicated
142228072Sbapt *
143228072Sbapt * note that the copy of mach is NOT an exact duplicate; rather, all the
144228072Sbapt * transition states values are adjusted so that the copy is self-contained,
145228072Sbapt * as the original should have been.
146228072Sbapt *
147228072Sbapt * also note that the original MUST be contiguous, with its low and high
148228072Sbapt * states accessible by the arrays firstst and lastst
149228072Sbapt */
150228072Sbapt
151228072Sbaptint     dupmachine (mach)
152228072Sbapt     int     mach;
153228072Sbapt{
154228072Sbapt	int     i, init, state_offset;
155228072Sbapt	int     state = 0;
156228072Sbapt	int     last = lastst[mach];
157228072Sbapt
158228072Sbapt	for (i = firstst[mach]; i <= last; ++i) {
159228072Sbapt		state = mkstate (transchar[i]);
160228072Sbapt
161228072Sbapt		if (trans1[i] != NO_TRANSITION) {
162228072Sbapt			mkxtion (finalst[state], trans1[i] + state - i);
163228072Sbapt
164228072Sbapt			if (transchar[i] == SYM_EPSILON &&
165228072Sbapt			    trans2[i] != NO_TRANSITION)
166228072Sbapt					mkxtion (finalst[state],
167228072Sbapt						 trans2[i] + state - i);
168228072Sbapt		}
169228072Sbapt
170228072Sbapt		accptnum[state] = accptnum[i];
171228072Sbapt	}
172228072Sbapt
173228072Sbapt	if (state == 0)
174228072Sbapt		flexfatal (_("empty machine in dupmachine()"));
175228072Sbapt
176228072Sbapt	state_offset = state - i + 1;
177228072Sbapt
178228072Sbapt	init = mach + state_offset;
179228072Sbapt	firstst[init] = firstst[mach] + state_offset;
180228072Sbapt	finalst[init] = finalst[mach] + state_offset;
181228072Sbapt	lastst[init] = lastst[mach] + state_offset;
182228072Sbapt
183228072Sbapt	return init;
184228072Sbapt}
185228072Sbapt
186228072Sbapt
187228072Sbapt/* finish_rule - finish up the processing for a rule
188228072Sbapt *
189228072Sbapt * An accepting number is added to the given machine.  If variable_trail_rule
190228072Sbapt * is true then the rule has trailing context and both the head and trail
191228072Sbapt * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
192228072Sbapt * the machine recognizes a pattern with trailing context and headcnt is
193228072Sbapt * the number of characters in the matched part of the pattern, or zero
194228072Sbapt * if the matched part has variable length.  trailcnt is the number of
195228072Sbapt * trailing context characters in the pattern, or zero if the trailing
196228072Sbapt * context has variable length.
197228072Sbapt */
198228072Sbapt
199228072Sbaptvoid    finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
200228072Sbapt		     pcont_act)
201228072Sbapt     int     mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
202228072Sbapt{
203228072Sbapt	char    action_text[MAXLINE];
204228072Sbapt
205228072Sbapt	add_accept (mach, num_rules);
206228072Sbapt
207228072Sbapt	/* We did this in new_rule(), but it often gets the wrong
208228072Sbapt	 * number because we do it before we start parsing the current rule.
209228072Sbapt	 */
210228072Sbapt	rule_linenum[num_rules] = linenum;
211228072Sbapt
212228072Sbapt	/* If this is a continued action, then the line-number has already
213228072Sbapt	 * been updated, giving us the wrong number.
214228072Sbapt	 */
215228072Sbapt	if (continued_action)
216228072Sbapt		--rule_linenum[num_rules];
217228072Sbapt
218228072Sbapt
219228072Sbapt	/* If the previous rule was continued action, then we inherit the
220228072Sbapt	 * previous newline flag, possibly overriding the current one.
221228072Sbapt	 */
222228072Sbapt	if (pcont_act && rule_has_nl[num_rules - 1])
223228072Sbapt		rule_has_nl[num_rules] = true;
224228072Sbapt
225228072Sbapt	snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
226228072Sbapt	add_action (action_text);
227228072Sbapt	if (rule_has_nl[num_rules]) {
228228072Sbapt		snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
229228072Sbapt			 num_rules);
230228072Sbapt		add_action (action_text);
231228072Sbapt	}
232228072Sbapt
233228072Sbapt
234228072Sbapt	if (variable_trail_rule) {
235228072Sbapt		rule_type[num_rules] = RULE_VARIABLE;
236228072Sbapt
237228072Sbapt		if (performance_report > 0)
238228072Sbapt			fprintf (stderr,
239228072Sbapt				 _
240228072Sbapt				 ("Variable trailing context rule at line %d\n"),
241228072Sbapt				 rule_linenum[num_rules]);
242228072Sbapt
243228072Sbapt		variable_trailing_context_rules = true;
244228072Sbapt	}
245228072Sbapt
246228072Sbapt	else {
247228072Sbapt		rule_type[num_rules] = RULE_NORMAL;
248228072Sbapt
249228072Sbapt		if (headcnt > 0 || trailcnt > 0) {
250228072Sbapt			/* Do trailing context magic to not match the trailing
251228072Sbapt			 * characters.
252228072Sbapt			 */
253228072Sbapt			char   *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
254228072Sbapt			char   *scanner_bp = "yy_bp";
255228072Sbapt
256228072Sbapt			add_action
257228072Sbapt				("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
258228072Sbapt
259228072Sbapt			if (headcnt > 0) {
260228072Sbapt				snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
261228072Sbapt					 scanner_cp, scanner_bp, headcnt);
262228072Sbapt				add_action (action_text);
263228072Sbapt			}
264228072Sbapt
265228072Sbapt			else {
266228072Sbapt				snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
267228072Sbapt					 scanner_cp, trailcnt);
268228072Sbapt				add_action (action_text);
269228072Sbapt			}
270228072Sbapt
271228072Sbapt			add_action
272228072Sbapt				("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
273228072Sbapt		}
274228072Sbapt	}
275228072Sbapt
276228072Sbapt	/* Okay, in the action code at this point yytext and yyleng have
277228072Sbapt	 * their proper final values for this rule, so here's the point
278228072Sbapt	 * to do any user action.  But don't do it for continued actions,
279228072Sbapt	 * as that'll result in multiple YY_RULE_SETUP's.
280228072Sbapt	 */
281228072Sbapt	if (!continued_action)
282228072Sbapt		add_action ("YY_RULE_SETUP\n");
283228072Sbapt
284228072Sbapt	line_directive_out ((FILE *) 0, 1);
285228072Sbapt}
286228072Sbapt
287228072Sbapt
288228072Sbapt/* link_machines - connect two machines together
289228072Sbapt *
290228072Sbapt * synopsis
291228072Sbapt *
292228072Sbapt *   new = link_machines( first, last );
293228072Sbapt *
294228072Sbapt *     new    - a machine constructed by connecting first to last
295228072Sbapt *     first  - the machine whose successor is to be last
296228072Sbapt *     last   - the machine whose predecessor is to be first
297228072Sbapt *
298228072Sbapt * note: this routine concatenates the machine first with the machine
299228072Sbapt *  last to produce a machine new which will pattern-match first first
300228072Sbapt *  and then last, and will fail if either of the sub-patterns fails.
301228072Sbapt *  FIRST is set to new by the operation.  last is unmolested.
302228072Sbapt */
303228072Sbapt
304228072Sbaptint     link_machines (first, last)
305228072Sbapt     int     first, last;
306228072Sbapt{
307228072Sbapt	if (first == NIL)
308228072Sbapt		return last;
309228072Sbapt
310228072Sbapt	else if (last == NIL)
311228072Sbapt		return first;
312228072Sbapt
313228072Sbapt	else {
314228072Sbapt		mkxtion (finalst[first], last);
315228072Sbapt		finalst[first] = finalst[last];
316228072Sbapt		lastst[first] = MAX (lastst[first], lastst[last]);
317228072Sbapt		firstst[first] = MIN (firstst[first], firstst[last]);
318228072Sbapt
319228072Sbapt		return first;
320228072Sbapt	}
321228072Sbapt}
322228072Sbapt
323228072Sbapt
324228072Sbapt/* mark_beginning_as_normal - mark each "beginning" state in a machine
325228072Sbapt *                            as being a "normal" (i.e., not trailing context-
326228072Sbapt *                            associated) states
327228072Sbapt *
328228072Sbapt * The "beginning" states are the epsilon closure of the first state
329228072Sbapt */
330228072Sbapt
331228072Sbaptvoid    mark_beginning_as_normal (mach)
332250874Sjkim     int mach;
333228072Sbapt{
334228072Sbapt	switch (state_type[mach]) {
335228072Sbapt	case STATE_NORMAL:
336228072Sbapt		/* Oh, we've already visited here. */
337228072Sbapt		return;
338228072Sbapt
339228072Sbapt	case STATE_TRAILING_CONTEXT:
340228072Sbapt		state_type[mach] = STATE_NORMAL;
341228072Sbapt
342228072Sbapt		if (transchar[mach] == SYM_EPSILON) {
343228072Sbapt			if (trans1[mach] != NO_TRANSITION)
344228072Sbapt				mark_beginning_as_normal (trans1[mach]);
345228072Sbapt
346228072Sbapt			if (trans2[mach] != NO_TRANSITION)
347228072Sbapt				mark_beginning_as_normal (trans2[mach]);
348228072Sbapt		}
349228072Sbapt		break;
350228072Sbapt
351228072Sbapt	default:
352228072Sbapt		flexerror (_
353228072Sbapt			   ("bad state type in mark_beginning_as_normal()"));
354228072Sbapt		break;
355228072Sbapt	}
356228072Sbapt}
357228072Sbapt
358228072Sbapt
359228072Sbapt/* mkbranch - make a machine that branches to two machines
360228072Sbapt *
361228072Sbapt * synopsis
362228072Sbapt *
363228072Sbapt *   branch = mkbranch( first, second );
364228072Sbapt *
365228072Sbapt *     branch - a machine which matches either first's pattern or second's
366228072Sbapt *     first, second - machines whose patterns are to be or'ed (the | operator)
367228072Sbapt *
368228072Sbapt * Note that first and second are NEITHER destroyed by the operation.  Also,
369228072Sbapt * the resulting machine CANNOT be used with any other "mk" operation except
370228072Sbapt * more mkbranch's.  Compare with mkor()
371228072Sbapt */
372228072Sbapt
373228072Sbaptint     mkbranch (first, second)
374228072Sbapt     int     first, second;
375228072Sbapt{
376228072Sbapt	int     eps;
377228072Sbapt
378228072Sbapt	if (first == NO_TRANSITION)
379228072Sbapt		return second;
380228072Sbapt
381228072Sbapt	else if (second == NO_TRANSITION)
382228072Sbapt		return first;
383228072Sbapt
384228072Sbapt	eps = mkstate (SYM_EPSILON);
385228072Sbapt
386228072Sbapt	mkxtion (eps, first);
387228072Sbapt	mkxtion (eps, second);
388228072Sbapt
389228072Sbapt	return eps;
390228072Sbapt}
391228072Sbapt
392228072Sbapt
393228072Sbapt/* mkclos - convert a machine into a closure
394228072Sbapt *
395228072Sbapt * synopsis
396228072Sbapt *   new = mkclos( state );
397228072Sbapt *
398228072Sbapt * new - a new state which matches the closure of "state"
399228072Sbapt */
400228072Sbapt
401228072Sbaptint     mkclos (state)
402228072Sbapt     int     state;
403228072Sbapt{
404228072Sbapt	return mkopt (mkposcl (state));
405228072Sbapt}
406228072Sbapt
407228072Sbapt
408228072Sbapt/* mkopt - make a machine optional
409228072Sbapt *
410228072Sbapt * synopsis
411228072Sbapt *
412228072Sbapt *   new = mkopt( mach );
413228072Sbapt *
414228072Sbapt *     new  - a machine which optionally matches whatever mach matched
415228072Sbapt *     mach - the machine to make optional
416228072Sbapt *
417228072Sbapt * notes:
418228072Sbapt *     1. mach must be the last machine created
419228072Sbapt *     2. mach is destroyed by the call
420228072Sbapt */
421228072Sbapt
422228072Sbaptint     mkopt (mach)
423228072Sbapt     int     mach;
424228072Sbapt{
425228072Sbapt	int     eps;
426228072Sbapt
427228072Sbapt	if (!SUPER_FREE_EPSILON (finalst[mach])) {
428228072Sbapt		eps = mkstate (SYM_EPSILON);
429228072Sbapt		mach = link_machines (mach, eps);
430228072Sbapt	}
431228072Sbapt
432228072Sbapt	/* Can't skimp on the following if FREE_EPSILON(mach) is true because
433228072Sbapt	 * some state interior to "mach" might point back to the beginning
434228072Sbapt	 * for a closure.
435228072Sbapt	 */
436228072Sbapt	eps = mkstate (SYM_EPSILON);
437228072Sbapt	mach = link_machines (eps, mach);
438228072Sbapt
439228072Sbapt	mkxtion (mach, finalst[mach]);
440228072Sbapt
441228072Sbapt	return mach;
442228072Sbapt}
443228072Sbapt
444228072Sbapt
445228072Sbapt/* mkor - make a machine that matches either one of two machines
446228072Sbapt *
447228072Sbapt * synopsis
448228072Sbapt *
449228072Sbapt *   new = mkor( first, second );
450228072Sbapt *
451228072Sbapt *     new - a machine which matches either first's pattern or second's
452228072Sbapt *     first, second - machines whose patterns are to be or'ed (the | operator)
453228072Sbapt *
454228072Sbapt * note that first and second are both destroyed by the operation
455228072Sbapt * the code is rather convoluted because an attempt is made to minimize
456228072Sbapt * the number of epsilon states needed
457228072Sbapt */
458228072Sbapt
459228072Sbaptint     mkor (first, second)
460228072Sbapt     int     first, second;
461228072Sbapt{
462228072Sbapt	int     eps, orend;
463228072Sbapt
464228072Sbapt	if (first == NIL)
465228072Sbapt		return second;
466228072Sbapt
467228072Sbapt	else if (second == NIL)
468228072Sbapt		return first;
469228072Sbapt
470228072Sbapt	else {
471228072Sbapt		/* See comment in mkopt() about why we can't use the first
472228072Sbapt		 * state of "first" or "second" if they satisfy "FREE_EPSILON".
473228072Sbapt		 */
474228072Sbapt		eps = mkstate (SYM_EPSILON);
475228072Sbapt
476228072Sbapt		first = link_machines (eps, first);
477228072Sbapt
478228072Sbapt		mkxtion (first, second);
479228072Sbapt
480228072Sbapt		if (SUPER_FREE_EPSILON (finalst[first]) &&
481228072Sbapt		    accptnum[finalst[first]] == NIL) {
482228072Sbapt			orend = finalst[first];
483228072Sbapt			mkxtion (finalst[second], orend);
484228072Sbapt		}
485228072Sbapt
486228072Sbapt		else if (SUPER_FREE_EPSILON (finalst[second]) &&
487228072Sbapt			 accptnum[finalst[second]] == NIL) {
488228072Sbapt			orend = finalst[second];
489228072Sbapt			mkxtion (finalst[first], orend);
490228072Sbapt		}
491228072Sbapt
492228072Sbapt		else {
493228072Sbapt			eps = mkstate (SYM_EPSILON);
494228072Sbapt
495228072Sbapt			first = link_machines (first, eps);
496228072Sbapt			orend = finalst[first];
497228072Sbapt
498228072Sbapt			mkxtion (finalst[second], orend);
499228072Sbapt		}
500228072Sbapt	}
501228072Sbapt
502228072Sbapt	finalst[first] = orend;
503228072Sbapt	return first;
504228072Sbapt}
505228072Sbapt
506228072Sbapt
507228072Sbapt/* mkposcl - convert a machine into a positive closure
508228072Sbapt *
509228072Sbapt * synopsis
510228072Sbapt *   new = mkposcl( state );
511228072Sbapt *
512228072Sbapt *    new - a machine matching the positive closure of "state"
513228072Sbapt */
514228072Sbapt
515228072Sbaptint     mkposcl (state)
516228072Sbapt     int     state;
517228072Sbapt{
518228072Sbapt	int     eps;
519228072Sbapt
520228072Sbapt	if (SUPER_FREE_EPSILON (finalst[state])) {
521228072Sbapt		mkxtion (finalst[state], state);
522228072Sbapt		return state;
523228072Sbapt	}
524228072Sbapt
525228072Sbapt	else {
526228072Sbapt		eps = mkstate (SYM_EPSILON);
527228072Sbapt		mkxtion (eps, state);
528228072Sbapt		return link_machines (state, eps);
529228072Sbapt	}
530228072Sbapt}
531228072Sbapt
532228072Sbapt
533228072Sbapt/* mkrep - make a replicated machine
534228072Sbapt *
535228072Sbapt * synopsis
536228072Sbapt *   new = mkrep( mach, lb, ub );
537228072Sbapt *
538228072Sbapt *    new - a machine that matches whatever "mach" matched from "lb"
539228072Sbapt *          number of times to "ub" number of times
540228072Sbapt *
541228072Sbapt * note
542228072Sbapt *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
543228072Sbapt */
544228072Sbapt
545228072Sbaptint     mkrep (mach, lb, ub)
546228072Sbapt     int     mach, lb, ub;
547228072Sbapt{
548228072Sbapt	int     base_mach, tail, copy, i;
549228072Sbapt
550228072Sbapt	base_mach = copysingl (mach, lb - 1);
551228072Sbapt
552228072Sbapt	if (ub == INFINITE_REPEAT) {
553228072Sbapt		copy = dupmachine (mach);
554228072Sbapt		mach = link_machines (mach,
555228072Sbapt				      link_machines (base_mach,
556228072Sbapt						     mkclos (copy)));
557228072Sbapt	}
558228072Sbapt
559228072Sbapt	else {
560228072Sbapt		tail = mkstate (SYM_EPSILON);
561228072Sbapt
562228072Sbapt		for (i = lb; i < ub; ++i) {
563228072Sbapt			copy = dupmachine (mach);
564228072Sbapt			tail = mkopt (link_machines (copy, tail));
565228072Sbapt		}
566228072Sbapt
567228072Sbapt		mach =
568228072Sbapt			link_machines (mach,
569228072Sbapt				       link_machines (base_mach, tail));
570228072Sbapt	}
571228072Sbapt
572228072Sbapt	return mach;
573228072Sbapt}
574228072Sbapt
575228072Sbapt
576228072Sbapt/* mkstate - create a state with a transition on a given symbol
577228072Sbapt *
578228072Sbapt * synopsis
579228072Sbapt *
580228072Sbapt *   state = mkstate( sym );
581228072Sbapt *
582228072Sbapt *     state - a new state matching sym
583228072Sbapt *     sym   - the symbol the new state is to have an out-transition on
584228072Sbapt *
585228072Sbapt * note that this routine makes new states in ascending order through the
586228072Sbapt * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
587228072Sbapt * relies on machines being made in ascending order and that they are
588228072Sbapt * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
589228072Sbapt * that it admittedly is)
590228072Sbapt */
591228072Sbapt
592228072Sbaptint     mkstate (sym)
593228072Sbapt     int     sym;
594228072Sbapt{
595228072Sbapt	if (++lastnfa >= current_mns) {
596228072Sbapt		if ((current_mns += MNS_INCREMENT) >= maximum_mns)
597228072Sbapt			lerrif (_
598228072Sbapt				("input rules are too complicated (>= %d NFA states)"),
599228072Sbaptcurrent_mns);
600228072Sbapt
601228072Sbapt		++num_reallocs;
602228072Sbapt
603228072Sbapt		firstst = reallocate_integer_array (firstst, current_mns);
604228072Sbapt		lastst = reallocate_integer_array (lastst, current_mns);
605228072Sbapt		finalst = reallocate_integer_array (finalst, current_mns);
606228072Sbapt		transchar =
607228072Sbapt			reallocate_integer_array (transchar, current_mns);
608228072Sbapt		trans1 = reallocate_integer_array (trans1, current_mns);
609228072Sbapt		trans2 = reallocate_integer_array (trans2, current_mns);
610228072Sbapt		accptnum =
611228072Sbapt			reallocate_integer_array (accptnum, current_mns);
612228072Sbapt		assoc_rule =
613228072Sbapt			reallocate_integer_array (assoc_rule, current_mns);
614228072Sbapt		state_type =
615228072Sbapt			reallocate_integer_array (state_type, current_mns);
616228072Sbapt	}
617228072Sbapt
618228072Sbapt	firstst[lastnfa] = lastnfa;
619228072Sbapt	finalst[lastnfa] = lastnfa;
620228072Sbapt	lastst[lastnfa] = lastnfa;
621228072Sbapt	transchar[lastnfa] = sym;
622228072Sbapt	trans1[lastnfa] = NO_TRANSITION;
623228072Sbapt	trans2[lastnfa] = NO_TRANSITION;
624228072Sbapt	accptnum[lastnfa] = NIL;
625228072Sbapt	assoc_rule[lastnfa] = num_rules;
626228072Sbapt	state_type[lastnfa] = current_state_type;
627228072Sbapt
628228072Sbapt	/* Fix up equivalence classes base on this transition.  Note that any
629228072Sbapt	 * character which has its own transition gets its own equivalence
630228072Sbapt	 * class.  Thus only characters which are only in character classes
631228072Sbapt	 * have a chance at being in the same equivalence class.  E.g. "a|b"
632228072Sbapt	 * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
633228072Sbapt	 * puts them in the same equivalence class (barring other differences
634228072Sbapt	 * elsewhere in the input).
635228072Sbapt	 */
636228072Sbapt
637228072Sbapt	if (sym < 0) {
638228072Sbapt		/* We don't have to update the equivalence classes since
639228072Sbapt		 * that was already done when the ccl was created for the
640228072Sbapt		 * first time.
641228072Sbapt		 */
642228072Sbapt	}
643228072Sbapt
644228072Sbapt	else if (sym == SYM_EPSILON)
645228072Sbapt		++numeps;
646228072Sbapt
647228072Sbapt	else {
648228072Sbapt		check_char (sym);
649228072Sbapt
650228072Sbapt		if (useecs)
651228072Sbapt			/* Map NUL's to csize. */
652228072Sbapt			mkechar (sym ? sym : csize, nextecm, ecgroup);
653228072Sbapt	}
654228072Sbapt
655228072Sbapt	return lastnfa;
656228072Sbapt}
657228072Sbapt
658228072Sbapt
659228072Sbapt/* mkxtion - make a transition from one state to another
660228072Sbapt *
661228072Sbapt * synopsis
662228072Sbapt *
663228072Sbapt *   mkxtion( statefrom, stateto );
664228072Sbapt *
665228072Sbapt *     statefrom - the state from which the transition is to be made
666228072Sbapt *     stateto   - the state to which the transition is to be made
667228072Sbapt */
668228072Sbapt
669228072Sbaptvoid    mkxtion (statefrom, stateto)
670228072Sbapt     int     statefrom, stateto;
671228072Sbapt{
672228072Sbapt	if (trans1[statefrom] == NO_TRANSITION)
673228072Sbapt		trans1[statefrom] = stateto;
674228072Sbapt
675228072Sbapt	else if ((transchar[statefrom] != SYM_EPSILON) ||
676228072Sbapt		 (trans2[statefrom] != NO_TRANSITION))
677228072Sbapt		flexfatal (_("found too many transitions in mkxtion()"));
678228072Sbapt
679228072Sbapt	else {			/* second out-transition for an epsilon state */
680228072Sbapt		++eps2;
681228072Sbapt		trans2[statefrom] = stateto;
682228072Sbapt	}
683228072Sbapt}
684228072Sbapt
685228072Sbapt/* new_rule - initialize for a new rule */
686228072Sbapt
687228072Sbaptvoid    new_rule ()
688228072Sbapt{
689228072Sbapt	if (++num_rules >= current_max_rules) {
690228072Sbapt		++num_reallocs;
691228072Sbapt		current_max_rules += MAX_RULES_INCREMENT;
692228072Sbapt		rule_type = reallocate_integer_array (rule_type,
693228072Sbapt						      current_max_rules);
694228072Sbapt		rule_linenum = reallocate_integer_array (rule_linenum,
695228072Sbapt							 current_max_rules);
696228072Sbapt		rule_useful = reallocate_integer_array (rule_useful,
697228072Sbapt							current_max_rules);
698228072Sbapt		rule_has_nl = reallocate_bool_array (rule_has_nl,
699228072Sbapt						     current_max_rules);
700228072Sbapt	}
701228072Sbapt
702228072Sbapt	if (num_rules > MAX_RULE)
703228072Sbapt		lerrif (_("too many rules (> %d)!"), MAX_RULE);
704228072Sbapt
705228072Sbapt	rule_linenum[num_rules] = linenum;
706228072Sbapt	rule_useful[num_rules] = false;
707228072Sbapt	rule_has_nl[num_rules] = false;
708228072Sbapt}
709