tblcmp.c revision 16519
12258Scsgr/* tblcmp - table compression routines */
22258Scsgr
32258Scsgr/*-
42258Scsgr * Copyright (c) 1990 The Regents of the University of California.
52258Scsgr * All rights reserved.
62258Scsgr *
72258Scsgr * This code is derived from software contributed to Berkeley by
82258Scsgr * Vern Paxson.
916519Snate *
102258Scsgr * The United States Government has rights in this work pursuant
112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States
122258Scsgr * Department of Energy and the University of California.
132258Scsgr *
142258Scsgr * Redistribution and use in source and binary forms are permitted provided
152258Scsgr * that: (1) source distributions retain this entire copyright notice and
162258Scsgr * comment, and (2) distributions including binaries display the following
172258Scsgr * acknowledgement:  ``This product includes software developed by the
182258Scsgr * University of California, Berkeley and its contributors'' in the
192258Scsgr * documentation or other materials provided with the distribution and in
202258Scsgr * all advertising materials mentioning features or use of this software.
212258Scsgr * Neither the name of the University nor the names of its contributors may
222258Scsgr * be used to endorse or promote products derived from this software without
232258Scsgr * specific prior written permission.
242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
272258Scsgr */
282258Scsgr
2916519Snate/* $Header: /home/ncvs/src/usr.bin/lex/tblcmp.c,v 1.1.1.2 1996/06/19 20:26:43 nate Exp $ */
302258Scsgr
312258Scsgr#include "flexdef.h"
322258Scsgr
332258Scsgr
342258Scsgr/* declarations for functions that have forward references */
352258Scsgr
362258Scsgrvoid mkentry PROTO((register int*, int, int, int, int));
372258Scsgrvoid mkprot PROTO((int[], int, int));
382258Scsgrvoid mktemplate PROTO((int[], int, int));
392258Scsgrvoid mv2front PROTO((int));
402258Scsgrint tbldiff PROTO((int[], int, int[]));
412258Scsgr
422258Scsgr
432258Scsgr/* bldtbl - build table entries for dfa state
442258Scsgr *
452258Scsgr * synopsis
462258Scsgr *   int state[numecs], statenum, totaltrans, comstate, comfreq;
472258Scsgr *   bldtbl( state, statenum, totaltrans, comstate, comfreq );
482258Scsgr *
492258Scsgr * State is the statenum'th dfa state.  It is indexed by equivalence class and
502258Scsgr * gives the number of the state to enter for a given equivalence class.
512258Scsgr * totaltrans is the total number of transitions out of the state.  Comstate
522258Scsgr * is that state which is the destination of the most transitions out of State.
532258Scsgr * Comfreq is how many transitions there are out of State to Comstate.
542258Scsgr *
552258Scsgr * A note on terminology:
562258Scsgr *    "protos" are transition tables which have a high probability of
572258Scsgr * either being redundant (a state processed later will have an identical
582258Scsgr * transition table) or nearly redundant (a state processed later will have
592258Scsgr * many of the same out-transitions).  A "most recently used" queue of
602258Scsgr * protos is kept around with the hope that most states will find a proto
612258Scsgr * which is similar enough to be usable, and therefore compacting the
622258Scsgr * output tables.
632258Scsgr *    "templates" are a special type of proto.  If a transition table is
642258Scsgr * homogeneous or nearly homogeneous (all transitions go to the same
652258Scsgr * destination) then the odds are good that future states will also go
662258Scsgr * to the same destination state on basically the same character set.
672258Scsgr * These homogeneous states are so common when dealing with large rule
682258Scsgr * sets that they merit special attention.  If the transition table were
692258Scsgr * simply made into a proto, then (typically) each subsequent, similar
702258Scsgr * state will differ from the proto for two out-transitions.  One of these
712258Scsgr * out-transitions will be that character on which the proto does not go
722258Scsgr * to the common destination, and one will be that character on which the
732258Scsgr * state does not go to the common destination.  Templates, on the other
742258Scsgr * hand, go to the common state on EVERY transition character, and therefore
752258Scsgr * cost only one difference.
762258Scsgr */
772258Scsgr
782258Scsgrvoid bldtbl( state, statenum, totaltrans, comstate, comfreq )
792258Scsgrint state[], statenum, totaltrans, comstate, comfreq;
802258Scsgr	{
812258Scsgr	int extptr, extrct[2][CSIZE + 1];
822258Scsgr	int mindiff, minprot, i, d;
832258Scsgr
842258Scsgr	/* If extptr is 0 then the first array of extrct holds the result
852258Scsgr	 * of the "best difference" to date, which is those transitions
862258Scsgr	 * which occur in "state" but not in the proto which, to date,
872258Scsgr	 * has the fewest differences between itself and "state".  If
882258Scsgr	 * extptr is 1 then the second array of extrct hold the best
892258Scsgr	 * difference.  The two arrays are toggled between so that the
902258Scsgr	 * best difference to date can be kept around and also a difference
912258Scsgr	 * just created by checking against a candidate "best" proto.
922258Scsgr	 */
932258Scsgr
942258Scsgr	extptr = 0;
952258Scsgr
962258Scsgr	/* If the state has too few out-transitions, don't bother trying to
972258Scsgr	 * compact its tables.
982258Scsgr	 */
992258Scsgr
1002258Scsgr	if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
1012258Scsgr		mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
1022258Scsgr
1032258Scsgr	else
1042258Scsgr		{
1052258Scsgr		/* "checkcom" is true if we should only check "state" against
1062258Scsgr		 * protos which have the same "comstate" value.
1072258Scsgr		 */
1082258Scsgr		int checkcom =
1092258Scsgr			comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
1102258Scsgr
1112258Scsgr		minprot = firstprot;
1122258Scsgr		mindiff = totaltrans;
1132258Scsgr
1142258Scsgr		if ( checkcom )
1152258Scsgr			{
1162258Scsgr			/* Find first proto which has the same "comstate". */
1172258Scsgr			for ( i = firstprot; i != NIL; i = protnext[i] )
1182258Scsgr				if ( protcomst[i] == comstate )
1192258Scsgr					{
1202258Scsgr					minprot = i;
1212258Scsgr					mindiff = tbldiff( state, minprot,
1222258Scsgr							extrct[extptr] );
1232258Scsgr					break;
1242258Scsgr					}
1252258Scsgr			}
1262258Scsgr
1272258Scsgr		else
1282258Scsgr			{
1292258Scsgr			/* Since we've decided that the most common destination
1302258Scsgr			 * out of "state" does not occur with a high enough
1312258Scsgr			 * frequency, we set the "comstate" to zero, assuring
1322258Scsgr			 * that if this state is entered into the proto list,
1332258Scsgr			 * it will not be considered a template.
1342258Scsgr			 */
1352258Scsgr			comstate = 0;
1362258Scsgr
1372258Scsgr			if ( firstprot != NIL )
1382258Scsgr				{
1392258Scsgr				minprot = firstprot;
1402258Scsgr				mindiff = tbldiff( state, minprot,
1412258Scsgr						extrct[extptr] );
1422258Scsgr				}
1432258Scsgr			}
1442258Scsgr
1452258Scsgr		/* We now have the first interesting proto in "minprot".  If
1462258Scsgr		 * it matches within the tolerances set for the first proto,
1472258Scsgr		 * we don't want to bother scanning the rest of the proto list
1482258Scsgr		 * to see if we have any other reasonable matches.
1492258Scsgr		 */
1502258Scsgr
1512258Scsgr		if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
1522258Scsgr			{
1532258Scsgr			/* Not a good enough match.  Scan the rest of the
1542258Scsgr			 * protos.
1552258Scsgr			 */
1562258Scsgr			for ( i = minprot; i != NIL; i = protnext[i] )
1572258Scsgr				{
1582258Scsgr				d = tbldiff( state, i, extrct[1 - extptr] );
1592258Scsgr				if ( d < mindiff )
1602258Scsgr					{
1612258Scsgr					extptr = 1 - extptr;
1622258Scsgr					mindiff = d;
1632258Scsgr					minprot = i;
1642258Scsgr					}
1652258Scsgr				}
1662258Scsgr			}
1672258Scsgr
1682258Scsgr		/* Check if the proto we've decided on as our best bet is close
1692258Scsgr		 * enough to the state we want to match to be usable.
1702258Scsgr		 */
1712258Scsgr
1722258Scsgr		if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
1732258Scsgr			{
1742258Scsgr			/* No good.  If the state is homogeneous enough,
1752258Scsgr			 * we make a template out of it.  Otherwise, we
1762258Scsgr			 * make a proto.
1772258Scsgr			 */
1782258Scsgr
1792258Scsgr			if ( comfreq * 100 >=
1802258Scsgr			     totaltrans * TEMPLATE_SAME_PERCENTAGE )
1812258Scsgr				mktemplate( state, statenum, comstate );
1822258Scsgr
1832258Scsgr			else
1842258Scsgr				{
1852258Scsgr				mkprot( state, statenum, comstate );
1862258Scsgr				mkentry( state, numecs, statenum,
1872258Scsgr					JAMSTATE, totaltrans );
1882258Scsgr				}
1892258Scsgr			}
1902258Scsgr
1912258Scsgr		else
1922258Scsgr			{ /* use the proto */
1932258Scsgr			mkentry( extrct[extptr], numecs, statenum,
1942258Scsgr				prottbl[minprot], mindiff );
1952258Scsgr
1962258Scsgr			/* If this state was sufficiently different from the
1972258Scsgr			 * proto we built it from, make it, too, a proto.
1982258Scsgr			 */
1992258Scsgr
2002258Scsgr			if ( mindiff * 100 >=
2012258Scsgr			     totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
2022258Scsgr				mkprot( state, statenum, comstate );
2032258Scsgr
2042258Scsgr			/* Since mkprot added a new proto to the proto queue,
2052258Scsgr			 * it's possible that "minprot" is no longer on the
2062258Scsgr			 * proto queue (if it happened to have been the last
2072258Scsgr			 * entry, it would have been bumped off).  If it's
2082258Scsgr			 * not there, then the new proto took its physical
2092258Scsgr			 * place (though logically the new proto is at the
2102258Scsgr			 * beginning of the queue), so in that case the
2112258Scsgr			 * following call will do nothing.
2122258Scsgr			 */
2132258Scsgr
2142258Scsgr			mv2front( minprot );
2152258Scsgr			}
2162258Scsgr		}
2172258Scsgr	}
2182258Scsgr
2192258Scsgr
2202258Scsgr/* cmptmps - compress template table entries
2212258Scsgr *
2222258Scsgr * Template tables are compressed by using the 'template equivalence
2232258Scsgr * classes', which are collections of transition character equivalence
2242258Scsgr * classes which always appear together in templates - really meta-equivalence
2252258Scsgr * classes.
2262258Scsgr */
2272258Scsgr
2282258Scsgrvoid cmptmps()
2292258Scsgr	{
2302258Scsgr	int tmpstorage[CSIZE + 1];
2312258Scsgr	register int *tmp = tmpstorage, i, j;
2322258Scsgr	int totaltrans, trans;
2332258Scsgr
2342258Scsgr	peakpairs = numtemps * numecs + tblend;
2352258Scsgr
2362258Scsgr	if ( usemecs )
2372258Scsgr		{
2382258Scsgr		/* Create equivalence classes based on data gathered on
2392258Scsgr		 * template transitions.
2402258Scsgr		 */
2412258Scsgr		nummecs = cre8ecs( tecfwd, tecbck, numecs );
2422258Scsgr		}
2432258Scsgr
2442258Scsgr	else
2452258Scsgr		nummecs = numecs;
2462258Scsgr
2472258Scsgr	while ( lastdfa + numtemps + 1 >= current_max_dfas )
2482258Scsgr		increase_max_dfas();
2492258Scsgr
2502258Scsgr	/* Loop through each template. */
2512258Scsgr
2522258Scsgr	for ( i = 1; i <= numtemps; ++i )
2532258Scsgr		{
2542258Scsgr		/* Number of non-jam transitions out of this template. */
2552258Scsgr		totaltrans = 0;
2562258Scsgr
2572258Scsgr		for ( j = 1; j <= numecs; ++j )
2582258Scsgr			{
2592258Scsgr			trans = tnxt[numecs * i + j];
2602258Scsgr
2612258Scsgr			if ( usemecs )
2622258Scsgr				{
2632258Scsgr				/* The absolute value of tecbck is the
2642258Scsgr				 * meta-equivalence class of a given
2652258Scsgr				 * equivalence class, as set up by cre8ecs().
2662258Scsgr				 */
2672258Scsgr				if ( tecbck[j] > 0 )
2682258Scsgr					{
2692258Scsgr					tmp[tecbck[j]] = trans;
2702258Scsgr
2712258Scsgr					if ( trans > 0 )
2722258Scsgr						++totaltrans;
2732258Scsgr					}
2742258Scsgr				}
2752258Scsgr
2762258Scsgr			else
2772258Scsgr				{
2782258Scsgr				tmp[j] = trans;
2792258Scsgr
2802258Scsgr				if ( trans > 0 )
2812258Scsgr					++totaltrans;
2822258Scsgr				}
2832258Scsgr			}
2842258Scsgr
2852258Scsgr		/* It is assumed (in a rather subtle way) in the skeleton
2862258Scsgr		 * that if we're using meta-equivalence classes, the def[]
2872258Scsgr		 * entry for all templates is the jam template, i.e.,
2882258Scsgr		 * templates never default to other non-jam table entries
2892258Scsgr		 * (e.g., another template)
2902258Scsgr		 */
2912258Scsgr
2922258Scsgr		/* Leave room for the jam-state after the last real state. */
2932258Scsgr		mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
2942258Scsgr		}
2952258Scsgr	}
2962258Scsgr
2972258Scsgr
2982258Scsgr
2992258Scsgr/* expand_nxt_chk - expand the next check arrays */
3002258Scsgr
3012258Scsgrvoid expand_nxt_chk()
3022258Scsgr	{
3032258Scsgr	register int old_max = current_max_xpairs;
3042258Scsgr
3052258Scsgr	current_max_xpairs += MAX_XPAIRS_INCREMENT;
3062258Scsgr
3072258Scsgr	++num_reallocs;
3082258Scsgr
3092258Scsgr	nxt = reallocate_integer_array( nxt, current_max_xpairs );
3102258Scsgr	chk = reallocate_integer_array( chk, current_max_xpairs );
3112258Scsgr
3122258Scsgr	zero_out( (char *) (chk + old_max),
31316519Snate		(size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) );
3142258Scsgr	}
3152258Scsgr
3162258Scsgr
3172258Scsgr/* find_table_space - finds a space in the table for a state to be placed
3182258Scsgr *
3192258Scsgr * synopsis
3202258Scsgr *     int *state, numtrans, block_start;
3212258Scsgr *     int find_table_space();
3222258Scsgr *
3232258Scsgr *     block_start = find_table_space( state, numtrans );
3242258Scsgr *
3252258Scsgr * State is the state to be added to the full speed transition table.
3262258Scsgr * Numtrans is the number of out-transitions for the state.
3272258Scsgr *
3282258Scsgr * find_table_space() returns the position of the start of the first block (in
3292258Scsgr * chk) able to accommodate the state
3302258Scsgr *
3312258Scsgr * In determining if a state will or will not fit, find_table_space() must take
3322258Scsgr * into account the fact that an end-of-buffer state will be added at [0],
3332258Scsgr * and an action number will be added in [-1].
3342258Scsgr */
3352258Scsgr
3362258Scsgrint find_table_space( state, numtrans )
3372258Scsgrint *state, numtrans;
3382258Scsgr	{
3392258Scsgr	/* Firstfree is the position of the first possible occurrence of two
3402258Scsgr	 * consecutive unused records in the chk and nxt arrays.
3412258Scsgr	 */
3422258Scsgr	register int i;
3432258Scsgr	register int *state_ptr, *chk_ptr;
3442258Scsgr	register int *ptr_to_last_entry_in_state;
3452258Scsgr
3462258Scsgr	/* If there are too many out-transitions, put the state at the end of
3472258Scsgr	 * nxt and chk.
3482258Scsgr	 */
3492258Scsgr	if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
3502258Scsgr		{
3512258Scsgr		/* If table is empty, return the first available spot in
3522258Scsgr		 * chk/nxt, which should be 1.
3532258Scsgr		 */
3542258Scsgr		if ( tblend < 2 )
3552258Scsgr			return 1;
3562258Scsgr
3572258Scsgr		/* Start searching for table space near the end of
3582258Scsgr		 * chk/nxt arrays.
3592258Scsgr		 */
3602258Scsgr		i = tblend - numecs;
3612258Scsgr		}
3622258Scsgr
3632258Scsgr	else
3642258Scsgr		/* Start searching for table space from the beginning
3652258Scsgr		 * (skipping only the elements which will definitely not
3662258Scsgr		 * hold the new state).
3672258Scsgr		 */
3682258Scsgr		i = firstfree;
3692258Scsgr
3702258Scsgr	while ( 1 )	/* loops until a space is found */
3712258Scsgr		{
3722258Scsgr		while ( i + numecs >= current_max_xpairs )
3732258Scsgr			expand_nxt_chk();
3742258Scsgr
3752258Scsgr		/* Loops until space for end-of-buffer and action number
3762258Scsgr		 * are found.
3772258Scsgr		 */
3782258Scsgr		while ( 1 )
3792258Scsgr			{
3802258Scsgr			/* Check for action number space. */
3812258Scsgr			if ( chk[i - 1] == 0 )
3822258Scsgr				{
3832258Scsgr				/* Check for end-of-buffer space. */
3842258Scsgr				if ( chk[i] == 0 )
3852258Scsgr					break;
3862258Scsgr
3872258Scsgr				else
3882258Scsgr					/* Since i != 0, there is no use
3892258Scsgr					 * checking to see if (++i) - 1 == 0,
3902258Scsgr					 * because that's the same as i == 0,
3912258Scsgr					 * so we skip a space.
3922258Scsgr					 */
3932258Scsgr					i += 2;
3942258Scsgr				}
3952258Scsgr
3962258Scsgr			else
3972258Scsgr				++i;
3982258Scsgr
3992258Scsgr			while ( i + numecs >= current_max_xpairs )
4002258Scsgr				expand_nxt_chk();
4012258Scsgr			}
4022258Scsgr
4032258Scsgr		/* If we started search from the beginning, store the new
4042258Scsgr		 * firstfree for the next call of find_table_space().
4052258Scsgr		 */
4062258Scsgr		if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
4072258Scsgr			firstfree = i + 1;
4082258Scsgr
4092258Scsgr		/* Check to see if all elements in chk (and therefore nxt)
4102258Scsgr		 * that are needed for the new state have not yet been taken.
4112258Scsgr		 */
4122258Scsgr
4132258Scsgr		state_ptr = &state[1];
4142258Scsgr		ptr_to_last_entry_in_state = &chk[i + numecs + 1];
4152258Scsgr
4162258Scsgr		for ( chk_ptr = &chk[i + 1];
4172258Scsgr		      chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
4182258Scsgr			if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
4192258Scsgr				break;
4202258Scsgr
4212258Scsgr		if ( chk_ptr == ptr_to_last_entry_in_state )
4222258Scsgr			return i;
4232258Scsgr
4242258Scsgr		else
4252258Scsgr		++i;
4262258Scsgr		}
4272258Scsgr	}
4282258Scsgr
4292258Scsgr
4302258Scsgr/* inittbl - initialize transition tables
4312258Scsgr *
4322258Scsgr * Initializes "firstfree" to be one beyond the end of the table.  Initializes
4332258Scsgr * all "chk" entries to be zero.
4342258Scsgr */
4352258Scsgrvoid inittbl()
4362258Scsgr	{
4372258Scsgr	register int i;
4382258Scsgr
43916519Snate	zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) );
4402258Scsgr
4412258Scsgr	tblend = 0;
4422258Scsgr	firstfree = tblend + 1;
4432258Scsgr	numtemps = 0;
4442258Scsgr
4452258Scsgr	if ( usemecs )
4462258Scsgr		{
4472258Scsgr		/* Set up doubly-linked meta-equivalence classes; these
4482258Scsgr		 * are sets of equivalence classes which all have identical
4492258Scsgr		 * transitions out of TEMPLATES.
4502258Scsgr		 */
4512258Scsgr
4522258Scsgr		tecbck[1] = NIL;
4532258Scsgr
4542258Scsgr		for ( i = 2; i <= numecs; ++i )
4552258Scsgr			{
4562258Scsgr			tecbck[i] = i - 1;
4572258Scsgr			tecfwd[i - 1] = i;
4582258Scsgr			}
4592258Scsgr
4602258Scsgr		tecfwd[numecs] = NIL;
4612258Scsgr		}
4622258Scsgr	}
4632258Scsgr
4642258Scsgr
4652258Scsgr/* mkdeftbl - make the default, "jam" table entries */
4662258Scsgr
4672258Scsgrvoid mkdeftbl()
4682258Scsgr	{
4692258Scsgr	int i;
4702258Scsgr
4712258Scsgr	jamstate = lastdfa + 1;
4722258Scsgr
4732258Scsgr	++tblend; /* room for transition on end-of-buffer character */
4742258Scsgr
4752258Scsgr	while ( tblend + numecs >= current_max_xpairs )
4762258Scsgr		expand_nxt_chk();
4772258Scsgr
4782258Scsgr	/* Add in default end-of-buffer transition. */
4792258Scsgr	nxt[tblend] = end_of_buffer_state;
4802258Scsgr	chk[tblend] = jamstate;
4812258Scsgr
4822258Scsgr	for ( i = 1; i <= numecs; ++i )
4832258Scsgr		{
4842258Scsgr		nxt[tblend + i] = 0;
4852258Scsgr		chk[tblend + i] = jamstate;
4862258Scsgr		}
4872258Scsgr
4882258Scsgr	jambase = tblend;
4892258Scsgr
4902258Scsgr	base[jamstate] = jambase;
4912258Scsgr	def[jamstate] = 0;
4922258Scsgr
4932258Scsgr	tblend += numecs;
4942258Scsgr	++numtemps;
4952258Scsgr	}
4962258Scsgr
4972258Scsgr
4982258Scsgr/* mkentry - create base/def and nxt/chk entries for transition array
4992258Scsgr *
5002258Scsgr * synopsis
5012258Scsgr *   int state[numchars + 1], numchars, statenum, deflink, totaltrans;
5022258Scsgr *   mkentry( state, numchars, statenum, deflink, totaltrans );
5032258Scsgr *
5042258Scsgr * "state" is a transition array "numchars" characters in size, "statenum"
5052258Scsgr * is the offset to be used into the base/def tables, and "deflink" is the
5062258Scsgr * entry to put in the "def" table entry.  If "deflink" is equal to
5072258Scsgr * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
5082258Scsgr * (i.e., jam entries) into the table.  It is assumed that by linking to
5092258Scsgr * "JAMSTATE" they will be taken care of.  In any case, entries in "state"
5102258Scsgr * marking transitions to "SAME_TRANS" are treated as though they will be
5112258Scsgr * taken care of by whereever "deflink" points.  "totaltrans" is the total
5122258Scsgr * number of transitions out of the state.  If it is below a certain threshold,
5132258Scsgr * the tables are searched for an interior spot that will accommodate the
5142258Scsgr * state array.
5152258Scsgr */
5162258Scsgr
5172258Scsgrvoid mkentry( state, numchars, statenum, deflink, totaltrans )
5182258Scsgrregister int *state;
5192258Scsgrint numchars, statenum, deflink, totaltrans;
5202258Scsgr	{
5212258Scsgr	register int minec, maxec, i, baseaddr;
5222258Scsgr	int tblbase, tbllast;
5232258Scsgr
5242258Scsgr	if ( totaltrans == 0 )
5252258Scsgr		{ /* there are no out-transitions */
5262258Scsgr		if ( deflink == JAMSTATE )
5272258Scsgr			base[statenum] = JAMSTATE;
5282258Scsgr		else
5292258Scsgr			base[statenum] = 0;
5302258Scsgr
5312258Scsgr		def[statenum] = deflink;
5322258Scsgr		return;
5332258Scsgr		}
5342258Scsgr
5352258Scsgr	for ( minec = 1; minec <= numchars; ++minec )
5362258Scsgr		{
5372258Scsgr		if ( state[minec] != SAME_TRANS )
5382258Scsgr			if ( state[minec] != 0 || deflink != JAMSTATE )
5392258Scsgr				break;
5402258Scsgr		}
5412258Scsgr
5422258Scsgr	if ( totaltrans == 1 )
5432258Scsgr		{
5442258Scsgr		/* There's only one out-transition.  Save it for later to fill
5452258Scsgr		 * in holes in the tables.
5462258Scsgr		 */
5472258Scsgr		stack1( statenum, minec, state[minec], deflink );
5482258Scsgr		return;
5492258Scsgr		}
5502258Scsgr
5512258Scsgr	for ( maxec = numchars; maxec > 0; --maxec )
5522258Scsgr		{
5532258Scsgr		if ( state[maxec] != SAME_TRANS )
5542258Scsgr			if ( state[maxec] != 0 || deflink != JAMSTATE )
5552258Scsgr				break;
5562258Scsgr		}
5572258Scsgr
5582258Scsgr	/* Whether we try to fit the state table in the middle of the table
5592258Scsgr	 * entries we have already generated, or if we just take the state
5602258Scsgr	 * table at the end of the nxt/chk tables, we must make sure that we
5612258Scsgr	 * have a valid base address (i.e., non-negative).  Note that
5622258Scsgr	 * negative base addresses dangerous at run-time (because indexing
5632258Scsgr	 * the nxt array with one and a low-valued character will access
5642258Scsgr	 * memory before the start of the array.
5652258Scsgr	 */
5662258Scsgr
5672258Scsgr	/* Find the first transition of state that we need to worry about. */
5682258Scsgr	if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
5692258Scsgr		{
5702258Scsgr		/* Attempt to squeeze it into the middle of the tables. */
5712258Scsgr		baseaddr = firstfree;
5722258Scsgr
5732258Scsgr		while ( baseaddr < minec )
5742258Scsgr			{
5752258Scsgr			/* Using baseaddr would result in a negative base
5762258Scsgr			 * address below; find the next free slot.
5772258Scsgr			 */
5782258Scsgr			for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
5792258Scsgr				;
5802258Scsgr			}
5812258Scsgr
5822258Scsgr		while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
5832258Scsgr			expand_nxt_chk();
5842258Scsgr
5852258Scsgr		for ( i = minec; i <= maxec; ++i )
5862258Scsgr			if ( state[i] != SAME_TRANS &&
5872258Scsgr			     (state[i] != 0 || deflink != JAMSTATE) &&
5882258Scsgr			     chk[baseaddr + i - minec] != 0 )
5892258Scsgr				{ /* baseaddr unsuitable - find another */
5902258Scsgr				for ( ++baseaddr;
5912258Scsgr				      baseaddr < current_max_xpairs &&
5922258Scsgr				      chk[baseaddr] != 0; ++baseaddr )
5932258Scsgr					;
5942258Scsgr
5952258Scsgr				while ( baseaddr + maxec - minec + 1 >=
5962258Scsgr					current_max_xpairs )
5972258Scsgr					expand_nxt_chk();
5982258Scsgr
5992258Scsgr				/* Reset the loop counter so we'll start all
6002258Scsgr				 * over again next time it's incremented.
6012258Scsgr				 */
6022258Scsgr
6032258Scsgr				i = minec - 1;
6042258Scsgr				}
6052258Scsgr		}
6062258Scsgr
6072258Scsgr	else
6082258Scsgr		{
6092258Scsgr		/* Ensure that the base address we eventually generate is
6102258Scsgr		 * non-negative.
6112258Scsgr		 */
6122258Scsgr		baseaddr = MAX( tblend + 1, minec );
6132258Scsgr		}
6142258Scsgr
6152258Scsgr	tblbase = baseaddr - minec;
6162258Scsgr	tbllast = tblbase + maxec;
6172258Scsgr
6182258Scsgr	while ( tbllast + 1 >= current_max_xpairs )
6192258Scsgr		expand_nxt_chk();
6202258Scsgr
6212258Scsgr	base[statenum] = tblbase;
6222258Scsgr	def[statenum] = deflink;
6232258Scsgr
6242258Scsgr	for ( i = minec; i <= maxec; ++i )
6252258Scsgr		if ( state[i] != SAME_TRANS )
6262258Scsgr			if ( state[i] != 0 || deflink != JAMSTATE )
6272258Scsgr				{
6282258Scsgr				nxt[tblbase + i] = state[i];
6292258Scsgr				chk[tblbase + i] = statenum;
6302258Scsgr				}
6312258Scsgr
6322258Scsgr	if ( baseaddr == firstfree )
6332258Scsgr		/* Find next free slot in tables. */
6342258Scsgr		for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
6352258Scsgr			;
6362258Scsgr
6372258Scsgr	tblend = MAX( tblend, tbllast );
6382258Scsgr	}
6392258Scsgr
6402258Scsgr
6412258Scsgr/* mk1tbl - create table entries for a state (or state fragment) which
6422258Scsgr *            has only one out-transition
6432258Scsgr */
6442258Scsgr
6452258Scsgrvoid mk1tbl( state, sym, onenxt, onedef )
6462258Scsgrint state, sym, onenxt, onedef;
6472258Scsgr	{
6482258Scsgr	if ( firstfree < sym )
6492258Scsgr		firstfree = sym;
6502258Scsgr
6512258Scsgr	while ( chk[firstfree] != 0 )
6522258Scsgr		if ( ++firstfree >= current_max_xpairs )
6532258Scsgr			expand_nxt_chk();
6542258Scsgr
6552258Scsgr	base[state] = firstfree - sym;
6562258Scsgr	def[state] = onedef;
6572258Scsgr	chk[firstfree] = state;
6582258Scsgr	nxt[firstfree] = onenxt;
6592258Scsgr
6602258Scsgr	if ( firstfree > tblend )
6612258Scsgr		{
6622258Scsgr		tblend = firstfree++;
6632258Scsgr
6642258Scsgr		if ( firstfree >= current_max_xpairs )
6652258Scsgr			expand_nxt_chk();
6662258Scsgr		}
6672258Scsgr	}
6682258Scsgr
6692258Scsgr
6702258Scsgr/* mkprot - create new proto entry */
6712258Scsgr
6722258Scsgrvoid mkprot( state, statenum, comstate )
6732258Scsgrint state[], statenum, comstate;
6742258Scsgr	{
6752258Scsgr	int i, slot, tblbase;
6762258Scsgr
6772258Scsgr	if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
6782258Scsgr		{
6792258Scsgr		/* Gotta make room for the new proto by dropping last entry in
6802258Scsgr		 * the queue.
6812258Scsgr		 */
6822258Scsgr		slot = lastprot;
6832258Scsgr		lastprot = protprev[lastprot];
6842258Scsgr		protnext[lastprot] = NIL;
6852258Scsgr		}
6862258Scsgr
6872258Scsgr	else
6882258Scsgr		slot = numprots;
6892258Scsgr
6902258Scsgr	protnext[slot] = firstprot;
6912258Scsgr
6922258Scsgr	if ( firstprot != NIL )
6932258Scsgr		protprev[firstprot] = slot;
6942258Scsgr
6952258Scsgr	firstprot = slot;
6962258Scsgr	prottbl[slot] = statenum;
6972258Scsgr	protcomst[slot] = comstate;
6982258Scsgr
6992258Scsgr	/* Copy state into save area so it can be compared with rapidly. */
7002258Scsgr	tblbase = numecs * (slot - 1);
7012258Scsgr
7022258Scsgr	for ( i = 1; i <= numecs; ++i )
7032258Scsgr		protsave[tblbase + i] = state[i];
7042258Scsgr	}
7052258Scsgr
7062258Scsgr
7072258Scsgr/* mktemplate - create a template entry based on a state, and connect the state
7082258Scsgr *              to it
7092258Scsgr */
7102258Scsgr
7112258Scsgrvoid mktemplate( state, statenum, comstate )
7122258Scsgrint state[], statenum, comstate;
7132258Scsgr	{
7142258Scsgr	int i, numdiff, tmpbase, tmp[CSIZE + 1];
7152258Scsgr	Char transset[CSIZE + 1];
7162258Scsgr	int tsptr;
7172258Scsgr
7182258Scsgr	++numtemps;
7192258Scsgr
7202258Scsgr	tsptr = 0;
7212258Scsgr
7222258Scsgr	/* Calculate where we will temporarily store the transition table
7232258Scsgr	 * of the template in the tnxt[] array.  The final transition table
7242258Scsgr	 * gets created by cmptmps().
7252258Scsgr	 */
7262258Scsgr
7272258Scsgr	tmpbase = numtemps * numecs;
7282258Scsgr
7292258Scsgr	if ( tmpbase + numecs >= current_max_template_xpairs )
7302258Scsgr		{
7312258Scsgr		current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
7322258Scsgr
7332258Scsgr		++num_reallocs;
7342258Scsgr
7352258Scsgr		tnxt = reallocate_integer_array( tnxt,
7362258Scsgr			current_max_template_xpairs );
7372258Scsgr		}
7382258Scsgr
7392258Scsgr	for ( i = 1; i <= numecs; ++i )
7402258Scsgr		if ( state[i] == 0 )
7412258Scsgr			tnxt[tmpbase + i] = 0;
7422258Scsgr		else
7432258Scsgr			{
7442258Scsgr			transset[tsptr++] = i;
7452258Scsgr			tnxt[tmpbase + i] = comstate;
7462258Scsgr			}
7472258Scsgr
7482258Scsgr	if ( usemecs )
7492258Scsgr		mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
7502258Scsgr
7512258Scsgr	mkprot( tnxt + tmpbase, -numtemps, comstate );
7522258Scsgr
7532258Scsgr	/* We rely on the fact that mkprot adds things to the beginning
7542258Scsgr	 * of the proto queue.
7552258Scsgr	 */
7562258Scsgr
7572258Scsgr	numdiff = tbldiff( state, firstprot, tmp );
7582258Scsgr	mkentry( tmp, numecs, statenum, -numtemps, numdiff );
7592258Scsgr	}
7602258Scsgr
7612258Scsgr
7622258Scsgr/* mv2front - move proto queue element to front of queue */
7632258Scsgr
7642258Scsgrvoid mv2front( qelm )
7652258Scsgrint qelm;
7662258Scsgr	{
7672258Scsgr	if ( firstprot != qelm )
7682258Scsgr		{
7692258Scsgr		if ( qelm == lastprot )
7702258Scsgr			lastprot = protprev[lastprot];
7712258Scsgr
7722258Scsgr		protnext[protprev[qelm]] = protnext[qelm];
7732258Scsgr
7742258Scsgr		if ( protnext[qelm] != NIL )
7752258Scsgr			protprev[protnext[qelm]] = protprev[qelm];
7762258Scsgr
7772258Scsgr		protprev[qelm] = NIL;
7782258Scsgr		protnext[qelm] = firstprot;
7792258Scsgr		protprev[firstprot] = qelm;
7802258Scsgr		firstprot = qelm;
7812258Scsgr		}
7822258Scsgr	}
7832258Scsgr
7842258Scsgr
7852258Scsgr/* place_state - place a state into full speed transition table
7862258Scsgr *
7872258Scsgr * State is the statenum'th state.  It is indexed by equivalence class and
7882258Scsgr * gives the number of the state to enter for a given equivalence class.
7892258Scsgr * Transnum is the number of out-transitions for the state.
7902258Scsgr */
7912258Scsgr
7922258Scsgrvoid place_state( state, statenum, transnum )
7932258Scsgrint *state, statenum, transnum;
7942258Scsgr	{
7952258Scsgr	register int i;
7962258Scsgr	register int *state_ptr;
7972258Scsgr	int position = find_table_space( state, transnum );
7982258Scsgr
7992258Scsgr	/* "base" is the table of start positions. */
8002258Scsgr	base[statenum] = position;
8012258Scsgr
8022258Scsgr	/* Put in action number marker; this non-zero number makes sure that
8032258Scsgr	 * find_table_space() knows that this position in chk/nxt is taken
8042258Scsgr	 * and should not be used for another accepting number in another
8052258Scsgr	 * state.
8062258Scsgr	 */
8072258Scsgr	chk[position - 1] = 1;
8082258Scsgr
8092258Scsgr	/* Put in end-of-buffer marker; this is for the same purposes as
8102258Scsgr	 * above.
8112258Scsgr	 */
8122258Scsgr	chk[position] = 1;
8132258Scsgr
8142258Scsgr	/* Place the state into chk and nxt. */
8152258Scsgr	state_ptr = &state[1];
8162258Scsgr
8172258Scsgr	for ( i = 1; i <= numecs; ++i, ++state_ptr )
8182258Scsgr		if ( *state_ptr != 0 )
8192258Scsgr			{
8202258Scsgr			chk[position + i] = i;
8212258Scsgr			nxt[position + i] = *state_ptr;
8222258Scsgr			}
8232258Scsgr
8242258Scsgr	if ( position + numecs > tblend )
8252258Scsgr		tblend = position + numecs;
8262258Scsgr	}
8272258Scsgr
8282258Scsgr
8292258Scsgr/* stack1 - save states with only one out-transition to be processed later
8302258Scsgr *
8312258Scsgr * If there's room for another state on the "one-transition" stack, the
8322258Scsgr * state is pushed onto it, to be processed later by mk1tbl.  If there's
8332258Scsgr * no room, we process the sucker right now.
8342258Scsgr */
8352258Scsgr
8362258Scsgrvoid stack1( statenum, sym, nextstate, deflink )
8372258Scsgrint statenum, sym, nextstate, deflink;
8382258Scsgr	{
8392258Scsgr	if ( onesp >= ONE_STACK_SIZE - 1 )
8402258Scsgr		mk1tbl( statenum, sym, nextstate, deflink );
8412258Scsgr
8422258Scsgr	else
8432258Scsgr		{
8442258Scsgr		++onesp;
8452258Scsgr		onestate[onesp] = statenum;
8462258Scsgr		onesym[onesp] = sym;
8472258Scsgr		onenext[onesp] = nextstate;
8482258Scsgr		onedef[onesp] = deflink;
8492258Scsgr		}
8502258Scsgr	}
8512258Scsgr
8522258Scsgr
8532258Scsgr/* tbldiff - compute differences between two state tables
8542258Scsgr *
8552258Scsgr * "state" is the state array which is to be extracted from the pr'th
8562258Scsgr * proto.  "pr" is both the number of the proto we are extracting from
8572258Scsgr * and an index into the save area where we can find the proto's complete
8582258Scsgr * state table.  Each entry in "state" which differs from the corresponding
8592258Scsgr * entry of "pr" will appear in "ext".
8602258Scsgr *
8612258Scsgr * Entries which are the same in both "state" and "pr" will be marked
8622258Scsgr * as transitions to "SAME_TRANS" in "ext".  The total number of differences
8632258Scsgr * between "state" and "pr" is returned as function value.  Note that this
8642258Scsgr * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
8652258Scsgr */
8662258Scsgr
8672258Scsgrint tbldiff( state, pr, ext )
8682258Scsgrint state[], pr, ext[];
8692258Scsgr	{
8702258Scsgr	register int i, *sp = state, *ep = ext, *protp;
8712258Scsgr	register int numdiff = 0;
8722258Scsgr
8732258Scsgr	protp = &protsave[numecs * (pr - 1)];
8742258Scsgr
8752258Scsgr	for ( i = numecs; i > 0; --i )
8762258Scsgr		{
8772258Scsgr		if ( *++protp == *++sp )
8782258Scsgr			*++ep = SAME_TRANS;
8792258Scsgr		else
8802258Scsgr			{
8812258Scsgr			*++ep = *sp;
8822258Scsgr			++numdiff;
8832258Scsgr			}
8842258Scsgr		}
8852258Scsgr
8862258Scsgr	return numdiff;
8872258Scsgr	}
888