12258Scsgr/* tblcmp - table compression routines */
22258Scsgr
32258Scsgr/*-
42258Scsgr * Copyright (c) 1990 The Regents of the University of California.
52258Scsgr * All rights reserved.
62258Scsgr *
72258Scsgr * This code is derived from software contributed to Berkeley by
82258Scsgr * Vern Paxson.
916519Snate *
102258Scsgr * The United States Government has rights in this work pursuant
112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States
122258Scsgr * Department of Energy and the University of California.
132258Scsgr *
142258Scsgr * Redistribution and use in source and binary forms are permitted provided
152258Scsgr * that: (1) source distributions retain this entire copyright notice and
162258Scsgr * comment, and (2) distributions including binaries display the following
172258Scsgr * acknowledgement:  ``This product includes software developed by the
182258Scsgr * University of California, Berkeley and its contributors'' in the
192258Scsgr * documentation or other materials provided with the distribution and in
202258Scsgr * all advertising materials mentioning features or use of this software.
212258Scsgr * Neither the name of the University nor the names of its contributors may
222258Scsgr * be used to endorse or promote products derived from this software without
232258Scsgr * specific prior written permission.
242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
272258Scsgr */
282258Scsgr
2952555Sobrien/* $Header: /home/daffy/u0/vern/flex/RCS/tblcmp.c,v 2.11 94/11/05 17:08:28 vern Exp $ */
3099112Sobrien#include <sys/cdefs.h>
3199112Sobrien__FBSDID("$FreeBSD$");
322258Scsgr
332258Scsgr#include "flexdef.h"
342258Scsgr
352258Scsgr
362258Scsgr/* declarations for functions that have forward references */
372258Scsgr
38179549Sdwmalonevoid mkentry PROTO((int*, int, int, int, int));
392258Scsgrvoid mkprot PROTO((int[], int, int));
402258Scsgrvoid mktemplate PROTO((int[], int, int));
412258Scsgrvoid mv2front PROTO((int));
422258Scsgrint tbldiff PROTO((int[], int, int[]));
432258Scsgr
442258Scsgr
452258Scsgr/* bldtbl - build table entries for dfa state
462258Scsgr *
472258Scsgr * synopsis
482258Scsgr *   int state[numecs], statenum, totaltrans, comstate, comfreq;
492258Scsgr *   bldtbl( state, statenum, totaltrans, comstate, comfreq );
502258Scsgr *
512258Scsgr * State is the statenum'th dfa state.  It is indexed by equivalence class and
522258Scsgr * gives the number of the state to enter for a given equivalence class.
532258Scsgr * totaltrans is the total number of transitions out of the state.  Comstate
542258Scsgr * is that state which is the destination of the most transitions out of State.
552258Scsgr * Comfreq is how many transitions there are out of State to Comstate.
562258Scsgr *
572258Scsgr * A note on terminology:
582258Scsgr *    "protos" are transition tables which have a high probability of
592258Scsgr * either being redundant (a state processed later will have an identical
602258Scsgr * transition table) or nearly redundant (a state processed later will have
612258Scsgr * many of the same out-transitions).  A "most recently used" queue of
622258Scsgr * protos is kept around with the hope that most states will find a proto
632258Scsgr * which is similar enough to be usable, and therefore compacting the
642258Scsgr * output tables.
652258Scsgr *    "templates" are a special type of proto.  If a transition table is
662258Scsgr * homogeneous or nearly homogeneous (all transitions go to the same
672258Scsgr * destination) then the odds are good that future states will also go
682258Scsgr * to the same destination state on basically the same character set.
692258Scsgr * These homogeneous states are so common when dealing with large rule
702258Scsgr * sets that they merit special attention.  If the transition table were
712258Scsgr * simply made into a proto, then (typically) each subsequent, similar
722258Scsgr * state will differ from the proto for two out-transitions.  One of these
732258Scsgr * out-transitions will be that character on which the proto does not go
742258Scsgr * to the common destination, and one will be that character on which the
752258Scsgr * state does not go to the common destination.  Templates, on the other
762258Scsgr * hand, go to the common state on EVERY transition character, and therefore
772258Scsgr * cost only one difference.
782258Scsgr */
792258Scsgr
802258Scsgrvoid bldtbl( state, statenum, totaltrans, comstate, comfreq )
812258Scsgrint state[], statenum, totaltrans, comstate, comfreq;
822258Scsgr	{
832258Scsgr	int extptr, extrct[2][CSIZE + 1];
842258Scsgr	int mindiff, minprot, i, d;
852258Scsgr
862258Scsgr	/* If extptr is 0 then the first array of extrct holds the result
872258Scsgr	 * of the "best difference" to date, which is those transitions
882258Scsgr	 * which occur in "state" but not in the proto which, to date,
892258Scsgr	 * has the fewest differences between itself and "state".  If
902258Scsgr	 * extptr is 1 then the second array of extrct hold the best
912258Scsgr	 * difference.  The two arrays are toggled between so that the
922258Scsgr	 * best difference to date can be kept around and also a difference
932258Scsgr	 * just created by checking against a candidate "best" proto.
942258Scsgr	 */
952258Scsgr
962258Scsgr	extptr = 0;
972258Scsgr
982258Scsgr	/* If the state has too few out-transitions, don't bother trying to
992258Scsgr	 * compact its tables.
1002258Scsgr	 */
1012258Scsgr
1022258Scsgr	if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
1032258Scsgr		mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
1042258Scsgr
1052258Scsgr	else
1062258Scsgr		{
1072258Scsgr		/* "checkcom" is true if we should only check "state" against
1082258Scsgr		 * protos which have the same "comstate" value.
1092258Scsgr		 */
1102258Scsgr		int checkcom =
1112258Scsgr			comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
1122258Scsgr
1132258Scsgr		minprot = firstprot;
1142258Scsgr		mindiff = totaltrans;
1152258Scsgr
1162258Scsgr		if ( checkcom )
1172258Scsgr			{
1182258Scsgr			/* Find first proto which has the same "comstate". */
1192258Scsgr			for ( i = firstprot; i != NIL; i = protnext[i] )
1202258Scsgr				if ( protcomst[i] == comstate )
1212258Scsgr					{
1222258Scsgr					minprot = i;
1232258Scsgr					mindiff = tbldiff( state, minprot,
1242258Scsgr							extrct[extptr] );
1252258Scsgr					break;
1262258Scsgr					}
1272258Scsgr			}
1282258Scsgr
1292258Scsgr		else
1302258Scsgr			{
1312258Scsgr			/* Since we've decided that the most common destination
1322258Scsgr			 * out of "state" does not occur with a high enough
1332258Scsgr			 * frequency, we set the "comstate" to zero, assuring
1342258Scsgr			 * that if this state is entered into the proto list,
1352258Scsgr			 * it will not be considered a template.
1362258Scsgr			 */
1372258Scsgr			comstate = 0;
1382258Scsgr
1392258Scsgr			if ( firstprot != NIL )
1402258Scsgr				{
1412258Scsgr				minprot = firstprot;
1422258Scsgr				mindiff = tbldiff( state, minprot,
1432258Scsgr						extrct[extptr] );
1442258Scsgr				}
1452258Scsgr			}
1462258Scsgr
1472258Scsgr		/* We now have the first interesting proto in "minprot".  If
1482258Scsgr		 * it matches within the tolerances set for the first proto,
1492258Scsgr		 * we don't want to bother scanning the rest of the proto list
1502258Scsgr		 * to see if we have any other reasonable matches.
1512258Scsgr		 */
1522258Scsgr
1532258Scsgr		if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
1542258Scsgr			{
1552258Scsgr			/* Not a good enough match.  Scan the rest of the
1562258Scsgr			 * protos.
1572258Scsgr			 */
1582258Scsgr			for ( i = minprot; i != NIL; i = protnext[i] )
1592258Scsgr				{
1602258Scsgr				d = tbldiff( state, i, extrct[1 - extptr] );
1612258Scsgr				if ( d < mindiff )
1622258Scsgr					{
1632258Scsgr					extptr = 1 - extptr;
1642258Scsgr					mindiff = d;
1652258Scsgr					minprot = i;
1662258Scsgr					}
1672258Scsgr				}
1682258Scsgr			}
1692258Scsgr
1702258Scsgr		/* Check if the proto we've decided on as our best bet is close
1712258Scsgr		 * enough to the state we want to match to be usable.
1722258Scsgr		 */
1732258Scsgr
1742258Scsgr		if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
1752258Scsgr			{
1762258Scsgr			/* No good.  If the state is homogeneous enough,
1772258Scsgr			 * we make a template out of it.  Otherwise, we
1782258Scsgr			 * make a proto.
1792258Scsgr			 */
1802258Scsgr
1812258Scsgr			if ( comfreq * 100 >=
1822258Scsgr			     totaltrans * TEMPLATE_SAME_PERCENTAGE )
1832258Scsgr				mktemplate( state, statenum, comstate );
1842258Scsgr
1852258Scsgr			else
1862258Scsgr				{
1872258Scsgr				mkprot( state, statenum, comstate );
1882258Scsgr				mkentry( state, numecs, statenum,
1892258Scsgr					JAMSTATE, totaltrans );
1902258Scsgr				}
1912258Scsgr			}
1922258Scsgr
1932258Scsgr		else
1942258Scsgr			{ /* use the proto */
1952258Scsgr			mkentry( extrct[extptr], numecs, statenum,
1962258Scsgr				prottbl[minprot], mindiff );
1972258Scsgr
1982258Scsgr			/* If this state was sufficiently different from the
1992258Scsgr			 * proto we built it from, make it, too, a proto.
2002258Scsgr			 */
2012258Scsgr
2022258Scsgr			if ( mindiff * 100 >=
2032258Scsgr			     totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
2042258Scsgr				mkprot( state, statenum, comstate );
2052258Scsgr
2062258Scsgr			/* Since mkprot added a new proto to the proto queue,
2072258Scsgr			 * it's possible that "minprot" is no longer on the
2082258Scsgr			 * proto queue (if it happened to have been the last
2092258Scsgr			 * entry, it would have been bumped off).  If it's
2102258Scsgr			 * not there, then the new proto took its physical
2112258Scsgr			 * place (though logically the new proto is at the
2122258Scsgr			 * beginning of the queue), so in that case the
2132258Scsgr			 * following call will do nothing.
2142258Scsgr			 */
2152258Scsgr
2162258Scsgr			mv2front( minprot );
2172258Scsgr			}
2182258Scsgr		}
2192258Scsgr	}
2202258Scsgr
2212258Scsgr
2222258Scsgr/* cmptmps - compress template table entries
2232258Scsgr *
2242258Scsgr * Template tables are compressed by using the 'template equivalence
2252258Scsgr * classes', which are collections of transition character equivalence
2262258Scsgr * classes which always appear together in templates - really meta-equivalence
2272258Scsgr * classes.
2282258Scsgr */
2292258Scsgr
2302258Scsgrvoid cmptmps()
2312258Scsgr	{
2322258Scsgr	int tmpstorage[CSIZE + 1];
233179549Sdwmalone	int *tmp = tmpstorage, i, j;
2342258Scsgr	int totaltrans, trans;
2352258Scsgr
2362258Scsgr	peakpairs = numtemps * numecs + tblend;
2372258Scsgr
2382258Scsgr	if ( usemecs )
2392258Scsgr		{
2402258Scsgr		/* Create equivalence classes based on data gathered on
2412258Scsgr		 * template transitions.
2422258Scsgr		 */
2432258Scsgr		nummecs = cre8ecs( tecfwd, tecbck, numecs );
2442258Scsgr		}
2452258Scsgr
2462258Scsgr	else
2472258Scsgr		nummecs = numecs;
2482258Scsgr
2492258Scsgr	while ( lastdfa + numtemps + 1 >= current_max_dfas )
2502258Scsgr		increase_max_dfas();
2512258Scsgr
2522258Scsgr	/* Loop through each template. */
2532258Scsgr
2542258Scsgr	for ( i = 1; i <= numtemps; ++i )
2552258Scsgr		{
2562258Scsgr		/* Number of non-jam transitions out of this template. */
2572258Scsgr		totaltrans = 0;
2582258Scsgr
2592258Scsgr		for ( j = 1; j <= numecs; ++j )
2602258Scsgr			{
2612258Scsgr			trans = tnxt[numecs * i + j];
2622258Scsgr
2632258Scsgr			if ( usemecs )
2642258Scsgr				{
2652258Scsgr				/* The absolute value of tecbck is the
2662258Scsgr				 * meta-equivalence class of a given
2672258Scsgr				 * equivalence class, as set up by cre8ecs().
2682258Scsgr				 */
2692258Scsgr				if ( tecbck[j] > 0 )
2702258Scsgr					{
2712258Scsgr					tmp[tecbck[j]] = trans;
2722258Scsgr
2732258Scsgr					if ( trans > 0 )
2742258Scsgr						++totaltrans;
2752258Scsgr					}
2762258Scsgr				}
2772258Scsgr
2782258Scsgr			else
2792258Scsgr				{
2802258Scsgr				tmp[j] = trans;
2812258Scsgr
2822258Scsgr				if ( trans > 0 )
2832258Scsgr					++totaltrans;
2842258Scsgr				}
2852258Scsgr			}
2862258Scsgr
2872258Scsgr		/* It is assumed (in a rather subtle way) in the skeleton
2882258Scsgr		 * that if we're using meta-equivalence classes, the def[]
2892258Scsgr		 * entry for all templates is the jam template, i.e.,
2902258Scsgr		 * templates never default to other non-jam table entries
2912258Scsgr		 * (e.g., another template)
2922258Scsgr		 */
2932258Scsgr
2942258Scsgr		/* Leave room for the jam-state after the last real state. */
2952258Scsgr		mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
2962258Scsgr		}
2972258Scsgr	}
2982258Scsgr
2992258Scsgr
3002258Scsgr
3012258Scsgr/* expand_nxt_chk - expand the next check arrays */
3022258Scsgr
3032258Scsgrvoid expand_nxt_chk()
3042258Scsgr	{
305179549Sdwmalone	int old_max = current_max_xpairs;
3062258Scsgr
3072258Scsgr	current_max_xpairs += MAX_XPAIRS_INCREMENT;
3082258Scsgr
3092258Scsgr	++num_reallocs;
3102258Scsgr
3112258Scsgr	nxt = reallocate_integer_array( nxt, current_max_xpairs );
3122258Scsgr	chk = reallocate_integer_array( chk, current_max_xpairs );
3132258Scsgr
3142258Scsgr	zero_out( (char *) (chk + old_max),
31516519Snate		(size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) );
3162258Scsgr	}
3172258Scsgr
3182258Scsgr
3192258Scsgr/* find_table_space - finds a space in the table for a state to be placed
3202258Scsgr *
3212258Scsgr * synopsis
3222258Scsgr *     int *state, numtrans, block_start;
3232258Scsgr *     int find_table_space();
3242258Scsgr *
3252258Scsgr *     block_start = find_table_space( state, numtrans );
3262258Scsgr *
3272258Scsgr * State is the state to be added to the full speed transition table.
3282258Scsgr * Numtrans is the number of out-transitions for the state.
3292258Scsgr *
3302258Scsgr * find_table_space() returns the position of the start of the first block (in
3312258Scsgr * chk) able to accommodate the state
3322258Scsgr *
3332258Scsgr * In determining if a state will or will not fit, find_table_space() must take
3342258Scsgr * into account the fact that an end-of-buffer state will be added at [0],
3352258Scsgr * and an action number will be added in [-1].
3362258Scsgr */
3372258Scsgr
3382258Scsgrint find_table_space( state, numtrans )
3392258Scsgrint *state, numtrans;
3402258Scsgr	{
3412258Scsgr	/* Firstfree is the position of the first possible occurrence of two
3422258Scsgr	 * consecutive unused records in the chk and nxt arrays.
3432258Scsgr	 */
344179549Sdwmalone	int i;
345179549Sdwmalone	int *state_ptr, *chk_ptr;
346179549Sdwmalone	int *ptr_to_last_entry_in_state;
3472258Scsgr
3482258Scsgr	/* If there are too many out-transitions, put the state at the end of
3492258Scsgr	 * nxt and chk.
3502258Scsgr	 */
3512258Scsgr	if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
3522258Scsgr		{
3532258Scsgr		/* If table is empty, return the first available spot in
3542258Scsgr		 * chk/nxt, which should be 1.
3552258Scsgr		 */
3562258Scsgr		if ( tblend < 2 )
3572258Scsgr			return 1;
3582258Scsgr
3592258Scsgr		/* Start searching for table space near the end of
3602258Scsgr		 * chk/nxt arrays.
3612258Scsgr		 */
3622258Scsgr		i = tblend - numecs;
3632258Scsgr		}
3642258Scsgr
3652258Scsgr	else
3662258Scsgr		/* Start searching for table space from the beginning
3672258Scsgr		 * (skipping only the elements which will definitely not
3682258Scsgr		 * hold the new state).
3692258Scsgr		 */
3702258Scsgr		i = firstfree;
3712258Scsgr
3722258Scsgr	while ( 1 )	/* loops until a space is found */
3732258Scsgr		{
3742258Scsgr		while ( i + numecs >= current_max_xpairs )
3752258Scsgr			expand_nxt_chk();
3762258Scsgr
3772258Scsgr		/* Loops until space for end-of-buffer and action number
3782258Scsgr		 * are found.
3792258Scsgr		 */
3802258Scsgr		while ( 1 )
3812258Scsgr			{
3822258Scsgr			/* Check for action number space. */
3832258Scsgr			if ( chk[i - 1] == 0 )
3842258Scsgr				{
3852258Scsgr				/* Check for end-of-buffer space. */
3862258Scsgr				if ( chk[i] == 0 )
3872258Scsgr					break;
3882258Scsgr
3892258Scsgr				else
3902258Scsgr					/* Since i != 0, there is no use
3912258Scsgr					 * checking to see if (++i) - 1 == 0,
3922258Scsgr					 * because that's the same as i == 0,
3932258Scsgr					 * so we skip a space.
3942258Scsgr					 */
3952258Scsgr					i += 2;
3962258Scsgr				}
3972258Scsgr
3982258Scsgr			else
3992258Scsgr				++i;
4002258Scsgr
4012258Scsgr			while ( i + numecs >= current_max_xpairs )
4022258Scsgr				expand_nxt_chk();
4032258Scsgr			}
4042258Scsgr
4052258Scsgr		/* If we started search from the beginning, store the new
4062258Scsgr		 * firstfree for the next call of find_table_space().
4072258Scsgr		 */
4082258Scsgr		if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
4092258Scsgr			firstfree = i + 1;
4102258Scsgr
4112258Scsgr		/* Check to see if all elements in chk (and therefore nxt)
4122258Scsgr		 * that are needed for the new state have not yet been taken.
4132258Scsgr		 */
4142258Scsgr
4152258Scsgr		state_ptr = &state[1];
4162258Scsgr		ptr_to_last_entry_in_state = &chk[i + numecs + 1];
4172258Scsgr
4182258Scsgr		for ( chk_ptr = &chk[i + 1];
4192258Scsgr		      chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
4202258Scsgr			if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
4212258Scsgr				break;
4222258Scsgr
4232258Scsgr		if ( chk_ptr == ptr_to_last_entry_in_state )
4242258Scsgr			return i;
4252258Scsgr
4262258Scsgr		else
4272258Scsgr		++i;
4282258Scsgr		}
4292258Scsgr	}
4302258Scsgr
4312258Scsgr
4322258Scsgr/* inittbl - initialize transition tables
4332258Scsgr *
4342258Scsgr * Initializes "firstfree" to be one beyond the end of the table.  Initializes
4352258Scsgr * all "chk" entries to be zero.
4362258Scsgr */
4372258Scsgrvoid inittbl()
4382258Scsgr	{
439179549Sdwmalone	int i;
4402258Scsgr
44116519Snate	zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) );
4422258Scsgr
4432258Scsgr	tblend = 0;
4442258Scsgr	firstfree = tblend + 1;
4452258Scsgr	numtemps = 0;
4462258Scsgr
4472258Scsgr	if ( usemecs )
4482258Scsgr		{
4492258Scsgr		/* Set up doubly-linked meta-equivalence classes; these
4502258Scsgr		 * are sets of equivalence classes which all have identical
4512258Scsgr		 * transitions out of TEMPLATES.
4522258Scsgr		 */
4532258Scsgr
4542258Scsgr		tecbck[1] = NIL;
4552258Scsgr
4562258Scsgr		for ( i = 2; i <= numecs; ++i )
4572258Scsgr			{
4582258Scsgr			tecbck[i] = i - 1;
4592258Scsgr			tecfwd[i - 1] = i;
4602258Scsgr			}
4612258Scsgr
4622258Scsgr		tecfwd[numecs] = NIL;
4632258Scsgr		}
4642258Scsgr	}
4652258Scsgr
4662258Scsgr
4672258Scsgr/* mkdeftbl - make the default, "jam" table entries */
4682258Scsgr
4692258Scsgrvoid mkdeftbl()
4702258Scsgr	{
4712258Scsgr	int i;
4722258Scsgr
4732258Scsgr	jamstate = lastdfa + 1;
4742258Scsgr
4752258Scsgr	++tblend; /* room for transition on end-of-buffer character */
4762258Scsgr
4772258Scsgr	while ( tblend + numecs >= current_max_xpairs )
4782258Scsgr		expand_nxt_chk();
4792258Scsgr
4802258Scsgr	/* Add in default end-of-buffer transition. */
4812258Scsgr	nxt[tblend] = end_of_buffer_state;
4822258Scsgr	chk[tblend] = jamstate;
4832258Scsgr
4842258Scsgr	for ( i = 1; i <= numecs; ++i )
4852258Scsgr		{
4862258Scsgr		nxt[tblend + i] = 0;
4872258Scsgr		chk[tblend + i] = jamstate;
4882258Scsgr		}
4892258Scsgr
4902258Scsgr	jambase = tblend;
4912258Scsgr
4922258Scsgr	base[jamstate] = jambase;
4932258Scsgr	def[jamstate] = 0;
4942258Scsgr
4952258Scsgr	tblend += numecs;
4962258Scsgr	++numtemps;
4972258Scsgr	}
4982258Scsgr
4992258Scsgr
5002258Scsgr/* mkentry - create base/def and nxt/chk entries for transition array
5012258Scsgr *
5022258Scsgr * synopsis
5032258Scsgr *   int state[numchars + 1], numchars, statenum, deflink, totaltrans;
5042258Scsgr *   mkentry( state, numchars, statenum, deflink, totaltrans );
5052258Scsgr *
5062258Scsgr * "state" is a transition array "numchars" characters in size, "statenum"
5072258Scsgr * is the offset to be used into the base/def tables, and "deflink" is the
5082258Scsgr * entry to put in the "def" table entry.  If "deflink" is equal to
5092258Scsgr * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
5102258Scsgr * (i.e., jam entries) into the table.  It is assumed that by linking to
5112258Scsgr * "JAMSTATE" they will be taken care of.  In any case, entries in "state"
5122258Scsgr * marking transitions to "SAME_TRANS" are treated as though they will be
5132258Scsgr * taken care of by whereever "deflink" points.  "totaltrans" is the total
5142258Scsgr * number of transitions out of the state.  If it is below a certain threshold,
5152258Scsgr * the tables are searched for an interior spot that will accommodate the
5162258Scsgr * state array.
5172258Scsgr */
5182258Scsgr
5192258Scsgrvoid mkentry( state, numchars, statenum, deflink, totaltrans )
520179549Sdwmaloneint *state;
5212258Scsgrint numchars, statenum, deflink, totaltrans;
5222258Scsgr	{
523179549Sdwmalone	int minec, maxec, i, baseaddr;
5242258Scsgr	int tblbase, tbllast;
5252258Scsgr
5262258Scsgr	if ( totaltrans == 0 )
5272258Scsgr		{ /* there are no out-transitions */
5282258Scsgr		if ( deflink == JAMSTATE )
5292258Scsgr			base[statenum] = JAMSTATE;
5302258Scsgr		else
5312258Scsgr			base[statenum] = 0;
5322258Scsgr
5332258Scsgr		def[statenum] = deflink;
5342258Scsgr		return;
5352258Scsgr		}
5362258Scsgr
5372258Scsgr	for ( minec = 1; minec <= numchars; ++minec )
5382258Scsgr		{
5392258Scsgr		if ( state[minec] != SAME_TRANS )
5402258Scsgr			if ( state[minec] != 0 || deflink != JAMSTATE )
5412258Scsgr				break;
5422258Scsgr		}
5432258Scsgr
5442258Scsgr	if ( totaltrans == 1 )
5452258Scsgr		{
5462258Scsgr		/* There's only one out-transition.  Save it for later to fill
5472258Scsgr		 * in holes in the tables.
5482258Scsgr		 */
5492258Scsgr		stack1( statenum, minec, state[minec], deflink );
5502258Scsgr		return;
5512258Scsgr		}
5522258Scsgr
5532258Scsgr	for ( maxec = numchars; maxec > 0; --maxec )
5542258Scsgr		{
5552258Scsgr		if ( state[maxec] != SAME_TRANS )
5562258Scsgr			if ( state[maxec] != 0 || deflink != JAMSTATE )
5572258Scsgr				break;
5582258Scsgr		}
5592258Scsgr
5602258Scsgr	/* Whether we try to fit the state table in the middle of the table
5612258Scsgr	 * entries we have already generated, or if we just take the state
5622258Scsgr	 * table at the end of the nxt/chk tables, we must make sure that we
5632258Scsgr	 * have a valid base address (i.e., non-negative).  Note that
5642258Scsgr	 * negative base addresses dangerous at run-time (because indexing
5652258Scsgr	 * the nxt array with one and a low-valued character will access
5662258Scsgr	 * memory before the start of the array.
5672258Scsgr	 */
5682258Scsgr
5692258Scsgr	/* Find the first transition of state that we need to worry about. */
5702258Scsgr	if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
5712258Scsgr		{
5722258Scsgr		/* Attempt to squeeze it into the middle of the tables. */
5732258Scsgr		baseaddr = firstfree;
5742258Scsgr
5752258Scsgr		while ( baseaddr < minec )
5762258Scsgr			{
5772258Scsgr			/* Using baseaddr would result in a negative base
5782258Scsgr			 * address below; find the next free slot.
5792258Scsgr			 */
5802258Scsgr			for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
5812258Scsgr				;
5822258Scsgr			}
5832258Scsgr
5842258Scsgr		while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
5852258Scsgr			expand_nxt_chk();
5862258Scsgr
5872258Scsgr		for ( i = minec; i <= maxec; ++i )
5882258Scsgr			if ( state[i] != SAME_TRANS &&
5892258Scsgr			     (state[i] != 0 || deflink != JAMSTATE) &&
5902258Scsgr			     chk[baseaddr + i - minec] != 0 )
5912258Scsgr				{ /* baseaddr unsuitable - find another */
5922258Scsgr				for ( ++baseaddr;
5932258Scsgr				      baseaddr < current_max_xpairs &&
5942258Scsgr				      chk[baseaddr] != 0; ++baseaddr )
5952258Scsgr					;
5962258Scsgr
5972258Scsgr				while ( baseaddr + maxec - minec + 1 >=
5982258Scsgr					current_max_xpairs )
5992258Scsgr					expand_nxt_chk();
6002258Scsgr
6012258Scsgr				/* Reset the loop counter so we'll start all
6022258Scsgr				 * over again next time it's incremented.
6032258Scsgr				 */
6042258Scsgr
6052258Scsgr				i = minec - 1;
6062258Scsgr				}
6072258Scsgr		}
6082258Scsgr
6092258Scsgr	else
6102258Scsgr		{
6112258Scsgr		/* Ensure that the base address we eventually generate is
6122258Scsgr		 * non-negative.
6132258Scsgr		 */
6142258Scsgr		baseaddr = MAX( tblend + 1, minec );
6152258Scsgr		}
6162258Scsgr
6172258Scsgr	tblbase = baseaddr - minec;
6182258Scsgr	tbllast = tblbase + maxec;
6192258Scsgr
6202258Scsgr	while ( tbllast + 1 >= current_max_xpairs )
6212258Scsgr		expand_nxt_chk();
6222258Scsgr
6232258Scsgr	base[statenum] = tblbase;
6242258Scsgr	def[statenum] = deflink;
6252258Scsgr
6262258Scsgr	for ( i = minec; i <= maxec; ++i )
6272258Scsgr		if ( state[i] != SAME_TRANS )
6282258Scsgr			if ( state[i] != 0 || deflink != JAMSTATE )
6292258Scsgr				{
6302258Scsgr				nxt[tblbase + i] = state[i];
6312258Scsgr				chk[tblbase + i] = statenum;
6322258Scsgr				}
6332258Scsgr
6342258Scsgr	if ( baseaddr == firstfree )
6352258Scsgr		/* Find next free slot in tables. */
6362258Scsgr		for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
6372258Scsgr			;
6382258Scsgr
6392258Scsgr	tblend = MAX( tblend, tbllast );
6402258Scsgr	}
6412258Scsgr
6422258Scsgr
6432258Scsgr/* mk1tbl - create table entries for a state (or state fragment) which
6442258Scsgr *            has only one out-transition
6452258Scsgr */
6462258Scsgr
6472258Scsgrvoid mk1tbl( state, sym, onenxt, onedef )
6482258Scsgrint state, sym, onenxt, onedef;
6492258Scsgr	{
6502258Scsgr	if ( firstfree < sym )
6512258Scsgr		firstfree = sym;
6522258Scsgr
6532258Scsgr	while ( chk[firstfree] != 0 )
6542258Scsgr		if ( ++firstfree >= current_max_xpairs )
6552258Scsgr			expand_nxt_chk();
6562258Scsgr
6572258Scsgr	base[state] = firstfree - sym;
6582258Scsgr	def[state] = onedef;
6592258Scsgr	chk[firstfree] = state;
6602258Scsgr	nxt[firstfree] = onenxt;
6612258Scsgr
6622258Scsgr	if ( firstfree > tblend )
6632258Scsgr		{
6642258Scsgr		tblend = firstfree++;
6652258Scsgr
6662258Scsgr		if ( firstfree >= current_max_xpairs )
6672258Scsgr			expand_nxt_chk();
6682258Scsgr		}
6692258Scsgr	}
6702258Scsgr
6712258Scsgr
6722258Scsgr/* mkprot - create new proto entry */
6732258Scsgr
6742258Scsgrvoid mkprot( state, statenum, comstate )
6752258Scsgrint state[], statenum, comstate;
6762258Scsgr	{
6772258Scsgr	int i, slot, tblbase;
6782258Scsgr
6792258Scsgr	if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
6802258Scsgr		{
6812258Scsgr		/* Gotta make room for the new proto by dropping last entry in
6822258Scsgr		 * the queue.
6832258Scsgr		 */
6842258Scsgr		slot = lastprot;
6852258Scsgr		lastprot = protprev[lastprot];
6862258Scsgr		protnext[lastprot] = NIL;
6872258Scsgr		}
6882258Scsgr
6892258Scsgr	else
6902258Scsgr		slot = numprots;
6912258Scsgr
6922258Scsgr	protnext[slot] = firstprot;
6932258Scsgr
6942258Scsgr	if ( firstprot != NIL )
6952258Scsgr		protprev[firstprot] = slot;
6962258Scsgr
6972258Scsgr	firstprot = slot;
6982258Scsgr	prottbl[slot] = statenum;
6992258Scsgr	protcomst[slot] = comstate;
7002258Scsgr
7012258Scsgr	/* Copy state into save area so it can be compared with rapidly. */
7022258Scsgr	tblbase = numecs * (slot - 1);
7032258Scsgr
7042258Scsgr	for ( i = 1; i <= numecs; ++i )
7052258Scsgr		protsave[tblbase + i] = state[i];
7062258Scsgr	}
7072258Scsgr
7082258Scsgr
7092258Scsgr/* mktemplate - create a template entry based on a state, and connect the state
7102258Scsgr *              to it
7112258Scsgr */
7122258Scsgr
7132258Scsgrvoid mktemplate( state, statenum, comstate )
7142258Scsgrint state[], statenum, comstate;
7152258Scsgr	{
7162258Scsgr	int i, numdiff, tmpbase, tmp[CSIZE + 1];
7172258Scsgr	Char transset[CSIZE + 1];
7182258Scsgr	int tsptr;
7192258Scsgr
7202258Scsgr	++numtemps;
7212258Scsgr
7222258Scsgr	tsptr = 0;
7232258Scsgr
7242258Scsgr	/* Calculate where we will temporarily store the transition table
7252258Scsgr	 * of the template in the tnxt[] array.  The final transition table
7262258Scsgr	 * gets created by cmptmps().
7272258Scsgr	 */
7282258Scsgr
7292258Scsgr	tmpbase = numtemps * numecs;
7302258Scsgr
7312258Scsgr	if ( tmpbase + numecs >= current_max_template_xpairs )
7322258Scsgr		{
7332258Scsgr		current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
7342258Scsgr
7352258Scsgr		++num_reallocs;
7362258Scsgr
7372258Scsgr		tnxt = reallocate_integer_array( tnxt,
7382258Scsgr			current_max_template_xpairs );
7392258Scsgr		}
7402258Scsgr
7412258Scsgr	for ( i = 1; i <= numecs; ++i )
7422258Scsgr		if ( state[i] == 0 )
7432258Scsgr			tnxt[tmpbase + i] = 0;
7442258Scsgr		else
7452258Scsgr			{
7462258Scsgr			transset[tsptr++] = i;
7472258Scsgr			tnxt[tmpbase + i] = comstate;
7482258Scsgr			}
7492258Scsgr
7502258Scsgr	if ( usemecs )
7512258Scsgr		mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
7522258Scsgr
7532258Scsgr	mkprot( tnxt + tmpbase, -numtemps, comstate );
7542258Scsgr
7552258Scsgr	/* We rely on the fact that mkprot adds things to the beginning
7562258Scsgr	 * of the proto queue.
7572258Scsgr	 */
7582258Scsgr
7592258Scsgr	numdiff = tbldiff( state, firstprot, tmp );
7602258Scsgr	mkentry( tmp, numecs, statenum, -numtemps, numdiff );
7612258Scsgr	}
7622258Scsgr
7632258Scsgr
7642258Scsgr/* mv2front - move proto queue element to front of queue */
7652258Scsgr
7662258Scsgrvoid mv2front( qelm )
7672258Scsgrint qelm;
7682258Scsgr	{
7692258Scsgr	if ( firstprot != qelm )
7702258Scsgr		{
7712258Scsgr		if ( qelm == lastprot )
7722258Scsgr			lastprot = protprev[lastprot];
7732258Scsgr
7742258Scsgr		protnext[protprev[qelm]] = protnext[qelm];
7752258Scsgr
7762258Scsgr		if ( protnext[qelm] != NIL )
7772258Scsgr			protprev[protnext[qelm]] = protprev[qelm];
7782258Scsgr
7792258Scsgr		protprev[qelm] = NIL;
7802258Scsgr		protnext[qelm] = firstprot;
7812258Scsgr		protprev[firstprot] = qelm;
7822258Scsgr		firstprot = qelm;
7832258Scsgr		}
7842258Scsgr	}
7852258Scsgr
7862258Scsgr
7872258Scsgr/* place_state - place a state into full speed transition table
7882258Scsgr *
7892258Scsgr * State is the statenum'th state.  It is indexed by equivalence class and
7902258Scsgr * gives the number of the state to enter for a given equivalence class.
7912258Scsgr * Transnum is the number of out-transitions for the state.
7922258Scsgr */
7932258Scsgr
7942258Scsgrvoid place_state( state, statenum, transnum )
7952258Scsgrint *state, statenum, transnum;
7962258Scsgr	{
797179549Sdwmalone	int i;
798179549Sdwmalone	int *state_ptr;
7992258Scsgr	int position = find_table_space( state, transnum );
8002258Scsgr
8012258Scsgr	/* "base" is the table of start positions. */
8022258Scsgr	base[statenum] = position;
8032258Scsgr
8042258Scsgr	/* Put in action number marker; this non-zero number makes sure that
8052258Scsgr	 * find_table_space() knows that this position in chk/nxt is taken
8062258Scsgr	 * and should not be used for another accepting number in another
8072258Scsgr	 * state.
8082258Scsgr	 */
8092258Scsgr	chk[position - 1] = 1;
8102258Scsgr
8112258Scsgr	/* Put in end-of-buffer marker; this is for the same purposes as
8122258Scsgr	 * above.
8132258Scsgr	 */
8142258Scsgr	chk[position] = 1;
8152258Scsgr
8162258Scsgr	/* Place the state into chk and nxt. */
8172258Scsgr	state_ptr = &state[1];
8182258Scsgr
8192258Scsgr	for ( i = 1; i <= numecs; ++i, ++state_ptr )
8202258Scsgr		if ( *state_ptr != 0 )
8212258Scsgr			{
8222258Scsgr			chk[position + i] = i;
8232258Scsgr			nxt[position + i] = *state_ptr;
8242258Scsgr			}
8252258Scsgr
8262258Scsgr	if ( position + numecs > tblend )
8272258Scsgr		tblend = position + numecs;
8282258Scsgr	}
8292258Scsgr
8302258Scsgr
8312258Scsgr/* stack1 - save states with only one out-transition to be processed later
8322258Scsgr *
8332258Scsgr * If there's room for another state on the "one-transition" stack, the
8342258Scsgr * state is pushed onto it, to be processed later by mk1tbl.  If there's
8352258Scsgr * no room, we process the sucker right now.
8362258Scsgr */
8372258Scsgr
8382258Scsgrvoid stack1( statenum, sym, nextstate, deflink )
8392258Scsgrint statenum, sym, nextstate, deflink;
8402258Scsgr	{
8412258Scsgr	if ( onesp >= ONE_STACK_SIZE - 1 )
8422258Scsgr		mk1tbl( statenum, sym, nextstate, deflink );
8432258Scsgr
8442258Scsgr	else
8452258Scsgr		{
8462258Scsgr		++onesp;
8472258Scsgr		onestate[onesp] = statenum;
8482258Scsgr		onesym[onesp] = sym;
8492258Scsgr		onenext[onesp] = nextstate;
8502258Scsgr		onedef[onesp] = deflink;
8512258Scsgr		}
8522258Scsgr	}
8532258Scsgr
8542258Scsgr
8552258Scsgr/* tbldiff - compute differences between two state tables
8562258Scsgr *
8572258Scsgr * "state" is the state array which is to be extracted from the pr'th
8582258Scsgr * proto.  "pr" is both the number of the proto we are extracting from
8592258Scsgr * and an index into the save area where we can find the proto's complete
8602258Scsgr * state table.  Each entry in "state" which differs from the corresponding
8612258Scsgr * entry of "pr" will appear in "ext".
8622258Scsgr *
8632258Scsgr * Entries which are the same in both "state" and "pr" will be marked
8642258Scsgr * as transitions to "SAME_TRANS" in "ext".  The total number of differences
8652258Scsgr * between "state" and "pr" is returned as function value.  Note that this
8662258Scsgr * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
8672258Scsgr */
8682258Scsgr
8692258Scsgrint tbldiff( state, pr, ext )
8702258Scsgrint state[], pr, ext[];
8712258Scsgr	{
872179549Sdwmalone	int i, *sp = state, *ep = ext, *protp;
873179549Sdwmalone	int numdiff = 0;
8742258Scsgr
8752258Scsgr	protp = &protsave[numecs * (pr - 1)];
8762258Scsgr
8772258Scsgr	for ( i = numecs; i > 0; --i )
8782258Scsgr		{
8792258Scsgr		if ( *++protp == *++sp )
8802258Scsgr			*++ep = SAME_TRANS;
8812258Scsgr		else
8822258Scsgr			{
8832258Scsgr			*++ep = *sp;
8842258Scsgr			++numdiff;
8852258Scsgr			}
8862258Scsgr		}
8872258Scsgr
8882258Scsgr	return numdiff;
8892258Scsgr	}
890