tblcmp.c revision 16519
12258Scsgr/* tblcmp - table compression routines */ 22258Scsgr 32258Scsgr/*- 42258Scsgr * Copyright (c) 1990 The Regents of the University of California. 52258Scsgr * All rights reserved. 62258Scsgr * 72258Scsgr * This code is derived from software contributed to Berkeley by 82258Scsgr * Vern Paxson. 916519Snate * 102258Scsgr * The United States Government has rights in this work pursuant 112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States 122258Scsgr * Department of Energy and the University of California. 132258Scsgr * 142258Scsgr * Redistribution and use in source and binary forms are permitted provided 152258Scsgr * that: (1) source distributions retain this entire copyright notice and 162258Scsgr * comment, and (2) distributions including binaries display the following 172258Scsgr * acknowledgement: ``This product includes software developed by the 182258Scsgr * University of California, Berkeley and its contributors'' in the 192258Scsgr * documentation or other materials provided with the distribution and in 202258Scsgr * all advertising materials mentioning features or use of this software. 212258Scsgr * Neither the name of the University nor the names of its contributors may 222258Scsgr * be used to endorse or promote products derived from this software without 232258Scsgr * specific prior written permission. 242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 272258Scsgr */ 282258Scsgr 2916519Snate/* $Header: /home/ncvs/src/usr.bin/lex/tblcmp.c,v 1.1.1.2 1996/06/19 20:26:43 nate Exp $ */ 302258Scsgr 312258Scsgr#include "flexdef.h" 322258Scsgr 332258Scsgr 342258Scsgr/* declarations for functions that have forward references */ 352258Scsgr 362258Scsgrvoid mkentry PROTO((register int*, int, int, int, int)); 372258Scsgrvoid mkprot PROTO((int[], int, int)); 382258Scsgrvoid mktemplate PROTO((int[], int, int)); 392258Scsgrvoid mv2front PROTO((int)); 402258Scsgrint tbldiff PROTO((int[], int, int[])); 412258Scsgr 422258Scsgr 432258Scsgr/* bldtbl - build table entries for dfa state 442258Scsgr * 452258Scsgr * synopsis 462258Scsgr * int state[numecs], statenum, totaltrans, comstate, comfreq; 472258Scsgr * bldtbl( state, statenum, totaltrans, comstate, comfreq ); 482258Scsgr * 492258Scsgr * State is the statenum'th dfa state. It is indexed by equivalence class and 502258Scsgr * gives the number of the state to enter for a given equivalence class. 512258Scsgr * totaltrans is the total number of transitions out of the state. Comstate 522258Scsgr * is that state which is the destination of the most transitions out of State. 532258Scsgr * Comfreq is how many transitions there are out of State to Comstate. 542258Scsgr * 552258Scsgr * A note on terminology: 562258Scsgr * "protos" are transition tables which have a high probability of 572258Scsgr * either being redundant (a state processed later will have an identical 582258Scsgr * transition table) or nearly redundant (a state processed later will have 592258Scsgr * many of the same out-transitions). A "most recently used" queue of 602258Scsgr * protos is kept around with the hope that most states will find a proto 612258Scsgr * which is similar enough to be usable, and therefore compacting the 622258Scsgr * output tables. 632258Scsgr * "templates" are a special type of proto. If a transition table is 642258Scsgr * homogeneous or nearly homogeneous (all transitions go to the same 652258Scsgr * destination) then the odds are good that future states will also go 662258Scsgr * to the same destination state on basically the same character set. 672258Scsgr * These homogeneous states are so common when dealing with large rule 682258Scsgr * sets that they merit special attention. If the transition table were 692258Scsgr * simply made into a proto, then (typically) each subsequent, similar 702258Scsgr * state will differ from the proto for two out-transitions. One of these 712258Scsgr * out-transitions will be that character on which the proto does not go 722258Scsgr * to the common destination, and one will be that character on which the 732258Scsgr * state does not go to the common destination. Templates, on the other 742258Scsgr * hand, go to the common state on EVERY transition character, and therefore 752258Scsgr * cost only one difference. 762258Scsgr */ 772258Scsgr 782258Scsgrvoid bldtbl( state, statenum, totaltrans, comstate, comfreq ) 792258Scsgrint state[], statenum, totaltrans, comstate, comfreq; 802258Scsgr { 812258Scsgr int extptr, extrct[2][CSIZE + 1]; 822258Scsgr int mindiff, minprot, i, d; 832258Scsgr 842258Scsgr /* If extptr is 0 then the first array of extrct holds the result 852258Scsgr * of the "best difference" to date, which is those transitions 862258Scsgr * which occur in "state" but not in the proto which, to date, 872258Scsgr * has the fewest differences between itself and "state". If 882258Scsgr * extptr is 1 then the second array of extrct hold the best 892258Scsgr * difference. The two arrays are toggled between so that the 902258Scsgr * best difference to date can be kept around and also a difference 912258Scsgr * just created by checking against a candidate "best" proto. 922258Scsgr */ 932258Scsgr 942258Scsgr extptr = 0; 952258Scsgr 962258Scsgr /* If the state has too few out-transitions, don't bother trying to 972258Scsgr * compact its tables. 982258Scsgr */ 992258Scsgr 1002258Scsgr if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) 1012258Scsgr mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); 1022258Scsgr 1032258Scsgr else 1042258Scsgr { 1052258Scsgr /* "checkcom" is true if we should only check "state" against 1062258Scsgr * protos which have the same "comstate" value. 1072258Scsgr */ 1082258Scsgr int checkcom = 1092258Scsgr comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; 1102258Scsgr 1112258Scsgr minprot = firstprot; 1122258Scsgr mindiff = totaltrans; 1132258Scsgr 1142258Scsgr if ( checkcom ) 1152258Scsgr { 1162258Scsgr /* Find first proto which has the same "comstate". */ 1172258Scsgr for ( i = firstprot; i != NIL; i = protnext[i] ) 1182258Scsgr if ( protcomst[i] == comstate ) 1192258Scsgr { 1202258Scsgr minprot = i; 1212258Scsgr mindiff = tbldiff( state, minprot, 1222258Scsgr extrct[extptr] ); 1232258Scsgr break; 1242258Scsgr } 1252258Scsgr } 1262258Scsgr 1272258Scsgr else 1282258Scsgr { 1292258Scsgr /* Since we've decided that the most common destination 1302258Scsgr * out of "state" does not occur with a high enough 1312258Scsgr * frequency, we set the "comstate" to zero, assuring 1322258Scsgr * that if this state is entered into the proto list, 1332258Scsgr * it will not be considered a template. 1342258Scsgr */ 1352258Scsgr comstate = 0; 1362258Scsgr 1372258Scsgr if ( firstprot != NIL ) 1382258Scsgr { 1392258Scsgr minprot = firstprot; 1402258Scsgr mindiff = tbldiff( state, minprot, 1412258Scsgr extrct[extptr] ); 1422258Scsgr } 1432258Scsgr } 1442258Scsgr 1452258Scsgr /* We now have the first interesting proto in "minprot". If 1462258Scsgr * it matches within the tolerances set for the first proto, 1472258Scsgr * we don't want to bother scanning the rest of the proto list 1482258Scsgr * to see if we have any other reasonable matches. 1492258Scsgr */ 1502258Scsgr 1512258Scsgr if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) 1522258Scsgr { 1532258Scsgr /* Not a good enough match. Scan the rest of the 1542258Scsgr * protos. 1552258Scsgr */ 1562258Scsgr for ( i = minprot; i != NIL; i = protnext[i] ) 1572258Scsgr { 1582258Scsgr d = tbldiff( state, i, extrct[1 - extptr] ); 1592258Scsgr if ( d < mindiff ) 1602258Scsgr { 1612258Scsgr extptr = 1 - extptr; 1622258Scsgr mindiff = d; 1632258Scsgr minprot = i; 1642258Scsgr } 1652258Scsgr } 1662258Scsgr } 1672258Scsgr 1682258Scsgr /* Check if the proto we've decided on as our best bet is close 1692258Scsgr * enough to the state we want to match to be usable. 1702258Scsgr */ 1712258Scsgr 1722258Scsgr if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) 1732258Scsgr { 1742258Scsgr /* No good. If the state is homogeneous enough, 1752258Scsgr * we make a template out of it. Otherwise, we 1762258Scsgr * make a proto. 1772258Scsgr */ 1782258Scsgr 1792258Scsgr if ( comfreq * 100 >= 1802258Scsgr totaltrans * TEMPLATE_SAME_PERCENTAGE ) 1812258Scsgr mktemplate( state, statenum, comstate ); 1822258Scsgr 1832258Scsgr else 1842258Scsgr { 1852258Scsgr mkprot( state, statenum, comstate ); 1862258Scsgr mkentry( state, numecs, statenum, 1872258Scsgr JAMSTATE, totaltrans ); 1882258Scsgr } 1892258Scsgr } 1902258Scsgr 1912258Scsgr else 1922258Scsgr { /* use the proto */ 1932258Scsgr mkentry( extrct[extptr], numecs, statenum, 1942258Scsgr prottbl[minprot], mindiff ); 1952258Scsgr 1962258Scsgr /* If this state was sufficiently different from the 1972258Scsgr * proto we built it from, make it, too, a proto. 1982258Scsgr */ 1992258Scsgr 2002258Scsgr if ( mindiff * 100 >= 2012258Scsgr totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) 2022258Scsgr mkprot( state, statenum, comstate ); 2032258Scsgr 2042258Scsgr /* Since mkprot added a new proto to the proto queue, 2052258Scsgr * it's possible that "minprot" is no longer on the 2062258Scsgr * proto queue (if it happened to have been the last 2072258Scsgr * entry, it would have been bumped off). If it's 2082258Scsgr * not there, then the new proto took its physical 2092258Scsgr * place (though logically the new proto is at the 2102258Scsgr * beginning of the queue), so in that case the 2112258Scsgr * following call will do nothing. 2122258Scsgr */ 2132258Scsgr 2142258Scsgr mv2front( minprot ); 2152258Scsgr } 2162258Scsgr } 2172258Scsgr } 2182258Scsgr 2192258Scsgr 2202258Scsgr/* cmptmps - compress template table entries 2212258Scsgr * 2222258Scsgr * Template tables are compressed by using the 'template equivalence 2232258Scsgr * classes', which are collections of transition character equivalence 2242258Scsgr * classes which always appear together in templates - really meta-equivalence 2252258Scsgr * classes. 2262258Scsgr */ 2272258Scsgr 2282258Scsgrvoid cmptmps() 2292258Scsgr { 2302258Scsgr int tmpstorage[CSIZE + 1]; 2312258Scsgr register int *tmp = tmpstorage, i, j; 2322258Scsgr int totaltrans, trans; 2332258Scsgr 2342258Scsgr peakpairs = numtemps * numecs + tblend; 2352258Scsgr 2362258Scsgr if ( usemecs ) 2372258Scsgr { 2382258Scsgr /* Create equivalence classes based on data gathered on 2392258Scsgr * template transitions. 2402258Scsgr */ 2412258Scsgr nummecs = cre8ecs( tecfwd, tecbck, numecs ); 2422258Scsgr } 2432258Scsgr 2442258Scsgr else 2452258Scsgr nummecs = numecs; 2462258Scsgr 2472258Scsgr while ( lastdfa + numtemps + 1 >= current_max_dfas ) 2482258Scsgr increase_max_dfas(); 2492258Scsgr 2502258Scsgr /* Loop through each template. */ 2512258Scsgr 2522258Scsgr for ( i = 1; i <= numtemps; ++i ) 2532258Scsgr { 2542258Scsgr /* Number of non-jam transitions out of this template. */ 2552258Scsgr totaltrans = 0; 2562258Scsgr 2572258Scsgr for ( j = 1; j <= numecs; ++j ) 2582258Scsgr { 2592258Scsgr trans = tnxt[numecs * i + j]; 2602258Scsgr 2612258Scsgr if ( usemecs ) 2622258Scsgr { 2632258Scsgr /* The absolute value of tecbck is the 2642258Scsgr * meta-equivalence class of a given 2652258Scsgr * equivalence class, as set up by cre8ecs(). 2662258Scsgr */ 2672258Scsgr if ( tecbck[j] > 0 ) 2682258Scsgr { 2692258Scsgr tmp[tecbck[j]] = trans; 2702258Scsgr 2712258Scsgr if ( trans > 0 ) 2722258Scsgr ++totaltrans; 2732258Scsgr } 2742258Scsgr } 2752258Scsgr 2762258Scsgr else 2772258Scsgr { 2782258Scsgr tmp[j] = trans; 2792258Scsgr 2802258Scsgr if ( trans > 0 ) 2812258Scsgr ++totaltrans; 2822258Scsgr } 2832258Scsgr } 2842258Scsgr 2852258Scsgr /* It is assumed (in a rather subtle way) in the skeleton 2862258Scsgr * that if we're using meta-equivalence classes, the def[] 2872258Scsgr * entry for all templates is the jam template, i.e., 2882258Scsgr * templates never default to other non-jam table entries 2892258Scsgr * (e.g., another template) 2902258Scsgr */ 2912258Scsgr 2922258Scsgr /* Leave room for the jam-state after the last real state. */ 2932258Scsgr mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); 2942258Scsgr } 2952258Scsgr } 2962258Scsgr 2972258Scsgr 2982258Scsgr 2992258Scsgr/* expand_nxt_chk - expand the next check arrays */ 3002258Scsgr 3012258Scsgrvoid expand_nxt_chk() 3022258Scsgr { 3032258Scsgr register int old_max = current_max_xpairs; 3042258Scsgr 3052258Scsgr current_max_xpairs += MAX_XPAIRS_INCREMENT; 3062258Scsgr 3072258Scsgr ++num_reallocs; 3082258Scsgr 3092258Scsgr nxt = reallocate_integer_array( nxt, current_max_xpairs ); 3102258Scsgr chk = reallocate_integer_array( chk, current_max_xpairs ); 3112258Scsgr 3122258Scsgr zero_out( (char *) (chk + old_max), 31316519Snate (size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) ); 3142258Scsgr } 3152258Scsgr 3162258Scsgr 3172258Scsgr/* find_table_space - finds a space in the table for a state to be placed 3182258Scsgr * 3192258Scsgr * synopsis 3202258Scsgr * int *state, numtrans, block_start; 3212258Scsgr * int find_table_space(); 3222258Scsgr * 3232258Scsgr * block_start = find_table_space( state, numtrans ); 3242258Scsgr * 3252258Scsgr * State is the state to be added to the full speed transition table. 3262258Scsgr * Numtrans is the number of out-transitions for the state. 3272258Scsgr * 3282258Scsgr * find_table_space() returns the position of the start of the first block (in 3292258Scsgr * chk) able to accommodate the state 3302258Scsgr * 3312258Scsgr * In determining if a state will or will not fit, find_table_space() must take 3322258Scsgr * into account the fact that an end-of-buffer state will be added at [0], 3332258Scsgr * and an action number will be added in [-1]. 3342258Scsgr */ 3352258Scsgr 3362258Scsgrint find_table_space( state, numtrans ) 3372258Scsgrint *state, numtrans; 3382258Scsgr { 3392258Scsgr /* Firstfree is the position of the first possible occurrence of two 3402258Scsgr * consecutive unused records in the chk and nxt arrays. 3412258Scsgr */ 3422258Scsgr register int i; 3432258Scsgr register int *state_ptr, *chk_ptr; 3442258Scsgr register int *ptr_to_last_entry_in_state; 3452258Scsgr 3462258Scsgr /* If there are too many out-transitions, put the state at the end of 3472258Scsgr * nxt and chk. 3482258Scsgr */ 3492258Scsgr if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) 3502258Scsgr { 3512258Scsgr /* If table is empty, return the first available spot in 3522258Scsgr * chk/nxt, which should be 1. 3532258Scsgr */ 3542258Scsgr if ( tblend < 2 ) 3552258Scsgr return 1; 3562258Scsgr 3572258Scsgr /* Start searching for table space near the end of 3582258Scsgr * chk/nxt arrays. 3592258Scsgr */ 3602258Scsgr i = tblend - numecs; 3612258Scsgr } 3622258Scsgr 3632258Scsgr else 3642258Scsgr /* Start searching for table space from the beginning 3652258Scsgr * (skipping only the elements which will definitely not 3662258Scsgr * hold the new state). 3672258Scsgr */ 3682258Scsgr i = firstfree; 3692258Scsgr 3702258Scsgr while ( 1 ) /* loops until a space is found */ 3712258Scsgr { 3722258Scsgr while ( i + numecs >= current_max_xpairs ) 3732258Scsgr expand_nxt_chk(); 3742258Scsgr 3752258Scsgr /* Loops until space for end-of-buffer and action number 3762258Scsgr * are found. 3772258Scsgr */ 3782258Scsgr while ( 1 ) 3792258Scsgr { 3802258Scsgr /* Check for action number space. */ 3812258Scsgr if ( chk[i - 1] == 0 ) 3822258Scsgr { 3832258Scsgr /* Check for end-of-buffer space. */ 3842258Scsgr if ( chk[i] == 0 ) 3852258Scsgr break; 3862258Scsgr 3872258Scsgr else 3882258Scsgr /* Since i != 0, there is no use 3892258Scsgr * checking to see if (++i) - 1 == 0, 3902258Scsgr * because that's the same as i == 0, 3912258Scsgr * so we skip a space. 3922258Scsgr */ 3932258Scsgr i += 2; 3942258Scsgr } 3952258Scsgr 3962258Scsgr else 3972258Scsgr ++i; 3982258Scsgr 3992258Scsgr while ( i + numecs >= current_max_xpairs ) 4002258Scsgr expand_nxt_chk(); 4012258Scsgr } 4022258Scsgr 4032258Scsgr /* If we started search from the beginning, store the new 4042258Scsgr * firstfree for the next call of find_table_space(). 4052258Scsgr */ 4062258Scsgr if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) 4072258Scsgr firstfree = i + 1; 4082258Scsgr 4092258Scsgr /* Check to see if all elements in chk (and therefore nxt) 4102258Scsgr * that are needed for the new state have not yet been taken. 4112258Scsgr */ 4122258Scsgr 4132258Scsgr state_ptr = &state[1]; 4142258Scsgr ptr_to_last_entry_in_state = &chk[i + numecs + 1]; 4152258Scsgr 4162258Scsgr for ( chk_ptr = &chk[i + 1]; 4172258Scsgr chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr ) 4182258Scsgr if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) 4192258Scsgr break; 4202258Scsgr 4212258Scsgr if ( chk_ptr == ptr_to_last_entry_in_state ) 4222258Scsgr return i; 4232258Scsgr 4242258Scsgr else 4252258Scsgr ++i; 4262258Scsgr } 4272258Scsgr } 4282258Scsgr 4292258Scsgr 4302258Scsgr/* inittbl - initialize transition tables 4312258Scsgr * 4322258Scsgr * Initializes "firstfree" to be one beyond the end of the table. Initializes 4332258Scsgr * all "chk" entries to be zero. 4342258Scsgr */ 4352258Scsgrvoid inittbl() 4362258Scsgr { 4372258Scsgr register int i; 4382258Scsgr 43916519Snate zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) ); 4402258Scsgr 4412258Scsgr tblend = 0; 4422258Scsgr firstfree = tblend + 1; 4432258Scsgr numtemps = 0; 4442258Scsgr 4452258Scsgr if ( usemecs ) 4462258Scsgr { 4472258Scsgr /* Set up doubly-linked meta-equivalence classes; these 4482258Scsgr * are sets of equivalence classes which all have identical 4492258Scsgr * transitions out of TEMPLATES. 4502258Scsgr */ 4512258Scsgr 4522258Scsgr tecbck[1] = NIL; 4532258Scsgr 4542258Scsgr for ( i = 2; i <= numecs; ++i ) 4552258Scsgr { 4562258Scsgr tecbck[i] = i - 1; 4572258Scsgr tecfwd[i - 1] = i; 4582258Scsgr } 4592258Scsgr 4602258Scsgr tecfwd[numecs] = NIL; 4612258Scsgr } 4622258Scsgr } 4632258Scsgr 4642258Scsgr 4652258Scsgr/* mkdeftbl - make the default, "jam" table entries */ 4662258Scsgr 4672258Scsgrvoid mkdeftbl() 4682258Scsgr { 4692258Scsgr int i; 4702258Scsgr 4712258Scsgr jamstate = lastdfa + 1; 4722258Scsgr 4732258Scsgr ++tblend; /* room for transition on end-of-buffer character */ 4742258Scsgr 4752258Scsgr while ( tblend + numecs >= current_max_xpairs ) 4762258Scsgr expand_nxt_chk(); 4772258Scsgr 4782258Scsgr /* Add in default end-of-buffer transition. */ 4792258Scsgr nxt[tblend] = end_of_buffer_state; 4802258Scsgr chk[tblend] = jamstate; 4812258Scsgr 4822258Scsgr for ( i = 1; i <= numecs; ++i ) 4832258Scsgr { 4842258Scsgr nxt[tblend + i] = 0; 4852258Scsgr chk[tblend + i] = jamstate; 4862258Scsgr } 4872258Scsgr 4882258Scsgr jambase = tblend; 4892258Scsgr 4902258Scsgr base[jamstate] = jambase; 4912258Scsgr def[jamstate] = 0; 4922258Scsgr 4932258Scsgr tblend += numecs; 4942258Scsgr ++numtemps; 4952258Scsgr } 4962258Scsgr 4972258Scsgr 4982258Scsgr/* mkentry - create base/def and nxt/chk entries for transition array 4992258Scsgr * 5002258Scsgr * synopsis 5012258Scsgr * int state[numchars + 1], numchars, statenum, deflink, totaltrans; 5022258Scsgr * mkentry( state, numchars, statenum, deflink, totaltrans ); 5032258Scsgr * 5042258Scsgr * "state" is a transition array "numchars" characters in size, "statenum" 5052258Scsgr * is the offset to be used into the base/def tables, and "deflink" is the 5062258Scsgr * entry to put in the "def" table entry. If "deflink" is equal to 5072258Scsgr * "JAMSTATE", then no attempt will be made to fit zero entries of "state" 5082258Scsgr * (i.e., jam entries) into the table. It is assumed that by linking to 5092258Scsgr * "JAMSTATE" they will be taken care of. In any case, entries in "state" 5102258Scsgr * marking transitions to "SAME_TRANS" are treated as though they will be 5112258Scsgr * taken care of by whereever "deflink" points. "totaltrans" is the total 5122258Scsgr * number of transitions out of the state. If it is below a certain threshold, 5132258Scsgr * the tables are searched for an interior spot that will accommodate the 5142258Scsgr * state array. 5152258Scsgr */ 5162258Scsgr 5172258Scsgrvoid mkentry( state, numchars, statenum, deflink, totaltrans ) 5182258Scsgrregister int *state; 5192258Scsgrint numchars, statenum, deflink, totaltrans; 5202258Scsgr { 5212258Scsgr register int minec, maxec, i, baseaddr; 5222258Scsgr int tblbase, tbllast; 5232258Scsgr 5242258Scsgr if ( totaltrans == 0 ) 5252258Scsgr { /* there are no out-transitions */ 5262258Scsgr if ( deflink == JAMSTATE ) 5272258Scsgr base[statenum] = JAMSTATE; 5282258Scsgr else 5292258Scsgr base[statenum] = 0; 5302258Scsgr 5312258Scsgr def[statenum] = deflink; 5322258Scsgr return; 5332258Scsgr } 5342258Scsgr 5352258Scsgr for ( minec = 1; minec <= numchars; ++minec ) 5362258Scsgr { 5372258Scsgr if ( state[minec] != SAME_TRANS ) 5382258Scsgr if ( state[minec] != 0 || deflink != JAMSTATE ) 5392258Scsgr break; 5402258Scsgr } 5412258Scsgr 5422258Scsgr if ( totaltrans == 1 ) 5432258Scsgr { 5442258Scsgr /* There's only one out-transition. Save it for later to fill 5452258Scsgr * in holes in the tables. 5462258Scsgr */ 5472258Scsgr stack1( statenum, minec, state[minec], deflink ); 5482258Scsgr return; 5492258Scsgr } 5502258Scsgr 5512258Scsgr for ( maxec = numchars; maxec > 0; --maxec ) 5522258Scsgr { 5532258Scsgr if ( state[maxec] != SAME_TRANS ) 5542258Scsgr if ( state[maxec] != 0 || deflink != JAMSTATE ) 5552258Scsgr break; 5562258Scsgr } 5572258Scsgr 5582258Scsgr /* Whether we try to fit the state table in the middle of the table 5592258Scsgr * entries we have already generated, or if we just take the state 5602258Scsgr * table at the end of the nxt/chk tables, we must make sure that we 5612258Scsgr * have a valid base address (i.e., non-negative). Note that 5622258Scsgr * negative base addresses dangerous at run-time (because indexing 5632258Scsgr * the nxt array with one and a low-valued character will access 5642258Scsgr * memory before the start of the array. 5652258Scsgr */ 5662258Scsgr 5672258Scsgr /* Find the first transition of state that we need to worry about. */ 5682258Scsgr if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) 5692258Scsgr { 5702258Scsgr /* Attempt to squeeze it into the middle of the tables. */ 5712258Scsgr baseaddr = firstfree; 5722258Scsgr 5732258Scsgr while ( baseaddr < minec ) 5742258Scsgr { 5752258Scsgr /* Using baseaddr would result in a negative base 5762258Scsgr * address below; find the next free slot. 5772258Scsgr */ 5782258Scsgr for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) 5792258Scsgr ; 5802258Scsgr } 5812258Scsgr 5822258Scsgr while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) 5832258Scsgr expand_nxt_chk(); 5842258Scsgr 5852258Scsgr for ( i = minec; i <= maxec; ++i ) 5862258Scsgr if ( state[i] != SAME_TRANS && 5872258Scsgr (state[i] != 0 || deflink != JAMSTATE) && 5882258Scsgr chk[baseaddr + i - minec] != 0 ) 5892258Scsgr { /* baseaddr unsuitable - find another */ 5902258Scsgr for ( ++baseaddr; 5912258Scsgr baseaddr < current_max_xpairs && 5922258Scsgr chk[baseaddr] != 0; ++baseaddr ) 5932258Scsgr ; 5942258Scsgr 5952258Scsgr while ( baseaddr + maxec - minec + 1 >= 5962258Scsgr current_max_xpairs ) 5972258Scsgr expand_nxt_chk(); 5982258Scsgr 5992258Scsgr /* Reset the loop counter so we'll start all 6002258Scsgr * over again next time it's incremented. 6012258Scsgr */ 6022258Scsgr 6032258Scsgr i = minec - 1; 6042258Scsgr } 6052258Scsgr } 6062258Scsgr 6072258Scsgr else 6082258Scsgr { 6092258Scsgr /* Ensure that the base address we eventually generate is 6102258Scsgr * non-negative. 6112258Scsgr */ 6122258Scsgr baseaddr = MAX( tblend + 1, minec ); 6132258Scsgr } 6142258Scsgr 6152258Scsgr tblbase = baseaddr - minec; 6162258Scsgr tbllast = tblbase + maxec; 6172258Scsgr 6182258Scsgr while ( tbllast + 1 >= current_max_xpairs ) 6192258Scsgr expand_nxt_chk(); 6202258Scsgr 6212258Scsgr base[statenum] = tblbase; 6222258Scsgr def[statenum] = deflink; 6232258Scsgr 6242258Scsgr for ( i = minec; i <= maxec; ++i ) 6252258Scsgr if ( state[i] != SAME_TRANS ) 6262258Scsgr if ( state[i] != 0 || deflink != JAMSTATE ) 6272258Scsgr { 6282258Scsgr nxt[tblbase + i] = state[i]; 6292258Scsgr chk[tblbase + i] = statenum; 6302258Scsgr } 6312258Scsgr 6322258Scsgr if ( baseaddr == firstfree ) 6332258Scsgr /* Find next free slot in tables. */ 6342258Scsgr for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) 6352258Scsgr ; 6362258Scsgr 6372258Scsgr tblend = MAX( tblend, tbllast ); 6382258Scsgr } 6392258Scsgr 6402258Scsgr 6412258Scsgr/* mk1tbl - create table entries for a state (or state fragment) which 6422258Scsgr * has only one out-transition 6432258Scsgr */ 6442258Scsgr 6452258Scsgrvoid mk1tbl( state, sym, onenxt, onedef ) 6462258Scsgrint state, sym, onenxt, onedef; 6472258Scsgr { 6482258Scsgr if ( firstfree < sym ) 6492258Scsgr firstfree = sym; 6502258Scsgr 6512258Scsgr while ( chk[firstfree] != 0 ) 6522258Scsgr if ( ++firstfree >= current_max_xpairs ) 6532258Scsgr expand_nxt_chk(); 6542258Scsgr 6552258Scsgr base[state] = firstfree - sym; 6562258Scsgr def[state] = onedef; 6572258Scsgr chk[firstfree] = state; 6582258Scsgr nxt[firstfree] = onenxt; 6592258Scsgr 6602258Scsgr if ( firstfree > tblend ) 6612258Scsgr { 6622258Scsgr tblend = firstfree++; 6632258Scsgr 6642258Scsgr if ( firstfree >= current_max_xpairs ) 6652258Scsgr expand_nxt_chk(); 6662258Scsgr } 6672258Scsgr } 6682258Scsgr 6692258Scsgr 6702258Scsgr/* mkprot - create new proto entry */ 6712258Scsgr 6722258Scsgrvoid mkprot( state, statenum, comstate ) 6732258Scsgrint state[], statenum, comstate; 6742258Scsgr { 6752258Scsgr int i, slot, tblbase; 6762258Scsgr 6772258Scsgr if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) 6782258Scsgr { 6792258Scsgr /* Gotta make room for the new proto by dropping last entry in 6802258Scsgr * the queue. 6812258Scsgr */ 6822258Scsgr slot = lastprot; 6832258Scsgr lastprot = protprev[lastprot]; 6842258Scsgr protnext[lastprot] = NIL; 6852258Scsgr } 6862258Scsgr 6872258Scsgr else 6882258Scsgr slot = numprots; 6892258Scsgr 6902258Scsgr protnext[slot] = firstprot; 6912258Scsgr 6922258Scsgr if ( firstprot != NIL ) 6932258Scsgr protprev[firstprot] = slot; 6942258Scsgr 6952258Scsgr firstprot = slot; 6962258Scsgr prottbl[slot] = statenum; 6972258Scsgr protcomst[slot] = comstate; 6982258Scsgr 6992258Scsgr /* Copy state into save area so it can be compared with rapidly. */ 7002258Scsgr tblbase = numecs * (slot - 1); 7012258Scsgr 7022258Scsgr for ( i = 1; i <= numecs; ++i ) 7032258Scsgr protsave[tblbase + i] = state[i]; 7042258Scsgr } 7052258Scsgr 7062258Scsgr 7072258Scsgr/* mktemplate - create a template entry based on a state, and connect the state 7082258Scsgr * to it 7092258Scsgr */ 7102258Scsgr 7112258Scsgrvoid mktemplate( state, statenum, comstate ) 7122258Scsgrint state[], statenum, comstate; 7132258Scsgr { 7142258Scsgr int i, numdiff, tmpbase, tmp[CSIZE + 1]; 7152258Scsgr Char transset[CSIZE + 1]; 7162258Scsgr int tsptr; 7172258Scsgr 7182258Scsgr ++numtemps; 7192258Scsgr 7202258Scsgr tsptr = 0; 7212258Scsgr 7222258Scsgr /* Calculate where we will temporarily store the transition table 7232258Scsgr * of the template in the tnxt[] array. The final transition table 7242258Scsgr * gets created by cmptmps(). 7252258Scsgr */ 7262258Scsgr 7272258Scsgr tmpbase = numtemps * numecs; 7282258Scsgr 7292258Scsgr if ( tmpbase + numecs >= current_max_template_xpairs ) 7302258Scsgr { 7312258Scsgr current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; 7322258Scsgr 7332258Scsgr ++num_reallocs; 7342258Scsgr 7352258Scsgr tnxt = reallocate_integer_array( tnxt, 7362258Scsgr current_max_template_xpairs ); 7372258Scsgr } 7382258Scsgr 7392258Scsgr for ( i = 1; i <= numecs; ++i ) 7402258Scsgr if ( state[i] == 0 ) 7412258Scsgr tnxt[tmpbase + i] = 0; 7422258Scsgr else 7432258Scsgr { 7442258Scsgr transset[tsptr++] = i; 7452258Scsgr tnxt[tmpbase + i] = comstate; 7462258Scsgr } 7472258Scsgr 7482258Scsgr if ( usemecs ) 7492258Scsgr mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); 7502258Scsgr 7512258Scsgr mkprot( tnxt + tmpbase, -numtemps, comstate ); 7522258Scsgr 7532258Scsgr /* We rely on the fact that mkprot adds things to the beginning 7542258Scsgr * of the proto queue. 7552258Scsgr */ 7562258Scsgr 7572258Scsgr numdiff = tbldiff( state, firstprot, tmp ); 7582258Scsgr mkentry( tmp, numecs, statenum, -numtemps, numdiff ); 7592258Scsgr } 7602258Scsgr 7612258Scsgr 7622258Scsgr/* mv2front - move proto queue element to front of queue */ 7632258Scsgr 7642258Scsgrvoid mv2front( qelm ) 7652258Scsgrint qelm; 7662258Scsgr { 7672258Scsgr if ( firstprot != qelm ) 7682258Scsgr { 7692258Scsgr if ( qelm == lastprot ) 7702258Scsgr lastprot = protprev[lastprot]; 7712258Scsgr 7722258Scsgr protnext[protprev[qelm]] = protnext[qelm]; 7732258Scsgr 7742258Scsgr if ( protnext[qelm] != NIL ) 7752258Scsgr protprev[protnext[qelm]] = protprev[qelm]; 7762258Scsgr 7772258Scsgr protprev[qelm] = NIL; 7782258Scsgr protnext[qelm] = firstprot; 7792258Scsgr protprev[firstprot] = qelm; 7802258Scsgr firstprot = qelm; 7812258Scsgr } 7822258Scsgr } 7832258Scsgr 7842258Scsgr 7852258Scsgr/* place_state - place a state into full speed transition table 7862258Scsgr * 7872258Scsgr * State is the statenum'th state. It is indexed by equivalence class and 7882258Scsgr * gives the number of the state to enter for a given equivalence class. 7892258Scsgr * Transnum is the number of out-transitions for the state. 7902258Scsgr */ 7912258Scsgr 7922258Scsgrvoid place_state( state, statenum, transnum ) 7932258Scsgrint *state, statenum, transnum; 7942258Scsgr { 7952258Scsgr register int i; 7962258Scsgr register int *state_ptr; 7972258Scsgr int position = find_table_space( state, transnum ); 7982258Scsgr 7992258Scsgr /* "base" is the table of start positions. */ 8002258Scsgr base[statenum] = position; 8012258Scsgr 8022258Scsgr /* Put in action number marker; this non-zero number makes sure that 8032258Scsgr * find_table_space() knows that this position in chk/nxt is taken 8042258Scsgr * and should not be used for another accepting number in another 8052258Scsgr * state. 8062258Scsgr */ 8072258Scsgr chk[position - 1] = 1; 8082258Scsgr 8092258Scsgr /* Put in end-of-buffer marker; this is for the same purposes as 8102258Scsgr * above. 8112258Scsgr */ 8122258Scsgr chk[position] = 1; 8132258Scsgr 8142258Scsgr /* Place the state into chk and nxt. */ 8152258Scsgr state_ptr = &state[1]; 8162258Scsgr 8172258Scsgr for ( i = 1; i <= numecs; ++i, ++state_ptr ) 8182258Scsgr if ( *state_ptr != 0 ) 8192258Scsgr { 8202258Scsgr chk[position + i] = i; 8212258Scsgr nxt[position + i] = *state_ptr; 8222258Scsgr } 8232258Scsgr 8242258Scsgr if ( position + numecs > tblend ) 8252258Scsgr tblend = position + numecs; 8262258Scsgr } 8272258Scsgr 8282258Scsgr 8292258Scsgr/* stack1 - save states with only one out-transition to be processed later 8302258Scsgr * 8312258Scsgr * If there's room for another state on the "one-transition" stack, the 8322258Scsgr * state is pushed onto it, to be processed later by mk1tbl. If there's 8332258Scsgr * no room, we process the sucker right now. 8342258Scsgr */ 8352258Scsgr 8362258Scsgrvoid stack1( statenum, sym, nextstate, deflink ) 8372258Scsgrint statenum, sym, nextstate, deflink; 8382258Scsgr { 8392258Scsgr if ( onesp >= ONE_STACK_SIZE - 1 ) 8402258Scsgr mk1tbl( statenum, sym, nextstate, deflink ); 8412258Scsgr 8422258Scsgr else 8432258Scsgr { 8442258Scsgr ++onesp; 8452258Scsgr onestate[onesp] = statenum; 8462258Scsgr onesym[onesp] = sym; 8472258Scsgr onenext[onesp] = nextstate; 8482258Scsgr onedef[onesp] = deflink; 8492258Scsgr } 8502258Scsgr } 8512258Scsgr 8522258Scsgr 8532258Scsgr/* tbldiff - compute differences between two state tables 8542258Scsgr * 8552258Scsgr * "state" is the state array which is to be extracted from the pr'th 8562258Scsgr * proto. "pr" is both the number of the proto we are extracting from 8572258Scsgr * and an index into the save area where we can find the proto's complete 8582258Scsgr * state table. Each entry in "state" which differs from the corresponding 8592258Scsgr * entry of "pr" will appear in "ext". 8602258Scsgr * 8612258Scsgr * Entries which are the same in both "state" and "pr" will be marked 8622258Scsgr * as transitions to "SAME_TRANS" in "ext". The total number of differences 8632258Scsgr * between "state" and "pr" is returned as function value. Note that this 8642258Scsgr * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". 8652258Scsgr */ 8662258Scsgr 8672258Scsgrint tbldiff( state, pr, ext ) 8682258Scsgrint state[], pr, ext[]; 8692258Scsgr { 8702258Scsgr register int i, *sp = state, *ep = ext, *protp; 8712258Scsgr register int numdiff = 0; 8722258Scsgr 8732258Scsgr protp = &protsave[numecs * (pr - 1)]; 8742258Scsgr 8752258Scsgr for ( i = numecs; i > 0; --i ) 8762258Scsgr { 8772258Scsgr if ( *++protp == *++sp ) 8782258Scsgr *++ep = SAME_TRANS; 8792258Scsgr else 8802258Scsgr { 8812258Scsgr *++ep = *sp; 8822258Scsgr ++numdiff; 8832258Scsgr } 8842258Scsgr } 8852258Scsgr 8862258Scsgr return numdiff; 8872258Scsgr } 888