12258Scsgr/* tblcmp - table compression routines */ 22258Scsgr 32258Scsgr/*- 42258Scsgr * Copyright (c) 1990 The Regents of the University of California. 52258Scsgr * All rights reserved. 62258Scsgr * 72258Scsgr * This code is derived from software contributed to Berkeley by 82258Scsgr * Vern Paxson. 916519Snate * 102258Scsgr * The United States Government has rights in this work pursuant 112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States 122258Scsgr * Department of Energy and the University of California. 132258Scsgr * 142258Scsgr * Redistribution and use in source and binary forms are permitted provided 152258Scsgr * that: (1) source distributions retain this entire copyright notice and 162258Scsgr * comment, and (2) distributions including binaries display the following 172258Scsgr * acknowledgement: ``This product includes software developed by the 182258Scsgr * University of California, Berkeley and its contributors'' in the 192258Scsgr * documentation or other materials provided with the distribution and in 202258Scsgr * all advertising materials mentioning features or use of this software. 212258Scsgr * Neither the name of the University nor the names of its contributors may 222258Scsgr * be used to endorse or promote products derived from this software without 232258Scsgr * specific prior written permission. 242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 272258Scsgr */ 282258Scsgr 2952555Sobrien/* $Header: /home/daffy/u0/vern/flex/RCS/tblcmp.c,v 2.11 94/11/05 17:08:28 vern Exp $ */ 3099112Sobrien#include <sys/cdefs.h> 3199112Sobrien__FBSDID("$FreeBSD$"); 322258Scsgr 332258Scsgr#include "flexdef.h" 342258Scsgr 352258Scsgr 362258Scsgr/* declarations for functions that have forward references */ 372258Scsgr 38179549Sdwmalonevoid mkentry PROTO((int*, int, int, int, int)); 392258Scsgrvoid mkprot PROTO((int[], int, int)); 402258Scsgrvoid mktemplate PROTO((int[], int, int)); 412258Scsgrvoid mv2front PROTO((int)); 422258Scsgrint tbldiff PROTO((int[], int, int[])); 432258Scsgr 442258Scsgr 452258Scsgr/* bldtbl - build table entries for dfa state 462258Scsgr * 472258Scsgr * synopsis 482258Scsgr * int state[numecs], statenum, totaltrans, comstate, comfreq; 492258Scsgr * bldtbl( state, statenum, totaltrans, comstate, comfreq ); 502258Scsgr * 512258Scsgr * State is the statenum'th dfa state. It is indexed by equivalence class and 522258Scsgr * gives the number of the state to enter for a given equivalence class. 532258Scsgr * totaltrans is the total number of transitions out of the state. Comstate 542258Scsgr * is that state which is the destination of the most transitions out of State. 552258Scsgr * Comfreq is how many transitions there are out of State to Comstate. 562258Scsgr * 572258Scsgr * A note on terminology: 582258Scsgr * "protos" are transition tables which have a high probability of 592258Scsgr * either being redundant (a state processed later will have an identical 602258Scsgr * transition table) or nearly redundant (a state processed later will have 612258Scsgr * many of the same out-transitions). A "most recently used" queue of 622258Scsgr * protos is kept around with the hope that most states will find a proto 632258Scsgr * which is similar enough to be usable, and therefore compacting the 642258Scsgr * output tables. 652258Scsgr * "templates" are a special type of proto. If a transition table is 662258Scsgr * homogeneous or nearly homogeneous (all transitions go to the same 672258Scsgr * destination) then the odds are good that future states will also go 682258Scsgr * to the same destination state on basically the same character set. 692258Scsgr * These homogeneous states are so common when dealing with large rule 702258Scsgr * sets that they merit special attention. If the transition table were 712258Scsgr * simply made into a proto, then (typically) each subsequent, similar 722258Scsgr * state will differ from the proto for two out-transitions. One of these 732258Scsgr * out-transitions will be that character on which the proto does not go 742258Scsgr * to the common destination, and one will be that character on which the 752258Scsgr * state does not go to the common destination. Templates, on the other 762258Scsgr * hand, go to the common state on EVERY transition character, and therefore 772258Scsgr * cost only one difference. 782258Scsgr */ 792258Scsgr 802258Scsgrvoid bldtbl( state, statenum, totaltrans, comstate, comfreq ) 812258Scsgrint state[], statenum, totaltrans, comstate, comfreq; 822258Scsgr { 832258Scsgr int extptr, extrct[2][CSIZE + 1]; 842258Scsgr int mindiff, minprot, i, d; 852258Scsgr 862258Scsgr /* If extptr is 0 then the first array of extrct holds the result 872258Scsgr * of the "best difference" to date, which is those transitions 882258Scsgr * which occur in "state" but not in the proto which, to date, 892258Scsgr * has the fewest differences between itself and "state". If 902258Scsgr * extptr is 1 then the second array of extrct hold the best 912258Scsgr * difference. The two arrays are toggled between so that the 922258Scsgr * best difference to date can be kept around and also a difference 932258Scsgr * just created by checking against a candidate "best" proto. 942258Scsgr */ 952258Scsgr 962258Scsgr extptr = 0; 972258Scsgr 982258Scsgr /* If the state has too few out-transitions, don't bother trying to 992258Scsgr * compact its tables. 1002258Scsgr */ 1012258Scsgr 1022258Scsgr if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) 1032258Scsgr mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); 1042258Scsgr 1052258Scsgr else 1062258Scsgr { 1072258Scsgr /* "checkcom" is true if we should only check "state" against 1082258Scsgr * protos which have the same "comstate" value. 1092258Scsgr */ 1102258Scsgr int checkcom = 1112258Scsgr comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; 1122258Scsgr 1132258Scsgr minprot = firstprot; 1142258Scsgr mindiff = totaltrans; 1152258Scsgr 1162258Scsgr if ( checkcom ) 1172258Scsgr { 1182258Scsgr /* Find first proto which has the same "comstate". */ 1192258Scsgr for ( i = firstprot; i != NIL; i = protnext[i] ) 1202258Scsgr if ( protcomst[i] == comstate ) 1212258Scsgr { 1222258Scsgr minprot = i; 1232258Scsgr mindiff = tbldiff( state, minprot, 1242258Scsgr extrct[extptr] ); 1252258Scsgr break; 1262258Scsgr } 1272258Scsgr } 1282258Scsgr 1292258Scsgr else 1302258Scsgr { 1312258Scsgr /* Since we've decided that the most common destination 1322258Scsgr * out of "state" does not occur with a high enough 1332258Scsgr * frequency, we set the "comstate" to zero, assuring 1342258Scsgr * that if this state is entered into the proto list, 1352258Scsgr * it will not be considered a template. 1362258Scsgr */ 1372258Scsgr comstate = 0; 1382258Scsgr 1392258Scsgr if ( firstprot != NIL ) 1402258Scsgr { 1412258Scsgr minprot = firstprot; 1422258Scsgr mindiff = tbldiff( state, minprot, 1432258Scsgr extrct[extptr] ); 1442258Scsgr } 1452258Scsgr } 1462258Scsgr 1472258Scsgr /* We now have the first interesting proto in "minprot". If 1482258Scsgr * it matches within the tolerances set for the first proto, 1492258Scsgr * we don't want to bother scanning the rest of the proto list 1502258Scsgr * to see if we have any other reasonable matches. 1512258Scsgr */ 1522258Scsgr 1532258Scsgr if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) 1542258Scsgr { 1552258Scsgr /* Not a good enough match. Scan the rest of the 1562258Scsgr * protos. 1572258Scsgr */ 1582258Scsgr for ( i = minprot; i != NIL; i = protnext[i] ) 1592258Scsgr { 1602258Scsgr d = tbldiff( state, i, extrct[1 - extptr] ); 1612258Scsgr if ( d < mindiff ) 1622258Scsgr { 1632258Scsgr extptr = 1 - extptr; 1642258Scsgr mindiff = d; 1652258Scsgr minprot = i; 1662258Scsgr } 1672258Scsgr } 1682258Scsgr } 1692258Scsgr 1702258Scsgr /* Check if the proto we've decided on as our best bet is close 1712258Scsgr * enough to the state we want to match to be usable. 1722258Scsgr */ 1732258Scsgr 1742258Scsgr if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) 1752258Scsgr { 1762258Scsgr /* No good. If the state is homogeneous enough, 1772258Scsgr * we make a template out of it. Otherwise, we 1782258Scsgr * make a proto. 1792258Scsgr */ 1802258Scsgr 1812258Scsgr if ( comfreq * 100 >= 1822258Scsgr totaltrans * TEMPLATE_SAME_PERCENTAGE ) 1832258Scsgr mktemplate( state, statenum, comstate ); 1842258Scsgr 1852258Scsgr else 1862258Scsgr { 1872258Scsgr mkprot( state, statenum, comstate ); 1882258Scsgr mkentry( state, numecs, statenum, 1892258Scsgr JAMSTATE, totaltrans ); 1902258Scsgr } 1912258Scsgr } 1922258Scsgr 1932258Scsgr else 1942258Scsgr { /* use the proto */ 1952258Scsgr mkentry( extrct[extptr], numecs, statenum, 1962258Scsgr prottbl[minprot], mindiff ); 1972258Scsgr 1982258Scsgr /* If this state was sufficiently different from the 1992258Scsgr * proto we built it from, make it, too, a proto. 2002258Scsgr */ 2012258Scsgr 2022258Scsgr if ( mindiff * 100 >= 2032258Scsgr totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) 2042258Scsgr mkprot( state, statenum, comstate ); 2052258Scsgr 2062258Scsgr /* Since mkprot added a new proto to the proto queue, 2072258Scsgr * it's possible that "minprot" is no longer on the 2082258Scsgr * proto queue (if it happened to have been the last 2092258Scsgr * entry, it would have been bumped off). If it's 2102258Scsgr * not there, then the new proto took its physical 2112258Scsgr * place (though logically the new proto is at the 2122258Scsgr * beginning of the queue), so in that case the 2132258Scsgr * following call will do nothing. 2142258Scsgr */ 2152258Scsgr 2162258Scsgr mv2front( minprot ); 2172258Scsgr } 2182258Scsgr } 2192258Scsgr } 2202258Scsgr 2212258Scsgr 2222258Scsgr/* cmptmps - compress template table entries 2232258Scsgr * 2242258Scsgr * Template tables are compressed by using the 'template equivalence 2252258Scsgr * classes', which are collections of transition character equivalence 2262258Scsgr * classes which always appear together in templates - really meta-equivalence 2272258Scsgr * classes. 2282258Scsgr */ 2292258Scsgr 2302258Scsgrvoid cmptmps() 2312258Scsgr { 2322258Scsgr int tmpstorage[CSIZE + 1]; 233179549Sdwmalone int *tmp = tmpstorage, i, j; 2342258Scsgr int totaltrans, trans; 2352258Scsgr 2362258Scsgr peakpairs = numtemps * numecs + tblend; 2372258Scsgr 2382258Scsgr if ( usemecs ) 2392258Scsgr { 2402258Scsgr /* Create equivalence classes based on data gathered on 2412258Scsgr * template transitions. 2422258Scsgr */ 2432258Scsgr nummecs = cre8ecs( tecfwd, tecbck, numecs ); 2442258Scsgr } 2452258Scsgr 2462258Scsgr else 2472258Scsgr nummecs = numecs; 2482258Scsgr 2492258Scsgr while ( lastdfa + numtemps + 1 >= current_max_dfas ) 2502258Scsgr increase_max_dfas(); 2512258Scsgr 2522258Scsgr /* Loop through each template. */ 2532258Scsgr 2542258Scsgr for ( i = 1; i <= numtemps; ++i ) 2552258Scsgr { 2562258Scsgr /* Number of non-jam transitions out of this template. */ 2572258Scsgr totaltrans = 0; 2582258Scsgr 2592258Scsgr for ( j = 1; j <= numecs; ++j ) 2602258Scsgr { 2612258Scsgr trans = tnxt[numecs * i + j]; 2622258Scsgr 2632258Scsgr if ( usemecs ) 2642258Scsgr { 2652258Scsgr /* The absolute value of tecbck is the 2662258Scsgr * meta-equivalence class of a given 2672258Scsgr * equivalence class, as set up by cre8ecs(). 2682258Scsgr */ 2692258Scsgr if ( tecbck[j] > 0 ) 2702258Scsgr { 2712258Scsgr tmp[tecbck[j]] = trans; 2722258Scsgr 2732258Scsgr if ( trans > 0 ) 2742258Scsgr ++totaltrans; 2752258Scsgr } 2762258Scsgr } 2772258Scsgr 2782258Scsgr else 2792258Scsgr { 2802258Scsgr tmp[j] = trans; 2812258Scsgr 2822258Scsgr if ( trans > 0 ) 2832258Scsgr ++totaltrans; 2842258Scsgr } 2852258Scsgr } 2862258Scsgr 2872258Scsgr /* It is assumed (in a rather subtle way) in the skeleton 2882258Scsgr * that if we're using meta-equivalence classes, the def[] 2892258Scsgr * entry for all templates is the jam template, i.e., 2902258Scsgr * templates never default to other non-jam table entries 2912258Scsgr * (e.g., another template) 2922258Scsgr */ 2932258Scsgr 2942258Scsgr /* Leave room for the jam-state after the last real state. */ 2952258Scsgr mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); 2962258Scsgr } 2972258Scsgr } 2982258Scsgr 2992258Scsgr 3002258Scsgr 3012258Scsgr/* expand_nxt_chk - expand the next check arrays */ 3022258Scsgr 3032258Scsgrvoid expand_nxt_chk() 3042258Scsgr { 305179549Sdwmalone int old_max = current_max_xpairs; 3062258Scsgr 3072258Scsgr current_max_xpairs += MAX_XPAIRS_INCREMENT; 3082258Scsgr 3092258Scsgr ++num_reallocs; 3102258Scsgr 3112258Scsgr nxt = reallocate_integer_array( nxt, current_max_xpairs ); 3122258Scsgr chk = reallocate_integer_array( chk, current_max_xpairs ); 3132258Scsgr 3142258Scsgr zero_out( (char *) (chk + old_max), 31516519Snate (size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) ); 3162258Scsgr } 3172258Scsgr 3182258Scsgr 3192258Scsgr/* find_table_space - finds a space in the table for a state to be placed 3202258Scsgr * 3212258Scsgr * synopsis 3222258Scsgr * int *state, numtrans, block_start; 3232258Scsgr * int find_table_space(); 3242258Scsgr * 3252258Scsgr * block_start = find_table_space( state, numtrans ); 3262258Scsgr * 3272258Scsgr * State is the state to be added to the full speed transition table. 3282258Scsgr * Numtrans is the number of out-transitions for the state. 3292258Scsgr * 3302258Scsgr * find_table_space() returns the position of the start of the first block (in 3312258Scsgr * chk) able to accommodate the state 3322258Scsgr * 3332258Scsgr * In determining if a state will or will not fit, find_table_space() must take 3342258Scsgr * into account the fact that an end-of-buffer state will be added at [0], 3352258Scsgr * and an action number will be added in [-1]. 3362258Scsgr */ 3372258Scsgr 3382258Scsgrint find_table_space( state, numtrans ) 3392258Scsgrint *state, numtrans; 3402258Scsgr { 3412258Scsgr /* Firstfree is the position of the first possible occurrence of two 3422258Scsgr * consecutive unused records in the chk and nxt arrays. 3432258Scsgr */ 344179549Sdwmalone int i; 345179549Sdwmalone int *state_ptr, *chk_ptr; 346179549Sdwmalone int *ptr_to_last_entry_in_state; 3472258Scsgr 3482258Scsgr /* If there are too many out-transitions, put the state at the end of 3492258Scsgr * nxt and chk. 3502258Scsgr */ 3512258Scsgr if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) 3522258Scsgr { 3532258Scsgr /* If table is empty, return the first available spot in 3542258Scsgr * chk/nxt, which should be 1. 3552258Scsgr */ 3562258Scsgr if ( tblend < 2 ) 3572258Scsgr return 1; 3582258Scsgr 3592258Scsgr /* Start searching for table space near the end of 3602258Scsgr * chk/nxt arrays. 3612258Scsgr */ 3622258Scsgr i = tblend - numecs; 3632258Scsgr } 3642258Scsgr 3652258Scsgr else 3662258Scsgr /* Start searching for table space from the beginning 3672258Scsgr * (skipping only the elements which will definitely not 3682258Scsgr * hold the new state). 3692258Scsgr */ 3702258Scsgr i = firstfree; 3712258Scsgr 3722258Scsgr while ( 1 ) /* loops until a space is found */ 3732258Scsgr { 3742258Scsgr while ( i + numecs >= current_max_xpairs ) 3752258Scsgr expand_nxt_chk(); 3762258Scsgr 3772258Scsgr /* Loops until space for end-of-buffer and action number 3782258Scsgr * are found. 3792258Scsgr */ 3802258Scsgr while ( 1 ) 3812258Scsgr { 3822258Scsgr /* Check for action number space. */ 3832258Scsgr if ( chk[i - 1] == 0 ) 3842258Scsgr { 3852258Scsgr /* Check for end-of-buffer space. */ 3862258Scsgr if ( chk[i] == 0 ) 3872258Scsgr break; 3882258Scsgr 3892258Scsgr else 3902258Scsgr /* Since i != 0, there is no use 3912258Scsgr * checking to see if (++i) - 1 == 0, 3922258Scsgr * because that's the same as i == 0, 3932258Scsgr * so we skip a space. 3942258Scsgr */ 3952258Scsgr i += 2; 3962258Scsgr } 3972258Scsgr 3982258Scsgr else 3992258Scsgr ++i; 4002258Scsgr 4012258Scsgr while ( i + numecs >= current_max_xpairs ) 4022258Scsgr expand_nxt_chk(); 4032258Scsgr } 4042258Scsgr 4052258Scsgr /* If we started search from the beginning, store the new 4062258Scsgr * firstfree for the next call of find_table_space(). 4072258Scsgr */ 4082258Scsgr if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) 4092258Scsgr firstfree = i + 1; 4102258Scsgr 4112258Scsgr /* Check to see if all elements in chk (and therefore nxt) 4122258Scsgr * that are needed for the new state have not yet been taken. 4132258Scsgr */ 4142258Scsgr 4152258Scsgr state_ptr = &state[1]; 4162258Scsgr ptr_to_last_entry_in_state = &chk[i + numecs + 1]; 4172258Scsgr 4182258Scsgr for ( chk_ptr = &chk[i + 1]; 4192258Scsgr chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr ) 4202258Scsgr if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) 4212258Scsgr break; 4222258Scsgr 4232258Scsgr if ( chk_ptr == ptr_to_last_entry_in_state ) 4242258Scsgr return i; 4252258Scsgr 4262258Scsgr else 4272258Scsgr ++i; 4282258Scsgr } 4292258Scsgr } 4302258Scsgr 4312258Scsgr 4322258Scsgr/* inittbl - initialize transition tables 4332258Scsgr * 4342258Scsgr * Initializes "firstfree" to be one beyond the end of the table. Initializes 4352258Scsgr * all "chk" entries to be zero. 4362258Scsgr */ 4372258Scsgrvoid inittbl() 4382258Scsgr { 439179549Sdwmalone int i; 4402258Scsgr 44116519Snate zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) ); 4422258Scsgr 4432258Scsgr tblend = 0; 4442258Scsgr firstfree = tblend + 1; 4452258Scsgr numtemps = 0; 4462258Scsgr 4472258Scsgr if ( usemecs ) 4482258Scsgr { 4492258Scsgr /* Set up doubly-linked meta-equivalence classes; these 4502258Scsgr * are sets of equivalence classes which all have identical 4512258Scsgr * transitions out of TEMPLATES. 4522258Scsgr */ 4532258Scsgr 4542258Scsgr tecbck[1] = NIL; 4552258Scsgr 4562258Scsgr for ( i = 2; i <= numecs; ++i ) 4572258Scsgr { 4582258Scsgr tecbck[i] = i - 1; 4592258Scsgr tecfwd[i - 1] = i; 4602258Scsgr } 4612258Scsgr 4622258Scsgr tecfwd[numecs] = NIL; 4632258Scsgr } 4642258Scsgr } 4652258Scsgr 4662258Scsgr 4672258Scsgr/* mkdeftbl - make the default, "jam" table entries */ 4682258Scsgr 4692258Scsgrvoid mkdeftbl() 4702258Scsgr { 4712258Scsgr int i; 4722258Scsgr 4732258Scsgr jamstate = lastdfa + 1; 4742258Scsgr 4752258Scsgr ++tblend; /* room for transition on end-of-buffer character */ 4762258Scsgr 4772258Scsgr while ( tblend + numecs >= current_max_xpairs ) 4782258Scsgr expand_nxt_chk(); 4792258Scsgr 4802258Scsgr /* Add in default end-of-buffer transition. */ 4812258Scsgr nxt[tblend] = end_of_buffer_state; 4822258Scsgr chk[tblend] = jamstate; 4832258Scsgr 4842258Scsgr for ( i = 1; i <= numecs; ++i ) 4852258Scsgr { 4862258Scsgr nxt[tblend + i] = 0; 4872258Scsgr chk[tblend + i] = jamstate; 4882258Scsgr } 4892258Scsgr 4902258Scsgr jambase = tblend; 4912258Scsgr 4922258Scsgr base[jamstate] = jambase; 4932258Scsgr def[jamstate] = 0; 4942258Scsgr 4952258Scsgr tblend += numecs; 4962258Scsgr ++numtemps; 4972258Scsgr } 4982258Scsgr 4992258Scsgr 5002258Scsgr/* mkentry - create base/def and nxt/chk entries for transition array 5012258Scsgr * 5022258Scsgr * synopsis 5032258Scsgr * int state[numchars + 1], numchars, statenum, deflink, totaltrans; 5042258Scsgr * mkentry( state, numchars, statenum, deflink, totaltrans ); 5052258Scsgr * 5062258Scsgr * "state" is a transition array "numchars" characters in size, "statenum" 5072258Scsgr * is the offset to be used into the base/def tables, and "deflink" is the 5082258Scsgr * entry to put in the "def" table entry. If "deflink" is equal to 5092258Scsgr * "JAMSTATE", then no attempt will be made to fit zero entries of "state" 5102258Scsgr * (i.e., jam entries) into the table. It is assumed that by linking to 5112258Scsgr * "JAMSTATE" they will be taken care of. In any case, entries in "state" 5122258Scsgr * marking transitions to "SAME_TRANS" are treated as though they will be 5132258Scsgr * taken care of by whereever "deflink" points. "totaltrans" is the total 5142258Scsgr * number of transitions out of the state. If it is below a certain threshold, 5152258Scsgr * the tables are searched for an interior spot that will accommodate the 5162258Scsgr * state array. 5172258Scsgr */ 5182258Scsgr 5192258Scsgrvoid mkentry( state, numchars, statenum, deflink, totaltrans ) 520179549Sdwmaloneint *state; 5212258Scsgrint numchars, statenum, deflink, totaltrans; 5222258Scsgr { 523179549Sdwmalone int minec, maxec, i, baseaddr; 5242258Scsgr int tblbase, tbllast; 5252258Scsgr 5262258Scsgr if ( totaltrans == 0 ) 5272258Scsgr { /* there are no out-transitions */ 5282258Scsgr if ( deflink == JAMSTATE ) 5292258Scsgr base[statenum] = JAMSTATE; 5302258Scsgr else 5312258Scsgr base[statenum] = 0; 5322258Scsgr 5332258Scsgr def[statenum] = deflink; 5342258Scsgr return; 5352258Scsgr } 5362258Scsgr 5372258Scsgr for ( minec = 1; minec <= numchars; ++minec ) 5382258Scsgr { 5392258Scsgr if ( state[minec] != SAME_TRANS ) 5402258Scsgr if ( state[minec] != 0 || deflink != JAMSTATE ) 5412258Scsgr break; 5422258Scsgr } 5432258Scsgr 5442258Scsgr if ( totaltrans == 1 ) 5452258Scsgr { 5462258Scsgr /* There's only one out-transition. Save it for later to fill 5472258Scsgr * in holes in the tables. 5482258Scsgr */ 5492258Scsgr stack1( statenum, minec, state[minec], deflink ); 5502258Scsgr return; 5512258Scsgr } 5522258Scsgr 5532258Scsgr for ( maxec = numchars; maxec > 0; --maxec ) 5542258Scsgr { 5552258Scsgr if ( state[maxec] != SAME_TRANS ) 5562258Scsgr if ( state[maxec] != 0 || deflink != JAMSTATE ) 5572258Scsgr break; 5582258Scsgr } 5592258Scsgr 5602258Scsgr /* Whether we try to fit the state table in the middle of the table 5612258Scsgr * entries we have already generated, or if we just take the state 5622258Scsgr * table at the end of the nxt/chk tables, we must make sure that we 5632258Scsgr * have a valid base address (i.e., non-negative). Note that 5642258Scsgr * negative base addresses dangerous at run-time (because indexing 5652258Scsgr * the nxt array with one and a low-valued character will access 5662258Scsgr * memory before the start of the array. 5672258Scsgr */ 5682258Scsgr 5692258Scsgr /* Find the first transition of state that we need to worry about. */ 5702258Scsgr if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) 5712258Scsgr { 5722258Scsgr /* Attempt to squeeze it into the middle of the tables. */ 5732258Scsgr baseaddr = firstfree; 5742258Scsgr 5752258Scsgr while ( baseaddr < minec ) 5762258Scsgr { 5772258Scsgr /* Using baseaddr would result in a negative base 5782258Scsgr * address below; find the next free slot. 5792258Scsgr */ 5802258Scsgr for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) 5812258Scsgr ; 5822258Scsgr } 5832258Scsgr 5842258Scsgr while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) 5852258Scsgr expand_nxt_chk(); 5862258Scsgr 5872258Scsgr for ( i = minec; i <= maxec; ++i ) 5882258Scsgr if ( state[i] != SAME_TRANS && 5892258Scsgr (state[i] != 0 || deflink != JAMSTATE) && 5902258Scsgr chk[baseaddr + i - minec] != 0 ) 5912258Scsgr { /* baseaddr unsuitable - find another */ 5922258Scsgr for ( ++baseaddr; 5932258Scsgr baseaddr < current_max_xpairs && 5942258Scsgr chk[baseaddr] != 0; ++baseaddr ) 5952258Scsgr ; 5962258Scsgr 5972258Scsgr while ( baseaddr + maxec - minec + 1 >= 5982258Scsgr current_max_xpairs ) 5992258Scsgr expand_nxt_chk(); 6002258Scsgr 6012258Scsgr /* Reset the loop counter so we'll start all 6022258Scsgr * over again next time it's incremented. 6032258Scsgr */ 6042258Scsgr 6052258Scsgr i = minec - 1; 6062258Scsgr } 6072258Scsgr } 6082258Scsgr 6092258Scsgr else 6102258Scsgr { 6112258Scsgr /* Ensure that the base address we eventually generate is 6122258Scsgr * non-negative. 6132258Scsgr */ 6142258Scsgr baseaddr = MAX( tblend + 1, minec ); 6152258Scsgr } 6162258Scsgr 6172258Scsgr tblbase = baseaddr - minec; 6182258Scsgr tbllast = tblbase + maxec; 6192258Scsgr 6202258Scsgr while ( tbllast + 1 >= current_max_xpairs ) 6212258Scsgr expand_nxt_chk(); 6222258Scsgr 6232258Scsgr base[statenum] = tblbase; 6242258Scsgr def[statenum] = deflink; 6252258Scsgr 6262258Scsgr for ( i = minec; i <= maxec; ++i ) 6272258Scsgr if ( state[i] != SAME_TRANS ) 6282258Scsgr if ( state[i] != 0 || deflink != JAMSTATE ) 6292258Scsgr { 6302258Scsgr nxt[tblbase + i] = state[i]; 6312258Scsgr chk[tblbase + i] = statenum; 6322258Scsgr } 6332258Scsgr 6342258Scsgr if ( baseaddr == firstfree ) 6352258Scsgr /* Find next free slot in tables. */ 6362258Scsgr for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) 6372258Scsgr ; 6382258Scsgr 6392258Scsgr tblend = MAX( tblend, tbllast ); 6402258Scsgr } 6412258Scsgr 6422258Scsgr 6432258Scsgr/* mk1tbl - create table entries for a state (or state fragment) which 6442258Scsgr * has only one out-transition 6452258Scsgr */ 6462258Scsgr 6472258Scsgrvoid mk1tbl( state, sym, onenxt, onedef ) 6482258Scsgrint state, sym, onenxt, onedef; 6492258Scsgr { 6502258Scsgr if ( firstfree < sym ) 6512258Scsgr firstfree = sym; 6522258Scsgr 6532258Scsgr while ( chk[firstfree] != 0 ) 6542258Scsgr if ( ++firstfree >= current_max_xpairs ) 6552258Scsgr expand_nxt_chk(); 6562258Scsgr 6572258Scsgr base[state] = firstfree - sym; 6582258Scsgr def[state] = onedef; 6592258Scsgr chk[firstfree] = state; 6602258Scsgr nxt[firstfree] = onenxt; 6612258Scsgr 6622258Scsgr if ( firstfree > tblend ) 6632258Scsgr { 6642258Scsgr tblend = firstfree++; 6652258Scsgr 6662258Scsgr if ( firstfree >= current_max_xpairs ) 6672258Scsgr expand_nxt_chk(); 6682258Scsgr } 6692258Scsgr } 6702258Scsgr 6712258Scsgr 6722258Scsgr/* mkprot - create new proto entry */ 6732258Scsgr 6742258Scsgrvoid mkprot( state, statenum, comstate ) 6752258Scsgrint state[], statenum, comstate; 6762258Scsgr { 6772258Scsgr int i, slot, tblbase; 6782258Scsgr 6792258Scsgr if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) 6802258Scsgr { 6812258Scsgr /* Gotta make room for the new proto by dropping last entry in 6822258Scsgr * the queue. 6832258Scsgr */ 6842258Scsgr slot = lastprot; 6852258Scsgr lastprot = protprev[lastprot]; 6862258Scsgr protnext[lastprot] = NIL; 6872258Scsgr } 6882258Scsgr 6892258Scsgr else 6902258Scsgr slot = numprots; 6912258Scsgr 6922258Scsgr protnext[slot] = firstprot; 6932258Scsgr 6942258Scsgr if ( firstprot != NIL ) 6952258Scsgr protprev[firstprot] = slot; 6962258Scsgr 6972258Scsgr firstprot = slot; 6982258Scsgr prottbl[slot] = statenum; 6992258Scsgr protcomst[slot] = comstate; 7002258Scsgr 7012258Scsgr /* Copy state into save area so it can be compared with rapidly. */ 7022258Scsgr tblbase = numecs * (slot - 1); 7032258Scsgr 7042258Scsgr for ( i = 1; i <= numecs; ++i ) 7052258Scsgr protsave[tblbase + i] = state[i]; 7062258Scsgr } 7072258Scsgr 7082258Scsgr 7092258Scsgr/* mktemplate - create a template entry based on a state, and connect the state 7102258Scsgr * to it 7112258Scsgr */ 7122258Scsgr 7132258Scsgrvoid mktemplate( state, statenum, comstate ) 7142258Scsgrint state[], statenum, comstate; 7152258Scsgr { 7162258Scsgr int i, numdiff, tmpbase, tmp[CSIZE + 1]; 7172258Scsgr Char transset[CSIZE + 1]; 7182258Scsgr int tsptr; 7192258Scsgr 7202258Scsgr ++numtemps; 7212258Scsgr 7222258Scsgr tsptr = 0; 7232258Scsgr 7242258Scsgr /* Calculate where we will temporarily store the transition table 7252258Scsgr * of the template in the tnxt[] array. The final transition table 7262258Scsgr * gets created by cmptmps(). 7272258Scsgr */ 7282258Scsgr 7292258Scsgr tmpbase = numtemps * numecs; 7302258Scsgr 7312258Scsgr if ( tmpbase + numecs >= current_max_template_xpairs ) 7322258Scsgr { 7332258Scsgr current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; 7342258Scsgr 7352258Scsgr ++num_reallocs; 7362258Scsgr 7372258Scsgr tnxt = reallocate_integer_array( tnxt, 7382258Scsgr current_max_template_xpairs ); 7392258Scsgr } 7402258Scsgr 7412258Scsgr for ( i = 1; i <= numecs; ++i ) 7422258Scsgr if ( state[i] == 0 ) 7432258Scsgr tnxt[tmpbase + i] = 0; 7442258Scsgr else 7452258Scsgr { 7462258Scsgr transset[tsptr++] = i; 7472258Scsgr tnxt[tmpbase + i] = comstate; 7482258Scsgr } 7492258Scsgr 7502258Scsgr if ( usemecs ) 7512258Scsgr mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); 7522258Scsgr 7532258Scsgr mkprot( tnxt + tmpbase, -numtemps, comstate ); 7542258Scsgr 7552258Scsgr /* We rely on the fact that mkprot adds things to the beginning 7562258Scsgr * of the proto queue. 7572258Scsgr */ 7582258Scsgr 7592258Scsgr numdiff = tbldiff( state, firstprot, tmp ); 7602258Scsgr mkentry( tmp, numecs, statenum, -numtemps, numdiff ); 7612258Scsgr } 7622258Scsgr 7632258Scsgr 7642258Scsgr/* mv2front - move proto queue element to front of queue */ 7652258Scsgr 7662258Scsgrvoid mv2front( qelm ) 7672258Scsgrint qelm; 7682258Scsgr { 7692258Scsgr if ( firstprot != qelm ) 7702258Scsgr { 7712258Scsgr if ( qelm == lastprot ) 7722258Scsgr lastprot = protprev[lastprot]; 7732258Scsgr 7742258Scsgr protnext[protprev[qelm]] = protnext[qelm]; 7752258Scsgr 7762258Scsgr if ( protnext[qelm] != NIL ) 7772258Scsgr protprev[protnext[qelm]] = protprev[qelm]; 7782258Scsgr 7792258Scsgr protprev[qelm] = NIL; 7802258Scsgr protnext[qelm] = firstprot; 7812258Scsgr protprev[firstprot] = qelm; 7822258Scsgr firstprot = qelm; 7832258Scsgr } 7842258Scsgr } 7852258Scsgr 7862258Scsgr 7872258Scsgr/* place_state - place a state into full speed transition table 7882258Scsgr * 7892258Scsgr * State is the statenum'th state. It is indexed by equivalence class and 7902258Scsgr * gives the number of the state to enter for a given equivalence class. 7912258Scsgr * Transnum is the number of out-transitions for the state. 7922258Scsgr */ 7932258Scsgr 7942258Scsgrvoid place_state( state, statenum, transnum ) 7952258Scsgrint *state, statenum, transnum; 7962258Scsgr { 797179549Sdwmalone int i; 798179549Sdwmalone int *state_ptr; 7992258Scsgr int position = find_table_space( state, transnum ); 8002258Scsgr 8012258Scsgr /* "base" is the table of start positions. */ 8022258Scsgr base[statenum] = position; 8032258Scsgr 8042258Scsgr /* Put in action number marker; this non-zero number makes sure that 8052258Scsgr * find_table_space() knows that this position in chk/nxt is taken 8062258Scsgr * and should not be used for another accepting number in another 8072258Scsgr * state. 8082258Scsgr */ 8092258Scsgr chk[position - 1] = 1; 8102258Scsgr 8112258Scsgr /* Put in end-of-buffer marker; this is for the same purposes as 8122258Scsgr * above. 8132258Scsgr */ 8142258Scsgr chk[position] = 1; 8152258Scsgr 8162258Scsgr /* Place the state into chk and nxt. */ 8172258Scsgr state_ptr = &state[1]; 8182258Scsgr 8192258Scsgr for ( i = 1; i <= numecs; ++i, ++state_ptr ) 8202258Scsgr if ( *state_ptr != 0 ) 8212258Scsgr { 8222258Scsgr chk[position + i] = i; 8232258Scsgr nxt[position + i] = *state_ptr; 8242258Scsgr } 8252258Scsgr 8262258Scsgr if ( position + numecs > tblend ) 8272258Scsgr tblend = position + numecs; 8282258Scsgr } 8292258Scsgr 8302258Scsgr 8312258Scsgr/* stack1 - save states with only one out-transition to be processed later 8322258Scsgr * 8332258Scsgr * If there's room for another state on the "one-transition" stack, the 8342258Scsgr * state is pushed onto it, to be processed later by mk1tbl. If there's 8352258Scsgr * no room, we process the sucker right now. 8362258Scsgr */ 8372258Scsgr 8382258Scsgrvoid stack1( statenum, sym, nextstate, deflink ) 8392258Scsgrint statenum, sym, nextstate, deflink; 8402258Scsgr { 8412258Scsgr if ( onesp >= ONE_STACK_SIZE - 1 ) 8422258Scsgr mk1tbl( statenum, sym, nextstate, deflink ); 8432258Scsgr 8442258Scsgr else 8452258Scsgr { 8462258Scsgr ++onesp; 8472258Scsgr onestate[onesp] = statenum; 8482258Scsgr onesym[onesp] = sym; 8492258Scsgr onenext[onesp] = nextstate; 8502258Scsgr onedef[onesp] = deflink; 8512258Scsgr } 8522258Scsgr } 8532258Scsgr 8542258Scsgr 8552258Scsgr/* tbldiff - compute differences between two state tables 8562258Scsgr * 8572258Scsgr * "state" is the state array which is to be extracted from the pr'th 8582258Scsgr * proto. "pr" is both the number of the proto we are extracting from 8592258Scsgr * and an index into the save area where we can find the proto's complete 8602258Scsgr * state table. Each entry in "state" which differs from the corresponding 8612258Scsgr * entry of "pr" will appear in "ext". 8622258Scsgr * 8632258Scsgr * Entries which are the same in both "state" and "pr" will be marked 8642258Scsgr * as transitions to "SAME_TRANS" in "ext". The total number of differences 8652258Scsgr * between "state" and "pr" is returned as function value. Note that this 8662258Scsgr * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". 8672258Scsgr */ 8682258Scsgr 8692258Scsgrint tbldiff( state, pr, ext ) 8702258Scsgrint state[], pr, ext[]; 8712258Scsgr { 872179549Sdwmalone int i, *sp = state, *ep = ext, *protp; 873179549Sdwmalone int numdiff = 0; 8742258Scsgr 8752258Scsgr protp = &protsave[numecs * (pr - 1)]; 8762258Scsgr 8772258Scsgr for ( i = numecs; i > 0; --i ) 8782258Scsgr { 8792258Scsgr if ( *++protp == *++sp ) 8802258Scsgr *++ep = SAME_TRANS; 8812258Scsgr else 8822258Scsgr { 8832258Scsgr *++ep = *sp; 8842258Scsgr ++numdiff; 8852258Scsgr } 8862258Scsgr } 8872258Scsgr 8882258Scsgr return numdiff; 8892258Scsgr } 890