ecs.c revision 52555
12258Scsgr/* ecs - equivalence class routines */ 22258Scsgr 32258Scsgr/*- 42258Scsgr * Copyright (c) 1990 The Regents of the University of California. 52258Scsgr * All rights reserved. 62258Scsgr * 72258Scsgr * This code is derived from software contributed to Berkeley by 82258Scsgr * Vern Paxson. 916519Snate * 102258Scsgr * The United States Government has rights in this work pursuant 112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States 122258Scsgr * Department of Energy and the University of California. 132258Scsgr * 142258Scsgr * Redistribution and use in source and binary forms are permitted provided 152258Scsgr * that: (1) source distributions retain this entire copyright notice and 162258Scsgr * comment, and (2) distributions including binaries display the following 172258Scsgr * acknowledgement: ``This product includes software developed by the 182258Scsgr * University of California, Berkeley and its contributors'' in the 192258Scsgr * documentation or other materials provided with the distribution and in 202258Scsgr * all advertising materials mentioning features or use of this software. 212258Scsgr * Neither the name of the University nor the names of its contributors may 222258Scsgr * be used to endorse or promote products derived from this software without 232258Scsgr * specific prior written permission. 242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 272258Scsgr */ 282258Scsgr 2952555Sobrien/* $Header: /home/daffy/u0/vern/flex/RCS/ecs.c,v 2.9 93/12/07 10:18:20 vern Exp $ */ 3050488Speter/* $FreeBSD: head/usr.bin/lex/ecs.c 52555 1999-10-27 07:56:49Z obrien $ */ 312258Scsgr 322258Scsgr#include "flexdef.h" 332258Scsgr 342258Scsgr/* ccl2ecl - convert character classes to set of equivalence classes */ 352258Scsgr 362258Scsgrvoid ccl2ecl() 372258Scsgr { 382258Scsgr int i, ich, newlen, cclp, ccls, cclmec; 392258Scsgr 402258Scsgr for ( i = 1; i <= lastccl; ++i ) 412258Scsgr { 422258Scsgr /* We loop through each character class, and for each character 432258Scsgr * in the class, add the character's equivalence class to the 442258Scsgr * new "character" class we are creating. Thus when we are all 452258Scsgr * done, character classes will really consist of collections 462258Scsgr * of equivalence classes 472258Scsgr */ 482258Scsgr 492258Scsgr newlen = 0; 502258Scsgr cclp = cclmap[i]; 512258Scsgr 522258Scsgr for ( ccls = 0; ccls < ccllen[i]; ++ccls ) 532258Scsgr { 542258Scsgr ich = ccltbl[cclp + ccls]; 552258Scsgr cclmec = ecgroup[ich]; 562258Scsgr 572258Scsgr if ( cclmec > 0 ) 582258Scsgr { 592258Scsgr ccltbl[cclp + newlen] = cclmec; 602258Scsgr ++newlen; 612258Scsgr } 622258Scsgr } 632258Scsgr 642258Scsgr ccllen[i] = newlen; 652258Scsgr } 662258Scsgr } 672258Scsgr 682258Scsgr 692258Scsgr/* cre8ecs - associate equivalence class numbers with class members 702258Scsgr * 712258Scsgr * fwd is the forward linked-list of equivalence class members. bck 722258Scsgr * is the backward linked-list, and num is the number of class members. 732258Scsgr * 742258Scsgr * Returned is the number of classes. 752258Scsgr */ 762258Scsgr 772258Scsgrint cre8ecs( fwd, bck, num ) 782258Scsgrint fwd[], bck[], num; 792258Scsgr { 802258Scsgr int i, j, numcl; 812258Scsgr 822258Scsgr numcl = 0; 832258Scsgr 842258Scsgr /* Create equivalence class numbers. From now on, ABS( bck(x) ) 852258Scsgr * is the equivalence class number for object x. If bck(x) 862258Scsgr * is positive, then x is the representative of its equivalence 872258Scsgr * class. 882258Scsgr */ 892258Scsgr for ( i = 1; i <= num; ++i ) 902258Scsgr if ( bck[i] == NIL ) 912258Scsgr { 922258Scsgr bck[i] = ++numcl; 932258Scsgr for ( j = fwd[i]; j != NIL; j = fwd[j] ) 942258Scsgr bck[j] = -numcl; 952258Scsgr } 962258Scsgr 972258Scsgr return numcl; 982258Scsgr } 992258Scsgr 1002258Scsgr 1012258Scsgr/* mkeccl - update equivalence classes based on character class xtions 1022258Scsgr * 1032258Scsgr * synopsis 1042258Scsgr * Char ccls[]; 1052258Scsgr * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; 1062258Scsgr * void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz], 1072258Scsgr * int llsiz, int NUL_mapping ); 1082258Scsgr * 1092258Scsgr * ccls contains the elements of the character class, lenccl is the 1102258Scsgr * number of elements in the ccl, fwd is the forward link-list of equivalent 1112258Scsgr * characters, bck is the backward link-list, and llsiz size of the link-list. 1122258Scsgr * 1132258Scsgr * NUL_mapping is the value which NUL (0) should be mapped to. 1142258Scsgr */ 1152258Scsgr 1162258Scsgrvoid mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) 1172258ScsgrChar ccls[]; 1182258Scsgrint lenccl, fwd[], bck[], llsiz, NUL_mapping; 1192258Scsgr { 1202258Scsgr int cclp, oldec, newec; 1212258Scsgr int cclm, i, j; 1222258Scsgr static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ 1232258Scsgr 1242258Scsgr /* Note that it doesn't matter whether or not the character class is 1252258Scsgr * negated. The same results will be obtained in either case. 1262258Scsgr */ 1272258Scsgr 1282258Scsgr cclp = 0; 1292258Scsgr 1302258Scsgr while ( cclp < lenccl ) 1312258Scsgr { 1322258Scsgr cclm = ccls[cclp]; 1332258Scsgr 1342258Scsgr if ( NUL_mapping && cclm == 0 ) 1352258Scsgr cclm = NUL_mapping; 1362258Scsgr 1372258Scsgr oldec = bck[cclm]; 1382258Scsgr newec = cclm; 1392258Scsgr 1402258Scsgr j = cclp + 1; 1412258Scsgr 1422258Scsgr for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) 1432258Scsgr { /* look for the symbol in the character class */ 1442258Scsgr for ( ; j < lenccl; ++j ) 1452258Scsgr { 1462258Scsgr register int ccl_char; 1472258Scsgr 1482258Scsgr if ( NUL_mapping && ccls[j] == 0 ) 1492258Scsgr ccl_char = NUL_mapping; 1502258Scsgr else 1512258Scsgr ccl_char = ccls[j]; 1522258Scsgr 1532258Scsgr if ( ccl_char > i ) 1542258Scsgr break; 1552258Scsgr 1562258Scsgr if ( ccl_char == i && ! cclflags[j] ) 1572258Scsgr { 1582258Scsgr /* We found an old companion of cclm 1592258Scsgr * in the ccl. Link it into the new 1602258Scsgr * equivalence class and flag it as 1612258Scsgr * having been processed. 1622258Scsgr */ 1632258Scsgr 1642258Scsgr bck[i] = newec; 1652258Scsgr fwd[newec] = i; 1662258Scsgr newec = i; 1672258Scsgr /* Set flag so we don't reprocess. */ 1682258Scsgr cclflags[j] = 1; 1692258Scsgr 1702258Scsgr /* Get next equivalence class member. */ 1712258Scsgr /* continue 2 */ 1722258Scsgr goto next_pt; 1732258Scsgr } 1742258Scsgr } 1752258Scsgr 1762258Scsgr /* Symbol isn't in character class. Put it in the old 1772258Scsgr * equivalence class. 1782258Scsgr */ 1792258Scsgr 1802258Scsgr bck[i] = oldec; 1812258Scsgr 1822258Scsgr if ( oldec != NIL ) 1832258Scsgr fwd[oldec] = i; 1842258Scsgr 1852258Scsgr oldec = i; 1862258Scsgr 1872258Scsgr next_pt: ; 1882258Scsgr } 1892258Scsgr 1902258Scsgr if ( bck[cclm] != NIL || oldec != bck[cclm] ) 1912258Scsgr { 1922258Scsgr bck[cclm] = NIL; 1932258Scsgr fwd[oldec] = NIL; 1942258Scsgr } 1952258Scsgr 1962258Scsgr fwd[newec] = NIL; 1972258Scsgr 1982258Scsgr /* Find next ccl member to process. */ 1992258Scsgr 2002258Scsgr for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) 2012258Scsgr { 2022258Scsgr /* Reset "doesn't need processing" flag. */ 2032258Scsgr cclflags[cclp] = 0; 2042258Scsgr } 2052258Scsgr } 2062258Scsgr } 2072258Scsgr 2082258Scsgr 2092258Scsgr/* mkechar - create equivalence class for single character */ 2102258Scsgr 2112258Scsgrvoid mkechar( tch, fwd, bck ) 2122258Scsgrint tch, fwd[], bck[]; 2132258Scsgr { 2142258Scsgr /* If until now the character has been a proper subset of 2152258Scsgr * an equivalence class, break it away to create a new ec 2162258Scsgr */ 2172258Scsgr 2182258Scsgr if ( fwd[tch] != NIL ) 2192258Scsgr bck[fwd[tch]] = bck[tch]; 2202258Scsgr 2212258Scsgr if ( bck[tch] != NIL ) 2222258Scsgr fwd[bck[tch]] = fwd[tch]; 2232258Scsgr 2242258Scsgr fwd[tch] = NIL; 2252258Scsgr bck[tch] = NIL; 2262258Scsgr } 227