ecs.c revision 52555
12258Scsgr/* ecs - equivalence class routines */
22258Scsgr
32258Scsgr/*-
42258Scsgr * Copyright (c) 1990 The Regents of the University of California.
52258Scsgr * All rights reserved.
62258Scsgr *
72258Scsgr * This code is derived from software contributed to Berkeley by
82258Scsgr * Vern Paxson.
916519Snate *
102258Scsgr * The United States Government has rights in this work pursuant
112258Scsgr * to contract no. DE-AC03-76SF00098 between the United States
122258Scsgr * Department of Energy and the University of California.
132258Scsgr *
142258Scsgr * Redistribution and use in source and binary forms are permitted provided
152258Scsgr * that: (1) source distributions retain this entire copyright notice and
162258Scsgr * comment, and (2) distributions including binaries display the following
172258Scsgr * acknowledgement:  ``This product includes software developed by the
182258Scsgr * University of California, Berkeley and its contributors'' in the
192258Scsgr * documentation or other materials provided with the distribution and in
202258Scsgr * all advertising materials mentioning features or use of this software.
212258Scsgr * Neither the name of the University nor the names of its contributors may
222258Scsgr * be used to endorse or promote products derived from this software without
232258Scsgr * specific prior written permission.
242258Scsgr * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
252258Scsgr * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
262258Scsgr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
272258Scsgr */
282258Scsgr
2952555Sobrien/* $Header: /home/daffy/u0/vern/flex/RCS/ecs.c,v 2.9 93/12/07 10:18:20 vern Exp $ */
3050488Speter/* $FreeBSD: head/usr.bin/lex/ecs.c 52555 1999-10-27 07:56:49Z obrien $ */
312258Scsgr
322258Scsgr#include "flexdef.h"
332258Scsgr
342258Scsgr/* ccl2ecl - convert character classes to set of equivalence classes */
352258Scsgr
362258Scsgrvoid ccl2ecl()
372258Scsgr	{
382258Scsgr	int i, ich, newlen, cclp, ccls, cclmec;
392258Scsgr
402258Scsgr	for ( i = 1; i <= lastccl; ++i )
412258Scsgr		{
422258Scsgr		/* We loop through each character class, and for each character
432258Scsgr		 * in the class, add the character's equivalence class to the
442258Scsgr		 * new "character" class we are creating.  Thus when we are all
452258Scsgr		 * done, character classes will really consist of collections
462258Scsgr		 * of equivalence classes
472258Scsgr		 */
482258Scsgr
492258Scsgr		newlen = 0;
502258Scsgr		cclp = cclmap[i];
512258Scsgr
522258Scsgr		for ( ccls = 0; ccls < ccllen[i]; ++ccls )
532258Scsgr			{
542258Scsgr			ich = ccltbl[cclp + ccls];
552258Scsgr			cclmec = ecgroup[ich];
562258Scsgr
572258Scsgr			if ( cclmec > 0 )
582258Scsgr				{
592258Scsgr				ccltbl[cclp + newlen] = cclmec;
602258Scsgr				++newlen;
612258Scsgr				}
622258Scsgr			}
632258Scsgr
642258Scsgr		ccllen[i] = newlen;
652258Scsgr		}
662258Scsgr	}
672258Scsgr
682258Scsgr
692258Scsgr/* cre8ecs - associate equivalence class numbers with class members
702258Scsgr *
712258Scsgr * fwd is the forward linked-list of equivalence class members.  bck
722258Scsgr * is the backward linked-list, and num is the number of class members.
732258Scsgr *
742258Scsgr * Returned is the number of classes.
752258Scsgr */
762258Scsgr
772258Scsgrint cre8ecs( fwd, bck, num )
782258Scsgrint fwd[], bck[], num;
792258Scsgr	{
802258Scsgr	int i, j, numcl;
812258Scsgr
822258Scsgr	numcl = 0;
832258Scsgr
842258Scsgr	/* Create equivalence class numbers.  From now on, ABS( bck(x) )
852258Scsgr	 * is the equivalence class number for object x.  If bck(x)
862258Scsgr	 * is positive, then x is the representative of its equivalence
872258Scsgr	 * class.
882258Scsgr	 */
892258Scsgr	for ( i = 1; i <= num; ++i )
902258Scsgr		if ( bck[i] == NIL )
912258Scsgr			{
922258Scsgr			bck[i] = ++numcl;
932258Scsgr			for ( j = fwd[i]; j != NIL; j = fwd[j] )
942258Scsgr				bck[j] = -numcl;
952258Scsgr			}
962258Scsgr
972258Scsgr	return numcl;
982258Scsgr	}
992258Scsgr
1002258Scsgr
1012258Scsgr/* mkeccl - update equivalence classes based on character class xtions
1022258Scsgr *
1032258Scsgr * synopsis
1042258Scsgr *    Char ccls[];
1052258Scsgr *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
1062258Scsgr *    void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
1072258Scsgr *			int llsiz, int NUL_mapping );
1082258Scsgr *
1092258Scsgr * ccls contains the elements of the character class, lenccl is the
1102258Scsgr * number of elements in the ccl, fwd is the forward link-list of equivalent
1112258Scsgr * characters, bck is the backward link-list, and llsiz size of the link-list.
1122258Scsgr *
1132258Scsgr * NUL_mapping is the value which NUL (0) should be mapped to.
1142258Scsgr */
1152258Scsgr
1162258Scsgrvoid mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
1172258ScsgrChar ccls[];
1182258Scsgrint lenccl, fwd[], bck[], llsiz, NUL_mapping;
1192258Scsgr	{
1202258Scsgr	int cclp, oldec, newec;
1212258Scsgr	int cclm, i, j;
1222258Scsgr	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
1232258Scsgr
1242258Scsgr	/* Note that it doesn't matter whether or not the character class is
1252258Scsgr	 * negated.  The same results will be obtained in either case.
1262258Scsgr	 */
1272258Scsgr
1282258Scsgr	cclp = 0;
1292258Scsgr
1302258Scsgr	while ( cclp < lenccl )
1312258Scsgr		{
1322258Scsgr		cclm = ccls[cclp];
1332258Scsgr
1342258Scsgr		if ( NUL_mapping && cclm == 0 )
1352258Scsgr			cclm = NUL_mapping;
1362258Scsgr
1372258Scsgr		oldec = bck[cclm];
1382258Scsgr		newec = cclm;
1392258Scsgr
1402258Scsgr		j = cclp + 1;
1412258Scsgr
1422258Scsgr		for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
1432258Scsgr			{ /* look for the symbol in the character class */
1442258Scsgr			for ( ; j < lenccl; ++j )
1452258Scsgr				{
1462258Scsgr				register int ccl_char;
1472258Scsgr
1482258Scsgr				if ( NUL_mapping && ccls[j] == 0 )
1492258Scsgr					ccl_char = NUL_mapping;
1502258Scsgr				else
1512258Scsgr					ccl_char = ccls[j];
1522258Scsgr
1532258Scsgr				if ( ccl_char > i )
1542258Scsgr					break;
1552258Scsgr
1562258Scsgr				if ( ccl_char == i && ! cclflags[j] )
1572258Scsgr					{
1582258Scsgr					/* We found an old companion of cclm
1592258Scsgr					 * in the ccl.  Link it into the new
1602258Scsgr					 * equivalence class and flag it as
1612258Scsgr					 * having been processed.
1622258Scsgr					 */
1632258Scsgr
1642258Scsgr					bck[i] = newec;
1652258Scsgr					fwd[newec] = i;
1662258Scsgr					newec = i;
1672258Scsgr					/* Set flag so we don't reprocess. */
1682258Scsgr					cclflags[j] = 1;
1692258Scsgr
1702258Scsgr					/* Get next equivalence class member. */
1712258Scsgr					/* continue 2 */
1722258Scsgr					goto next_pt;
1732258Scsgr					}
1742258Scsgr				}
1752258Scsgr
1762258Scsgr			/* Symbol isn't in character class.  Put it in the old
1772258Scsgr			 * equivalence class.
1782258Scsgr			 */
1792258Scsgr
1802258Scsgr			bck[i] = oldec;
1812258Scsgr
1822258Scsgr			if ( oldec != NIL )
1832258Scsgr				fwd[oldec] = i;
1842258Scsgr
1852258Scsgr			oldec = i;
1862258Scsgr
1872258Scsgr			next_pt: ;
1882258Scsgr			}
1892258Scsgr
1902258Scsgr		if ( bck[cclm] != NIL || oldec != bck[cclm] )
1912258Scsgr			{
1922258Scsgr			bck[cclm] = NIL;
1932258Scsgr			fwd[oldec] = NIL;
1942258Scsgr			}
1952258Scsgr
1962258Scsgr		fwd[newec] = NIL;
1972258Scsgr
1982258Scsgr		/* Find next ccl member to process. */
1992258Scsgr
2002258Scsgr		for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
2012258Scsgr			{
2022258Scsgr			/* Reset "doesn't need processing" flag. */
2032258Scsgr			cclflags[cclp] = 0;
2042258Scsgr			}
2052258Scsgr		}
2062258Scsgr	}
2072258Scsgr
2082258Scsgr
2092258Scsgr/* mkechar - create equivalence class for single character */
2102258Scsgr
2112258Scsgrvoid mkechar( tch, fwd, bck )
2122258Scsgrint tch, fwd[], bck[];
2132258Scsgr	{
2142258Scsgr	/* If until now the character has been a proper subset of
2152258Scsgr	 * an equivalence class, break it away to create a new ec
2162258Scsgr	 */
2172258Scsgr
2182258Scsgr	if ( fwd[tch] != NIL )
2192258Scsgr		bck[fwd[tch]] = bck[tch];
2202258Scsgr
2212258Scsgr	if ( bck[tch] != NIL )
2222258Scsgr		fwd[bck[tch]] = fwd[tch];
2232258Scsgr
2242258Scsgr	fwd[tch] = NIL;
2252258Scsgr	bck[tch] = NIL;
2262258Scsgr	}
227