ecs.c revision 16519
1224090Sdougb/* ecs - equivalence class routines */
2262706Serwin
3224090Sdougb/*-
4224090Sdougb * Copyright (c) 1990 The Regents of the University of California.
5224090Sdougb * All rights reserved.
6224090Sdougb *
7224090Sdougb * This code is derived from software contributed to Berkeley by
8224090Sdougb * Vern Paxson.
9224090Sdougb *
10224090Sdougb * The United States Government has rights in this work pursuant
11224090Sdougb * to contract no. DE-AC03-76SF00098 between the United States
12224090Sdougb * Department of Energy and the University of California.
13224090Sdougb *
14224090Sdougb * Redistribution and use in source and binary forms are permitted provided
15224090Sdougb * that: (1) source distributions retain this entire copyright notice and
16224090Sdougb * comment, and (2) distributions including binaries display the following
17234010Sdougb * acknowledgement:  ``This product includes software developed by the
18224090Sdougb * University of California, Berkeley and its contributors'' in the
19224090Sdougb * documentation or other materials provided with the distribution and in
20224090Sdougb * all advertising materials mentioning features or use of this software.
21224090Sdougb * Neither the name of the University nor the names of its contributors may
22224090Sdougb * be used to endorse or promote products derived from this software without
23224090Sdougb * specific prior written permission.
24224090Sdougb * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
25224090Sdougb * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
26224090Sdougb * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27224090Sdougb */
28224090Sdougb
29224090Sdougb/* $Header: /home/daffy/u0/vern/flex/RCS/ecs.c,v 2.9 93/12/07 10:18:20 vern Exp $ */
30224090Sdougb
31224090Sdougb#include "flexdef.h"
32224090Sdougb
33224090Sdougb/* ccl2ecl - convert character classes to set of equivalence classes */
34224090Sdougb
35224090Sdougbvoid ccl2ecl()
36224090Sdougb	{
37224090Sdougb	int i, ich, newlen, cclp, ccls, cclmec;
38224090Sdougb
39224090Sdougb	for ( i = 1; i <= lastccl; ++i )
40224090Sdougb		{
41224090Sdougb		/* We loop through each character class, and for each character
42224090Sdougb		 * in the class, add the character's equivalence class to the
43224090Sdougb		 * new "character" class we are creating.  Thus when we are all
44224090Sdougb		 * done, character classes will really consist of collections
45224090Sdougb		 * of equivalence classes
46224090Sdougb		 */
47224090Sdougb
48224090Sdougb		newlen = 0;
49224090Sdougb		cclp = cclmap[i];
50224090Sdougb
51224090Sdougb		for ( ccls = 0; ccls < ccllen[i]; ++ccls )
52224090Sdougb			{
53224090Sdougb			ich = ccltbl[cclp + ccls];
54224090Sdougb			cclmec = ecgroup[ich];
55224090Sdougb
56224090Sdougb			if ( cclmec > 0 )
57224090Sdougb				{
58224090Sdougb				ccltbl[cclp + newlen] = cclmec;
59224090Sdougb				++newlen;
60224090Sdougb				}
61224090Sdougb			}
62224090Sdougb
63224090Sdougb		ccllen[i] = newlen;
64224090Sdougb		}
65224090Sdougb	}
66224090Sdougb
67224090Sdougb
68224090Sdougb/* cre8ecs - associate equivalence class numbers with class members
69224090Sdougb *
70224090Sdougb * fwd is the forward linked-list of equivalence class members.  bck
71224090Sdougb * is the backward linked-list, and num is the number of class members.
72224090Sdougb *
73224090Sdougb * Returned is the number of classes.
74224090Sdougb */
75224090Sdougb
76224090Sdougbint cre8ecs( fwd, bck, num )
77224090Sdougbint fwd[], bck[], num;
78224090Sdougb	{
79224090Sdougb	int i, j, numcl;
80224090Sdougb
81224090Sdougb	numcl = 0;
82224090Sdougb
83224090Sdougb	/* Create equivalence class numbers.  From now on, ABS( bck(x) )
84224090Sdougb	 * is the equivalence class number for object x.  If bck(x)
85224090Sdougb	 * is positive, then x is the representative of its equivalence
86224090Sdougb	 * class.
87224090Sdougb	 */
88224090Sdougb	for ( i = 1; i <= num; ++i )
89224090Sdougb		if ( bck[i] == NIL )
90224090Sdougb			{
91224090Sdougb			bck[i] = ++numcl;
92224090Sdougb			for ( j = fwd[i]; j != NIL; j = fwd[j] )
93224090Sdougb				bck[j] = -numcl;
94224090Sdougb			}
95224090Sdougb
96224090Sdougb	return numcl;
97224090Sdougb	}
98224090Sdougb
99224090Sdougb
100224090Sdougb/* mkeccl - update equivalence classes based on character class xtions
101224090Sdougb *
102224090Sdougb * synopsis
103224090Sdougb *    Char ccls[];
104224090Sdougb *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
105224090Sdougb *    void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
106224090Sdougb *			int llsiz, int NUL_mapping );
107224090Sdougb *
108224090Sdougb * ccls contains the elements of the character class, lenccl is the
109224090Sdougb * number of elements in the ccl, fwd is the forward link-list of equivalent
110224090Sdougb * characters, bck is the backward link-list, and llsiz size of the link-list.
111224090Sdougb *
112224090Sdougb * NUL_mapping is the value which NUL (0) should be mapped to.
113224090Sdougb */
114224090Sdougb
115224090Sdougbvoid mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
116224090SdougbChar ccls[];
117224090Sdougbint lenccl, fwd[], bck[], llsiz, NUL_mapping;
118224090Sdougb	{
119224090Sdougb	int cclp, oldec, newec;
120224090Sdougb	int cclm, i, j;
121224090Sdougb	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
122224090Sdougb
123224090Sdougb	/* Note that it doesn't matter whether or not the character class is
124224090Sdougb	 * negated.  The same results will be obtained in either case.
125224090Sdougb	 */
126224090Sdougb
127224090Sdougb	cclp = 0;
128224090Sdougb
129224090Sdougb	while ( cclp < lenccl )
130224090Sdougb		{
131224090Sdougb		cclm = ccls[cclp];
132224090Sdougb
133224090Sdougb		if ( NUL_mapping && cclm == 0 )
134224090Sdougb			cclm = NUL_mapping;
135224090Sdougb
136224090Sdougb		oldec = bck[cclm];
137224090Sdougb		newec = cclm;
138224090Sdougb
139224090Sdougb		j = cclp + 1;
140224090Sdougb
141224090Sdougb		for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
142224090Sdougb			{ /* look for the symbol in the character class */
143224090Sdougb			for ( ; j < lenccl; ++j )
144224090Sdougb				{
145224090Sdougb				register int ccl_char;
146224090Sdougb
147224090Sdougb				if ( NUL_mapping && ccls[j] == 0 )
148224090Sdougb					ccl_char = NUL_mapping;
149224090Sdougb				else
150224090Sdougb					ccl_char = ccls[j];
151224090Sdougb
152262706Serwin				if ( ccl_char > i )
153262706Serwin					break;
154262706Serwin
155262706Serwin				if ( ccl_char == i && ! cclflags[j] )
156262706Serwin					{
157262706Serwin					/* We found an old companion of cclm
158262706Serwin					 * in the ccl.  Link it into the new
159224090Sdougb					 * equivalence class and flag it as
160224090Sdougb					 * having been processed.
161224090Sdougb					 */
162224090Sdougb
163224090Sdougb					bck[i] = newec;
164224090Sdougb					fwd[newec] = i;
165224090Sdougb					newec = i;
166224090Sdougb					/* Set flag so we don't reprocess. */
167224090Sdougb					cclflags[j] = 1;
168224090Sdougb
169224090Sdougb					/* Get next equivalence class member. */
170224090Sdougb					/* continue 2 */
171262706Serwin					goto next_pt;
172262706Serwin					}
173262706Serwin				}
174262706Serwin
175262706Serwin			/* Symbol isn't in character class.  Put it in the old
176262706Serwin			 * equivalence class.
177224090Sdougb			 */
178224090Sdougb
179224090Sdougb			bck[i] = oldec;
180224090Sdougb
181224090Sdougb			if ( oldec != NIL )
182224090Sdougb				fwd[oldec] = i;
183224090Sdougb
184224090Sdougb			oldec = i;
185224090Sdougb
186224090Sdougb			next_pt: ;
187224090Sdougb			}
188224090Sdougb
189224090Sdougb		if ( bck[cclm] != NIL || oldec != bck[cclm] )
190224090Sdougb			{
191224090Sdougb			bck[cclm] = NIL;
192224090Sdougb			fwd[oldec] = NIL;
193224090Sdougb			}
194224090Sdougb
195224090Sdougb		fwd[newec] = NIL;
196224090Sdougb
197224090Sdougb		/* Find next ccl member to process. */
198224090Sdougb
199224090Sdougb		for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
200224090Sdougb			{
201224090Sdougb			/* Reset "doesn't need processing" flag. */
202224090Sdougb			cclflags[cclp] = 0;
203224090Sdougb			}
204224090Sdougb		}
205224090Sdougb	}
206224090Sdougb
207224090Sdougb
208224090Sdougb/* mkechar - create equivalence class for single character */
209224090Sdougb
210224090Sdougbvoid mkechar( tch, fwd, bck )
211224090Sdougbint tch, fwd[], bck[];
212224090Sdougb	{
213224090Sdougb	/* If until now the character has been a proper subset of
214224090Sdougb	 * an equivalence class, break it away to create a new ec
215224090Sdougb	 */
216224090Sdougb
217224090Sdougb	if ( fwd[tch] != NIL )
218224090Sdougb		bck[fwd[tch]] = bck[tch];
219224090Sdougb
220224090Sdougb	if ( bck[tch] != NIL )
221224090Sdougb		fwd[bck[tch]] = fwd[tch];
222224090Sdougb
223224090Sdougb	fwd[tch] = NIL;
224224090Sdougb	bck[tch] = NIL;
225224090Sdougb	}
226224090Sdougb