ecs.c revision 16519
1224090Sdougb/* ecs - equivalence class routines */ 2262706Serwin 3224090Sdougb/*- 4224090Sdougb * Copyright (c) 1990 The Regents of the University of California. 5224090Sdougb * All rights reserved. 6224090Sdougb * 7224090Sdougb * This code is derived from software contributed to Berkeley by 8224090Sdougb * Vern Paxson. 9224090Sdougb * 10224090Sdougb * The United States Government has rights in this work pursuant 11224090Sdougb * to contract no. DE-AC03-76SF00098 between the United States 12224090Sdougb * Department of Energy and the University of California. 13224090Sdougb * 14224090Sdougb * Redistribution and use in source and binary forms are permitted provided 15224090Sdougb * that: (1) source distributions retain this entire copyright notice and 16224090Sdougb * comment, and (2) distributions including binaries display the following 17234010Sdougb * acknowledgement: ``This product includes software developed by the 18224090Sdougb * University of California, Berkeley and its contributors'' in the 19224090Sdougb * documentation or other materials provided with the distribution and in 20224090Sdougb * all advertising materials mentioning features or use of this software. 21224090Sdougb * Neither the name of the University nor the names of its contributors may 22224090Sdougb * be used to endorse or promote products derived from this software without 23224090Sdougb * specific prior written permission. 24224090Sdougb * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 25224090Sdougb * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 26224090Sdougb * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 27224090Sdougb */ 28224090Sdougb 29224090Sdougb/* $Header: /home/daffy/u0/vern/flex/RCS/ecs.c,v 2.9 93/12/07 10:18:20 vern Exp $ */ 30224090Sdougb 31224090Sdougb#include "flexdef.h" 32224090Sdougb 33224090Sdougb/* ccl2ecl - convert character classes to set of equivalence classes */ 34224090Sdougb 35224090Sdougbvoid ccl2ecl() 36224090Sdougb { 37224090Sdougb int i, ich, newlen, cclp, ccls, cclmec; 38224090Sdougb 39224090Sdougb for ( i = 1; i <= lastccl; ++i ) 40224090Sdougb { 41224090Sdougb /* We loop through each character class, and for each character 42224090Sdougb * in the class, add the character's equivalence class to the 43224090Sdougb * new "character" class we are creating. Thus when we are all 44224090Sdougb * done, character classes will really consist of collections 45224090Sdougb * of equivalence classes 46224090Sdougb */ 47224090Sdougb 48224090Sdougb newlen = 0; 49224090Sdougb cclp = cclmap[i]; 50224090Sdougb 51224090Sdougb for ( ccls = 0; ccls < ccllen[i]; ++ccls ) 52224090Sdougb { 53224090Sdougb ich = ccltbl[cclp + ccls]; 54224090Sdougb cclmec = ecgroup[ich]; 55224090Sdougb 56224090Sdougb if ( cclmec > 0 ) 57224090Sdougb { 58224090Sdougb ccltbl[cclp + newlen] = cclmec; 59224090Sdougb ++newlen; 60224090Sdougb } 61224090Sdougb } 62224090Sdougb 63224090Sdougb ccllen[i] = newlen; 64224090Sdougb } 65224090Sdougb } 66224090Sdougb 67224090Sdougb 68224090Sdougb/* cre8ecs - associate equivalence class numbers with class members 69224090Sdougb * 70224090Sdougb * fwd is the forward linked-list of equivalence class members. bck 71224090Sdougb * is the backward linked-list, and num is the number of class members. 72224090Sdougb * 73224090Sdougb * Returned is the number of classes. 74224090Sdougb */ 75224090Sdougb 76224090Sdougbint cre8ecs( fwd, bck, num ) 77224090Sdougbint fwd[], bck[], num; 78224090Sdougb { 79224090Sdougb int i, j, numcl; 80224090Sdougb 81224090Sdougb numcl = 0; 82224090Sdougb 83224090Sdougb /* Create equivalence class numbers. From now on, ABS( bck(x) ) 84224090Sdougb * is the equivalence class number for object x. If bck(x) 85224090Sdougb * is positive, then x is the representative of its equivalence 86224090Sdougb * class. 87224090Sdougb */ 88224090Sdougb for ( i = 1; i <= num; ++i ) 89224090Sdougb if ( bck[i] == NIL ) 90224090Sdougb { 91224090Sdougb bck[i] = ++numcl; 92224090Sdougb for ( j = fwd[i]; j != NIL; j = fwd[j] ) 93224090Sdougb bck[j] = -numcl; 94224090Sdougb } 95224090Sdougb 96224090Sdougb return numcl; 97224090Sdougb } 98224090Sdougb 99224090Sdougb 100224090Sdougb/* mkeccl - update equivalence classes based on character class xtions 101224090Sdougb * 102224090Sdougb * synopsis 103224090Sdougb * Char ccls[]; 104224090Sdougb * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; 105224090Sdougb * void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz], 106224090Sdougb * int llsiz, int NUL_mapping ); 107224090Sdougb * 108224090Sdougb * ccls contains the elements of the character class, lenccl is the 109224090Sdougb * number of elements in the ccl, fwd is the forward link-list of equivalent 110224090Sdougb * characters, bck is the backward link-list, and llsiz size of the link-list. 111224090Sdougb * 112224090Sdougb * NUL_mapping is the value which NUL (0) should be mapped to. 113224090Sdougb */ 114224090Sdougb 115224090Sdougbvoid mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) 116224090SdougbChar ccls[]; 117224090Sdougbint lenccl, fwd[], bck[], llsiz, NUL_mapping; 118224090Sdougb { 119224090Sdougb int cclp, oldec, newec; 120224090Sdougb int cclm, i, j; 121224090Sdougb static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ 122224090Sdougb 123224090Sdougb /* Note that it doesn't matter whether or not the character class is 124224090Sdougb * negated. The same results will be obtained in either case. 125224090Sdougb */ 126224090Sdougb 127224090Sdougb cclp = 0; 128224090Sdougb 129224090Sdougb while ( cclp < lenccl ) 130224090Sdougb { 131224090Sdougb cclm = ccls[cclp]; 132224090Sdougb 133224090Sdougb if ( NUL_mapping && cclm == 0 ) 134224090Sdougb cclm = NUL_mapping; 135224090Sdougb 136224090Sdougb oldec = bck[cclm]; 137224090Sdougb newec = cclm; 138224090Sdougb 139224090Sdougb j = cclp + 1; 140224090Sdougb 141224090Sdougb for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) 142224090Sdougb { /* look for the symbol in the character class */ 143224090Sdougb for ( ; j < lenccl; ++j ) 144224090Sdougb { 145224090Sdougb register int ccl_char; 146224090Sdougb 147224090Sdougb if ( NUL_mapping && ccls[j] == 0 ) 148224090Sdougb ccl_char = NUL_mapping; 149224090Sdougb else 150224090Sdougb ccl_char = ccls[j]; 151224090Sdougb 152262706Serwin if ( ccl_char > i ) 153262706Serwin break; 154262706Serwin 155262706Serwin if ( ccl_char == i && ! cclflags[j] ) 156262706Serwin { 157262706Serwin /* We found an old companion of cclm 158262706Serwin * in the ccl. Link it into the new 159224090Sdougb * equivalence class and flag it as 160224090Sdougb * having been processed. 161224090Sdougb */ 162224090Sdougb 163224090Sdougb bck[i] = newec; 164224090Sdougb fwd[newec] = i; 165224090Sdougb newec = i; 166224090Sdougb /* Set flag so we don't reprocess. */ 167224090Sdougb cclflags[j] = 1; 168224090Sdougb 169224090Sdougb /* Get next equivalence class member. */ 170224090Sdougb /* continue 2 */ 171262706Serwin goto next_pt; 172262706Serwin } 173262706Serwin } 174262706Serwin 175262706Serwin /* Symbol isn't in character class. Put it in the old 176262706Serwin * equivalence class. 177224090Sdougb */ 178224090Sdougb 179224090Sdougb bck[i] = oldec; 180224090Sdougb 181224090Sdougb if ( oldec != NIL ) 182224090Sdougb fwd[oldec] = i; 183224090Sdougb 184224090Sdougb oldec = i; 185224090Sdougb 186224090Sdougb next_pt: ; 187224090Sdougb } 188224090Sdougb 189224090Sdougb if ( bck[cclm] != NIL || oldec != bck[cclm] ) 190224090Sdougb { 191224090Sdougb bck[cclm] = NIL; 192224090Sdougb fwd[oldec] = NIL; 193224090Sdougb } 194224090Sdougb 195224090Sdougb fwd[newec] = NIL; 196224090Sdougb 197224090Sdougb /* Find next ccl member to process. */ 198224090Sdougb 199224090Sdougb for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) 200224090Sdougb { 201224090Sdougb /* Reset "doesn't need processing" flag. */ 202224090Sdougb cclflags[cclp] = 0; 203224090Sdougb } 204224090Sdougb } 205224090Sdougb } 206224090Sdougb 207224090Sdougb 208224090Sdougb/* mkechar - create equivalence class for single character */ 209224090Sdougb 210224090Sdougbvoid mkechar( tch, fwd, bck ) 211224090Sdougbint tch, fwd[], bck[]; 212224090Sdougb { 213224090Sdougb /* If until now the character has been a proper subset of 214224090Sdougb * an equivalence class, break it away to create a new ec 215224090Sdougb */ 216224090Sdougb 217224090Sdougb if ( fwd[tch] != NIL ) 218224090Sdougb bck[fwd[tch]] = bck[tch]; 219224090Sdougb 220224090Sdougb if ( bck[tch] != NIL ) 221224090Sdougb fwd[bck[tch]] = fwd[tch]; 222224090Sdougb 223224090Sdougb fwd[tch] = NIL; 224224090Sdougb bck[tch] = NIL; 225224090Sdougb } 226224090Sdougb