1/* ccl - routines for character classes */
2
3/*  Copyright (c) 1990 The Regents of the University of California. */
4/*  All rights reserved. */
5
6/*  This code is derived from software contributed to Berkeley by */
7/*  Vern Paxson. */
8
9/*  The United States Government has rights in this work pursuant */
10/*  to contract no. DE-AC03-76SF00098 between the United States */
11 /*  Department of Energy and the University of California. */
12
13/*  This file is part of flex. */
14
15/*  Redistribution and use in source and binary forms, with or without */
16/*  modification, are permitted provided that the following conditions */
17/*  are met: */
18
19/*  1. Redistributions of source code must retain the above copyright */
20/*     notice, this list of conditions and the following disclaimer. */
21/*  2. Redistributions in binary form must reproduce the above copyright */
22/*     notice, this list of conditions and the following disclaimer in the */
23/*     documentation and/or other materials provided with the distribution. */
24
25/*  Neither the name of the University nor the names of its contributors */
26/*  may be used to endorse or promote products derived from this software */
27/*  without specific prior written permission. */
28
29/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32/*  PURPOSE. */
33
34#include "flexdef.h"
35
36/* return true if the chr is in the ccl. Takes negation into account. */
37static bool
38ccl_contains (const int cclp, const int ch)
39{
40	int     ind, len, i;
41
42	len = ccllen[cclp];
43	ind = cclmap[cclp];
44
45	for (i = 0; i < len; ++i)
46		if (ccltbl[ind + i] == ch)
47			return !cclng[cclp];
48
49    return cclng[cclp];
50}
51
52
53/* ccladd - add a single character to a ccl */
54
55void    ccladd (cclp, ch)
56     int     cclp;
57     int     ch;
58{
59	int     ind, len, newpos, i;
60
61	check_char (ch);
62
63	len = ccllen[cclp];
64	ind = cclmap[cclp];
65
66	/* check to see if the character is already in the ccl */
67
68	for (i = 0; i < len; ++i)
69		if (ccltbl[ind + i] == ch)
70			return;
71
72	/* mark newlines */
73	if (ch == nlch)
74		ccl_has_nl[cclp] = true;
75
76	newpos = ind + len;
77
78	if (newpos >= current_max_ccl_tbl_size) {
79		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
80
81		++num_reallocs;
82
83		ccltbl = reallocate_Character_array (ccltbl,
84						     current_max_ccl_tbl_size);
85	}
86
87	ccllen[cclp] = len + 1;
88	ccltbl[newpos] = ch;
89}
90
91/* dump_cclp - same thing as list_character_set, but for cclps.  */
92
93static void    dump_cclp (FILE* file, int cclp)
94{
95	int i;
96
97	putc ('[', file);
98
99	for (i = 0; i < csize; ++i) {
100		if (ccl_contains(cclp, i)){
101			int start_char = i;
102
103			putc (' ', file);
104
105			fputs (readable_form (i), file);
106
107			while (++i < csize && ccl_contains(cclp,i)) ;
108
109			if (i - 1 > start_char)
110				/* this was a run */
111				fprintf (file, "-%s",
112					 readable_form (i - 1));
113
114			putc (' ', file);
115		}
116	}
117
118	putc (']', file);
119}
120
121
122
123/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
124int
125ccl_set_diff (int a, int b)
126{
127    int  d, ch;
128
129    /* create new class  */
130    d = cclinit();
131
132    /* In order to handle negation, we spin through all possible chars,
133     * addding each char in a that is not in b.
134     * (This could be O(n^2), but n is small and bounded.)
135     */
136	for ( ch = 0; ch < csize; ++ch )
137        if (ccl_contains (a, ch) && !ccl_contains(b, ch))
138            ccladd (d, ch);
139
140    /* debug */
141    if (0){
142        fprintf(stderr, "ccl_set_diff (");
143            fprintf(stderr, "\n    ");
144            dump_cclp (stderr, a);
145            fprintf(stderr, "\n    ");
146            dump_cclp (stderr, b);
147            fprintf(stderr, "\n    ");
148            dump_cclp (stderr, d);
149        fprintf(stderr, "\n)\n");
150    }
151    return d;
152}
153
154/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
155int
156ccl_set_union (int a, int b)
157{
158    int  d, i;
159
160    /* create new class  */
161    d = cclinit();
162
163    /* Add all of a */
164    for (i = 0; i < ccllen[a]; ++i)
165		ccladd (d, ccltbl[cclmap[a] + i]);
166
167    /* Add all of b */
168    for (i = 0; i < ccllen[b]; ++i)
169		ccladd (d, ccltbl[cclmap[b] + i]);
170
171    /* debug */
172    if (0){
173        fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
174            fprintf(stderr, "\n    ");
175            dump_cclp (stderr, a);
176            fprintf(stderr, "\n    ");
177            dump_cclp (stderr, b);
178            fprintf(stderr, "\n    ");
179            dump_cclp (stderr, d);
180        fprintf(stderr, "\n)\n");
181    }
182    return d;
183}
184
185
186/* cclinit - return an empty ccl */
187
188int     cclinit ()
189{
190	if (++lastccl >= current_maxccls) {
191		current_maxccls += MAX_CCLS_INCREMENT;
192
193		++num_reallocs;
194
195		cclmap =
196			reallocate_integer_array (cclmap, current_maxccls);
197		ccllen =
198			reallocate_integer_array (ccllen, current_maxccls);
199		cclng = reallocate_integer_array (cclng, current_maxccls);
200		ccl_has_nl =
201			reallocate_bool_array (ccl_has_nl,
202					       current_maxccls);
203	}
204
205	if (lastccl == 1)
206		/* we're making the first ccl */
207		cclmap[lastccl] = 0;
208
209	else
210		/* The new pointer is just past the end of the last ccl.
211		 * Since the cclmap points to the \first/ character of a
212		 * ccl, adding the length of the ccl to the cclmap pointer
213		 * will produce a cursor to the first free space.
214		 */
215		cclmap[lastccl] =
216			cclmap[lastccl - 1] + ccllen[lastccl - 1];
217
218	ccllen[lastccl] = 0;
219	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
220	ccl_has_nl[lastccl] = false;
221
222	return lastccl;
223}
224
225
226/* cclnegate - negate the given ccl */
227
228void    cclnegate (cclp)
229     int     cclp;
230{
231	cclng[cclp] = 1;
232	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
233}
234
235
236/* list_character_set - list the members of a set of characters in CCL form
237 *
238 * Writes to the given file a character-class representation of those
239 * characters present in the given CCL.  A character is present if it
240 * has a non-zero value in the cset array.
241 */
242
243void    list_character_set (file, cset)
244     FILE   *file;
245     int     cset[];
246{
247	int i;
248
249	putc ('[', file);
250
251	for (i = 0; i < csize; ++i) {
252		if (cset[i]) {
253			int start_char = i;
254
255			putc (' ', file);
256
257			fputs (readable_form (i), file);
258
259			while (++i < csize && cset[i]) ;
260
261			if (i - 1 > start_char)
262				/* this was a run */
263				fprintf (file, "-%s",
264					 readable_form (i - 1));
265
266			putc (' ', file);
267		}
268	}
269
270	putc (']', file);
271}
272
273/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
274 * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
275 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
276 * be in the range. If not, then this range is ambiguous, and the function
277 * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
278 * [a-z] will be labeled ambiguous because it does not include [A-Z].
279 *
280 * @param c1 the lower end of the range
281 * @param c2 the upper end of the range
282 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
283 */
284bool range_covers_case (int c1, int c2)
285{
286	int     i, o;
287
288	for (i = c1; i <= c2; i++) {
289		if (has_case (i)) {
290			o = reverse_case (i);
291			if (o < c1 || c2 < o)
292				return false;
293		}
294	}
295	return true;
296}
297
298/** Reverse the case of a character, if possible.
299 * @return c if case-reversal does not apply.
300 */
301int reverse_case (int c)
302{
303	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
304}
305
306/** Return true if c is uppercase or lowercase. */
307bool has_case (int c)
308{
309	return (isupper (c) || islower (c)) ? true : false;
310}
311