Deleted Added
full compact
cset.c (132142) cset.c (200420)
1/*-
2 * Copyright (c) 2004 Tim J. Robbins.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26/*
27 * "Set of characters" ADT implemented as a splay tree of extents, with
28 * a lookup table cache to simplify looking up the first bunch of
29 * characters (which are presumably more common than others).
30 */
31
32#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Tim J. Robbins.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26/*
27 * "Set of characters" ADT implemented as a splay tree of extents, with
28 * a lookup table cache to simplify looking up the first bunch of
29 * characters (which are presumably more common than others).
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/usr.bin/tr/cset.c 132142 2004-07-14 08:33:14Z tjr $");
33__FBSDID("$FreeBSD: head/usr.bin/tr/cset.c 200420 2009-12-11 23:35:38Z delphij $");
34
35#include <assert.h>
34
35#include <assert.h>
36#include <stdbool.h>
37#include <stdlib.h>
36#include <stdlib.h>
38#include <wchar.h>
39#include <wctype.h>
40#include "cset.h"
41
42static struct csnode * cset_delete(struct csnode *, wchar_t);
43static __inline int cset_rangecmp(struct csnode *, wchar_t);
44static struct csnode * cset_splay(struct csnode *, wchar_t);
45
46/*
47 * cset_alloc --
48 * Allocate a set of characters.
49 */
50struct cset *
51cset_alloc(void)
52{
53 struct cset *cs;
54
55 if ((cs = malloc(sizeof(*cs))) == NULL)
56 return (NULL);
57 cs->cs_root = NULL;
58 cs->cs_classes = NULL;
59 cs->cs_havecache = false;
60 cs->cs_invert = false;
61 return (cs);
62}
63
64/*
65 * cset_add --
66 * Add a character to the set.
67 */
68bool
69cset_add(struct cset *cs, wchar_t ch)
70{
71 struct csnode *csn, *ncsn;
72 wchar_t oval;
73
74 cs->cs_havecache = false;
75
76 /*
77 * Inserting into empty tree; new item becomes the root.
78 */
79 if (cs->cs_root == NULL) {
80 csn = malloc(sizeof(*cs->cs_root));
81 if (csn == NULL)
82 return (false);
83 csn->csn_left = csn->csn_right = NULL;
84 csn->csn_min = csn->csn_max = ch;
85 cs->cs_root = csn;
86 return (true);
87 }
88
89 /*
90 * Splay to check whether the item already exists, and otherwise,
91 * where we should put it.
92 */
93 csn = cs->cs_root = cset_splay(cs->cs_root, ch);
94
95 /*
96 * Avoid adding duplicate nodes.
97 */
98 if (cset_rangecmp(csn, ch) == 0)
99 return (true);
100
101 /*
102 * Allocate a new node and make it the new root.
103 */
104 ncsn = malloc(sizeof(*ncsn));
105 if (ncsn == NULL)
106 return (false);
107 ncsn->csn_min = ncsn->csn_max = ch;
108 if (cset_rangecmp(csn, ch) < 0) {
109 ncsn->csn_left = csn->csn_left;
110 ncsn->csn_right = csn;
111 csn->csn_left = NULL;
112 } else {
113 ncsn->csn_right = csn->csn_right;
114 ncsn->csn_left = csn;
115 csn->csn_right = NULL;
116 }
117 cs->cs_root = ncsn;
118
119 /*
120 * Coalesce with left and right neighbours if possible.
121 */
122 if (ncsn->csn_left != NULL) {
123 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
124 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
125 oval = ncsn->csn_left->csn_min;
126 ncsn->csn_left = cset_delete(ncsn->csn_left,
127 ncsn->csn_left->csn_min);
128 ncsn->csn_min = oval;
129 }
130 }
131 if (ncsn->csn_right != NULL) {
132 ncsn->csn_right = cset_splay(ncsn->csn_right,
133 ncsn->csn_max + 1);
134 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
135 oval = ncsn->csn_right->csn_max;
136 ncsn->csn_right = cset_delete(ncsn->csn_right,
137 ncsn->csn_right->csn_min);
138 ncsn->csn_max = oval;
139 }
140 }
141
142 return (true);
143}
144
145/*
146 * cset_in_hard --
147 * Determine whether a character is in the set without using
148 * the cache.
149 */
150bool
151cset_in_hard(struct cset *cs, wchar_t ch)
152{
153 struct csclass *csc;
154
155 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
156 if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0)
157 return (cs->cs_invert ^ true);
158 if (cs->cs_root != NULL) {
159 cs->cs_root = cset_splay(cs->cs_root, ch);
160 return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0);
161 }
162 return (cs->cs_invert ^ false);
163}
164
165/*
166 * cset_cache --
167 * Update the cache.
168 */
169void
170cset_cache(struct cset *cs)
171{
172 wchar_t i;
173
174 for (i = 0; i < CS_CACHE_SIZE; i++)
175 cs->cs_cache[i] = cset_in_hard(cs, i);
176
177 cs->cs_havecache = true;
178}
179
180/*
181 * cset_invert --
182 * Invert the character set.
183 */
184void
185cset_invert(struct cset *cs)
186{
187
188 cs->cs_invert ^= true;
189 cs->cs_havecache = false;
190}
191
192/*
193 * cset_addclass --
194 * Add a wctype()-style character class to the set, optionally
195 * inverting it.
196 */
197bool
198cset_addclass(struct cset *cs, wctype_t type, bool invert)
199{
200 struct csclass *csc;
201
202 csc = malloc(sizeof(*csc));
203 if (csc == NULL)
204 return (false);
205 csc->csc_type = type;
206 csc->csc_invert = invert;
207 csc->csc_next = cs->cs_classes;
208 cs->cs_classes = csc;
209 cs->cs_havecache = false;
210 return (true);
211}
212
213static __inline int
214cset_rangecmp(struct csnode *t, wchar_t ch)
215{
216
217 if (ch < t->csn_min)
218 return (-1);
219 if (ch > t->csn_max)
220 return (1);
221 return (0);
222}
223
224static struct csnode *
225cset_splay(struct csnode *t, wchar_t ch)
226{
227 struct csnode N, *l, *r, *y;
228
229 /*
230 * Based on public domain code from Sleator.
231 */
232
233 assert(t != NULL);
234
235 N.csn_left = N.csn_right = NULL;
236 l = r = &N;
237 for (;;) {
238 if (cset_rangecmp(t, ch) < 0) {
239 if (t->csn_left != NULL &&
240 cset_rangecmp(t->csn_left, ch) < 0) {
241 y = t->csn_left;
242 t->csn_left = y->csn_right;
243 y->csn_right = t;
244 t = y;
245 }
246 if (t->csn_left == NULL)
247 break;
248 r->csn_left = t;
249 r = t;
250 t = t->csn_left;
251 } else if (cset_rangecmp(t, ch) > 0) {
252 if (t->csn_right != NULL &&
253 cset_rangecmp(t->csn_right, ch) > 0) {
254 y = t->csn_right;
255 t->csn_right = y->csn_left;
256 y->csn_left = t;
257 t = y;
258 }
259 if (t->csn_right == NULL)
260 break;
261 l->csn_right = t;
262 l = t;
263 t = t->csn_right;
264 } else
265 break;
266 }
267 l->csn_right = t->csn_left;
268 r->csn_left = t->csn_right;
269 t->csn_left = N.csn_right;
270 t->csn_right = N.csn_left;
271 return (t);
272}
273
274static struct csnode *
275cset_delete(struct csnode *t, wchar_t ch)
276{
277 struct csnode *x;
278
279 assert(t != NULL);
280 t = cset_splay(t, ch);
281 assert(cset_rangecmp(t, ch) == 0);
282 if (t->csn_left == NULL)
283 x = t->csn_right;
284 else {
285 x = cset_splay(t->csn_left, ch);
286 x->csn_right = t->csn_right;
287 }
288 free(t);
289 return x;
290}
37#include "cset.h"
38
39static struct csnode * cset_delete(struct csnode *, wchar_t);
40static __inline int cset_rangecmp(struct csnode *, wchar_t);
41static struct csnode * cset_splay(struct csnode *, wchar_t);
42
43/*
44 * cset_alloc --
45 * Allocate a set of characters.
46 */
47struct cset *
48cset_alloc(void)
49{
50 struct cset *cs;
51
52 if ((cs = malloc(sizeof(*cs))) == NULL)
53 return (NULL);
54 cs->cs_root = NULL;
55 cs->cs_classes = NULL;
56 cs->cs_havecache = false;
57 cs->cs_invert = false;
58 return (cs);
59}
60
61/*
62 * cset_add --
63 * Add a character to the set.
64 */
65bool
66cset_add(struct cset *cs, wchar_t ch)
67{
68 struct csnode *csn, *ncsn;
69 wchar_t oval;
70
71 cs->cs_havecache = false;
72
73 /*
74 * Inserting into empty tree; new item becomes the root.
75 */
76 if (cs->cs_root == NULL) {
77 csn = malloc(sizeof(*cs->cs_root));
78 if (csn == NULL)
79 return (false);
80 csn->csn_left = csn->csn_right = NULL;
81 csn->csn_min = csn->csn_max = ch;
82 cs->cs_root = csn;
83 return (true);
84 }
85
86 /*
87 * Splay to check whether the item already exists, and otherwise,
88 * where we should put it.
89 */
90 csn = cs->cs_root = cset_splay(cs->cs_root, ch);
91
92 /*
93 * Avoid adding duplicate nodes.
94 */
95 if (cset_rangecmp(csn, ch) == 0)
96 return (true);
97
98 /*
99 * Allocate a new node and make it the new root.
100 */
101 ncsn = malloc(sizeof(*ncsn));
102 if (ncsn == NULL)
103 return (false);
104 ncsn->csn_min = ncsn->csn_max = ch;
105 if (cset_rangecmp(csn, ch) < 0) {
106 ncsn->csn_left = csn->csn_left;
107 ncsn->csn_right = csn;
108 csn->csn_left = NULL;
109 } else {
110 ncsn->csn_right = csn->csn_right;
111 ncsn->csn_left = csn;
112 csn->csn_right = NULL;
113 }
114 cs->cs_root = ncsn;
115
116 /*
117 * Coalesce with left and right neighbours if possible.
118 */
119 if (ncsn->csn_left != NULL) {
120 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
121 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
122 oval = ncsn->csn_left->csn_min;
123 ncsn->csn_left = cset_delete(ncsn->csn_left,
124 ncsn->csn_left->csn_min);
125 ncsn->csn_min = oval;
126 }
127 }
128 if (ncsn->csn_right != NULL) {
129 ncsn->csn_right = cset_splay(ncsn->csn_right,
130 ncsn->csn_max + 1);
131 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
132 oval = ncsn->csn_right->csn_max;
133 ncsn->csn_right = cset_delete(ncsn->csn_right,
134 ncsn->csn_right->csn_min);
135 ncsn->csn_max = oval;
136 }
137 }
138
139 return (true);
140}
141
142/*
143 * cset_in_hard --
144 * Determine whether a character is in the set without using
145 * the cache.
146 */
147bool
148cset_in_hard(struct cset *cs, wchar_t ch)
149{
150 struct csclass *csc;
151
152 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
153 if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0)
154 return (cs->cs_invert ^ true);
155 if (cs->cs_root != NULL) {
156 cs->cs_root = cset_splay(cs->cs_root, ch);
157 return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0);
158 }
159 return (cs->cs_invert ^ false);
160}
161
162/*
163 * cset_cache --
164 * Update the cache.
165 */
166void
167cset_cache(struct cset *cs)
168{
169 wchar_t i;
170
171 for (i = 0; i < CS_CACHE_SIZE; i++)
172 cs->cs_cache[i] = cset_in_hard(cs, i);
173
174 cs->cs_havecache = true;
175}
176
177/*
178 * cset_invert --
179 * Invert the character set.
180 */
181void
182cset_invert(struct cset *cs)
183{
184
185 cs->cs_invert ^= true;
186 cs->cs_havecache = false;
187}
188
189/*
190 * cset_addclass --
191 * Add a wctype()-style character class to the set, optionally
192 * inverting it.
193 */
194bool
195cset_addclass(struct cset *cs, wctype_t type, bool invert)
196{
197 struct csclass *csc;
198
199 csc = malloc(sizeof(*csc));
200 if (csc == NULL)
201 return (false);
202 csc->csc_type = type;
203 csc->csc_invert = invert;
204 csc->csc_next = cs->cs_classes;
205 cs->cs_classes = csc;
206 cs->cs_havecache = false;
207 return (true);
208}
209
210static __inline int
211cset_rangecmp(struct csnode *t, wchar_t ch)
212{
213
214 if (ch < t->csn_min)
215 return (-1);
216 if (ch > t->csn_max)
217 return (1);
218 return (0);
219}
220
221static struct csnode *
222cset_splay(struct csnode *t, wchar_t ch)
223{
224 struct csnode N, *l, *r, *y;
225
226 /*
227 * Based on public domain code from Sleator.
228 */
229
230 assert(t != NULL);
231
232 N.csn_left = N.csn_right = NULL;
233 l = r = &N;
234 for (;;) {
235 if (cset_rangecmp(t, ch) < 0) {
236 if (t->csn_left != NULL &&
237 cset_rangecmp(t->csn_left, ch) < 0) {
238 y = t->csn_left;
239 t->csn_left = y->csn_right;
240 y->csn_right = t;
241 t = y;
242 }
243 if (t->csn_left == NULL)
244 break;
245 r->csn_left = t;
246 r = t;
247 t = t->csn_left;
248 } else if (cset_rangecmp(t, ch) > 0) {
249 if (t->csn_right != NULL &&
250 cset_rangecmp(t->csn_right, ch) > 0) {
251 y = t->csn_right;
252 t->csn_right = y->csn_left;
253 y->csn_left = t;
254 t = y;
255 }
256 if (t->csn_right == NULL)
257 break;
258 l->csn_right = t;
259 l = t;
260 t = t->csn_right;
261 } else
262 break;
263 }
264 l->csn_right = t->csn_left;
265 r->csn_left = t->csn_right;
266 t->csn_left = N.csn_right;
267 t->csn_right = N.csn_left;
268 return (t);
269}
270
271static struct csnode *
272cset_delete(struct csnode *t, wchar_t ch)
273{
274 struct csnode *x;
275
276 assert(t != NULL);
277 t = cset_splay(t, ch);
278 assert(cset_rangecmp(t, ch) == 0);
279 if (t->csn_left == NULL)
280 x = t->csn_right;
281 else {
282 x = cset_splay(t->csn_left, ch);
283 x->csn_right = t->csn_right;
284 }
285 free(t);
286 return x;
287}