1/*- 2 * Copyright (c) 2004 Tim J. Robbins. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * "Set of characters" ADT implemented as a splay tree of extents, with 28 * a lookup table cache to simplify looking up the first bunch of 29 * characters (which are presumably more common than others). 30 */ 31 32#include <sys/cdefs.h>
| 1/*- 2 * Copyright (c) 2004 Tim J. Robbins. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * "Set of characters" ADT implemented as a splay tree of extents, with 28 * a lookup table cache to simplify looking up the first bunch of 29 * characters (which are presumably more common than others). 30 */ 31 32#include <sys/cdefs.h>
|
40#include "cset.h" 41 42static struct csnode * cset_delete(struct csnode *, wchar_t); 43static __inline int cset_rangecmp(struct csnode *, wchar_t); 44static struct csnode * cset_splay(struct csnode *, wchar_t); 45 46/* 47 * cset_alloc -- 48 * Allocate a set of characters. 49 */ 50struct cset * 51cset_alloc(void) 52{ 53 struct cset *cs; 54 55 if ((cs = malloc(sizeof(*cs))) == NULL) 56 return (NULL); 57 cs->cs_root = NULL; 58 cs->cs_classes = NULL; 59 cs->cs_havecache = false; 60 cs->cs_invert = false; 61 return (cs); 62} 63 64/* 65 * cset_add -- 66 * Add a character to the set. 67 */ 68bool 69cset_add(struct cset *cs, wchar_t ch) 70{ 71 struct csnode *csn, *ncsn; 72 wchar_t oval; 73 74 cs->cs_havecache = false; 75 76 /* 77 * Inserting into empty tree; new item becomes the root. 78 */ 79 if (cs->cs_root == NULL) { 80 csn = malloc(sizeof(*cs->cs_root)); 81 if (csn == NULL) 82 return (false); 83 csn->csn_left = csn->csn_right = NULL; 84 csn->csn_min = csn->csn_max = ch; 85 cs->cs_root = csn; 86 return (true); 87 } 88 89 /* 90 * Splay to check whether the item already exists, and otherwise, 91 * where we should put it. 92 */ 93 csn = cs->cs_root = cset_splay(cs->cs_root, ch); 94 95 /* 96 * Avoid adding duplicate nodes. 97 */ 98 if (cset_rangecmp(csn, ch) == 0) 99 return (true); 100 101 /* 102 * Allocate a new node and make it the new root. 103 */ 104 ncsn = malloc(sizeof(*ncsn)); 105 if (ncsn == NULL) 106 return (false); 107 ncsn->csn_min = ncsn->csn_max = ch; 108 if (cset_rangecmp(csn, ch) < 0) { 109 ncsn->csn_left = csn->csn_left; 110 ncsn->csn_right = csn; 111 csn->csn_left = NULL; 112 } else { 113 ncsn->csn_right = csn->csn_right; 114 ncsn->csn_left = csn; 115 csn->csn_right = NULL; 116 } 117 cs->cs_root = ncsn; 118 119 /* 120 * Coalesce with left and right neighbours if possible. 121 */ 122 if (ncsn->csn_left != NULL) { 123 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); 124 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { 125 oval = ncsn->csn_left->csn_min; 126 ncsn->csn_left = cset_delete(ncsn->csn_left, 127 ncsn->csn_left->csn_min); 128 ncsn->csn_min = oval; 129 } 130 } 131 if (ncsn->csn_right != NULL) { 132 ncsn->csn_right = cset_splay(ncsn->csn_right, 133 ncsn->csn_max + 1); 134 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { 135 oval = ncsn->csn_right->csn_max; 136 ncsn->csn_right = cset_delete(ncsn->csn_right, 137 ncsn->csn_right->csn_min); 138 ncsn->csn_max = oval; 139 } 140 } 141 142 return (true); 143} 144 145/* 146 * cset_in_hard -- 147 * Determine whether a character is in the set without using 148 * the cache. 149 */ 150bool 151cset_in_hard(struct cset *cs, wchar_t ch) 152{ 153 struct csclass *csc; 154 155 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) 156 if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0) 157 return (cs->cs_invert ^ true); 158 if (cs->cs_root != NULL) { 159 cs->cs_root = cset_splay(cs->cs_root, ch); 160 return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0); 161 } 162 return (cs->cs_invert ^ false); 163} 164 165/* 166 * cset_cache -- 167 * Update the cache. 168 */ 169void 170cset_cache(struct cset *cs) 171{ 172 wchar_t i; 173 174 for (i = 0; i < CS_CACHE_SIZE; i++) 175 cs->cs_cache[i] = cset_in_hard(cs, i); 176 177 cs->cs_havecache = true; 178} 179 180/* 181 * cset_invert -- 182 * Invert the character set. 183 */ 184void 185cset_invert(struct cset *cs) 186{ 187 188 cs->cs_invert ^= true; 189 cs->cs_havecache = false; 190} 191 192/* 193 * cset_addclass -- 194 * Add a wctype()-style character class to the set, optionally 195 * inverting it. 196 */ 197bool 198cset_addclass(struct cset *cs, wctype_t type, bool invert) 199{ 200 struct csclass *csc; 201 202 csc = malloc(sizeof(*csc)); 203 if (csc == NULL) 204 return (false); 205 csc->csc_type = type; 206 csc->csc_invert = invert; 207 csc->csc_next = cs->cs_classes; 208 cs->cs_classes = csc; 209 cs->cs_havecache = false; 210 return (true); 211} 212 213static __inline int 214cset_rangecmp(struct csnode *t, wchar_t ch) 215{ 216 217 if (ch < t->csn_min) 218 return (-1); 219 if (ch > t->csn_max) 220 return (1); 221 return (0); 222} 223 224static struct csnode * 225cset_splay(struct csnode *t, wchar_t ch) 226{ 227 struct csnode N, *l, *r, *y; 228 229 /* 230 * Based on public domain code from Sleator. 231 */ 232 233 assert(t != NULL); 234 235 N.csn_left = N.csn_right = NULL; 236 l = r = &N; 237 for (;;) { 238 if (cset_rangecmp(t, ch) < 0) { 239 if (t->csn_left != NULL && 240 cset_rangecmp(t->csn_left, ch) < 0) { 241 y = t->csn_left; 242 t->csn_left = y->csn_right; 243 y->csn_right = t; 244 t = y; 245 } 246 if (t->csn_left == NULL) 247 break; 248 r->csn_left = t; 249 r = t; 250 t = t->csn_left; 251 } else if (cset_rangecmp(t, ch) > 0) { 252 if (t->csn_right != NULL && 253 cset_rangecmp(t->csn_right, ch) > 0) { 254 y = t->csn_right; 255 t->csn_right = y->csn_left; 256 y->csn_left = t; 257 t = y; 258 } 259 if (t->csn_right == NULL) 260 break; 261 l->csn_right = t; 262 l = t; 263 t = t->csn_right; 264 } else 265 break; 266 } 267 l->csn_right = t->csn_left; 268 r->csn_left = t->csn_right; 269 t->csn_left = N.csn_right; 270 t->csn_right = N.csn_left; 271 return (t); 272} 273 274static struct csnode * 275cset_delete(struct csnode *t, wchar_t ch) 276{ 277 struct csnode *x; 278 279 assert(t != NULL); 280 t = cset_splay(t, ch); 281 assert(cset_rangecmp(t, ch) == 0); 282 if (t->csn_left == NULL) 283 x = t->csn_right; 284 else { 285 x = cset_splay(t->csn_left, ch); 286 x->csn_right = t->csn_right; 287 } 288 free(t); 289 return x; 290}
| 37#include "cset.h" 38 39static struct csnode * cset_delete(struct csnode *, wchar_t); 40static __inline int cset_rangecmp(struct csnode *, wchar_t); 41static struct csnode * cset_splay(struct csnode *, wchar_t); 42 43/* 44 * cset_alloc -- 45 * Allocate a set of characters. 46 */ 47struct cset * 48cset_alloc(void) 49{ 50 struct cset *cs; 51 52 if ((cs = malloc(sizeof(*cs))) == NULL) 53 return (NULL); 54 cs->cs_root = NULL; 55 cs->cs_classes = NULL; 56 cs->cs_havecache = false; 57 cs->cs_invert = false; 58 return (cs); 59} 60 61/* 62 * cset_add -- 63 * Add a character to the set. 64 */ 65bool 66cset_add(struct cset *cs, wchar_t ch) 67{ 68 struct csnode *csn, *ncsn; 69 wchar_t oval; 70 71 cs->cs_havecache = false; 72 73 /* 74 * Inserting into empty tree; new item becomes the root. 75 */ 76 if (cs->cs_root == NULL) { 77 csn = malloc(sizeof(*cs->cs_root)); 78 if (csn == NULL) 79 return (false); 80 csn->csn_left = csn->csn_right = NULL; 81 csn->csn_min = csn->csn_max = ch; 82 cs->cs_root = csn; 83 return (true); 84 } 85 86 /* 87 * Splay to check whether the item already exists, and otherwise, 88 * where we should put it. 89 */ 90 csn = cs->cs_root = cset_splay(cs->cs_root, ch); 91 92 /* 93 * Avoid adding duplicate nodes. 94 */ 95 if (cset_rangecmp(csn, ch) == 0) 96 return (true); 97 98 /* 99 * Allocate a new node and make it the new root. 100 */ 101 ncsn = malloc(sizeof(*ncsn)); 102 if (ncsn == NULL) 103 return (false); 104 ncsn->csn_min = ncsn->csn_max = ch; 105 if (cset_rangecmp(csn, ch) < 0) { 106 ncsn->csn_left = csn->csn_left; 107 ncsn->csn_right = csn; 108 csn->csn_left = NULL; 109 } else { 110 ncsn->csn_right = csn->csn_right; 111 ncsn->csn_left = csn; 112 csn->csn_right = NULL; 113 } 114 cs->cs_root = ncsn; 115 116 /* 117 * Coalesce with left and right neighbours if possible. 118 */ 119 if (ncsn->csn_left != NULL) { 120 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); 121 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { 122 oval = ncsn->csn_left->csn_min; 123 ncsn->csn_left = cset_delete(ncsn->csn_left, 124 ncsn->csn_left->csn_min); 125 ncsn->csn_min = oval; 126 } 127 } 128 if (ncsn->csn_right != NULL) { 129 ncsn->csn_right = cset_splay(ncsn->csn_right, 130 ncsn->csn_max + 1); 131 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { 132 oval = ncsn->csn_right->csn_max; 133 ncsn->csn_right = cset_delete(ncsn->csn_right, 134 ncsn->csn_right->csn_min); 135 ncsn->csn_max = oval; 136 } 137 } 138 139 return (true); 140} 141 142/* 143 * cset_in_hard -- 144 * Determine whether a character is in the set without using 145 * the cache. 146 */ 147bool 148cset_in_hard(struct cset *cs, wchar_t ch) 149{ 150 struct csclass *csc; 151 152 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) 153 if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0) 154 return (cs->cs_invert ^ true); 155 if (cs->cs_root != NULL) { 156 cs->cs_root = cset_splay(cs->cs_root, ch); 157 return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0); 158 } 159 return (cs->cs_invert ^ false); 160} 161 162/* 163 * cset_cache -- 164 * Update the cache. 165 */ 166void 167cset_cache(struct cset *cs) 168{ 169 wchar_t i; 170 171 for (i = 0; i < CS_CACHE_SIZE; i++) 172 cs->cs_cache[i] = cset_in_hard(cs, i); 173 174 cs->cs_havecache = true; 175} 176 177/* 178 * cset_invert -- 179 * Invert the character set. 180 */ 181void 182cset_invert(struct cset *cs) 183{ 184 185 cs->cs_invert ^= true; 186 cs->cs_havecache = false; 187} 188 189/* 190 * cset_addclass -- 191 * Add a wctype()-style character class to the set, optionally 192 * inverting it. 193 */ 194bool 195cset_addclass(struct cset *cs, wctype_t type, bool invert) 196{ 197 struct csclass *csc; 198 199 csc = malloc(sizeof(*csc)); 200 if (csc == NULL) 201 return (false); 202 csc->csc_type = type; 203 csc->csc_invert = invert; 204 csc->csc_next = cs->cs_classes; 205 cs->cs_classes = csc; 206 cs->cs_havecache = false; 207 return (true); 208} 209 210static __inline int 211cset_rangecmp(struct csnode *t, wchar_t ch) 212{ 213 214 if (ch < t->csn_min) 215 return (-1); 216 if (ch > t->csn_max) 217 return (1); 218 return (0); 219} 220 221static struct csnode * 222cset_splay(struct csnode *t, wchar_t ch) 223{ 224 struct csnode N, *l, *r, *y; 225 226 /* 227 * Based on public domain code from Sleator. 228 */ 229 230 assert(t != NULL); 231 232 N.csn_left = N.csn_right = NULL; 233 l = r = &N; 234 for (;;) { 235 if (cset_rangecmp(t, ch) < 0) { 236 if (t->csn_left != NULL && 237 cset_rangecmp(t->csn_left, ch) < 0) { 238 y = t->csn_left; 239 t->csn_left = y->csn_right; 240 y->csn_right = t; 241 t = y; 242 } 243 if (t->csn_left == NULL) 244 break; 245 r->csn_left = t; 246 r = t; 247 t = t->csn_left; 248 } else if (cset_rangecmp(t, ch) > 0) { 249 if (t->csn_right != NULL && 250 cset_rangecmp(t->csn_right, ch) > 0) { 251 y = t->csn_right; 252 t->csn_right = y->csn_left; 253 y->csn_left = t; 254 t = y; 255 } 256 if (t->csn_right == NULL) 257 break; 258 l->csn_right = t; 259 l = t; 260 t = t->csn_right; 261 } else 262 break; 263 } 264 l->csn_right = t->csn_left; 265 r->csn_left = t->csn_right; 266 t->csn_left = N.csn_right; 267 t->csn_right = N.csn_left; 268 return (t); 269} 270 271static struct csnode * 272cset_delete(struct csnode *t, wchar_t ch) 273{ 274 struct csnode *x; 275 276 assert(t != NULL); 277 t = cset_splay(t, ch); 278 assert(cset_rangecmp(t, ch) == 0); 279 if (t->csn_left == NULL) 280 x = t->csn_right; 281 else { 282 x = cset_splay(t->csn_left, ch); 283 x->csn_right = t->csn_right; 284 } 285 free(t); 286 return x; 287}
|