1/* 2 * Copyright (C) 2013 Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17#include <config.h> 18 19#include <isc/file.h> 20#include <isc/regex.h> 21#include <isc/string.h> 22 23#if VALREGEX_REPORT_REASON 24#define FAIL(x) do { reason = (x); goto error; } while(0) 25#else 26#define FAIL(x) goto error 27#endif 28 29/* 30 * Validate the regular expression 'C' locale. 31 */ 32int 33isc_regex_validate(const char *c) { 34 enum { 35 none, parse_bracket, parse_bound, 36 parse_ce, parse_ec, parse_cc 37 } state = none; 38 /* Well known character classes. */ 39 const char *cc[] = { 40 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:", 41 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:", 42 ":print:", ":xdigit:" 43 }; 44 isc_boolean_t seen_comma = ISC_FALSE; 45 isc_boolean_t seen_high = ISC_FALSE; 46 isc_boolean_t seen_char = ISC_FALSE; 47 isc_boolean_t seen_ec = ISC_FALSE; 48 isc_boolean_t seen_ce = ISC_FALSE; 49 isc_boolean_t have_atom = ISC_FALSE; 50 int group = 0; 51 int range = 0; 52 int sub = 0; 53 isc_boolean_t empty_ok = ISC_FALSE; 54 isc_boolean_t neg = ISC_FALSE; 55 isc_boolean_t was_multiple = ISC_FALSE; 56 unsigned int low = 0; 57 unsigned int high = 0; 58 const char *ccname = NULL; 59 int range_start = 0; 60#if VALREGEX_REPORT_REASON 61 const char *reason = ""; 62#endif 63 64 if (c == NULL || *c == 0) 65 FAIL("empty string"); 66 67 while (c != NULL && *c != 0) { 68 switch (state) { 69 case none: 70 switch (*c) { 71 case '\\': /* make literal */ 72 ++c; 73 switch (*c) { 74 case '1': case '2': case '3': 75 case '4': case '5': case '6': 76 case '7': case '8': case '9': 77 if ((*c - '0') > sub) 78 FAIL("bad back reference"); 79 have_atom = ISC_TRUE; 80 was_multiple = ISC_FALSE; 81 break; 82 case 0: 83 FAIL("escaped end-of-string"); 84 default: 85 goto literal; 86 } 87 ++c; 88 break; 89 case '[': /* bracket start */ 90 ++c; 91 neg = ISC_FALSE; 92 was_multiple = ISC_FALSE; 93 seen_char = ISC_FALSE; 94 state = parse_bracket; 95 break; 96 case '{': /* bound start */ 97 switch (c[1]) { 98 case '0': case '1': case '2': case '3': 99 case '4': case '5': case '6': case '7': 100 case '8': case '9': 101 if (!have_atom) 102 FAIL("no atom"); 103 if (was_multiple) 104 FAIL("was multiple"); 105 seen_comma = ISC_FALSE; 106 seen_high = ISC_FALSE; 107 low = high = 0; 108 state = parse_bound; 109 break; 110 default: 111 goto literal; 112 } 113 ++c; 114 have_atom = ISC_TRUE; 115 was_multiple = ISC_TRUE; 116 break; 117 case '}': 118 goto literal; 119 case '(': /* group start */ 120 have_atom = ISC_FALSE; 121 was_multiple = ISC_FALSE; 122 empty_ok = ISC_TRUE; 123 ++group; 124 ++sub; 125 ++c; 126 break; 127 case ')': /* group end */ 128 if (group && !have_atom && !empty_ok) 129 FAIL("empty alternative"); 130 have_atom = ISC_TRUE; 131 was_multiple = ISC_FALSE; 132 if (group != 0) 133 --group; 134 ++c; 135 break; 136 case '|': /* alternative seperator */ 137 if (!have_atom) 138 FAIL("no atom"); 139 have_atom = ISC_FALSE; 140 empty_ok = ISC_FALSE; 141 was_multiple = ISC_FALSE; 142 ++c; 143 break; 144 case '^': 145 case '$': 146 have_atom = ISC_TRUE; 147 was_multiple = ISC_TRUE; 148 ++c; 149 break; 150 case '+': 151 case '*': 152 case '?': 153 if (was_multiple) 154 FAIL("was multiple"); 155 if (!have_atom) 156 FAIL("no atom"); 157 have_atom = ISC_TRUE; 158 was_multiple = ISC_TRUE; 159 ++c; 160 break; 161 case '.': 162 default: 163 literal: 164 have_atom = ISC_TRUE; 165 was_multiple = ISC_FALSE; 166 ++c; 167 break; 168 } 169 break; 170 case parse_bound: 171 switch (*c) { 172 case '0': case '1': case '2': case '3': case '4': 173 case '5': case '6': case '7': case '8': case '9': 174 if (!seen_comma) { 175 low = low * 10 + *c - '0'; 176 if (low > 255) 177 FAIL("lower bound too big"); 178 } else { 179 seen_high = ISC_TRUE; 180 high = high * 10 + *c - '0'; 181 if (high > 255) 182 FAIL("upper bound too big"); 183 } 184 ++c; 185 break; 186 case ',': 187 if (seen_comma) 188 FAIL("multiple commas"); 189 seen_comma = ISC_TRUE; 190 ++c; 191 break; 192 default: 193 case '{': 194 FAIL("non digit/comma"); 195 case '}': 196 if (seen_high && low > high) 197 FAIL("bad parse bound"); 198 seen_comma = ISC_FALSE; 199 state = none; 200 ++c; 201 break; 202 } 203 break; 204 case parse_bracket: 205 switch (*c) { 206 case '^': 207 if (seen_char || neg) goto inside; 208 neg = ISC_TRUE; 209 ++c; 210 break; 211 case '-': 212 if (range == 2) goto inside; 213 if (!seen_char) goto inside; 214 if (range == 1) 215 FAIL("bad range"); 216 range = 2; 217 ++c; 218 break; 219 case '[': 220 ++c; 221 switch (*c) { 222 case '.': /* collating element */ 223 if (range) --range; 224 ++c; 225 state = parse_ce; 226 seen_ce = ISC_FALSE; 227 break; 228 case '=': /* equivalence class */ 229 if (range == 2) 230 FAIL("equivalence class in range"); 231 ++c; 232 state = parse_ec; 233 seen_ec = ISC_FALSE; 234 break; 235 case ':': /* character class */ 236 if (range == 2) 237 FAIL("character class in range"); 238 ccname = c; 239 ++c; 240 state = parse_cc; 241 break; 242 } 243 seen_char = ISC_TRUE; 244 break; 245 case ']': 246 if (!c[1] && !seen_char) 247 FAIL("unfinished brace"); 248 if (!seen_char) 249 goto inside; 250 ++c; 251 range = 0; 252 have_atom = ISC_TRUE; 253 state = none; 254 break; 255 default: 256 inside: 257 seen_char = ISC_TRUE; 258 if (range == 2 && *c < range_start) 259 FAIL("out of order range"); 260 if (range != 0) 261 --range; 262 range_start = *c; 263 ++c; 264 break; 265 }; 266 break; 267 case parse_ce: 268 switch (*c) { 269 case '.': 270 ++c; 271 switch (*c) { 272 case ']': 273 if (!seen_ce) 274 FAIL("empty ce"); 275 ++c; 276 state = parse_bracket; 277 break; 278 default: 279 if (seen_ce) 280 range_start = 256; 281 else 282 range_start = '.'; 283 seen_ce = ISC_TRUE; 284 break; 285 } 286 break; 287 default: 288 if (seen_ce) 289 range_start = 256; 290 else 291 range_start = *c; 292 seen_ce = ISC_TRUE; 293 ++c; 294 break; 295 } 296 break; 297 case parse_ec: 298 switch (*c) { 299 case '=': 300 ++c; 301 switch (*c) { 302 case ']': 303 if (!seen_ec) 304 FAIL("no ec"); 305 ++c; 306 state = parse_bracket; 307 break; 308 default: 309 seen_ec = ISC_TRUE; 310 break; 311 } 312 break; 313 default: 314 seen_ec = ISC_TRUE; 315 ++c; 316 break; 317 } 318 break; 319 case parse_cc: 320 switch (*c) { 321 case ':': 322 ++c; 323 switch (*c) { 324 case ']': { 325 unsigned int i; 326 isc_boolean_t found = ISC_FALSE; 327 for (i = 0; 328 i < sizeof(cc)/sizeof(*cc); 329 i++) 330 { 331 unsigned int len; 332 len = strlen(cc[i]); 333 if (len != 334 (unsigned int)(c - ccname)) 335 continue; 336 if (strncmp(cc[i], ccname, len)) 337 continue; 338 found = ISC_TRUE; 339 } 340 if (!found) 341 FAIL("unknown cc"); 342 ++c; 343 state = parse_bracket; 344 break; 345 } 346 default: 347 break; 348 } 349 break; 350 default: 351 ++c; 352 break; 353 } 354 break; 355 } 356 } 357 if (group != 0) 358 FAIL("group open"); 359 if (state != none) 360 FAIL("incomplete"); 361 if (!have_atom) 362 FAIL("no atom"); 363 return (sub); 364 365 error: 366#if VALREGEX_REPORT_REASON 367 fprintf(stderr, "%s\n", reason); 368#endif 369 return (-1); 370} 371