1/* $NetBSD: att.c,v 1.6 2011/11/06 16:43:25 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include <sys/cdefs.h> 40__RCSID("$NetBSD: att.c,v 1.6 2011/11/06 16:43:25 christos Exp $"); 41 42#include <stdio.h> 43#include <regex.h> 44#include <string.h> 45#include <stdlib.h> 46#include <vis.h> 47#include <ctype.h> 48#include <atf-c.h> 49 50ATF_TC(regex_att); 51 52ATF_TC_HEAD(regex_att, tc) 53{ 54 55 atf_tc_set_md_var(tc, "descr", "Driver for parsing AT&T format" 56 " input files available from:" 57 " http://www2.research.att.com/~gsf/testregex/"); 58} 59 60static const char sep[] = "\r\n\t"; 61static const char delim[3] = "\\\\\0"; 62 63 64static void 65fail(const char *pattern, const char *input, size_t lineno) { 66 fprintf(stderr, 67 "skipping failed test at line %zu (pattern=%s, input=%s)\n", 68 lineno, pattern, input); 69} 70 71static int 72bug(const char *pattern, const char *input, size_t lineno) { 73 static const struct { 74 const char *p; 75 const char *i; 76 } b[] = { 77#if defined(REGEX_SPENCER) 78 /* 79 * The default libc implementation by Henry Spencer 80 */ 81 { "a[-]?c", "ac" }, // basic.dat 82 { "(a*)*", "a" }, // categorization.dat 83 { "(aba|a*b)*", "ababa" }, // categorization.dat 84 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat 85 { "(a*)*", "aaaaaa" }, // nullsubexpression.dat 86 { "(a*)*", "aaaaaax" }, // nullsubexpression.dat 87 { "(a*)+", "a" }, // nullsubexpression.dat 88 { "(a*)+", "aaaaaa" }, // nullsubexpression.dat 89 { "(a*)+", "aaaaaax" }, // nullsubexpression.dat 90 { "([a]*)*", "a" }, // nullsubexpression.dat 91 { "([a]*)*", "aaaaaa" }, // nullsubexpression.dat 92 { "([a]*)*", "aaaaaax" }, // nullsubexpression.dat 93 { "([a]*)+", "a" }, // nullsubexpression.dat 94 { "([a]*)+", "aaaaaa" }, // nullsubexpression.dat 95 { "([a]*)+", "aaaaaax" }, // nullsubexpression.dat 96 { "([^b]*)*", "a" }, // nullsubexpression.dat 97 { "([^b]*)*", "aaaaaa" }, // nullsubexpression.dat 98 { "([^b]*)*", "aaaaaab" }, // nullsubexpression.dat 99 { "([ab]*)*", "a" }, // nullsubexpression.dat 100 { "([ab]*)*", "aaaaaa" }, // nullsubexpression.dat 101 { "([ab]*)*", "ababab" }, // nullsubexpression.dat 102 { "([ab]*)*", "bababa" }, // nullsubexpression.dat 103 { "([ab]*)*", "b" }, // nullsubexpression.dat 104 { "([ab]*)*", "bbbbbb" }, // nullsubexpression.dat 105 { "([ab]*)*", "aaaabcde" }, // nullsubexpression.dat 106 { "([^a]*)*", "b" }, // nullsubexpression.dat 107 { "([^a]*)*", "bbbbbb" }, // nullsubexpression.dat 108 { "([^ab]*)*", "ccccxx" }, // nullsubexpression.dat 109 { "\\(a*\\)*\\(x\\)", "ax" }, // nullsubexpression.dat 110 { "\\(a*\\)*\\(x\\)", "axa" }, // nullsubexpression.dat 111 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "x" }, // nullsubexpression.dat 112/* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // nullsubexpression.dat 113/* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // "" 114 { "(a*)*(x)", "ax" }, // nullsubexpression.dat 115 { "(a*)*(x)", "axa" }, // nullsubexpression.dat 116 { "(a*)+(x)", "ax" }, // nullsubexpression.dat 117 { "(a*)+(x)", "axa" }, // nullsubexpression.dat 118 { "((a|ab)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat 119 { "((a|ab)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat 120 { "((ab|a)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat 121 { "((ab|a)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat 122 { "((a*)(b|abc))(c*)", "abc" }, // forcedassoc.dat 123 { "((a*)(abc|b))(c*)", "abc" }, // forcedassoc.dat 124 { "((..)|(.)){2}", "aaa" }, // repetition.dat 125 { "((..)|(.)){3}", "aaa" }, // repetition.dat 126 { "((..)|(.)){3}", "aaaa" }, // repetition.dat 127 { "((..)|(.)){3}", "aaaaa" }, // repetition.dat 128 { "X(.?){0,}Y", "X1234567Y" }, // repetition.dat 129 { "X(.?){1,}Y", "X1234567Y" }, // repetition.dat 130 { "X(.?){2,}Y", "X1234567Y" }, // repetition.dat 131 { "X(.?){3,}Y", "X1234567Y" }, // repetition.dat 132 { "X(.?){4,}Y", "X1234567Y" }, // repetition.dat 133 { "X(.?){5,}Y", "X1234567Y" }, // repetition.dat 134 { "X(.?){6,}Y", "X1234567Y" }, // repetition.dat 135 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat 136 { "X(.?){0,8}Y", "X1234567Y" }, // repetition.dat 137 { "X(.?){1,8}Y", "X1234567Y" }, // repetition.dat 138 { "X(.?){2,8}Y", "X1234567Y" }, // repetition.dat 139 { "X(.?){3,8}Y", "X1234567Y" }, // repetition.dat 140 { "X(.?){4,8}Y", "X1234567Y" }, // repetition.dat 141 { "X(.?){5,8}Y", "X1234567Y" }, // repetition.dat 142 { "X(.?){6,8}Y", "X1234567Y" }, // repetition.dat 143 { "X(.?){7,8}Y", "X1234567Y" }, // repetition.dat 144 { "(a|ab|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat 145 { "(a|ab|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat 146 { "(a|ab|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat 147 { "(a|ab|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat 148 { "(a|ab|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat 149 { "(a|ab|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat 150 { "(a|ab|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat 151 { "(a|ab|c|bcd)*(d*)", "ababcd" }, // repetition.dat 152 { "(a|ab|c|bcd)+(d*)", "ababcd" }, // repetition.dat 153 { "(ab|a|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat 154 { "(ab|a|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat 155 { "(ab|a|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat 156 { "(ab|a|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat 157 { "(ab|a|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat 158 { "(ab|a|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat 159 { "(ab|a|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat 160 { "(ab|a|c|bcd)*(d*)", "ababcd" }, // repetition.dat 161 { "(ab|a|c|bcd)+(d*)", "ababcd" }, // repetition.dat 162#elif defined(REGEX_TRE) 163 { "a[-]?c", "ac" }, // basic.dat 164 { "a\\(b\\)*\\1", "a" }, // categorization.dat 165 { "a\\(b\\)*\\1", "abab" }, // categorization.dat 166 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat 167 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // categorization.dat 168 { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // "" 169 { "((..)|(.))*", "aa" }, // repetition.dat 170 { "((..)|(.))*", "aaa" }, // repetition.dat 171 { "((..)|(.))*", "aaaaa" }, // repetition.dat 172 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat 173#else 174 { "", "" } 175#endif 176 }; 177 178 for (size_t i = 0; i < __arraycount(b); i++) { 179 if (strcmp(pattern, b[i].p) == 0 && 180 strcmp(input, b[i].i) == 0) { 181 fail(pattern, input, lineno); 182 return 1; 183 } 184 } 185 return 0; 186} 187 188#ifdef REGEX_SPENCER 189#define HAVE_BRACES 1 190#define HAVE_MINIMAL 0 191#endif 192#ifndef HAVE_BRACES 193#define HAVE_BRACES 1 194#endif 195#ifndef HAVE_MINIMAL 196#define HAVE_MINIMAL 1 197#endif 198 199static int 200optional(const char *s) 201{ 202 static const struct{ 203 const char *n; 204 int v; 205 } nv[]= { 206 { "[[<element>]] not supported", HAVE_BRACES }, 207 { "no *? +? mimimal match ops", HAVE_MINIMAL }, 208 }; 209 210 for (size_t i = 0; i < __arraycount(nv); i++) 211 if (strcmp(nv[i].n, s) == 0) { 212 if (nv[i].v) 213 return 0; 214 fprintf(stderr, "skipping unsupported [%s] tests\n", s); 215 return 1; 216 } 217 218 ATF_REQUIRE_MSG(0, "Unknown feature: %s", s); 219 return 0; 220} 221 222static int 223unsupported(const char *s) 224{ 225 static const char *we[] = { 226#if defined(REGEX_SPENCER) 227 "ASSOCIATIVITY=left", // have right associativity 228 "SUBEXPRESSION=precedence", // have grouping subexpression 229 "REPEAT_LONGEST=last", // have first repeat longest 230 "BUG=alternation-order", // don't have it 231 "BUG=first-match", // don't have it 232 "BUG=nomatch-match", // don't have it 233 "BUG=repeat-any", // don't have it 234 "BUG=range-null", // don't have it 235 "BUG=repeat-null-unknown", // don't have it 236 "BUG=repeat-null", // don't have it 237 "BUG=repeat-artifact", // don't have it 238 "BUG=subexpression-first", // don't have it 239#elif defined(REGEX_TRE) 240 "ASSOCIATIVITY=right", // have left associativity 241 "SUBEXPRESSION=grouping", // have precedence subexpression 242 "REPEAT_LONGEST=first", // have last repeat longest 243 "LENGTH=first", // have last length 244 "BUG=alternation-order", // don't have it 245 "BUG=first-match", // don't have it 246 "BUG=range-null", // don't have it 247 "BUG=repeat-null", // don't have it 248 "BUG=repeat-artifact", // don't have it 249 "BUG=subexpression-first", // don't have it 250 "BUG=repeat-short", // don't have it 251#endif 252 }; 253 254 if (s == NULL) 255 return 0; 256 257 while (*s == '#' || isspace((unsigned char)*s)) 258 s++; 259 260 for (size_t i = 0; i < __arraycount(we); i++) 261 if (strcmp(we[i], s) == 0) 262 return 1; 263 return 0; 264} 265 266static void 267geterror(const char *s, int *comp, int *exec) 268{ 269 static const struct { 270 const char *n; 271 int v; 272 int ce; 273 } nv[] = { 274#define COMP 1 275#define EXEC 2 276 { "OK", 0, COMP|EXEC }, 277#define _DO(a, b) { # a, REG_ ## a, b }, 278 _DO(NOMATCH, EXEC) 279 _DO(BADPAT, COMP) 280 _DO(ECOLLATE, COMP) 281 _DO(ECTYPE, COMP) 282 _DO(EESCAPE, COMP) 283 _DO(ESUBREG, COMP) 284 _DO(EBRACK, COMP) 285 _DO(EPAREN, COMP) 286 _DO(EBRACE, COMP) 287 _DO(BADBR, COMP) 288 _DO(ERANGE, COMP) 289 _DO(ESPACE, EXEC) 290 _DO(BADRPT, COMP) 291 _DO(EMPTY, COMP) 292 _DO(ASSERT, COMP) 293 _DO(INVARG, COMP) 294 _DO(ENOSYS, COMP) 295#undef _DO 296 }; 297 *comp = 0; 298 *exec = 0; 299 for (size_t i = 0; i < __arraycount(nv); i++) 300 if (strcmp(s, nv[i].n) == 0) { 301 if (nv[i].ce & COMP) 302 *comp = nv[i].v; 303 if (nv[i].ce & EXEC) 304 *exec = nv[i].v; 305 return; 306 } 307 ATF_REQUIRE_MSG(0, "Unknown error %s", s); 308 return; 309} 310 311static int 312getflags(char *s) 313{ 314 int flags = 0; 315 316 for (;; s++) 317 switch (*s) { 318 case '0': case '1': case '2': case '3': case '4': 319 case '5': case '6': case '7': case '8': case '9': 320 *s = '\0'; 321 break; 322 case '\0': 323 return flags; 324 case 'B': 325 case 'E': 326 case 'F': 327 case 'L': 328 break; 329 case 'i': 330 flags |= REG_ICASE; 331 *s = '\0'; 332 break; 333 case '$': 334 *s = '\0'; 335 break; 336 case 'n': 337 *s = '\0'; 338 break; 339 default: 340 ATF_REQUIRE_MSG(0, "Unknown char %c", *s); 341 break; 342 } 343} 344 345static size_t 346getmatches(const char *s) 347{ 348 size_t i; 349 char *q; 350 for (i = 0; (q = strchr(s, '(')) != NULL; i++, s = q + 1) 351 continue; 352 ATF_REQUIRE_MSG(i != 0, "No parentheses found"); 353 return i; 354} 355 356static void 357checkcomment(const char *s, size_t lineno) 358{ 359 if (s && strstr(s, "BUG") != NULL) 360 fprintf(stderr, "Expected %s at line %zu\n", s, lineno); 361} 362 363static void 364checkmatches(const char *matches, size_t nm, const regmatch_t *pm, 365 size_t lineno) 366{ 367 if (nm == 0) 368 return; 369 370 char *res; 371 size_t len = strlen(matches) + 1, off = 0; 372 373 ATF_REQUIRE((res = strdup(matches)) != NULL); 374 for (size_t i = 0; i < nm; i++) { 375 int l; 376 if (pm[i].rm_so == -1 && pm[i].rm_eo == -1) 377 l = snprintf(res + off, len - off, "(?,?)"); 378 else 379 l = snprintf(res + off, len - off, "(%lld,%lld)", 380 (long long)pm[i].rm_so, (long long)pm[i].rm_eo); 381 ATF_REQUIRE_MSG((size_t) l < len - off, "String too long %s" 382 " cur=%d, max=%zu", res, l, len - off); 383 off += l; 384 } 385 ATF_REQUIRE_STREQ_MSG(res, matches, " at line %zu", lineno); 386 free(res); 387} 388 389ATF_TC_BODY(regex_att, tc) 390{ 391 regex_t re; 392 char *line, *lastpattern = NULL; 393 size_t len, lineno = 0; 394 int skipping = 0; 395 396 for (; (line = fparseln(stdin, &len, &lineno, delim, 0)) 397 != NULL; free(line)) { 398 char *name, *pattern, *input, *matches, *comment; 399 regmatch_t *pm; 400 size_t nm; 401#ifdef DEBUG 402 fprintf(stderr, "[%s]\n", line); 403#endif 404 if ((name = strtok(line, sep)) == NULL) 405 continue; 406 407 /* 408 * We check these early so that we skip the lines quickly 409 * in order to do more strict testing on the other arguments 410 * The same characters are also tested in the switch below 411 */ 412 if (*name == '}') { 413 skipping = 0; 414 continue; 415 } 416 if (skipping) 417 continue; 418 if (*name == ';' || *name == '#' || strcmp(name, "NOTE") == 0) 419 continue; 420 if (*name == ':') { 421 /* Skip ":HA#???:" prefix */ 422 while (*++name && *name != ':') 423 continue; 424 if (*name) 425 name++; 426 } 427 428 ATF_REQUIRE_MSG((pattern = strtok(NULL, sep)) != NULL, 429 "Missing pattern at line %zu", lineno); 430 ATF_REQUIRE_MSG((input = strtok(NULL, sep)) != NULL, 431 "Missing input at line %zu", lineno); 432 433 if (strchr(name, '$')) { 434 ATF_REQUIRE(strunvis(pattern, pattern) != -1); 435 ATF_REQUIRE(strunvis(input, input) != -1); 436 } 437 438 439 if (strcmp(input, "NULL") == 0) 440 *input = '\0'; 441 442 if (strcmp(pattern, "SAME") == 0) { 443 ATF_REQUIRE(lastpattern != NULL); 444 pattern = lastpattern; 445 } else { 446 free(lastpattern); 447 ATF_REQUIRE((lastpattern = strdup(pattern)) != NULL); 448 } 449 450 ATF_REQUIRE_MSG((matches = strtok(NULL, sep)) != NULL, 451 "Missing matches at line %zu", lineno); 452 453 comment = strtok(NULL, sep); 454 switch (*name) { 455 case '{': /* Begin optional implementation */ 456 if (optional(comment)) { 457 skipping++; 458 continue; 459 } 460 name++; /* We have it, so ignore */ 461 break; 462 case '}': /* End optional implementation */ 463 skipping = 0; 464 continue; 465 case '?': /* Optional */ 466 case '|': /* Alternative */ 467 if (unsupported(comment)) 468 continue; 469 name++; /* We have it, so ignore */ 470 break; 471 case '#': /* Comment */ 472 case ';': /* Skip */ 473 continue; 474 default: 475 break; 476 } 477 478 /* XXX: Our bug */ 479 if (bug(pattern, input, lineno)) 480 continue; 481 482 int comp, exec; 483 if (*matches != '(') { 484 geterror(matches, &comp, &exec); 485 pm = NULL; 486 nm = 0; 487 } else { 488 comp = exec = 0; 489 nm = getmatches(matches); 490 ATF_REQUIRE((pm = calloc(nm, sizeof(*pm))) != NULL); 491 } 492 493 494 495 int iflags = getflags(name); 496 for (; *name; name++) { 497 int flags; 498 switch (*name) { 499 case 'B': 500 flags = REG_BASIC; 501 break; 502 case 'E': 503 flags = REG_EXTENDED; 504 break; 505 case 'L': 506 flags = REG_NOSPEC; 507 break; 508 default: 509 ATF_REQUIRE_MSG(0, "Bad name %c", *name); 510 continue; 511 } 512 int c = regcomp(&re, pattern, flags | iflags); 513 ATF_REQUIRE_MSG(c == comp, 514 "regcomp returned %d for pattern %s at line %zu", 515 c, pattern, lineno); 516 if (c) 517 continue; 518 int e = regexec(&re, input, nm, pm, 0); 519 ATF_REQUIRE_MSG(e == exec, "Expected error %d," 520 " got %d at line %zu", exec, e, lineno); 521 checkmatches(matches, nm, pm, lineno); 522 checkcomment(comment, lineno); 523 regfree(&re); 524 } 525 free(pm); 526 } 527} 528 529ATF_TP_ADD_TCS(tp) 530{ 531 532 ATF_TP_ADD_TC(tp, regex_att); 533 return atf_no_error(); 534} 535