1/*********************************************************************** 2* * 3* This software is part of the ast package * 4* Copyright (c) 1985-2011 AT&T Intellectual Property * 5* and is licensed under the * 6* Common Public License, Version 1.0 * 7* by AT&T Intellectual Property * 8* * 9* A copy of the License is available at * 10* http://www.opensource.org/licenses/cpl1.0.txt * 11* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12* * 13* Information and Software Systems Research * 14* AT&T Research * 15* Florham Park NJ * 16* * 17* Glenn Fowler <gsf@research.att.com> * 18* David Korn <dgk@research.att.com> * 19* Phong Vo <kpv@research.att.com> * 20* * 21***********************************************************************/ 22#pragma prototyped 23 24/* 25 * D. G. Korn 26 * G. S. Fowler 27 * AT&T Research 28 * 29 * match shell file patterns -- derived from Bourne and Korn shell gmatch() 30 * 31 * sh pattern egrep RE description 32 * ---------- -------- ----------- 33 * * .* 0 or more chars 34 * ? . any single char 35 * [.] [.] char class 36 * [!.] [^.] negated char class 37 * [[:.:]] [[:.:]] ctype class 38 * [[=.=]] [[=.=]] equivalence class 39 * [[...]] [[...]] collation element 40 * *(.) (.)* 0 or more of 41 * +(.) (.)+ 1 or more of 42 * ?(.) (.)? 0 or 1 of 43 * (.) (.) 1 of 44 * @(.) (.) 1 of 45 * a|b a|b a or b 46 * \# () subgroup back reference [1-9] 47 * a&b a and b 48 * !(.) none of 49 * 50 * \ used to escape metacharacters 51 * 52 * *, ?, (, |, &, ), [, \ must be \'d outside of [...] 53 * only ] must be \'d inside [...] 54 * 55 * BUG: unbalanced ) terminates top level pattern 56 */ 57 58#include <ast.h> 59#include <ctype.h> 60#include <hashkey.h> 61 62#ifndef isblank 63#define isblank(x) ((x)==' '||(x)=='\t') 64#endif 65 66#ifndef isgraph 67#define isgraph(x) (isprint(x)&&!isblank(x)) 68#endif 69 70#define MAXGROUP 10 71 72typedef struct 73{ 74 char* beg[MAXGROUP]; 75 char* end[MAXGROUP]; 76 char* next_s; 77 short groups; 78} Group_t; 79 80typedef struct 81{ 82 Group_t current; 83 Group_t best; 84 char* last_s; 85 char* next_p; 86} Match_t; 87 88#define mbgetchar(p) (*p++) 89 90#ifndef isxdigit 91#define isxdigit(c) ((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F') 92#endif 93 94#define getsource(s,e) (((s)>=(e))?0:mbgetchar(s)) 95 96#define COLL_MAX 3 97 98/* 99 * gobble chars up to <sub> or ) keeping track of (...) and [...] 100 * sub must be one of { '|', '&', 0 } 101 * 0 returned if s runs out 102 */ 103 104static char* 105gobble(Match_t* mp, register char* s, register int sub, int* g, int clear) 106{ 107 register int p = 0; 108 register char* b = 0; 109 int c = 0; 110 int n; 111 112 for (;;) 113 switch (mbgetchar(s)) 114 { 115 case '\\': 116 if (mbgetchar(s)) 117 break; 118 /*FALLTHROUGH*/ 119 case 0: 120 return 0; 121 case '[': 122 if (!b) 123 { 124 if (*s == '!') 125 mbgetchar(s); 126 b = s; 127 } 128 else if (*s == '.' || *s == '=' || *s == ':') 129 c = *s; 130 break; 131 case ']': 132 if (b) 133 { 134 if (*(s - 2) == c) 135 c = 0; 136 else if (b != (s - 1)) 137 b = 0; 138 } 139 break; 140 case '(': 141 if (!b) 142 { 143 p++; 144 n = (*g)++; 145 if (clear) 146 { 147 if (!sub) 148 n++; 149 if (n < MAXGROUP) 150 mp->current.beg[n] = mp->current.end[n] = 0; 151 } 152 } 153 break; 154 case ')': 155 if (!b && p-- <= 0) 156 return sub ? 0 : s; 157 break; 158 case '|': 159 if (!b && !p && sub == '|') 160 return s; 161 break; 162 } 163} 164 165static int grpmatch(Match_t*, int, char*, register char*, char*, int); 166 167/* 168 * match a single pattern 169 * e is the end (0) of the substring in s 170 * r marks the start of a repeated subgroup pattern 171 */ 172 173static int 174onematch(Match_t* mp, int g, char* s, char* p, char* e, char* r, int flags) 175{ 176 register int pc; 177 register int sc; 178 register int n; 179 register int icase; 180 char* olds; 181 char* oldp; 182 183 icase = flags & STR_ICASE; 184 do 185 { 186 olds = s; 187 sc = getsource(s, e); 188 if (icase && isupper(sc)) 189 sc = tolower(sc); 190 oldp = p; 191 switch (pc = mbgetchar(p)) 192 { 193 case '(': 194 case '*': 195 case '?': 196 case '+': 197 case '@': 198 case '!': 199 if (pc == '(' || *p == '(') 200 { 201 char* subp; 202 int oldg; 203 204 s = olds; 205 subp = p + (pc != '('); 206 oldg = g; 207 n = ++g; 208 if (g < MAXGROUP && (!r || g > mp->current.groups)) 209 mp->current.beg[g] = mp->current.end[g] = 0; 210 if (!(p = gobble(mp, subp, 0, &g, !r))) 211 return 0; 212 if (pc == '*' || pc == '?' || pc == '+' && oldp == r) 213 { 214 if (onematch(mp, g, s, p, e, NiL, flags)) 215 return 1; 216 if (!sc || !getsource(s, e)) 217 { 218 mp->current.groups = oldg; 219 return 0; 220 } 221 } 222 if (pc == '*' || pc == '+') 223 { 224 p = oldp; 225 sc = n - 1; 226 } 227 else 228 sc = g; 229 pc = (pc != '!'); 230 do 231 { 232 if (grpmatch(mp, n, olds, subp, s, flags) == pc) 233 { 234 if (n < MAXGROUP) 235 { 236 if (!mp->current.beg[n] || mp->current.beg[n] > olds) 237 mp->current.beg[n] = olds; 238 if (s > mp->current.end[n]) 239 mp->current.end[n] = s; 240 } 241 if (onematch(mp, sc, s, p, e, oldp, flags)) 242 { 243 if (p == oldp && n < MAXGROUP) 244 { 245 if (!mp->current.beg[n] || mp->current.beg[n] > olds) 246 mp->current.beg[n] = olds; 247 if (s > mp->current.end[n]) 248 mp->current.end[n] = s; 249 } 250 return 1; 251 } 252 } 253 } while (s < e && mbgetchar(s)); 254 mp->current.groups = oldg; 255 return 0; 256 } 257 else if (pc == '*') 258 { 259 /* 260 * several stars are the same as one 261 */ 262 263 while (*p == '*' && *(p + 1) != '(') 264 p++; 265 oldp = p; 266 switch (pc = mbgetchar(p)) 267 { 268 case '@': 269 case '!': 270 case '+': 271 n = *p == '('; 272 break; 273 case '(': 274 case '[': 275 case '?': 276 case '*': 277 n = 1; 278 break; 279 case 0: 280 case '|': 281 case '&': 282 case ')': 283 mp->current.next_s = (flags & STR_MAXIMAL) ? e : olds; 284 mp->next_p = oldp; 285 mp->current.groups = g; 286 if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && mp->current.next_s > mp->best.next_s || !(flags & STR_MAXIMAL) && mp->current.next_s < mp->best.next_s)) 287 mp->best = mp->current; 288 return 1; 289 case '\\': 290 if (!(pc = mbgetchar(p))) 291 return 0; 292 if (pc >= '0' && pc <= '9') 293 { 294 n = pc - '0'; 295 if (n <= g && mp->current.beg[n]) 296 pc = *mp->current.beg[n]; 297 } 298 /*FALLTHROUGH*/ 299 default: 300 if (icase && isupper(pc)) 301 pc = tolower(pc); 302 n = 0; 303 break; 304 } 305 p = oldp; 306 for (;;) 307 { 308 if ((n || pc == sc) && onematch(mp, g, olds, p, e, NiL, flags)) 309 return 1; 310 if (!sc) 311 return 0; 312 olds = s; 313 sc = getsource(s, e); 314 if ((flags & STR_ICASE) && isupper(sc)) 315 sc = tolower(sc); 316 } 317 } 318 else if (pc != '?' && pc != sc) 319 return 0; 320 break; 321 case 0: 322 if (!(flags & STR_MAXIMAL)) 323 sc = 0; 324 /*FALLTHROUGH*/ 325 case '|': 326 case '&': 327 case ')': 328 if (!sc) 329 { 330 mp->current.next_s = olds; 331 mp->next_p = oldp; 332 mp->current.groups = g; 333 } 334 if (!pc && (!mp->best.next_s || (flags & STR_MAXIMAL) && olds > mp->best.next_s || !(flags & STR_MAXIMAL) && olds < mp->best.next_s)) 335 { 336 mp->best = mp->current; 337 mp->best.next_s = olds; 338 mp->best.groups = g; 339 } 340 return !sc; 341 case '[': 342 { 343 /*UNDENT...*/ 344 345 int invert; 346 int x; 347 int ok = 0; 348 char* range; 349 350 if (!sc) 351 return 0; 352 range = 0; 353 n = 0; 354 if (invert = *p == '!') 355 p++; 356 for (;;) 357 { 358 oldp = p; 359 if (!(pc = mbgetchar(p))) 360 return 0; 361 else if (pc == '[' && (*p == ':' || *p == '=' || *p == '.')) 362 { 363 x = 0; 364 n = mbgetchar(p); 365 oldp = p; 366 for (;;) 367 { 368 if (!(pc = mbgetchar(p))) 369 return 0; 370 if (pc == n && *p == ']') 371 break; 372 x++; 373 } 374 mbgetchar(p); 375 if (ok) 376 /*NOP*/; 377 else if (n == ':') 378 { 379 switch (HASHNKEY5(x, oldp[0], oldp[1], oldp[2], oldp[3], oldp[4])) 380 { 381 case HASHNKEY5(5,'a','l','n','u','m'): 382 if (isalnum(sc)) 383 ok = 1; 384 break; 385 case HASHNKEY5(5,'a','l','p','h','a'): 386 if (isalpha(sc)) 387 ok = 1; 388 break; 389 case HASHNKEY5(5,'b','l','a','n','k'): 390 if (isblank(sc)) 391 ok = 1; 392 break; 393 case HASHNKEY5(5,'c','n','t','r','l'): 394 if (iscntrl(sc)) 395 ok = 1; 396 break; 397 case HASHNKEY5(5,'d','i','g','i','t'): 398 if (isdigit(sc)) 399 ok = 1; 400 break; 401 case HASHNKEY5(5,'g','r','a','p','h'): 402 if (isgraph(sc)) 403 ok = 1; 404 break; 405 case HASHNKEY5(5,'l','o','w','e','r'): 406 if (islower(sc)) 407 ok = 1; 408 break; 409 case HASHNKEY5(5,'p','r','i','n','t'): 410 if (isprint(sc)) 411 ok = 1; 412 break; 413 case HASHNKEY5(5,'p','u','n','c','t'): 414 if (ispunct(sc)) 415 ok = 1; 416 break; 417 case HASHNKEY5(5,'s','p','a','c','e'): 418 if (isspace(sc)) 419 ok = 1; 420 break; 421 case HASHNKEY5(5,'u','p','p','e','r'): 422 if (icase ? islower(sc) : isupper(sc)) 423 ok = 1; 424 break; 425 case HASHNKEY5(6,'x','d','i','g','i'): 426 if (oldp[5] == 't' && isxdigit(sc)) 427 ok = 1; 428 break; 429 } 430 } 431 else if (range) 432 goto getrange; 433 else if (*p == '-' && *(p + 1) != ']') 434 { 435 mbgetchar(p); 436 range = oldp; 437 } 438 else if (isalpha(*oldp) && isalpha(*olds) && tolower(*oldp) == tolower(*olds) || sc == mbgetchar(oldp)) 439 ok = 1; 440 n = 1; 441 } 442 else if (pc == ']' && n) 443 { 444 if (ok != invert) 445 break; 446 return 0; 447 } 448 else if (pc == '\\' && (oldp = p, !(pc = mbgetchar(p)))) 449 return 0; 450 else if (ok) 451 /*NOP*/; 452 else if (range) 453 { 454 getrange: 455 if (icase && isupper(pc)) 456 pc = tolower(pc); 457 x = mbgetchar(range); 458 if (icase && isupper(x)) 459 x = tolower(x); 460 if (sc == x || sc == pc || sc > x && sc < pc) 461 ok = 1; 462 if (*p == '-' && *(p + 1) != ']') 463 { 464 mbgetchar(p); 465 range = oldp; 466 } 467 else 468 range = 0; 469 n = 1; 470 } 471 else if (*p == '-' && *(p + 1) != ']') 472 { 473 mbgetchar(p); 474 range = oldp; 475 n = 1; 476 } 477 else 478 { 479 if (icase && isupper(pc)) 480 pc = tolower(pc); 481 if (sc == pc) 482 ok = 1; 483 n = pc; 484 } 485 } 486 487 /*...INDENT*/ 488 } 489 break; 490 case '\\': 491 if (!(pc = mbgetchar(p))) 492 return 0; 493 if (pc >= '0' && pc <= '9') 494 { 495 n = pc - '0'; 496 if (n <= g && (oldp = mp->current.beg[n])) 497 { 498 while (oldp < mp->current.end[n]) 499 if (!*olds || *olds++ != *oldp++) 500 return 0; 501 s = olds; 502 break; 503 } 504 } 505 /*FALLTHROUGH*/ 506 default: 507 if (icase && isupper(pc)) 508 pc = tolower(pc); 509 if (pc != sc) 510 return 0; 511 break; 512 } 513 } while (sc); 514 return 0; 515} 516 517/* 518 * match any pattern in a group 519 * | and & subgroups are parsed here 520 */ 521 522static int 523grpmatch(Match_t* mp, int g, char* s, register char* p, char* e, int flags) 524{ 525 register char* a; 526 527 do 528 { 529 for (a = p; onematch(mp, g, s, a, e, NiL, flags); a++) 530 if (*(a = mp->next_p) != '&') 531 return 1; 532 } while (p = gobble(mp, p, '|', &g, 1)); 533 return 0; 534} 535 536/* 537 * subgroup match 538 * 0 returned if no match 539 * otherwise number of subgroups matched returned 540 * match group begin offsets are even elements of sub 541 * match group end offsets are odd elements of sub 542 * the matched string is from s+sub[0] up to but not 543 * including s+sub[1] 544 */ 545 546int 547strgrpmatch(const char* b, const char* p, int* sub, int n, int flags) 548{ 549 register int i; 550 register char* s; 551 char* e; 552 Match_t match; 553 554 s = (char*)b; 555 match.last_s = e = s + strlen(s); 556 for (;;) 557 { 558 match.best.next_s = 0; 559 match.current.groups = 0; 560 if ((i = grpmatch(&match, 0, s, (char*)p, e, flags)) || match.best.next_s) 561 { 562 if (!i) 563 match.current = match.best; 564 match.current.groups++; 565 match.current.end[0] = match.current.next_s; 566 break; 567 } 568 if ((flags & STR_LEFT) || s >= e) 569 return 0; 570 s++; 571 } 572 if ((flags & STR_RIGHT) && match.current.next_s != e) 573 return 0; 574 if (!sub) 575 return 1; 576 match.current.beg[0] = s; 577 s = (char*)b; 578 if (n > match.current.groups) 579 n = match.current.groups; 580 for (i = 0; i < n; i++) 581 { 582 sub[i * 2] = match.current.end[i] ? match.current.beg[i] - s : 0; 583 sub[i * 2 + 1] = match.current.end[i] ? match.current.end[i] - s : 0; 584 } 585 return n; 586} 587 588/* 589 * compare the string s with the shell pattern p 590 * returns 1 for match 0 otherwise 591 */ 592 593int 594strmatch(const char* s, const char* p) 595{ 596 return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT); 597} 598