glob.c revision 59243
1/* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36#if defined(LIBC_SCCS) && !defined(lint) 37static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; 38#endif /* LIBC_SCCS and not lint */ 39/* 40 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 41 * draft 9. 42 * 43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 44 * 45 * Optional extra services, controlled by flags not defined by POSIX: 46 * 47 * GLOB_QUOTE: 48 * Escaping convention: \ inhibits any special meaning the following 49 * character might have (except \ at end of string is retained). 50 * GLOB_MAGCHAR: 51 * Set in gl_flags if pattern contained a globbing character. 52 * GLOB_ALTNOT: 53 * Use ^ instead of ! for "not". 54 * gl_matchc: 55 * Number of matches in the current invocation of glob. 56 */ 57 58#ifdef notdef 59#include <sys/types.h> 60#include <sys/param.h> 61#include <sys/stat.h> 62#include <dirent.h> 63#include <ctype.h> 64typedef void * ptr_t; 65#endif 66#ifdef WINNT 67 #pragma warning(disable:4244) 68#endif /* WINNT */ 69 70#define Char __Char 71#include "sh.h" 72#undef Char 73#undef QUOTE 74#undef TILDE 75#undef META 76#undef CHAR 77#undef ismeta 78#undef Strchr 79 80#include "glob.h" 81 82#ifndef S_ISDIR 83#define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR) 84#endif 85 86#if !defined(S_ISLNK) && defined(S_IFLNK) 87#define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK) 88#endif 89 90#if !defined(S_ISLNK) && !defined(lstat) 91#define lstat stat 92#endif 93 94typedef unsigned short Char; 95 96static int glob1 __P((Char *, glob_t *, int)); 97static int glob2 __P((Char *, Char *, Char *, glob_t *, int)); 98static int glob3 __P((Char *, Char *, Char *, Char *, 99 glob_t *, int)); 100static int globextend __P((Char *, glob_t *)); 101static int match __P((Char *, Char *, Char *, int)); 102#ifndef __clipper__ 103static int compare __P((const ptr_t, const ptr_t)); 104#endif 105static DIR *Opendir __P((Char *)); 106#ifdef S_IFLNK 107static int Lstat __P((Char *, struct stat *)); 108#endif 109static int Stat __P((Char *, struct stat *sb)); 110static Char *Strchr __P((Char *, int)); 111#ifdef DEBUG 112static void qprintf __P((Char *)); 113#endif 114 115#define DOLLAR '$' 116#define DOT '.' 117#define EOS '\0' 118#define LBRACKET '[' 119#define NOT '!' 120#define ALTNOT '^' 121#define QUESTION '?' 122#define QUOTE '\\' 123#define RANGE '-' 124#define RBRACKET ']' 125#define SEP '/' 126#define STAR '*' 127#define TILDE '~' 128#define UNDERSCORE '_' 129 130#define M_META 0x8000 131#define M_PROTECT 0x4000 132#define M_MASK 0xffff 133#define M_ASCII 0x00ff 134 135#define CHAR(c) ((c)&M_ASCII) 136#define META(c) ((c)|M_META) 137#define M_ALL META('*') 138#define M_END META(']') 139#define M_NOT META('!') 140#define M_ALTNOT META('^') 141#define M_ONE META('?') 142#define M_RNG META('-') 143#define M_SET META('[') 144#define ismeta(c) (((c)&M_META) != 0) 145 146#ifndef BUFSIZE 147#define GLOBBUFLEN MAXPATHLEN 148#else 149#define GLOBBUFLEN BUFSIZE 150#endif 151 152int 153globcharcoll(c1, c2) 154 int c1, c2; 155{ 156#if defined(NLS) && defined(LC_COLLATE) && !defined(NOSTRCOLL) 157 char s1[2], s2[2]; 158 159 if (c1 == c2) 160 return (0); 161 s1[0] = c1; 162 s2[0] = c2; 163 s1[1] = s2[1] = '\0'; 164 return strcoll(s1, s2); 165#else 166 return (c1 - c2); 167#endif 168} 169 170/* 171 * Need to dodge two kernel bugs: 172 * opendir("") != opendir(".") 173 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels. 174 * POSIX specifies that they should be ignored in directories. 175 */ 176 177static DIR * 178Opendir(str) 179 register Char *str; 180{ 181 char buf[GLOBBUFLEN]; 182 register char *dc = buf; 183#if defined(hpux) || defined(__hpux) 184 struct stat st; 185#endif 186 187 if (!*str) 188 return (opendir(".")); 189 while ((*dc++ = *str++) != '\0') 190 continue; 191#if defined(hpux) || defined(__hpux) 192 /* 193 * Opendir on some device files hangs, so avoid it 194 */ 195 if (stat(buf, &st) == -1 || !S_ISDIR(st.st_mode)) 196 return NULL; 197#endif 198 return (opendir(buf)); 199} 200 201#ifdef S_IFLNK 202static int 203Lstat(fn, sb) 204 register Char *fn; 205 struct stat *sb; 206{ 207 char buf[GLOBBUFLEN]; 208 register char *dc = buf; 209 210 while ((*dc++ = *fn++) != '\0') 211 continue; 212# ifdef NAMEI_BUG 213 { 214 int st; 215 216 st = lstat(buf, sb); 217 if (*buf) 218 dc--; 219 return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st); 220 } 221# else 222 return (lstat(buf, sb)); 223# endif /* NAMEI_BUG */ 224} 225#else 226#define Lstat Stat 227#endif /* S_IFLNK */ 228 229static int 230Stat(fn, sb) 231 register Char *fn; 232 struct stat *sb; 233{ 234 char buf[GLOBBUFLEN]; 235 register char *dc = buf; 236 237 while ((*dc++ = *fn++) != '\0') 238 continue; 239#ifdef NAMEI_BUG 240 { 241 int st; 242 243 st = stat(buf, sb); 244 if (*buf) 245 dc--; 246 return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st); 247 } 248#else 249 return (stat(buf, sb)); 250#endif /* NAMEI_BUG */ 251} 252 253static Char * 254Strchr(str, ch) 255 Char *str; 256 int ch; 257{ 258 do 259 if (*str == ch) 260 return (str); 261 while (*str++); 262 return (NULL); 263} 264 265#ifdef DEBUG 266static void 267qprintf(s) 268Char *s; 269{ 270 Char *p; 271 272 for (p = s; *p; p++) 273 printf("%c", *p & 0xff); 274 printf("\n"); 275 for (p = s; *p; p++) 276 printf("%c", *p & M_PROTECT ? '"' : ' '); 277 printf("\n"); 278 for (p = s; *p; p++) 279 printf("%c", *p & M_META ? '_' : ' '); 280 printf("\n"); 281} 282#endif /* DEBUG */ 283 284static int 285compare(p, q) 286 const ptr_t p, q; 287{ 288#if defined(NLS) && !defined(NOSTRCOLL) 289 errno = 0; /* strcoll sets errno, another brain-damage */ 290 291 return (strcoll(*(char **) p, *(char **) q)); 292#else 293 return (strcmp(*(char **) p, *(char **) q)); 294#endif /* NLS && !NOSTRCOLL */ 295} 296 297/* 298 * The main glob() routine: compiles the pattern (optionally processing 299 * quotes), calls glob1() to do the real pattern matching, and finally 300 * sorts the list (unless unsorted operation is requested). Returns 0 301 * if things went well, nonzero if errors occurred. It is not an error 302 * to find no matches. 303 */ 304int 305glob(pattern, flags, errfunc, pglob) 306 const char *pattern; 307 int flags; 308 int (*errfunc) __P((const char *, int)); 309 glob_t *pglob; 310{ 311 int err, oldpathc; 312 Char *bufnext, *bufend, *compilebuf, m_not; 313 const unsigned char *compilepat, *patnext; 314 int c, not; 315 Char patbuf[GLOBBUFLEN + 1], *qpatnext; 316 int no_match; 317 318 patnext = (unsigned char *) pattern; 319 if (!(flags & GLOB_APPEND)) { 320 pglob->gl_pathc = 0; 321 pglob->gl_pathv = NULL; 322 if (!(flags & GLOB_DOOFFS)) 323 pglob->gl_offs = 0; 324 } 325 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 326 pglob->gl_errfunc = errfunc; 327 oldpathc = pglob->gl_pathc; 328 pglob->gl_matchc = 0; 329 330 if (pglob->gl_flags & GLOB_ALTNOT) { 331 not = ALTNOT; 332 m_not = M_ALTNOT; 333 } 334 else { 335 not = NOT; 336 m_not = M_NOT; 337 } 338 339 bufnext = patbuf; 340 bufend = bufnext + GLOBBUFLEN; 341 compilebuf = bufnext; 342 compilepat = patnext; 343 344 no_match = *patnext == not; 345 if (no_match) 346 patnext++; 347 348 if (flags & GLOB_QUOTE) { 349 /* Protect the quoted characters */ 350 while (bufnext < bufend && (c = *patnext++) != EOS) 351 if (c == QUOTE) { 352 if ((c = *patnext++) == EOS) { 353 c = QUOTE; 354 --patnext; 355 } 356 *bufnext++ = (Char) (c | M_PROTECT); 357 } 358 else 359 *bufnext++ = (Char) c; 360 } 361 else 362 while (bufnext < bufend && (c = *patnext++) != EOS) 363 *bufnext++ = (Char) c; 364 *bufnext = EOS; 365 366 bufnext = patbuf; 367 qpatnext = patbuf; 368 /* we don't need to check for buffer overflow any more */ 369 while ((c = *qpatnext++) != EOS) { 370 switch (c) { 371 case LBRACKET: 372 c = *qpatnext; 373 if (c == not) 374 ++qpatnext; 375 if (*qpatnext == EOS || 376 Strchr(qpatnext + 1, RBRACKET) == NULL) { 377 *bufnext++ = LBRACKET; 378 if (c == not) 379 --qpatnext; 380 break; 381 } 382 pglob->gl_flags |= GLOB_MAGCHAR; 383 *bufnext++ = M_SET; 384 if (c == not) 385 *bufnext++ = m_not; 386 c = *qpatnext++; 387 do { 388 *bufnext++ = CHAR(c); 389 if (*qpatnext == RANGE && 390 (c = qpatnext[1]) != RBRACKET) { 391 *bufnext++ = M_RNG; 392 *bufnext++ = CHAR(c); 393 qpatnext += 2; 394 } 395 } while ((c = *qpatnext++) != RBRACKET); 396 *bufnext++ = M_END; 397 break; 398 case QUESTION: 399 pglob->gl_flags |= GLOB_MAGCHAR; 400 *bufnext++ = M_ONE; 401 break; 402 case STAR: 403 pglob->gl_flags |= GLOB_MAGCHAR; 404 /* collapse adjacent stars to one, to avoid 405 * exponential behavior 406 */ 407 if (bufnext == patbuf || bufnext[-1] != M_ALL) 408 *bufnext++ = M_ALL; 409 break; 410 default: 411 *bufnext++ = CHAR(c); 412 break; 413 } 414 } 415 *bufnext = EOS; 416#ifdef DEBUG 417 qprintf(patbuf); 418#endif 419 420 if ((err = glob1(patbuf, pglob, no_match)) != 0) 421 return (err); 422 423 /* 424 * If there was no match we are going to append the pattern 425 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 426 * and the pattern did not contain any magic characters 427 * GLOB_NOMAGIC is there just for compatibility with csh. 428 */ 429 if (pglob->gl_pathc == oldpathc && 430 ((flags & GLOB_NOCHECK) || 431 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 432 if (!(flags & GLOB_QUOTE)) { 433 Char *dp = compilebuf; 434 const unsigned char *sp = compilepat; 435 436 while ((*dp++ = *sp++) != '\0') 437 continue; 438 } 439 else { 440 /* 441 * copy pattern, interpreting quotes; this is slightly different 442 * than the interpretation of quotes above -- which should prevail? 443 */ 444 while (*compilepat != EOS) { 445 if (*compilepat == QUOTE) { 446 if (*++compilepat == EOS) 447 --compilepat; 448 } 449 *compilebuf++ = (unsigned char) *compilepat++; 450 } 451 *compilebuf = EOS; 452 } 453 return (globextend(patbuf, pglob)); 454 } 455 else if (!(flags & GLOB_NOSORT)) 456 qsort((char *) (pglob->gl_pathv + pglob->gl_offs + oldpathc), 457 pglob->gl_pathc - oldpathc, sizeof(char *), 458 (int (*) __P((const void *, const void *))) compare); 459 return (0); 460} 461 462static int 463glob1(pattern, pglob, no_match) 464 Char *pattern; 465 glob_t *pglob; 466 int no_match; 467{ 468 Char pathbuf[GLOBBUFLEN + 1]; 469 470 /* 471 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. 472 */ 473 if (*pattern == EOS) 474 return (0); 475 return (glob2(pathbuf, pathbuf, pattern, pglob, no_match)); 476} 477 478/* 479 * functions glob2 and glob3 are mutually recursive; there is one level 480 * of recursion for each segment in the pattern that contains one or 481 * more meta characters. 482 */ 483static int 484glob2(pathbuf, pathend, pattern, pglob, no_match) 485 Char *pathbuf, *pathend, *pattern; 486 glob_t *pglob; 487 int no_match; 488{ 489 struct stat sbuf; 490 int anymeta; 491 Char *p, *q; 492 493 /* 494 * loop over pattern segments until end of pattern or until segment with 495 * meta character found. 496 */ 497 anymeta = 0; 498 for (;;) { 499 if (*pattern == EOS) { /* end of pattern? */ 500 *pathend = EOS; 501 502 if (Lstat(pathbuf, &sbuf)) 503 return (0); 504 505 if (((pglob->gl_flags & GLOB_MARK) && 506 pathend[-1] != SEP) && 507 (S_ISDIR(sbuf.st_mode) 508#ifdef S_IFLNK 509 || (S_ISLNK(sbuf.st_mode) && 510 (Stat(pathbuf, &sbuf) == 0) && 511 S_ISDIR(sbuf.st_mode)) 512#endif 513 )) { 514 *pathend++ = SEP; 515 *pathend = EOS; 516 } 517 ++pglob->gl_matchc; 518 return (globextend(pathbuf, pglob)); 519 } 520 521 /* find end of next segment, copy tentatively to pathend */ 522 q = pathend; 523 p = pattern; 524 while (*p != EOS && *p != SEP) { 525 if (ismeta(*p)) 526 anymeta = 1; 527 *q++ = *p++; 528 } 529 530 if (!anymeta) { /* no expansion, do next segment */ 531 pathend = q; 532 pattern = p; 533 while (*pattern == SEP) 534 *pathend++ = *pattern++; 535 } 536 else /* need expansion, recurse */ 537 return (glob3(pathbuf, pathend, pattern, p, pglob, no_match)); 538 } 539 /* NOTREACHED */ 540} 541 542 543static int 544glob3(pathbuf, pathend, pattern, restpattern, pglob, no_match) 545 Char *pathbuf, *pathend, *pattern, *restpattern; 546 glob_t *pglob; 547 int no_match; 548{ 549 extern int errno; 550 DIR *dirp; 551 struct dirent *dp; 552 int err; 553 Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT; 554 char cpathbuf[GLOBBUFLEN], *ptr;; 555 556 *pathend = EOS; 557 errno = 0; 558 559 if (!(dirp = Opendir(pathbuf))) { 560 /* todo: don't call for ENOENT or ENOTDIR? */ 561 for (ptr = cpathbuf; (*ptr++ = (char) *pathbuf++) != EOS;) 562 continue; 563 if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (cpathbuf, errno)) || 564 (pglob->gl_flags & GLOB_ERR)) 565 return (GLOB_ABEND); 566 else 567 return (0); 568 } 569 570 err = 0; 571 572 /* search directory for matching names */ 573 while ((dp = readdir(dirp)) != NULL) { 574 register unsigned char *sc; 575 register Char *dc; 576 577 /* initial DOT must be matched literally */ 578 if (dp->d_name[0] == DOT && *pattern != DOT) 579 continue; 580 for (sc = (unsigned char *) dp->d_name, dc = pathend; 581 (*dc++ = *sc++) != '\0';) 582 continue; 583 if (match(pathend, pattern, restpattern, (int) m_not) == no_match) { 584 *pathend = EOS; 585 continue; 586 } 587 err = glob2(pathbuf, --dc, restpattern, pglob, no_match); 588 if (err) 589 break; 590 } 591 /* todo: check error from readdir? */ 592 (void) closedir(dirp); 593 return (err); 594} 595 596 597/* 598 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 599 * add the new item, and update gl_pathc. 600 * 601 * This assumes the BSD realloc, which only copies the block when its size 602 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 603 * behavior. 604 * 605 * Return 0 if new item added, error code if memory couldn't be allocated. 606 * 607 * Invariant of the glob_t structure: 608 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 609 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 610 */ 611static int 612globextend(path, pglob) 613 Char *path; 614 glob_t *pglob; 615{ 616 register char **pathv; 617 register int i; 618 unsigned int newsize; 619 char *copy; 620 Char *p; 621 622 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 623 pathv = (char **) (pglob->gl_pathv ? 624 xrealloc((ptr_t) pglob->gl_pathv, (size_t) newsize) : 625 xmalloc((size_t) newsize)); 626 if (pathv == NULL) 627 return (GLOB_NOSPACE); 628 629 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 630 /* first time around -- clear initial gl_offs items */ 631 pathv += pglob->gl_offs; 632 for (i = pglob->gl_offs; --i >= 0;) 633 *--pathv = NULL; 634 } 635 pglob->gl_pathv = pathv; 636 637 for (p = path; *p++;) 638 continue; 639 if ((copy = (char *) xmalloc((size_t) (p - path))) != NULL) { 640 register char *dc = copy; 641 register Char *sc = path; 642 643 while ((*dc++ = *sc++) != '\0') 644 continue; 645 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 646 } 647 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 648 return ((copy == NULL) ? GLOB_NOSPACE : 0); 649} 650 651 652/* 653 * pattern matching function for filenames. Each occurrence of the * 654 * pattern causes a recursion level. 655 */ 656static int 657match(name, pat, patend, m_not) 658 register Char *name, *pat, *patend; 659 int m_not; 660{ 661 int ok, negate_range; 662 Char c, k; 663 664 while (pat < patend) { 665 c = *pat++; 666 switch (c & M_MASK) { 667 case M_ALL: 668 if (pat == patend) 669 return (1); 670 do 671 if (match(name, pat, patend, m_not)) 672 return (1); 673 while (*name++ != EOS); 674 return (0); 675 case M_ONE: 676 if (*name++ == EOS) 677 return (0); 678 break; 679 case M_SET: 680 ok = 0; 681 if ((k = *name++) == EOS) 682 return (0); 683 if ((negate_range = ((*pat & M_MASK) == m_not)) != 0) 684 ++pat; 685 while (((c = *pat++) & M_MASK) != M_END) { 686 if ((*pat & M_MASK) == M_RNG) { 687 if (globcharcoll(CHAR(c), CHAR(k)) <= 0 && 688 globcharcoll(CHAR(k), CHAR(pat[1])) <= 0) 689 ok = 1; 690 pat += 2; 691 } 692 else if (c == k) 693 ok = 1; 694 } 695 if (ok == negate_range) 696 return (0); 697 break; 698 default: 699 k = *name++; 700 if (samecase(k) != samecase(c)) 701 return (0); 702 break; 703 } 704 } 705 return (*name == EOS); 706} 707 708/* free allocated data belonging to a glob_t structure */ 709void 710globfree(pglob) 711 glob_t *pglob; 712{ 713 register int i; 714 register char **pp; 715 716 if (pglob->gl_pathv != NULL) { 717 pp = pglob->gl_pathv + pglob->gl_offs; 718 for (i = pglob->gl_pathc; i--; ++pp) 719 if (*pp) 720 xfree((ptr_t) *pp), *pp = NULL; 721 xfree((ptr_t) pglob->gl_pathv), pglob->gl_pathv = NULL; 722 } 723} 724