1/* 2 * glob.c - filename generation 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1992-1997 Paul Falstad 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Paul Falstad or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Paul Falstad and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Paul Falstad and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Paul Falstad and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "zsh.mdh" 31#include "glob.pro" 32 33#if defined(OFF_T_IS_64_BIT) && defined(__GNUC__) 34# define ALIGN64 __attribute__((aligned(8))) 35#else 36# define ALIGN64 37#endif 38 39/* flag for CSHNULLGLOB */ 40 41typedef struct gmatch *Gmatch; 42 43struct gmatch { 44 char *name; 45 /* 46 * Array of sort strings: one for each GS_EXEC sort type in 47 * the glob qualifiers. 48 */ 49 char **sortstrs; 50 off_t size ALIGN64; 51 long atime; 52 long mtime; 53 long ctime; 54 long links; 55 off_t _size ALIGN64; 56 long _atime; 57 long _mtime; 58 long _ctime; 59 long _links; 60#ifdef GET_ST_ATIME_NSEC 61 long ansec; 62 long _ansec; 63#endif 64#ifdef GET_ST_MTIME_NSEC 65 long mnsec; 66 long _mnsec; 67#endif 68#ifdef GET_ST_CTIME_NSEC 69 long cnsec; 70 long _cnsec; 71#endif 72}; 73 74#define GS_NAME 1 75#define GS_DEPTH 2 76#define GS_EXEC 4 77 78#define GS_SHIFT_BASE 8 79 80#define GS_SIZE (GS_SHIFT_BASE) 81#define GS_ATIME (GS_SHIFT_BASE << 1) 82#define GS_MTIME (GS_SHIFT_BASE << 2) 83#define GS_CTIME (GS_SHIFT_BASE << 3) 84#define GS_LINKS (GS_SHIFT_BASE << 4) 85 86#define GS_SHIFT 5 87#define GS__SIZE (GS_SIZE << GS_SHIFT) 88#define GS__ATIME (GS_ATIME << GS_SHIFT) 89#define GS__MTIME (GS_MTIME << GS_SHIFT) 90#define GS__CTIME (GS_CTIME << GS_SHIFT) 91#define GS__LINKS (GS_LINKS << GS_SHIFT) 92 93#define GS_DESC (GS_SHIFT_BASE << (2*GS_SHIFT)) 94#define GS_NONE (GS_SHIFT_BASE << (2*GS_SHIFT+1)) 95 96#define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS) 97#define GS_LINKED (GS_NORMAL << GS_SHIFT) 98 99/**/ 100int badcshglob; 101 102/**/ 103int pathpos; /* position in pathbuf (needed by pattern code) */ 104 105/**/ 106char *pathbuf; /* pathname buffer (needed by pattern code) */ 107 108typedef struct stat *Statptr; /* This makes the Ultrix compiler happy. Go figure. */ 109 110/* modifier for unit conversions */ 111 112#define TT_DAYS 0 113#define TT_HOURS 1 114#define TT_MINS 2 115#define TT_WEEKS 3 116#define TT_MONTHS 4 117#define TT_SECONDS 5 118 119#define TT_BYTES 0 120#define TT_POSIX_BLOCKS 1 121#define TT_KILOBYTES 2 122#define TT_MEGABYTES 3 123 124 125typedef int (*TestMatchFunc) _((char *, struct stat *, off_t, char *)); 126 127struct qual { 128 struct qual *next; /* Next qualifier, must match */ 129 struct qual *or; /* Alternative set of qualifiers to match */ 130 TestMatchFunc func; /* Function to call to test match */ 131 off_t data ALIGN64; /* Argument passed to function */ 132 int sense; /* Whether asserting or negating */ 133 int amc; /* Flag for which time to test (a, m, c) */ 134 int range; /* Whether to test <, > or = (as per signum) */ 135 int units; /* Multiplier for time or size, respectively */ 136 char *sdata; /* currently only: expression to eval */ 137}; 138 139/* Prefix, suffix for doing zle trickery */ 140 141/**/ 142mod_export char *glob_pre, *glob_suf; 143 144/* Element of a glob sort */ 145struct globsort { 146 /* Sort type */ 147 int tp; 148 /* Sort code to eval, if type is GS_EXEC */ 149 char *exec; 150}; 151 152/* Maximum entries in sort array */ 153#define MAX_SORTS (12) 154 155/* struct to easily save/restore current state */ 156 157struct globdata { 158 int gd_pathpos; 159 char *gd_pathbuf; 160 161 int gd_matchsz; /* size of matchbuf */ 162 int gd_matchct; /* number of matches found */ 163 int gd_pathbufsz; /* size of pathbuf */ 164 int gd_pathbufcwd; /* where did we chdir()'ed */ 165 Gmatch gd_matchbuf; /* array of matches */ 166 Gmatch gd_matchptr; /* &matchbuf[matchct] */ 167 char *gd_colonmod; /* colon modifiers in qualifier list */ 168 169 /* Qualifiers pertaining to current pattern */ 170 struct qual *gd_quals; 171 172 /* Other state values for current pattern */ 173 int gd_qualct, gd_qualorct; 174 int gd_range, gd_amc, gd_units; 175 int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes; 176 int gd_gf_numsort; 177 int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts; 178 struct globsort gd_gf_sortlist[MAX_SORTS]; 179 LinkList gd_gf_pre_words; 180 181 char *gd_glob_pre, *gd_glob_suf; 182}; 183 184/* The variable with the current globbing state and convenience macros */ 185 186static struct globdata curglobdata; 187 188#define matchsz (curglobdata.gd_matchsz) 189#define matchct (curglobdata.gd_matchct) 190#define pathbufsz (curglobdata.gd_pathbufsz) 191#define pathbufcwd (curglobdata.gd_pathbufcwd) 192#define matchbuf (curglobdata.gd_matchbuf) 193#define matchptr (curglobdata.gd_matchptr) 194#define colonmod (curglobdata.gd_colonmod) 195#define quals (curglobdata.gd_quals) 196#define qualct (curglobdata.gd_qualct) 197#define qualorct (curglobdata.gd_qualorct) 198#define g_range (curglobdata.gd_range) 199#define g_amc (curglobdata.gd_amc) 200#define g_units (curglobdata.gd_units) 201#define gf_nullglob (curglobdata.gd_gf_nullglob) 202#define gf_markdirs (curglobdata.gd_gf_markdirs) 203#define gf_noglobdots (curglobdata.gd_gf_noglobdots) 204#define gf_listtypes (curglobdata.gd_gf_listtypes) 205#define gf_numsort (curglobdata.gd_gf_numsort) 206#define gf_follow (curglobdata.gd_gf_follow) 207#define gf_sorts (curglobdata.gd_gf_sorts) 208#define gf_nsorts (curglobdata.gd_gf_nsorts) 209#define gf_sortlist (curglobdata.gd_gf_sortlist) 210#define gf_pre_words (curglobdata.gd_gf_pre_words) 211 212/* and macros for save/restore */ 213 214#define save_globstate(N) \ 215 do { \ 216 memcpy(&(N), &curglobdata, sizeof(struct globdata)); \ 217 (N).gd_pathpos = pathpos; \ 218 (N).gd_pathbuf = pathbuf; \ 219 (N).gd_glob_pre = glob_pre; \ 220 (N).gd_glob_suf = glob_suf; \ 221 pathbuf = NULL; \ 222 } while (0) 223 224#define restore_globstate(N) \ 225 do { \ 226 zfree(pathbuf, pathbufsz); \ 227 memcpy(&curglobdata, &(N), sizeof(struct globdata)); \ 228 pathpos = (N).gd_pathpos; \ 229 pathbuf = (N).gd_pathbuf; \ 230 glob_pre = (N).gd_glob_pre; \ 231 glob_suf = (N).gd_glob_suf; \ 232 } while (0) 233 234/* pathname component in filename patterns */ 235 236struct complist { 237 Complist next; 238 Patprog pat; 239 int closure; /* 1 if this is a (foo/)# */ 240 int follow; /* 1 to go thru symlinks */ 241}; 242 243/* Add a component to pathbuf: This keeps track of how * 244 * far we are into a file name, since each path component * 245 * must be matched separately. */ 246 247/**/ 248static void 249addpath(char *s, int l) 250{ 251 DPUTS(!pathbuf, "BUG: pathbuf not initialised"); 252 while (pathpos + l + 1 >= pathbufsz) 253 pathbuf = realloc(pathbuf, pathbufsz *= 2); 254 while (l--) 255 pathbuf[pathpos++] = *s++; 256 pathbuf[pathpos++] = '/'; 257 pathbuf[pathpos] = '\0'; 258} 259 260/* stat the filename s appended to pathbuf. l should be true for lstat, * 261 * false for stat. If st is NULL, the file is only checked for existance. * 262 * s == "" is treated as s == ".". This is necessary since on most systems * 263 * foo/ can be used to reference a non-directory foo. Returns nonzero if * 264 * the file does not exists. */ 265 266/**/ 267static int 268statfullpath(const char *s, struct stat *st, int l) 269{ 270 char buf[PATH_MAX]; 271 272 DPUTS(strlen(s) + !*s + pathpos - pathbufcwd >= PATH_MAX, 273 "BUG: statfullpath(): pathname too long"); 274 strcpy(buf, pathbuf + pathbufcwd); 275 strcpy(buf + pathpos - pathbufcwd, s); 276 if (!*s && *buf) { 277 /* 278 * Don't add the '.' if the path so far is empty, since 279 * then we get bogus empty strings inserted as files. 280 */ 281 buf[pathpos - pathbufcwd] = '.'; 282 buf[pathpos - pathbufcwd + 1] = '\0'; 283 l = 0; 284 } 285 unmetafy(buf, NULL); 286 if (!st) { 287 char lbuf[1]; 288 return access(buf, F_OK) && (!l || readlink(buf, lbuf, 1) < 0); 289 } 290 return l ? lstat(buf, st) : stat(buf, st); 291} 292 293/* This may be set by qualifier functions to an array of strings to insert 294 * into the list instead of the original string. */ 295 296char **inserts; 297 298/* add a match to the list */ 299 300/**/ 301static void 302insert(char *s, int checked) 303{ 304 struct stat buf, buf2, *bp; 305 char *news = s; 306 int statted = 0; 307 308 queue_signals(); 309 inserts = NULL; 310 311 if (gf_listtypes || gf_markdirs) { 312 /* Add the type marker to the end of the filename */ 313 mode_t mode; 314 checked = statted = 1; 315 if (statfullpath(s, &buf, 1)) { 316 unqueue_signals(); 317 return; 318 } 319 mode = buf.st_mode; 320 if (gf_follow) { 321 if (!S_ISLNK(mode) || statfullpath(s, &buf2, 0)) 322 memcpy(&buf2, &buf, sizeof(buf)); 323 statted |= 2; 324 mode = buf2.st_mode; 325 } 326 if (gf_listtypes || S_ISDIR(mode)) { 327 int ll = strlen(s); 328 329 news = (char *) hcalloc(ll + 2); 330 strcpy(news, s); 331 news[ll] = file_type(mode); 332 news[ll + 1] = '\0'; 333 } 334 } 335 if (qualct || qualorct) { 336 /* Go through the qualifiers, rejecting the file if appropriate */ 337 struct qual *qo, *qn; 338 339 if (!statted && statfullpath(s, &buf, 1)) { 340 unqueue_signals(); 341 return; 342 } 343 news = dyncat(pathbuf, news); 344 345 statted = 1; 346 qo = quals; 347 for (qn = qo; qn && qn->func;) { 348 g_range = qn->range; 349 g_amc = qn->amc; 350 g_units = qn->units; 351 if ((qn->sense & 2) && !(statted & 2)) { 352 /* If (sense & 2), we're following links */ 353 if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0)) 354 memcpy(&buf2, &buf, sizeof(buf)); 355 statted |= 2; 356 } 357 bp = (qn->sense & 2) ? &buf2 : &buf; 358 /* Reject the file if the function returned zero * 359 * and the sense was positive (sense&1 == 0), or * 360 * vice versa. */ 361 if ((!((qn->func) (news, bp, qn->data, qn->sdata)) 362 ^ qn->sense) & 1) { 363 /* Try next alternative, or return if there are no more */ 364 if (!(qo = qo->or)) { 365 unqueue_signals(); 366 return; 367 } 368 qn = qo; 369 continue; 370 } 371 qn = qn->next; 372 } 373 } else if (!checked) { 374 if (statfullpath(s, NULL, 1)) { 375 unqueue_signals(); 376 return; 377 } 378 statted = 1; 379 news = dyncat(pathbuf, news); 380 } else 381 news = dyncat(pathbuf, news); 382 383 while (!inserts || (news = dupstring(*inserts++))) { 384 if (colonmod) { 385 /* Handle the remainder of the qualifier: e.g. (:r:s/foo/bar/). */ 386 s = colonmod; 387 modify(&news, &s); 388 } 389 if (!statted && (gf_sorts & GS_NORMAL)) { 390 statfullpath(s, &buf, 1); 391 statted = 1; 392 } 393 if (!(statted & 2) && (gf_sorts & GS_LINKED)) { 394 if (statted) { 395 if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0)) 396 memcpy(&buf2, &buf, sizeof(buf)); 397 } else if (statfullpath(s, &buf2, 0)) 398 statfullpath(s, &buf2, 1); 399 statted |= 2; 400 } 401 matchptr->name = news; 402 if (statted & 1) { 403 matchptr->size = buf.st_size; 404 matchptr->atime = buf.st_atime; 405 matchptr->mtime = buf.st_mtime; 406 matchptr->ctime = buf.st_ctime; 407 matchptr->links = buf.st_nlink; 408#ifdef GET_ST_ATIME_NSEC 409 matchptr->ansec = GET_ST_ATIME_NSEC(buf); 410#endif 411#ifdef GET_ST_MTIME_NSEC 412 matchptr->mnsec = GET_ST_MTIME_NSEC(buf); 413#endif 414#ifdef GET_ST_CTIME_NSEC 415 matchptr->cnsec = GET_ST_CTIME_NSEC(buf); 416#endif 417 } 418 if (statted & 2) { 419 matchptr->_size = buf2.st_size; 420 matchptr->_atime = buf2.st_atime; 421 matchptr->_mtime = buf2.st_mtime; 422 matchptr->_ctime = buf2.st_ctime; 423 matchptr->_links = buf2.st_nlink; 424#ifdef GET_ST_ATIME_NSEC 425 matchptr->_ansec = GET_ST_ATIME_NSEC(buf2); 426#endif 427#ifdef GET_ST_MTIME_NSEC 428 matchptr->_mnsec = GET_ST_MTIME_NSEC(buf2); 429#endif 430#ifdef GET_ST_CTIME_NSEC 431 matchptr->_cnsec = GET_ST_CTIME_NSEC(buf2); 432#endif 433 } 434 matchptr++; 435 436 if (++matchct == matchsz) { 437 matchbuf = (Gmatch )realloc((char *)matchbuf, 438 sizeof(struct gmatch) * (matchsz *= 2)); 439 440 matchptr = matchbuf + matchct; 441 } 442 if (!inserts) 443 break; 444 } 445 unqueue_signals(); 446} 447 448/* Do the globbing: scanner is called recursively * 449 * with successive bits of the path until we've * 450 * tried all of it. */ 451 452/**/ 453static void 454scanner(Complist q) 455{ 456 Patprog p; 457 int closure; 458 int pbcwdsav = pathbufcwd; 459 int errssofar = errsfound; 460 struct dirsav ds; 461 462 init_dirsav(&ds); 463 if (!q) 464 return; 465 466 if ((closure = q->closure)) { 467 /* (foo/)# - match zero or more dirs */ 468 if (q->closure == 2) /* (foo/)## - match one or more dirs */ 469 q->closure = 1; 470 else 471 scanner(q->next); 472 } 473 p = q->pat; 474 /* Now the actual matching for the current path section. */ 475 if (p->flags & PAT_PURES) { 476 /* 477 * It's a straight string to the end of the path section. 478 */ 479 char *str = (char *)p + p->startoff; 480 int l = p->patmlen; 481 482 if (l + !l + pathpos - pathbufcwd >= PATH_MAX) { 483 int err; 484 485 if (l >= PATH_MAX) 486 return; 487 err = lchdir(pathbuf + pathbufcwd, &ds, 0); 488 if (err == -1) 489 return; 490 if (err) { 491 zerr("current directory lost during glob"); 492 return; 493 } 494 pathbufcwd = pathpos; 495 } 496 if (q->next) { 497 /* Not the last path section. Just add it to the path. */ 498 int oppos = pathpos; 499 500 if (!errflag) { 501 int add = 1; 502 503 if (q->closure && *pathbuf) { 504 if (!strcmp(str, ".")) 505 add = 0; 506 else if (!strcmp(str, "..")) { 507 struct stat sc, sr; 508 509 add = (stat("/", &sr) || stat(pathbuf, &sc) || 510 sr.st_ino != sc.st_ino || 511 sr.st_dev != sc.st_dev); 512 } 513 } 514 if (add) { 515 addpath(str, l); 516 if (!closure || !statfullpath("", NULL, 1)) 517 scanner((q->closure) ? q : q->next); 518 pathbuf[pathpos = oppos] = '\0'; 519 } 520 } 521 } else { 522 if (str[l]) 523 str = dupstrpfx(str, l); 524 insert(str, 0); 525 } 526 } else { 527 /* Do pattern matching on current path section. */ 528 char *fn = pathbuf[pathbufcwd] ? unmeta(pathbuf + pathbufcwd) : "."; 529 int dirs = !!q->next; 530 DIR *lock = opendir(fn); 531 char *subdirs = NULL; 532 int subdirlen = 0; 533 534 if (lock == NULL) 535 return; 536 while ((fn = zreaddir(lock, 1)) && !errflag) { 537 /* prefix and suffix are zle trickery */ 538 if (!dirs && !colonmod && 539 ((glob_pre && !strpfx(glob_pre, fn)) 540 || (glob_suf && !strsfx(glob_suf, fn)))) 541 continue; 542 errsfound = errssofar; 543 if (pattry(p, fn)) { 544 /* if this name matchs the pattern... */ 545 if (pbcwdsav == pathbufcwd && 546 strlen(fn) + pathpos - pathbufcwd >= PATH_MAX) { 547 int err; 548 549 DPUTS(pathpos == pathbufcwd, 550 "BUG: filename longer than PATH_MAX"); 551 err = lchdir(pathbuf + pathbufcwd, &ds, 0); 552 if (err == -1) 553 break; 554 if (err) { 555 zerr("current directory lost during glob"); 556 break; 557 } 558 pathbufcwd = pathpos; 559 } 560 if (dirs) { 561 int l; 562 563 /* 564 * If not the last component in the path: 565 * 566 * If we made an approximation in the new path segment, 567 * then it is possible we made too many errors. For 568 * example, (ab)#(cb)# will match the directory abcb 569 * with one error if allowed to, even though it can 570 * match with none. This will stop later parts of the 571 * path matching, so we need to check by reducing the 572 * maximum number of errors and seeing if the directory 573 * still matches. Luckily, this is not a terribly 574 * common case, since complex patterns typically occur 575 * in the last part of the path which is not affected 576 * by this problem. 577 */ 578 if (errsfound > errssofar) { 579 forceerrs = errsfound - 1; 580 while (forceerrs >= errssofar) { 581 errsfound = errssofar; 582 if (!pattry(p, fn)) 583 break; 584 forceerrs = errsfound - 1; 585 } 586 errsfound = forceerrs + 1; 587 forceerrs = -1; 588 } 589 if (closure) { 590 /* if matching multiple directories */ 591 struct stat buf; 592 593 if (statfullpath(fn, &buf, !q->follow)) { 594 if (errno != ENOENT && errno != EINTR && 595 errno != ENOTDIR && !errflag) { 596 zwarn("%e: %s", errno, fn); 597 } 598 continue; 599 } 600 if (!S_ISDIR(buf.st_mode)) 601 continue; 602 } 603 l = strlen(fn) + 1; 604 subdirs = hrealloc(subdirs, subdirlen, subdirlen + l 605 + sizeof(int)); 606 strcpy(subdirs + subdirlen, fn); 607 subdirlen += l; 608 /* store the count of errors made so far, too */ 609 memcpy(subdirs + subdirlen, (char *)&errsfound, 610 sizeof(int)); 611 subdirlen += sizeof(int); 612 } else 613 /* if the last filename component, just add it */ 614 insert(fn, 1); 615 } 616 } 617 closedir(lock); 618 if (subdirs) { 619 int oppos = pathpos; 620 621 for (fn = subdirs; fn < subdirs+subdirlen; ) { 622 int l = strlen(fn); 623 addpath(fn, l); 624 fn += l + 1; 625 memcpy((char *)&errsfound, fn, sizeof(int)); 626 fn += sizeof(int); 627 scanner((q->closure) ? q : q->next); /* scan next level */ 628 pathbuf[pathpos = oppos] = '\0'; 629 } 630 hrealloc(subdirs, subdirlen, 0); 631 } 632 } 633 if (pbcwdsav < pathbufcwd) { 634 if (restoredir(&ds)) 635 zerr("current directory lost during glob"); 636 zsfree(ds.dirname); 637 if (ds.dirfd >= 0) 638 close(ds.dirfd); 639 pathbufcwd = pbcwdsav; 640 } 641} 642 643/* This function tokenizes a zsh glob pattern */ 644 645/**/ 646static Complist 647parsecomplist(char *instr) 648{ 649 Patprog p1; 650 Complist l1; 651 char *str; 652 int compflags = gf_noglobdots ? (PAT_FILE|PAT_NOGLD) : PAT_FILE; 653 654 if (instr[0] == Star && instr[1] == Star && 655 (instr[2] == '/' || (instr[2] == Star && instr[3] == '/'))) { 656 /* Match any number of directories. */ 657 int follow; 658 659 /* with three stars, follow symbolic links */ 660 follow = (instr[2] == Star); 661 instr += (3 + follow); 662 663 /* Now get the next path component if there is one. */ 664 l1 = (Complist) zhalloc(sizeof *l1); 665 if ((l1->next = parsecomplist(instr)) == NULL) { 666 errflag = 1; 667 return NULL; 668 } 669 l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL); 670 l1->closure = 1; /* ...zero or more times. */ 671 l1->follow = follow; 672 return l1; 673 } 674 675 /* Parse repeated directories such as (dir/)# and (dir/)## */ 676 if (*(str = instr) == zpc_special[ZPC_INPAR] && 677 !skipparens(Inpar, Outpar, (char **)&str) && 678 *str == zpc_special[ZPC_HASH] && str[-2] == '/') { 679 instr++; 680 if (!(p1 = patcompile(instr, compflags, &instr))) 681 return NULL; 682 if (instr[0] == '/' && instr[1] == Outpar && instr[2] == Pound) { 683 int pdflag = 0; 684 685 instr += 3; 686 if (*instr == Pound) { 687 pdflag = 1; 688 instr++; 689 } 690 l1 = (Complist) zhalloc(sizeof *l1); 691 l1->pat = p1; 692 l1->closure = 1 + pdflag; 693 l1->follow = 0; 694 l1->next = parsecomplist(instr); 695 return (l1->pat) ? l1 : NULL; 696 } 697 } else { 698 /* parse single path component */ 699 if (!(p1 = patcompile(instr, compflags|PAT_FILET, &instr))) 700 return NULL; 701 /* then do the remaining path components */ 702 if (*instr == '/' || !*instr) { 703 int ef = *instr == '/'; 704 705 l1 = (Complist) zhalloc(sizeof *l1); 706 l1->pat = p1; 707 l1->closure = 0; 708 l1->next = ef ? parsecomplist(instr+1) : NULL; 709 return (ef && !l1->next) ? NULL : l1; 710 } 711 } 712 errflag = 1; 713 return NULL; 714} 715 716/* turn a string into a Complist struct: this has path components */ 717 718/**/ 719static Complist 720parsepat(char *str) 721{ 722 long assert; 723 int ignore; 724 725 patcompstart(); 726 /* 727 * Check for initial globbing flags, so that they don't form 728 * a bogus path component. 729 */ 730 if ((*str == zpc_special[ZPC_INPAR] && str[1] == zpc_special[ZPC_HASH]) || 731 (*str == zpc_special[ZPC_KSH_AT] && str[1] == Inpar && 732 str[2] == zpc_special[ZPC_HASH])) { 733 str += (*str == Inpar) ? 2 : 3; 734 if (!patgetglobflags(&str, &assert, &ignore)) 735 return NULL; 736 } 737 738 /* Now there is no (#X) in front, we can check the path. */ 739 if (!pathbuf) 740 pathbuf = zalloc(pathbufsz = PATH_MAX); 741 DPUTS(pathbufcwd, "BUG: glob changed directory"); 742 if (*str == '/') { /* pattern has absolute path */ 743 str++; 744 pathbuf[0] = '/'; 745 pathbuf[pathpos = 1] = '\0'; 746 } else /* pattern is relative to pwd */ 747 pathbuf[pathpos = 0] = '\0'; 748 749 return parsecomplist(str); 750} 751 752/* get number after qualifier */ 753 754/**/ 755static off_t 756qgetnum(char **s) 757{ 758 off_t v = 0; 759 760 if (!idigit(**s)) { 761 zerr("number expected"); 762 return 0; 763 } 764 while (idigit(**s)) 765 v = v * 10 + *(*s)++ - '0'; 766 return v; 767} 768 769/* get mode spec after qualifier */ 770 771/**/ 772static zlong 773qgetmodespec(char **s) 774{ 775 zlong yes = 0, no = 0, val, mask, t; 776 char *p = *s, c, how, end; 777 778 if ((c = *p) == '=' || c == Equals || c == '+' || c == '-' || 779 c == '?' || c == Quest || (c >= '0' && c <= '7')) { 780 end = 0; 781 c = 0; 782 } else { 783 end = (c == '<' ? '>' : 784 (c == '[' ? ']' : 785 (c == '{' ? '}' : 786 (c == Inang ? Outang : 787 (c == Inbrack ? Outbrack : 788 (c == Inbrace ? Outbrace : c)))))); 789 p++; 790 } 791 do { 792 mask = 0; 793 while (((c = *p) == 'u' || c == 'g' || c == 'o' || c == 'a') && end) { 794 switch (c) { 795 case 'o': mask |= 01007; break; 796 case 'g': mask |= 02070; break; 797 case 'u': mask |= 04700; break; 798 case 'a': mask |= 07777; break; 799 } 800 p++; 801 } 802 how = ((c == '+' || c == '-') ? c : '='); 803 if (c == '+' || c == '-' || c == '=' || c == Equals) 804 p++; 805 val = 0; 806 if (mask) { 807 while ((c = *p++) != ',' && c != end) { 808 switch (c) { 809 case 'x': val |= 00111; break; 810 case 'w': val |= 00222; break; 811 case 'r': val |= 00444; break; 812 case 's': val |= 06000; break; 813 case 't': val |= 01000; break; 814 case '0': case '1': case '2': case '3': 815 case '4': case '5': case '6': case '7': 816 t = ((zlong) c - '0'); 817 val |= t | (t << 3) | (t << 6); 818 break; 819 default: 820 zerr("invalid mode specification"); 821 return 0; 822 } 823 } 824 if (how == '=' || how == '+') { 825 yes |= val & mask; 826 val = ~val; 827 } 828 if (how == '=' || how == '-') 829 no |= val & mask; 830 } else if (!(end && c == end) && c != ',' && c) { 831 t = 07777; 832 while ((c = *p) == '?' || c == Quest || 833 (c >= '0' && c <= '7')) { 834 if (c == '?' || c == Quest) { 835 t = (t << 3) | 7; 836 val <<= 3; 837 } else { 838 t <<= 3; 839 val = (val << 3) | ((zlong) c - '0'); 840 } 841 p++; 842 } 843 if (end && c != end && c != ',') { 844 zerr("invalid mode specification"); 845 return 0; 846 } 847 if (how == '=') { 848 yes = (yes & ~t) | val; 849 no = (no & ~t) | (~val & ~t); 850 } else if (how == '+') 851 yes |= val; 852 else 853 no |= val; 854 } else { 855 zerr("invalid mode specification"); 856 return 0; 857 } 858 } while (end && c != end); 859 860 *s = p; 861 return ((yes & 07777) | ((no & 07777) << 12)); 862} 863 864static int 865gmatchcmp(Gmatch a, Gmatch b) 866{ 867 int i; 868 off_t r = 0L; 869 struct globsort *s; 870 char **asortstrp = NULL, **bsortstrp = NULL; 871 872 for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) { 873 switch (s->tp & ~GS_DESC) { 874 case GS_NAME: 875 r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0); 876 break; 877 case GS_DEPTH: 878 { 879 char *aptr = a->name, *bptr = b->name; 880 int slasha = 0, slashb = 0; 881 /* Count slashes. Trailing slashes don't count. */ 882 while (*aptr && *aptr == *bptr) 883 aptr++, bptr++; 884 if (*aptr) 885 for (; aptr[1]; aptr++) 886 if (*aptr == '/') { 887 slasha = 1; 888 break; 889 } 890 if (*bptr) 891 for (; bptr[1]; bptr++) 892 if (*bptr == '/') { 893 slashb = 1; 894 break; 895 } 896 r = slasha - slashb; 897 } 898 break; 899 case GS_EXEC: 900 if (!asortstrp) { 901 asortstrp = a->sortstrs; 902 bsortstrp = b->sortstrs; 903 } else { 904 asortstrp++; 905 bsortstrp++; 906 } 907 r = zstrcmp(*bsortstrp, *asortstrp, 908 gf_numsort ? SORTIT_NUMERICALLY : 0); 909 break; 910 case GS_SIZE: 911 r = b->size - a->size; 912 break; 913 case GS_ATIME: 914 r = a->atime - b->atime; 915#ifdef GET_ST_ATIME_NSEC 916 if (!r) 917 r = a->ansec - b->ansec; 918#endif 919 break; 920 case GS_MTIME: 921 r = a->mtime - b->mtime; 922#ifdef GET_ST_MTIME_NSEC 923 if (!r) 924 r = a->mnsec - b->mnsec; 925#endif 926 break; 927 case GS_CTIME: 928 r = a->ctime - b->ctime; 929#ifdef GET_ST_CTIME_NSEC 930 if (!r) 931 r = a->cnsec - b->cnsec; 932#endif 933 break; 934 case GS_LINKS: 935 r = b->links - a->links; 936 break; 937 case GS__SIZE: 938 r = b->_size - a->_size; 939 break; 940 case GS__ATIME: 941 r = a->_atime - b->_atime; 942#ifdef GET_ST_ATIME_NSEC 943 if (!r) 944 r = a->_ansec - b->_ansec; 945#endif 946 break; 947 case GS__MTIME: 948 r = a->_mtime - b->_mtime; 949#ifdef GET_ST_MTIME_NSEC 950 if (!r) 951 r = a->_mnsec - b->_mnsec; 952#endif 953 break; 954 case GS__CTIME: 955 r = a->_ctime - b->_ctime; 956#ifdef GET_ST_CTIME_NSEC 957 if (!r) 958 r = a->_cnsec - b->_cnsec; 959#endif 960 break; 961 case GS__LINKS: 962 r = b->_links - a->_links; 963 break; 964 } 965 if (r) 966 return (s->tp & GS_DESC) ? 967 (r < 0L ? 1 : -1) : 968 (r > 0L ? 1 : -1); 969 } 970 return 0; 971} 972 973/* 974 * Duplicate a list of qualifiers using the `next' linkage (not the 975 * `or' linkage). Return the head element and set *last (if last non-NULL) 976 * to point to the last element of the new list. All allocation is on the 977 * heap (or off the heap?) 978 */ 979static struct qual *dup_qual_list(struct qual *orig, struct qual **lastp) 980{ 981 struct qual *qfirst = NULL, *qlast = NULL; 982 983 while (orig) { 984 struct qual *qnew = (struct qual *)zhalloc(sizeof(struct qual)); 985 *qnew = *orig; 986 qnew->next = qnew->or = NULL; 987 988 if (!qfirst) 989 qfirst = qnew; 990 if (qlast) 991 qlast->next = qnew; 992 qlast = qnew; 993 994 orig = orig->next; 995 } 996 997 if (lastp) 998 *lastp = qlast; 999 return qfirst; 1000} 1001 1002 1003/* 1004 * Get a glob string for execution, following e, P or + qualifiers. 1005 * Pointer is character after the e, P or +. 1006 */ 1007 1008/**/ 1009static char * 1010glob_exec_string(char **sp) 1011{ 1012 char sav, *tt, *sdata, *s = *sp; 1013 int plus; 1014 1015 if (s[-1] == '+') { 1016 plus = 0; 1017 tt = itype_end(s, IIDENT, 0); 1018 if (tt == s) 1019 { 1020 zerr("missing identifier after `+'"); 1021 return NULL; 1022 } 1023 } else { 1024 tt = get_strarg(s, &plus); 1025 if (!*tt) 1026 { 1027 zerr("missing end of string"); 1028 return NULL; 1029 } 1030 } 1031 1032 sav = *tt; 1033 *tt = '\0'; 1034 sdata = dupstring(s + plus); 1035 untokenize(sdata); 1036 *tt = sav; 1037 if (sav) 1038 *sp = tt + plus; 1039 else 1040 *sp = tt; 1041 1042 return sdata; 1043} 1044 1045/* 1046 * Insert a glob match. 1047 * If there were words to prepend given by the P glob qualifier, do so. 1048 */ 1049static void 1050insert_glob_match(LinkList list, LinkNode next, char *data) 1051{ 1052 if (gf_pre_words) { 1053 LinkNode added; 1054 for (added = firstnode(gf_pre_words); added; incnode(added)) { 1055 next = insertlinknode(list, next, dupstring(getdata(added))); 1056 } 1057 } 1058 1059 insertlinknode(list, next, data); 1060} 1061 1062/* Main entry point to the globbing code for filename globbing. * 1063 * np points to a node in the list list which will be expanded * 1064 * into a series of nodes. */ 1065 1066/**/ 1067void 1068zglob(LinkList list, LinkNode np, int nountok) 1069{ 1070 struct qual *qo, *qn, *ql; 1071 LinkNode node = prevnode(np); 1072 char *str; /* the pattern */ 1073 int sl; /* length of the pattern */ 1074 Complist q; /* pattern after parsing */ 1075 char *ostr = (char *)getdata(np); /* the pattern before the parser */ 1076 /* chops it up */ 1077 int first = 0, end = -1; /* index of first match to return */ 1078 /* and index+1 of the last match */ 1079 struct globdata saved; /* saved glob state */ 1080 int nobareglob = !isset(BAREGLOBQUAL); 1081 1082 if (unset(GLOBOPT) || !haswilds(ostr) || unset(EXECOPT)) { 1083 if (!nountok) 1084 untokenize(ostr); 1085 return; 1086 } 1087 save_globstate(saved); 1088 1089 str = dupstring(ostr); 1090 uremnode(list, np); 1091 1092 /* quals will hold the complete list of qualifiers (file static). */ 1093 quals = NULL; 1094 /* 1095 * qualct and qualorct indicate we have qualifiers in the last 1096 * alternative, or a set of alternatives, respectively. They 1097 * are not necessarily an accurate count, however. 1098 */ 1099 qualct = qualorct = 0; 1100 /* 1101 * colonmod is a concatenated list of all colon modifiers found in 1102 * all sets of qualifiers. 1103 */ 1104 colonmod = NULL; 1105 /* The gf_* flags are qualifiers which are applied globally. */ 1106 gf_nullglob = isset(NULLGLOB); 1107 gf_markdirs = isset(MARKDIRS); 1108 gf_listtypes = gf_follow = 0; 1109 gf_noglobdots = unset(GLOBDOTS); 1110 gf_numsort = isset(NUMERICGLOBSORT); 1111 gf_sorts = gf_nsorts = 0; 1112 gf_pre_words = NULL; 1113 1114 /* Check for qualifiers */ 1115 while (!nobareglob || 1116 (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])) { 1117 struct qual *newquals; 1118 char *s; 1119 int sense, paren; 1120 off_t data; 1121 char *sdata, *newcolonmod; 1122 int (*func) _((char *, Statptr, off_t, char *)); 1123 1124 /* 1125 * Initialise state variables for current file pattern. 1126 * newquals is the root for the linked list of all qualifiers. 1127 * qo is the root of the current list of alternatives. 1128 * ql is the end of the current alternative where the `next' will go. 1129 * qn is the current qualifier node to be added. 1130 * 1131 * Here is an attempt at a diagram. An `or' is added horizontally 1132 * to the top line, a `next' at the bottom of the right hand line. 1133 * `qn' is usually NULL unless a new `or' has just been added. 1134 * 1135 * quals -> x -> x -> qo 1136 * | | | 1137 * x x x 1138 * | | 1139 * x ql 1140 * 1141 * In fact, after each loop the complete set is in the file static 1142 * `quals'. Then, if we have a second set of qualifiers, we merge 1143 * the lists together. This is only tricky if one or both have an 1144 * `or' in them; then we need to distribute over all alternatives. 1145 */ 1146 newquals = qo = qn = ql = NULL; 1147 1148 sl = strlen(str); 1149 if (str[sl - 1] != Outpar) 1150 break; 1151 1152 /* Check these are really qualifiers, not a set of * 1153 * alternatives or exclusions. We can be more * 1154 * lenient with an explicit (#q) than with a bare * 1155 * set of qualifiers. */ 1156 paren = 0; 1157 for (s = str + sl - 2; *s && (*s != Inpar || paren); s--) { 1158 switch (*s) { 1159 case Outpar: 1160 paren++; /*FALLTHROUGH*/ 1161 case Bar: 1162 if (!zpc_disables[ZPC_BAR]) 1163 nobareglob = 1; 1164 break; 1165 case Tilde: 1166 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_TILDE]) 1167 nobareglob = 1; 1168 break; 1169 case Inpar: 1170 paren--; 1171 break; 1172 } 1173 } 1174 if (*s != Inpar) 1175 break; 1176 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH] && s[1] == Pound) { 1177 if (s[2] == 'q') { 1178 *s = 0; 1179 s += 2; 1180 } else 1181 break; 1182 } else if (nobareglob) 1183 break; 1184 1185 /* Real qualifiers found. */ 1186 nobareglob = 1; 1187 sense = 0; /* bit 0 for match (0)/don't match (1) */ 1188 /* bit 1 for follow links (2), don't (0) */ 1189 data = 0; /* Any numerical argument required */ 1190 sdata = NULL; /* Any list argument required */ 1191 newcolonmod = NULL; /* Contains trailing colon modifiers */ 1192 1193 str[sl-1] = 0; 1194 *s++ = 0; 1195 while (*s && !newcolonmod) { 1196 func = (int (*) _((char *, Statptr, off_t, char *)))0; 1197 if (idigit(*s)) { 1198 /* Store numeric argument for qualifier */ 1199 func = qualflags; 1200 data = 0; 1201 sdata = NULL; 1202 while (idigit(*s)) 1203 data = data * 010 + (*s++ - '0'); 1204 } else if (*s == ',') { 1205 /* A comma separates alternative sets of qualifiers */ 1206 s++; 1207 sense = 0; 1208 if (qualct) { 1209 qn = (struct qual *)hcalloc(sizeof *qn); 1210 qo->or = qn; 1211 qo = qn; 1212 qualorct++; 1213 qualct = 0; 1214 ql = NULL; 1215 } 1216 } else { 1217 switch (*s++) { 1218 case ':': 1219 /* Remaining arguments are history-type * 1220 * colon substitutions, handled separately. */ 1221 newcolonmod = s - 1; 1222 untokenize(newcolonmod); 1223 if (colonmod) { 1224 /* remember we're searching backwards */ 1225 colonmod = dyncat(newcolonmod, colonmod); 1226 } else 1227 colonmod = newcolonmod; 1228 break; 1229 case Hat: 1230 case '^': 1231 /* Toggle sense: go from positive to * 1232 * negative match and vice versa. */ 1233 sense ^= 1; 1234 break; 1235 case '-': 1236 /* Toggle matching of symbolic links */ 1237 sense ^= 2; 1238 break; 1239 case '@': 1240 /* Match symbolic links */ 1241 func = qualislnk; 1242 break; 1243 case Equals: 1244 case '=': 1245 /* Match sockets */ 1246 func = qualissock; 1247 break; 1248 case 'p': 1249 /* Match named pipes */ 1250 func = qualisfifo; 1251 break; 1252 case '/': 1253 /* Match directories */ 1254 func = qualisdir; 1255 break; 1256 case '.': 1257 /* Match regular files */ 1258 func = qualisreg; 1259 break; 1260 case '%': 1261 /* Match special files: block, * 1262 * character or any device */ 1263 if (*s == 'b') 1264 s++, func = qualisblk; 1265 else if (*s == 'c') 1266 s++, func = qualischr; 1267 else 1268 func = qualisdev; 1269 break; 1270 case Star: 1271 /* Match executable plain files */ 1272 func = qualiscom; 1273 break; 1274 case 'R': 1275 /* Match world-readable files */ 1276 func = qualflags; 1277 data = 0004; 1278 break; 1279 case 'W': 1280 /* Match world-writeable files */ 1281 func = qualflags; 1282 data = 0002; 1283 break; 1284 case 'X': 1285 /* Match world-executable files */ 1286 func = qualflags; 1287 data = 0001; 1288 break; 1289 case 'A': 1290 func = qualflags; 1291 data = 0040; 1292 break; 1293 case 'I': 1294 func = qualflags; 1295 data = 0020; 1296 break; 1297 case 'E': 1298 func = qualflags; 1299 data = 0010; 1300 break; 1301 case 'r': 1302 /* Match files readable by current process */ 1303 func = qualflags; 1304 data = 0400; 1305 break; 1306 case 'w': 1307 /* Match files writeable by current process */ 1308 func = qualflags; 1309 data = 0200; 1310 break; 1311 case 'x': 1312 /* Match files executable by current process */ 1313 func = qualflags; 1314 data = 0100; 1315 break; 1316 case 's': 1317 /* Match setuid files */ 1318 func = qualflags; 1319 data = 04000; 1320 break; 1321 case 'S': 1322 /* Match setgid files */ 1323 func = qualflags; 1324 data = 02000; 1325 break; 1326 case 't': 1327 func = qualflags; 1328 data = 01000; 1329 break; 1330 case 'd': 1331 /* Match device files by device number * 1332 * (as given by stat's st_dev element). */ 1333 func = qualdev; 1334 data = qgetnum(&s); 1335 break; 1336 case 'l': 1337 /* Match files with the given no. of hard links */ 1338 func = qualnlink; 1339 g_amc = -1; 1340 goto getrange; 1341 case 'U': 1342 /* Match files owned by effective user ID */ 1343 func = qualuid; 1344 data = geteuid(); 1345 break; 1346 case 'G': 1347 /* Match files owned by effective group ID */ 1348 func = qualgid; 1349 data = getegid(); 1350 break; 1351 case 'u': 1352 /* Match files owned by given user id */ 1353 func = qualuid; 1354 /* either the actual uid... */ 1355 if (idigit(*s)) 1356 data = qgetnum(&s); 1357 else { 1358 /* ... or a user name */ 1359 char sav, *tt; 1360 int arglen; 1361 1362 /* Find matching delimiters */ 1363 tt = get_strarg(s, &arglen); 1364 if (!*tt) { 1365 zerr("missing end of name"); 1366 data = 0; 1367 } else { 1368#ifdef USE_GETPWNAM 1369 struct passwd *pw; 1370 sav = *tt; 1371 *tt = '\0'; 1372 1373 if ((pw = getpwnam(s + arglen))) 1374 data = pw->pw_uid; 1375 else { 1376 zerr("unknown user"); 1377 data = 0; 1378 } 1379 *tt = sav; 1380#else /* !USE_GETPWNAM */ 1381 sav = *tt; 1382 zerr("unknown user"); 1383 data = 0; 1384#endif /* !USE_GETPWNAM */ 1385 if (sav) 1386 s = tt + arglen; 1387 else 1388 s = tt; 1389 } 1390 } 1391 break; 1392 case 'g': 1393 /* Given gid or group id... works like `u' */ 1394 func = qualgid; 1395 /* either the actual gid... */ 1396 if (idigit(*s)) 1397 data = qgetnum(&s); 1398 else { 1399 /* ...or a delimited group name. */ 1400 char sav, *tt; 1401 int arglen; 1402 1403 tt = get_strarg(s, &arglen); 1404 if (!*tt) { 1405 zerr("missing end of name"); 1406 data = 0; 1407 } else { 1408#ifdef USE_GETGRNAM 1409 struct group *gr; 1410 sav = *tt; 1411 *tt = '\0'; 1412 1413 if ((gr = getgrnam(s + arglen))) 1414 data = gr->gr_gid; 1415 else { 1416 zerr("unknown group"); 1417 data = 0; 1418 } 1419 *tt = sav; 1420#else /* !USE_GETGRNAM */ 1421 sav = *tt; 1422 zerr("unknown group"); 1423 data = 0; 1424#endif /* !USE_GETGRNAM */ 1425 if (sav) 1426 s = tt + arglen; 1427 else 1428 s = tt; 1429 } 1430 } 1431 break; 1432 case 'f': 1433 /* Match modes with chmod-spec. */ 1434 func = qualmodeflags; 1435 data = qgetmodespec(&s); 1436 break; 1437 case 'F': 1438 func = qualnonemptydir; 1439 break; 1440 case 'M': 1441 /* Mark directories with a / */ 1442 if ((gf_markdirs = !(sense & 1))) 1443 gf_follow = sense & 2; 1444 break; 1445 case 'T': 1446 /* Mark types in a `ls -F' type fashion */ 1447 if ((gf_listtypes = !(sense & 1))) 1448 gf_follow = sense & 2; 1449 break; 1450 case 'N': 1451 /* Nullglob: remove unmatched patterns. */ 1452 gf_nullglob = !(sense & 1); 1453 break; 1454 case 'D': 1455 /* Glob dots: match leading dots implicitly */ 1456 gf_noglobdots = sense & 1; 1457 break; 1458 case 'n': 1459 /* Numeric glob sort */ 1460 gf_numsort = !(sense & 1); 1461 break; 1462 case 'a': 1463 /* Access time in given range */ 1464 g_amc = 0; 1465 func = qualtime; 1466 goto getrange; 1467 case 'm': 1468 /* Modification time in given range */ 1469 g_amc = 1; 1470 func = qualtime; 1471 goto getrange; 1472 case 'c': 1473 /* Inode creation time in given range */ 1474 g_amc = 2; 1475 func = qualtime; 1476 goto getrange; 1477 case 'L': 1478 /* File size (Length) in given range */ 1479 func = qualsize; 1480 g_amc = -1; 1481 /* Get size multiplier */ 1482 g_units = TT_BYTES; 1483 if (*s == 'p' || *s == 'P') 1484 g_units = TT_POSIX_BLOCKS, ++s; 1485 else if (*s == 'k' || *s == 'K') 1486 g_units = TT_KILOBYTES, ++s; 1487 else if (*s == 'm' || *s == 'M') 1488 g_units = TT_MEGABYTES, ++s; 1489 getrange: 1490 /* Get time multiplier */ 1491 if (g_amc >= 0) { 1492 g_units = TT_DAYS; 1493 if (*s == 'h') 1494 g_units = TT_HOURS, ++s; 1495 else if (*s == 'm') 1496 g_units = TT_MINS, ++s; 1497 else if (*s == 'w') 1498 g_units = TT_WEEKS, ++s; 1499 else if (*s == 'M') 1500 g_units = TT_MONTHS, ++s; 1501 else if (*s == 's') 1502 g_units = TT_SECONDS, ++s; 1503 else if (*s == 'd') 1504 ++s; 1505 } 1506 /* See if it's greater than, equal to, or less than */ 1507 if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0)) 1508 ++s; 1509 data = qgetnum(&s); 1510 break; 1511 1512 case 'o': 1513 case 'O': 1514 { 1515 int t; 1516 char *send; 1517 1518 if (gf_nsorts == MAX_SORTS) { 1519 zerr("too many glob sort specifiers"); 1520 restore_globstate(saved); 1521 return; 1522 } 1523 1524 /* usually just one character */ 1525 send = s+1; 1526 switch (*s) { 1527 case 'n': t = GS_NAME; break; 1528 case 'L': t = GS_SIZE; break; 1529 case 'l': t = GS_LINKS; break; 1530 case 'a': t = GS_ATIME; break; 1531 case 'm': t = GS_MTIME; break; 1532 case 'c': t = GS_CTIME; break; 1533 case 'd': t = GS_DEPTH; break; 1534 case 'N': t = GS_NONE; break; 1535 case 'e': 1536 case '+': 1537 { 1538 t = GS_EXEC; 1539 if ((gf_sortlist[gf_nsorts].exec = 1540 glob_exec_string(&send)) == NULL) 1541 { 1542 restore_globstate(saved); 1543 return; 1544 } 1545 break; 1546 } 1547 default: 1548 zerr("unknown sort specifier"); 1549 restore_globstate(saved); 1550 return; 1551 } 1552 if (t != GS_EXEC) { 1553 if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH))) 1554 t <<= GS_SHIFT; /* HERE: GS_EXEC? */ 1555 if (gf_sorts & t) { 1556 zerr("doubled sort specifier"); 1557 restore_globstate(saved); 1558 return; 1559 } 1560 } 1561 gf_sorts |= t; 1562 gf_sortlist[gf_nsorts++].tp = t | 1563 (((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0); 1564 s = send; 1565 break; 1566 } 1567 case '+': 1568 case 'e': 1569 { 1570 char *tt; 1571 1572 tt = glob_exec_string(&s); 1573 1574 if (tt == NULL) { 1575 data = 0; 1576 } else { 1577 func = qualsheval; 1578 sdata = tt; 1579 } 1580 break; 1581 } 1582 case '[': 1583 case Inbrack: 1584 { 1585 char *os = --s; 1586 struct value v; 1587 1588 v.isarr = SCANPM_WANTVALS; 1589 v.pm = NULL; 1590 v.end = -1; 1591 v.flags = 0; 1592 if (getindex(&s, &v, 0) || s == os) { 1593 zerr("invalid subscript"); 1594 restore_globstate(saved); 1595 return; 1596 } 1597 first = v.start; 1598 end = v.end; 1599 break; 1600 } 1601 case 'P': 1602 { 1603 char *tt; 1604 tt = glob_exec_string(&s); 1605 1606 if (tt != NULL) 1607 { 1608 if (!gf_pre_words) 1609 gf_pre_words = newlinklist(); 1610 addlinknode(gf_pre_words, tt); 1611 } 1612 break; 1613 } 1614 default: 1615 untokenize(--s); 1616 zerr("unknown file attribute: %c", *s); 1617 restore_globstate(saved); 1618 return; 1619 } 1620 } 1621 if (func) { 1622 /* Requested test is performed by function func */ 1623 if (!qn) 1624 qn = (struct qual *)hcalloc(sizeof *qn); 1625 if (ql) 1626 ql->next = qn; 1627 ql = qn; 1628 if (!newquals) 1629 newquals = qo = qn; 1630 qn->func = func; 1631 qn->sense = sense; 1632 qn->data = data; 1633 qn->sdata = sdata; 1634 qn->range = g_range; 1635 qn->units = g_units; 1636 qn->amc = g_amc; 1637 1638 qn = NULL; 1639 qualct++; 1640 } 1641 if (errflag) { 1642 restore_globstate(saved); 1643 return; 1644 } 1645 } 1646 1647 if (quals && newquals) { 1648 /* Merge previous group of qualifiers with new set. */ 1649 if (quals->or || newquals->or) { 1650 /* The hard case. */ 1651 struct qual *qorhead = NULL, *qortail = NULL; 1652 /* 1653 * Distribute in the most trivial way, by creating 1654 * all possible combinations of the two sets and chaining 1655 * these into one long set of alternatives given 1656 * by qorhead and qortail. 1657 */ 1658 for (qn = newquals; qn; qn = qn->or) { 1659 for (qo = quals; qo; qo = qo->or) { 1660 struct qual *qfirst, *qlast; 1661 int islast = !qn->or && !qo->or; 1662 /* Generate first set of qualifiers... */ 1663 if (islast) { 1664 /* Last time round: don't bother copying. */ 1665 qfirst = qn; 1666 for (qlast = qfirst; qlast->next; 1667 qlast = qlast->next) 1668 ; 1669 } else 1670 qfirst = dup_qual_list(qn, &qlast); 1671 /* ... link into new `or' chain ... */ 1672 if (!qorhead) 1673 qorhead = qfirst; 1674 if (qortail) 1675 qortail->or = qfirst; 1676 qortail = qfirst; 1677 /* ... and concatenate second set. */ 1678 qlast->next = islast ? qo : dup_qual_list(qo, NULL); 1679 } 1680 } 1681 quals = qorhead; 1682 } else { 1683 /* 1684 * Easy: we can just chain the qualifiers together. 1685 * This is an optimisation; the code above will work, too. 1686 * We retain the original left to right ordering --- remember 1687 * we are searching for sets of qualifiers from the right. 1688 */ 1689 qn = newquals; 1690 for ( ; newquals->next; newquals = newquals->next) 1691 ; 1692 newquals->next = quals; 1693 quals = qn; 1694 } 1695 } else if (newquals) 1696 quals = newquals; 1697 } 1698 q = parsepat(str); 1699 if (!q || errflag) { /* if parsing failed */ 1700 restore_globstate(saved); 1701 if (unset(BADPATTERN)) { 1702 if (!nountok) 1703 untokenize(ostr); 1704 insertlinknode(list, node, ostr); 1705 return; 1706 } 1707 errflag = 0; 1708 zerr("bad pattern: %s", ostr); 1709 return; 1710 } 1711 if (!gf_nsorts) { 1712 gf_sortlist[0].tp = gf_sorts = GS_NAME; 1713 gf_nsorts = 1; 1714 } 1715 /* Initialise receptacle for matched files, * 1716 * expanded by insert() where necessary. */ 1717 matchptr = matchbuf = (Gmatch)zalloc((matchsz = 16) * 1718 sizeof(struct gmatch)); 1719 matchct = 0; 1720 pattrystart(); 1721 1722 /* The actual processing takes place here: matches go into * 1723 * matchbuf. This is the only top-level call to scanner(). */ 1724 scanner(q); 1725 1726 /* Deal with failures to match depending on options */ 1727 if (matchct) 1728 badcshglob |= 2; /* at least one cmd. line expansion O.K. */ 1729 else if (!gf_nullglob) { 1730 if (isset(CSHNULLGLOB)) { 1731 badcshglob |= 1; /* at least one cmd. line expansion failed */ 1732 } else if (isset(NOMATCH)) { 1733 zerr("no matches found: %s", ostr); 1734 free(matchbuf); 1735 restore_globstate(saved); 1736 return; 1737 } else { 1738 /* treat as an ordinary string */ 1739 untokenize(matchptr->name = dupstring(ostr)); 1740 matchptr++; 1741 matchct = 1; 1742 } 1743 } 1744 1745 if (!(gf_sortlist[0].tp & GS_NONE)) { 1746 /* 1747 * Get the strings to use for sorting by executing 1748 * the code chunk. We allow more than one of these. 1749 */ 1750 int nexecs = 0; 1751 struct globsort *sortp; 1752 struct globsort *lastsortp = gf_sortlist + gf_nsorts; 1753 1754 /* First find out if there are any GS_EXECs, counting them. */ 1755 for (sortp = gf_sortlist; sortp < lastsortp; sortp++) 1756 { 1757 if (sortp->tp & GS_EXEC) 1758 nexecs++; 1759 } 1760 1761 if (nexecs) { 1762 Gmatch tmpptr; 1763 int iexec = 0; 1764 1765 /* Yes; allocate enough space for strings for each */ 1766 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) 1767 tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*)); 1768 1769 /* Loop over each one, incrementing iexec */ 1770 for (sortp = gf_sortlist; sortp < lastsortp; sortp++) 1771 { 1772 /* Ignore unless this is a GS_EXEC */ 1773 if (sortp->tp & GS_EXEC) { 1774 Eprog prog; 1775 1776 if ((prog = parse_string(sortp->exec, 0))) { 1777 int ef = errflag, lv = lastval; 1778 1779 /* Parsed OK, execute for each name */ 1780 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) { 1781 setsparam("REPLY", ztrdup(tmpptr->name)); 1782 execode(prog, 1, 0, "globsort"); 1783 if (!errflag) 1784 tmpptr->sortstrs[iexec] = 1785 dupstring(getsparam("REPLY")); 1786 else 1787 tmpptr->sortstrs[iexec] = tmpptr->name; 1788 } 1789 1790 errflag = ef; 1791 lastval = lv; 1792 } else { 1793 /* Failed, let's be safe */ 1794 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) 1795 tmpptr->sortstrs[iexec] = tmpptr->name; 1796 } 1797 1798 iexec++; 1799 } 1800 } 1801 } 1802 1803 /* Sort arguments in to lexical (and possibly numeric) order. * 1804 * This is reversed to facilitate insertion into the list. */ 1805 qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch), 1806 (int (*) _((const void *, const void *)))gmatchcmp); 1807 } 1808 1809 if (first < 0) { 1810 first += matchct; 1811 if (first < 0) 1812 first = 0; 1813 } 1814 if (end < 0) 1815 end += matchct + 1; 1816 else if (end > matchct) 1817 end = matchct; 1818 if ((end -= first) > 0) { 1819 if (gf_sortlist[0].tp & GS_NONE) { 1820 /* Match list was never reversed, so insert back to front. */ 1821 matchptr = matchbuf + matchct - first - 1; 1822 while (end-- > 0) { 1823 /* insert matches in the arg list */ 1824 insert_glob_match(list, node, matchptr->name); 1825 matchptr--; 1826 } 1827 } else { 1828 matchptr = matchbuf + matchct - first - end; 1829 while (end-- > 0) { 1830 /* insert matches in the arg list */ 1831 insert_glob_match(list, node, matchptr->name); 1832 matchptr++; 1833 } 1834 } 1835 } 1836 free(matchbuf); 1837 1838 restore_globstate(saved); 1839} 1840 1841/* Return the trailing character for marking file types */ 1842 1843/**/ 1844mod_export char 1845file_type(mode_t filemode) 1846{ 1847 if(S_ISBLK(filemode)) 1848 return '#'; 1849 else if(S_ISCHR(filemode)) 1850 return '%'; 1851 else if(S_ISDIR(filemode)) 1852 return '/'; 1853 else if(S_ISFIFO(filemode)) 1854 return '|'; 1855 else if(S_ISLNK(filemode)) 1856 return '@'; 1857 else if(S_ISREG(filemode)) 1858 return (filemode & S_IXUGO) ? '*' : ' '; 1859 else if(S_ISSOCK(filemode)) 1860 return '='; 1861 else 1862 return '?'; 1863} 1864 1865/* check to see if str is eligible for brace expansion */ 1866 1867/**/ 1868mod_export int 1869hasbraces(char *str) 1870{ 1871 char *lbr, *mbr, *comma; 1872 1873 if (isset(BRACECCL)) { 1874 /* In this case, any properly formed brace expression * 1875 * will match and expand to the characters in between. */ 1876 int bc, c; 1877 1878 for (bc = 0; (c = *str); ++str) 1879 if (c == Inbrace) { 1880 if (!bc && str[1] == Outbrace) 1881 *str++ = '{', *str = '}'; 1882 else 1883 bc++; 1884 } else if (c == Outbrace) { 1885 if (!bc) 1886 *str = '}'; 1887 else if (!--bc) 1888 return 1; 1889 } 1890 return 0; 1891 } 1892 /* Otherwise we need to look for... */ 1893 lbr = mbr = comma = NULL; 1894 for (;;) { 1895 switch (*str++) { 1896 case Inbrace: 1897 if (!lbr) { 1898 lbr = str - 1; 1899 if (*str == '-') 1900 str++; 1901 while (idigit(*str)) 1902 str++; 1903 if (*str == '.' && str[1] == '.') { 1904 str++; str++; 1905 if (*str == '-') 1906 str++; 1907 while (idigit(*str)) 1908 str++; 1909 if (*str == Outbrace && 1910 (idigit(lbr[1]) || idigit(str[-1]))) 1911 return 1; 1912 else if (*str == '.' && str[1] == '.') { 1913 str++; str++; 1914 if (*str == '-') 1915 str++; 1916 while (idigit(*str)) 1917 str++; 1918 if (*str == Outbrace && 1919 (idigit(lbr[1]) || idigit(str[-1]))) 1920 return 1; 1921 } 1922 } 1923 } else { 1924 char *s = --str; 1925 1926 if (skipparens(Inbrace, Outbrace, &str)) { 1927 *lbr = *s = '{'; 1928 if (comma) 1929 str = comma; 1930 if (mbr && mbr < str) 1931 str = mbr; 1932 lbr = mbr = comma = NULL; 1933 } else if (!mbr) 1934 mbr = s; 1935 } 1936 break; 1937 case Outbrace: 1938 if (!lbr) 1939 str[-1] = '}'; 1940 else if (comma) 1941 return 1; 1942 else { 1943 *lbr = '{'; 1944 str[-1] = '}'; 1945 if (mbr) 1946 str = mbr; 1947 mbr = lbr = NULL; 1948 } 1949 break; 1950 case Comma: 1951 if (!lbr) 1952 str[-1] = ','; 1953 else if (!comma) 1954 comma = str - 1; 1955 break; 1956 case '\0': 1957 if (lbr) 1958 *lbr = '{'; 1959 if (!mbr && !comma) 1960 return 0; 1961 if (comma) 1962 str = comma; 1963 if (mbr && mbr < str) 1964 str = mbr; 1965 lbr = mbr = comma = NULL; 1966 break; 1967 } 1968 } 1969} 1970 1971/* expand stuff like >>*.c */ 1972 1973/**/ 1974int 1975xpandredir(struct redir *fn, LinkList redirtab) 1976{ 1977 char *nam; 1978 struct redir *ff; 1979 int ret = 0; 1980 local_list1(fake); 1981 1982 /* Stick the name in a list... */ 1983 init_list1(fake, fn->name); 1984 /* ...which undergoes all the usual shell expansions */ 1985 prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE); 1986 /* Globbing is only done for multios. */ 1987 if (!errflag && isset(MULTIOS)) 1988 globlist(&fake, 0); 1989 if (errflag) 1990 return 0; 1991 if (nonempty(&fake) && !nextnode(firstnode(&fake))) { 1992 /* Just one match, the usual case. */ 1993 char *s = peekfirst(&fake); 1994 fn->name = s; 1995 untokenize(s); 1996 if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) { 1997 if (s[0] == '-' && !s[1]) 1998 fn->type = REDIR_CLOSE; 1999 else if (s[0] == 'p' && !s[1]) 2000 fn->fd2 = -2; 2001 else { 2002 while (idigit(*s)) 2003 s++; 2004 if (!*s && s > fn->name) 2005 fn->fd2 = zstrtol(fn->name, NULL, 10); 2006 else if (fn->type == REDIR_MERGEIN) 2007 zerr("file number expected"); 2008 else 2009 fn->type = REDIR_ERRWRITE; 2010 } 2011 } 2012 } else if (fn->type == REDIR_MERGEIN) 2013 zerr("file number expected"); 2014 else { 2015 if (fn->type == REDIR_MERGEOUT) 2016 fn->type = REDIR_ERRWRITE; 2017 while ((nam = (char *)ugetnode(&fake))) { 2018 /* Loop over matches, duplicating the * 2019 * redirection for each file found. */ 2020 ff = (struct redir *) zhalloc(sizeof *ff); 2021 *ff = *fn; 2022 ff->name = nam; 2023 addlinknode(redirtab, ff); 2024 ret = 1; 2025 } 2026 } 2027 return ret; 2028} 2029 2030/* brace expansion */ 2031 2032/**/ 2033mod_export void 2034xpandbraces(LinkList list, LinkNode *np) 2035{ 2036 LinkNode node = (*np), last = prevnode(node); 2037 char *str = (char *)getdata(node), *str3 = str, *str2; 2038 int prev, bc, comma, dotdot; 2039 2040 for (; *str != Inbrace; str++); 2041 /* First, match up braces and see what we have. */ 2042 for (str2 = str, bc = comma = dotdot = 0; *str2; ++str2) 2043 if (*str2 == Inbrace) 2044 ++bc; 2045 else if (*str2 == Outbrace) { 2046 if (--bc == 0) 2047 break; 2048 } else if (bc == 1) { 2049 if (*str2 == Comma) 2050 ++comma; /* we have {foo,bar} */ 2051 else if (*str2 == '.' && str2[1] == '.') { 2052 dotdot++; /* we have {num1..num2} */ 2053 ++str2; 2054 } 2055 } 2056 DPUTS(bc, "BUG: unmatched brace in xpandbraces()"); 2057 if (!comma && dotdot) { 2058 /* Expand range like 0..10 numerically: comma or recursive 2059 brace expansion take precedence. */ 2060 char *dots, *p, *dots2 = NULL; 2061 LinkNode olast = last; 2062 /* Get the first number of the range */ 2063 zlong rstart = zstrtol(str+1,&dots,10), rend = 0; 2064 int err = 0, rev = 0, rincr = 1; 2065 int wid1 = (dots - str) - 1, wid2 = (str2 - dots) - 2, wid3 = 0; 2066 int strp = str - str3; 2067 2068 if (dots == str + 1 || *dots != '.' || dots[1] != '.') 2069 err++; 2070 else { 2071 /* Get the last number of the range */ 2072 rend = zstrtol(dots+2,&p,10); 2073 if (p == dots+2) 2074 err++; 2075 /* check for {num1..num2..incr} */ 2076 if (p != str2) { 2077 wid2 = (p - dots) - 2; 2078 dots2 = p; 2079 if (dotdot == 2 && *p == '.' && p[1] == '.') { 2080 rincr = zstrtol(p+2, &p, 10); 2081 wid3 = p - dots2 - 2; 2082 if (p != str2 || !rincr) 2083 err++; 2084 } else 2085 err++; 2086 } 2087 } 2088 if (!err) { 2089 /* If either no. begins with a zero, pad the output with * 2090 * zeroes. Otherwise, set min width to 0 to suppress them. 2091 * str+1 is the first number in the range, dots+2 the last, 2092 * and dots2+2 is the increment if that's given. */ 2093 /* TODO: sorry about this */ 2094 int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0')) 2095 ? wid1 2096 : (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0')) 2097 ? wid2 2098 : (dots2 && (dots2[2] == '0' || 2099 (dots2[2] == '-' && dots2[3] == '0'))) 2100 ? wid3 2101 : 0; 2102 if (rincr < 0) { 2103 /* Handle negative increment */ 2104 rincr = -rincr; 2105 rev = !rev; 2106 } 2107 if (rstart > rend) { 2108 /* Handle decreasing ranges correctly. */ 2109 zlong rt = rend; 2110 rend = rstart; 2111 rstart = rt; 2112 rev = !rev; 2113 } else if (rincr > 1) { 2114 /* when incr > 1, range is aligned to the highest number of str1, 2115 * compensate for this so that it is aligned to the first number */ 2116 rend -= (rend - rstart) % rincr; 2117 } 2118 uremnode(list, node); 2119 for (; rend >= rstart; rend -= rincr) { 2120 /* Node added in at end, so do highest first */ 2121 p = dupstring(str3); 2122#if defined(ZLONG_IS_LONG_LONG) && defined(PRINTF_HAS_LLD) 2123 sprintf(p + strp, "%0*lld", minw, rend); 2124#else 2125 sprintf(p + strp, "%0*ld", minw, (long)rend); 2126#endif 2127 strcat(p + strp, str2 + 1); 2128 insertlinknode(list, last, p); 2129 if (rev) /* decreasing: add in reverse order. */ 2130 last = nextnode(last); 2131 } 2132 *np = nextnode(olast); 2133 return; 2134 } 2135 } 2136 if (!comma && isset(BRACECCL)) { /* {a-mnop} */ 2137 /* Here we expand each character to a separate node, * 2138 * but also ranges of characters like a-m. ccl is a * 2139 * set of flags saying whether each character is present; * 2140 * the final list is in lexical order. */ 2141 char ccl[256], *p; 2142 unsigned char c1, c2; 2143 unsigned int len, pl; 2144 int lastch = -1; 2145 2146 uremnode(list, node); 2147 memset(ccl, 0, sizeof(ccl) / sizeof(ccl[0])); 2148 for (p = str + 1; p < str2;) { 2149 if (itok(c1 = *p++)) 2150 c1 = ztokens[c1 - STOUC(Pound)]; 2151 if ((char) c1 == Meta) 2152 c1 = 32 ^ *p++; 2153 if (itok(c2 = *p)) 2154 c2 = ztokens[c2 - STOUC(Pound)]; 2155 if ((char) c2 == Meta) 2156 c2 = 32 ^ p[1]; 2157 if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) { 2158 while (lastch < (int)c2) 2159 ccl[lastch++] = 1; 2160 lastch = -1; 2161 } else 2162 ccl[lastch = c1] = 1; 2163 } 2164 pl = str - str3; 2165 len = pl + strlen(++str2) + 2; 2166 for (p = ccl + 256; p-- > ccl;) 2167 if (*p) { 2168 c1 = p - ccl; 2169 if (imeta(c1)) { 2170 str = hcalloc(len + 1); 2171 str[pl] = Meta; 2172 str[pl+1] = c1 ^ 32; 2173 strcpy(str + pl + 2, str2); 2174 } else { 2175 str = hcalloc(len); 2176 str[pl] = c1; 2177 strcpy(str + pl + 1, str2); 2178 } 2179 memcpy(str, str3, pl); 2180 insertlinknode(list, last, str); 2181 } 2182 *np = nextnode(last); 2183 return; 2184 } 2185 prev = str++ - str3; 2186 str2++; 2187 uremnode(list, node); 2188 node = last; 2189 /* Finally, normal comma expansion * 2190 * str1{foo,bar}str2 -> str1foostr2 str1barstr2. * 2191 * Any number of intervening commas is allowed. */ 2192 for (;;) { 2193 char *zz, *str4; 2194 int cnt; 2195 2196 for (str4 = str, cnt = 0; cnt || (*str != Comma && *str != 2197 Outbrace); str++) { 2198 if (*str == Inbrace) 2199 cnt++; 2200 else if (*str == Outbrace) 2201 cnt--; 2202 DPUTS(!*str, "BUG: illegal brace expansion"); 2203 } 2204 /* Concatenate the string before the braces (str3), the section * 2205 * just found (str4) and the text after the braces (str2) */ 2206 zz = (char *) hcalloc(prev + (str - str4) + strlen(str2) + 1); 2207 ztrncpy(zz, str3, prev); 2208 strncat(zz, str4, str - str4); 2209 strcat(zz, str2); 2210 /* and add this text to the argument list. */ 2211 insertlinknode(list, node, zz); 2212 incnode(node); 2213 if (*str != Outbrace) 2214 str++; 2215 else 2216 break; 2217 } 2218 *np = nextnode(last); 2219} 2220 2221/* check to see if a matches b (b is not a filename pattern) */ 2222 2223/**/ 2224int 2225matchpat(char *a, char *b) 2226{ 2227 Patprog p = patcompile(b, PAT_STATIC, NULL); 2228 2229 if (!p) { 2230 zerr("bad pattern: %s", b); 2231 return 0; 2232 } 2233 return pattry(p, a); 2234} 2235 2236/* do the ${foo%%bar}, ${foo#bar} stuff */ 2237/* please do not laugh at this code. */ 2238 2239/* Having found a match in getmatch, decide what part of string 2240 * to return. The matched part starts b characters into string s 2241 * and finishes e characters in: 0 <= b <= e <= strlen(s) 2242 * (yes, empty matches should work). 2243 * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards; 2244 * the lower parts are ignored. 2245 * replstr is the replacement string for a substitution 2246 */ 2247 2248/**/ 2249static char * 2250get_match_ret(char *s, int b, int e, int fl, char *replstr, 2251 LinkList repllist) 2252{ 2253 char buf[80], *r, *p, *rr; 2254 int ll = 0, l = strlen(s), bl = 0, t = 0, i; 2255 2256 if (replstr || (fl & SUB_LIST)) { 2257 if (fl & SUB_DOSUBST) { 2258 replstr = dupstring(replstr); 2259 singsub(&replstr); 2260 untokenize(replstr); 2261 } 2262 if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) { 2263 /* We are replacing the chunk, just add this to the list */ 2264 Repldata rd = (Repldata) 2265 ((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd))); 2266 rd->b = b; 2267 rd->e = e; 2268 rd->replstr = replstr; 2269 if (fl & SUB_LIST) 2270 zaddlinknode(repllist, rd); 2271 else 2272 addlinknode(repllist, rd); 2273 return s; 2274 } 2275 ll += strlen(replstr); 2276 } 2277 if (fl & SUB_MATCH) /* matched portion */ 2278 ll += 1 + (e - b); 2279 if (fl & SUB_REST) /* unmatched portion */ 2280 ll += 1 + (l - (e - b)); 2281 if (fl & SUB_BIND) { 2282 /* position of start of matched portion */ 2283 sprintf(buf, "%d ", b + 1); 2284 ll += (bl = strlen(buf)); 2285 } 2286 if (fl & SUB_EIND) { 2287 /* position of end of matched portion */ 2288 sprintf(buf + bl, "%d ", e + 1); 2289 ll += (bl = strlen(buf)); 2290 } 2291 if (fl & SUB_LEN) { 2292 /* length of matched portion */ 2293 sprintf(buf + bl, "%d ", e - b); 2294 ll += (bl = strlen(buf)); 2295 } 2296 if (bl) 2297 buf[bl - 1] = '\0'; 2298 2299 rr = r = (char *) hcalloc(ll); 2300 2301 if (fl & SUB_MATCH) { 2302 /* copy matched portion to new buffer */ 2303 for (i = b, p = s + b; i < e; i++) 2304 *rr++ = *p++; 2305 t = 1; 2306 } 2307 if (fl & SUB_REST) { 2308 /* Copy unmatched portion to buffer. If both portions * 2309 * requested, put a space in between (why?) */ 2310 if (t) 2311 *rr++ = ' '; 2312 /* there may be unmatched bits at both beginning and end of string */ 2313 for (i = 0, p = s; i < b; i++) 2314 *rr++ = *p++; 2315 if (replstr) 2316 for (p = replstr; *p; ) 2317 *rr++ = *p++; 2318 for (i = e, p = s + e; i < l; i++) 2319 *rr++ = *p++; 2320 t = 1; 2321 } 2322 *rr = '\0'; 2323 if (bl) { 2324 /* if there was a buffer (with a numeric result), add it; * 2325 * if there was other stuff too, stick in a space first. */ 2326 if (t) 2327 *rr++ = ' '; 2328 strcpy(rr, buf); 2329 } 2330 return r; 2331} 2332 2333static Patprog 2334compgetmatch(char *pat, int *flp, char **replstrp) 2335{ 2336 Patprog p; 2337 /* 2338 * Flags to pattern compiler: use static buffer since we only 2339 * have one pattern at a time; we will try the must-match test ourselves, 2340 * so tell the pattern compiler we are scanning. 2341 */ 2342 2343 /* int patflags = PAT_STATIC|PAT_SCAN|PAT_NOANCH;*/ 2344 2345 /* Unfortunately, PAT_STATIC doesn't work if we have a replstr with 2346 * something like ${x#...} in it which will be singsub()ed below because 2347 * that would overwrite the pattern buffer. */ 2348 2349 int patflags = PAT_SCAN|PAT_NOANCH | (*replstrp ? 0 : PAT_STATIC); 2350 2351 /* 2352 * Search is anchored to the end of the string if we want to match 2353 * it all, or if we are matching at the end of the string and not 2354 * using substrings. 2355 */ 2356 if ((*flp & SUB_ALL) || ((*flp & SUB_END) && !(*flp & SUB_SUBSTR))) 2357 patflags &= ~PAT_NOANCH; 2358 p = patcompile(pat, patflags, NULL); 2359 if (!p) { 2360 zerr("bad pattern: %s", pat); 2361 return NULL; 2362 } 2363 if (*replstrp) { 2364 if (p->patnpar || (p->globend & GF_MATCHREF)) { 2365 /* 2366 * Either backreferences or match references, so we 2367 * need to re-substitute replstr each time round. 2368 */ 2369 *flp |= SUB_DOSUBST; 2370 } else { 2371 singsub(replstrp); 2372 untokenize(*replstrp); 2373 } 2374 } 2375 2376 return p; 2377} 2378 2379/* 2380 * This is called from paramsubst to get the match for ${foo#bar} etc. 2381 * fl is a set of the SUB_* flags defined in zsh.h 2382 * *sp points to the string we have to modify. The n'th match will be 2383 * returned in *sp. The heap is used to get memory for the result string. 2384 * replstr is the replacement string from a ${.../orig/repl}, in 2385 * which case pat is the original. 2386 * 2387 * n is now ignored unless we are looking for a substring, in 2388 * which case the n'th match from the start is counted such that 2389 * there is no more than one match from each position. 2390 */ 2391 2392/**/ 2393int 2394getmatch(char **sp, char *pat, int fl, int n, char *replstr) 2395{ 2396 Patprog p; 2397 2398 if (!(p = compgetmatch(pat, &fl, &replstr))) 2399 return 1; 2400 2401 return igetmatch(sp, p, fl, n, replstr, NULL); 2402} 2403 2404/* 2405 * This is the corresponding function for array variables. 2406 * Matching is done with the same pattern on each element. 2407 */ 2408 2409/**/ 2410void 2411getmatcharr(char ***ap, char *pat, int fl, int n, char *replstr) 2412{ 2413 char **arr = *ap, **pp; 2414 Patprog p; 2415 2416 if (!(p = compgetmatch(pat, &fl, &replstr))) 2417 return; 2418 2419 *ap = pp = hcalloc(sizeof(char *) * (arrlen(arr) + 1)); 2420 while ((*pp = *arr++)) 2421 if (igetmatch(pp, p, fl, n, replstr, NULL)) 2422 pp++; 2423} 2424 2425/* 2426 * Match against str using pattern pp; return a list of 2427 * Repldata matches in the linked list *repllistp; this is 2428 * in permanent storage and to be freed by freematchlist() 2429 */ 2430 2431/**/ 2432mod_export int 2433getmatchlist(char *str, Patprog p, LinkList *repllistp) 2434{ 2435 char **sp = &str; 2436 2437 /* 2438 * We don't care if we have longest or shortest match, but SUB_LONG 2439 * is cheaper since the pattern code does that by default. 2440 * We need SUB_GLOBAL to get all matches. 2441 * We need SUB_SUBSTR to scan through for substrings. 2442 * We need SUB_LIST to activate the special handling of the list 2443 * passed in. 2444 */ 2445 return igetmatch(sp, p, SUB_LONG|SUB_GLOBAL|SUB_SUBSTR|SUB_LIST, 2446 0, NULL, repllistp); 2447} 2448 2449static void 2450freerepldata(void *ptr) 2451{ 2452 zfree(ptr, sizeof(struct repldata)); 2453} 2454 2455/**/ 2456mod_export void 2457freematchlist(LinkList repllist) 2458{ 2459 freelinklist(repllist, freerepldata); 2460} 2461 2462/**/ 2463static void 2464set_pat_start(Patprog p, int offs) 2465{ 2466 /* 2467 * If we are messing around with the test string by advancing up 2468 * it from the start, we need to tell the pattern matcher that 2469 * a start-of-string assertion, i.e. (#s), should fail. Hence 2470 * we test whether the offset of the real start of string from 2471 * the actual start, passed as offs, is zero. 2472 */ 2473 if (offs) 2474 p->flags |= PAT_NOTSTART; 2475 else 2476 p->flags &= ~PAT_NOTSTART; 2477} 2478 2479/**/ 2480static void 2481set_pat_end(Patprog p, char null_me) 2482{ 2483 /* 2484 * If we are messing around with the string by shortening it at the 2485 * tail, we need to tell the pattern matcher that an end-of-string 2486 * assertion, i.e. (#e), should fail. Hence we test whether 2487 * the character null_me about to be zapped is or is not already a null. 2488 */ 2489 if (null_me) 2490 p->flags |= PAT_NOTEND; 2491 else 2492 p->flags &= ~PAT_NOTEND; 2493} 2494 2495/**/ 2496#ifdef MULTIBYTE_SUPPORT 2497 2498/* 2499 * Increment *tp over character which may be multibyte. 2500 * Return number of bytes that remain in the character after unmetafication. 2501 */ 2502 2503/**/ 2504static int iincchar(char **tp) 2505{ 2506 char *t = *tp; 2507 int mbclen = mb_metacharlenconv(t, NULL); 2508 int umlen = 0; 2509 2510 while (mbclen--) { 2511 umlen++; 2512 if (*t++ == Meta) { 2513 t++; 2514 mbclen--; 2515 } 2516 } 2517 *tp = t; 2518 2519 return umlen; 2520} 2521 2522/**/ 2523static int 2524igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, 2525 LinkList *repllistp) 2526{ 2527 char *s = *sp, *t, *tmatch; 2528 /* 2529 * Note that ioff counts (possibly multibyte) characters in the 2530 * character set (Meta's are not included), while l counts characters in 2531 * the metafied string. 2532 * 2533 * umlen is a counter for (unmetafied) byte lengths---neither characters 2534 * nor raw byte indices; this is simply an optimisation for allocation. 2535 * umltot is the full length of the string in this scheme. 2536 * 2537 * l is the raw string length, used together with any pointers into 2538 * the string (typically t). 2539 */ 2540 int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp); 2541 int umlen, nmatches; 2542 /* 2543 * List of bits of matches to concatenate with replacement string. 2544 * The data is a struct repldata. It is not used in cases like 2545 * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match 2546 * is anchored. It goes on the heap. 2547 */ 2548 LinkList repllist = NULL; 2549 2550 /* perform must-match test for complex closures */ 2551 if (p->mustoff) 2552 { 2553 /* 2554 * Yuk. Probably we should rewrite this whole function to 2555 * use an unmetafied test string. 2556 * 2557 * Use META_HEAPDUP because we need a terminating NULL. 2558 */ 2559 char *muststr = metafy((char *)p + p->mustoff, 2560 p->patmlen, META_HEAPDUP); 2561 2562 if (!strstr(s, muststr)) 2563 matched = 0; 2564 } 2565 2566 /* in case we used the prog before... */ 2567 p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); 2568 2569 if (fl & SUB_ALL) { 2570 int i = matched && pattry(p, s); 2571 *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); 2572 if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) 2573 return 0; 2574 return 1; 2575 } 2576 if (matched) { 2577 /* 2578 * The default behaviour is to match at the start; this 2579 * is modified by SUB_END and SUB_SUBSTR. SUB_END matches 2580 * at the end of the string instead of the start. SUB_SUBSTR 2581 * without SUB_END matches substrings searching from the start; 2582 * with SUB_END it matches substrings searching from the end. 2583 * 2584 * The possibilities are further modified by whether we want the 2585 * longest (SUB_LONG) or shortest possible match. 2586 * 2587 * SUB_START is only used in the case where we are also 2588 * forcing a match at the end (SUB_END with no SUB_SUBSTR, 2589 * with or without SUB_LONG), to indicate we should match 2590 * the entire string. 2591 */ 2592 switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) { 2593 case 0: 2594 case SUB_LONG: 2595 /* 2596 * Largest/smallest possible match at head of string. 2597 * First get the longest match... 2598 */ 2599 if (pattry(p, s)) { 2600 /* patmatchlen returns metafied length, as we need */ 2601 int mlen = patmatchlen(); 2602 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 2603 /* 2604 * ... now we know whether it's worth looking for the 2605 * shortest, which we do by brute force. 2606 */ 2607 mb_metacharinit(); 2608 for (t = s, umlen = 0; t < s + mlen; ) { 2609 set_pat_end(p, *t); 2610 if (pattrylen(p, s, t - s, umlen, 0)) { 2611 mlen = patmatchlen(); 2612 break; 2613 } 2614 umlen += iincchar(&t); 2615 } 2616 } 2617 *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL); 2618 return 1; 2619 } 2620 break; 2621 2622 case SUB_END: 2623 /* 2624 * Smallest possible match at tail of string. 2625 * As we can only be sure we've got wide characters right 2626 * when going forwards, we need to match at every point 2627 * until we fail and record the last successful match. 2628 * 2629 * It's important that we return the last successful match 2630 * so that match, mbegin, mend and MATCH, MBEGIN, MEND are 2631 * correct. 2632 */ 2633 mb_metacharinit(); 2634 tmatch = NULL; 2635 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { 2636 set_pat_start(p, t-s); 2637 if (pattrylen(p, t, s + l - t, umlen, ioff)) 2638 tmatch = t; 2639 if (fl & SUB_START) 2640 break; 2641 umlen -= iincchar(&t); 2642 } 2643 if (tmatch) { 2644 *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL); 2645 return 1; 2646 } 2647 if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { 2648 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 2649 return 1; 2650 } 2651 break; 2652 2653 case (SUB_END|SUB_LONG): 2654 /* Largest possible match at tail of string: * 2655 * move forward along string until we get a match. * 2656 * Again there's no optimisation. */ 2657 mb_metacharinit(); 2658 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { 2659 set_pat_start(p, t-s); 2660 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 2661 *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); 2662 return 1; 2663 } 2664 if (fl & SUB_START) 2665 break; 2666 umlen -= iincchar(&t); 2667 } 2668 if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { 2669 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 2670 return 1; 2671 } 2672 break; 2673 2674 case SUB_SUBSTR: 2675 /* Smallest at start, but matching substrings. */ 2676 set_pat_start(p, l); 2677 if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) { 2678 *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); 2679 return 1; 2680 } /* fall through */ 2681 case (SUB_SUBSTR|SUB_LONG): 2682 /* longest or smallest at start with substrings */ 2683 t = s; 2684 if (fl & SUB_GLOBAL) { 2685 repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist(); 2686 if (repllistp) 2687 *repllistp = repllist; 2688 } 2689 ioff = 0; /* offset into string */ 2690 umlen = umltot; 2691 mb_metacharinit(); 2692 do { 2693 /* loop over all matches for global substitution */ 2694 matched = 0; 2695 for (; t < s + l; ioff++) { 2696 /* Find the longest match from this position. */ 2697 set_pat_start(p, t-s); 2698 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 2699 char *mpos = t + patmatchlen(); 2700 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 2701 char *ptr; 2702 int umlen2; 2703 /* 2704 * If searching for the shortest match, 2705 * start with a zero length and increase 2706 * it until we reach the longest possible 2707 * match, accepting the first successful 2708 * match. 2709 */ 2710 for (ptr = t, umlen2 = 0; ptr < mpos;) { 2711 set_pat_end(p, *ptr); 2712 if (pattrylen(p, t, ptr - t, umlen2, ioff)) { 2713 mpos = t + patmatchlen(); 2714 break; 2715 } 2716 umlen2 += iincchar(&ptr); 2717 } 2718 } 2719 if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) { 2720 *sp = get_match_ret(*sp, t-s, mpos-s, fl, 2721 replstr, repllist); 2722 if (mpos == t) 2723 mpos += mb_metacharlenconv(mpos, NULL); 2724 } 2725 if (!(fl & SUB_GLOBAL)) { 2726 if (n) { 2727 /* 2728 * Looking for a later match: in this case, 2729 * we can continue looking for matches from 2730 * the next character, even if it overlaps 2731 * with what we just found. 2732 */ 2733 umlen -= iincchar(&t); 2734 continue; 2735 } else { 2736 return 1; 2737 } 2738 } 2739 /* 2740 * For a global match, we need to skip the stuff 2741 * which is already marked for replacement. 2742 */ 2743 matched = 1; 2744 while (t < mpos) { 2745 ioff++; 2746 umlen -= iincchar(&t); 2747 } 2748 break; 2749 } 2750 umlen -= iincchar(&t); 2751 } 2752 } while (matched); 2753 /* 2754 * check if we can match a blank string, if so do it 2755 * at the start. Goodness knows if this is a good idea 2756 * with global substitution, so it doesn't happen. 2757 */ 2758 set_pat_start(p, l); 2759 if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && 2760 pattry(p, s + l) && !--n) { 2761 *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); 2762 return 1; 2763 } 2764 break; 2765 2766 case (SUB_END|SUB_SUBSTR): 2767 case (SUB_END|SUB_LONG|SUB_SUBSTR): 2768 /* Longest/shortest at end, matching substrings. */ 2769 if (!(fl & SUB_LONG)) { 2770 set_pat_start(p, l); 2771 if (pattrylen(p, s + l, 0, 0, umltot) && !--n) { 2772 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 2773 return 1; 2774 } 2775 } 2776 /* 2777 * If multibyte characters are present we need to start from the 2778 * beginning. This is a bit unpleasant because we can't tell in 2779 * advance how many times it will match and from where, so if n is 2780 * greater then 1 we will need to count the number of times it 2781 * matched and then go through again until we reach the right 2782 * point. (Either that or record every single match in a list, 2783 * which isn't stupid; it involves more memory management at this 2784 * level but less use of the pattern matcher.) 2785 */ 2786 nmatches = 0; 2787 tmatch = NULL; 2788 mb_metacharinit(); 2789 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { 2790 set_pat_start(p, t-s); 2791 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 2792 nmatches++; 2793 tmatch = t; 2794 } 2795 umlen -= iincchar(&t); 2796 } 2797 if (nmatches) { 2798 char *mpos; 2799 if (n > 1) { 2800 /* 2801 * We need to find the n'th last match. 2802 */ 2803 n = nmatches - n; 2804 mb_metacharinit(); 2805 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { 2806 set_pat_start(p, t-s); 2807 if (pattrylen(p, t, s + l - t, umlen, ioff) && 2808 !n--) { 2809 tmatch = t; 2810 break; 2811 } 2812 umlen -= iincchar(&t); 2813 } 2814 } 2815 mpos = tmatch + patmatchlen(); 2816 /* Look for the shortest match if necessary */ 2817 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 2818 for (t = tmatch, umlen = 0; t < mpos; ) { 2819 set_pat_end(p, *t); 2820 if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) { 2821 mpos = tmatch + patmatchlen(); 2822 break; 2823 } 2824 umlen += iincchar(&t); 2825 } 2826 } 2827 *sp = get_match_ret(*sp, tmatch-s, mpos-s, fl, 2828 replstr, NULL); 2829 return 1; 2830 } 2831 set_pat_start(p, l); 2832 if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) { 2833 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 2834 return 1; 2835 } 2836 break; 2837 } 2838 } 2839 2840 if (repllist && nonempty(repllist)) { 2841 /* Put all the bits of a global search and replace together. */ 2842 LinkNode nd; 2843 Repldata rd; 2844 int lleft; 2845 char *ptr, *start; 2846 int i; 2847 2848 if (!(fl & SUB_LIST)) { 2849 lleft = 0; /* size of returned string */ 2850 i = 0; /* start of last chunk we got from *sp */ 2851 for (nd = firstnode(repllist); nd; incnode(nd)) { 2852 rd = (Repldata) getdata(nd); 2853 lleft += rd->b - i; /* previous chunk of *sp */ 2854 lleft += strlen(rd->replstr); /* the replaced bit */ 2855 i = rd->e; /* start of next chunk of *sp */ 2856 } 2857 lleft += l - i; /* final chunk from *sp */ 2858 start = t = zhalloc(lleft+1); 2859 i = 0; 2860 for (nd = firstnode(repllist); nd; incnode(nd)) { 2861 rd = (Repldata) getdata(nd); 2862 memcpy(t, s + i, rd->b - i); 2863 t += rd->b - i; 2864 ptr = rd->replstr; 2865 while (*ptr) 2866 *t++ = *ptr++; 2867 i = rd->e; 2868 } 2869 memcpy(t, s + i, l - i); 2870 start[lleft] = '\0'; 2871 *sp = (char *)start; 2872 } 2873 return 1; 2874 } 2875 if (fl & SUB_LIST) /* safety: don't think this can happen */ 2876 return 0; 2877 2878 /* munge the whole string: no match, so no replstr */ 2879 *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); 2880 return (fl & SUB_RETFAIL) ? 0 : 1; 2881} 2882 2883/**/ 2884#else 2885 2886/* 2887 * Increment pointer which may be on a Meta (x is a pointer variable), 2888 * returning the incremented value (i.e. like pre-increment). 2889 */ 2890#define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1) 2891 2892/**/ 2893static int 2894igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, 2895 LinkList *repllistp) 2896{ 2897 char *s = *sp, *t; 2898 /* 2899 * Note that ioff and uml count characters in the character 2900 * set (Meta's are not included), while l counts characters in the 2901 * metafied string. umlen is a counter for (unmetafied) character 2902 * lengths. 2903 */ 2904 int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen; 2905 /* 2906 * List of bits of matches to concatenate with replacement string. 2907 * The data is a struct repldata. It is not used in cases like 2908 * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match 2909 * is anchored. It goes on the heap. 2910 */ 2911 LinkList repllist = NULL; 2912 2913 /* perform must-match test for complex closures */ 2914 if (p->mustoff) 2915 { 2916 /* 2917 * Yuk. Probably we should rewrite this whole function to 2918 * use an unmetafied test string. 2919 * 2920 * Use META_HEAPDUP because we need a terminating NULL. 2921 */ 2922 char *muststr = metafy((char *)p + p->mustoff, 2923 p->patmlen, META_HEAPDUP); 2924 2925 if (!strstr(s, muststr)) 2926 matched = 0; 2927 } 2928 2929 /* in case we used the prog before... */ 2930 p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); 2931 2932 if (fl & SUB_ALL) { 2933 int i = matched && pattry(p, s); 2934 *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); 2935 if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) 2936 return 0; 2937 return 1; 2938 } 2939 if (matched) { 2940 switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) { 2941 case 0: 2942 case SUB_LONG: 2943 /* 2944 * Largest/smallest possible match at head of string. 2945 * First get the longest match... 2946 */ 2947 if (pattry(p, s)) { 2948 /* patmatchlen returns metafied length, as we need */ 2949 int mlen = patmatchlen(); 2950 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 2951 /* 2952 * ... now we know whether it's worth looking for the 2953 * shortest, which we do by brute force. 2954 */ 2955 for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) { 2956 set_pat_end(p, *t); 2957 if (pattrylen(p, s, t - s, umlen, 0)) { 2958 mlen = patmatchlen(); 2959 break; 2960 } 2961 } 2962 } 2963 *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL); 2964 return 1; 2965 } 2966 break; 2967 2968 case SUB_END: 2969 /* Smallest possible match at tail of string: * 2970 * move back down string until we get a match. * 2971 * There's no optimization here. */ 2972 for (ioff = uml, t = s + l, umlen = 0; t >= s; 2973 t--, ioff--, umlen++) { 2974 if (t > s && t[-1] == Meta) 2975 t--; 2976 set_pat_start(p, t-s); 2977 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 2978 *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL); 2979 return 1; 2980 } 2981 if (t > s+1 && t[-2] == Meta) 2982 t--; 2983 } 2984 break; 2985 2986 case (SUB_END|SUB_LONG): 2987 /* Largest possible match at tail of string: * 2988 * move forward along string until we get a match. * 2989 * Again there's no optimisation. */ 2990 for (ioff = 0, t = s, umlen = uml; t < s + l; 2991 ioff++, METAINC(t), umlen--) { 2992 set_pat_start(p, t-s); 2993 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 2994 *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); 2995 return 1; 2996 } 2997 if (*t == Meta) 2998 t++; 2999 } 3000 break; 3001 3002 case SUB_SUBSTR: 3003 /* Smallest at start, but matching substrings. */ 3004 set_pat_start(p, l); 3005 if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) { 3006 *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); 3007 return 1; 3008 } /* fall through */ 3009 case (SUB_SUBSTR|SUB_LONG): 3010 /* longest or smallest at start with substrings */ 3011 t = s; 3012 if (fl & SUB_GLOBAL) { 3013 repllist = newlinklist(); 3014 if (repllistp) 3015 *repllistp = repllist; 3016 } 3017 ioff = 0; /* offset into string */ 3018 umlen = uml; 3019 do { 3020 /* loop over all matches for global substitution */ 3021 matched = 0; 3022 for (; t < s + l; METAINC(t), ioff++, umlen--) { 3023 /* Find the longest match from this position. */ 3024 set_pat_start(p, t-s); 3025 if (pattrylen(p, t, s + l - t, umlen, ioff)) { 3026 char *mpos = t + patmatchlen(); 3027 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 3028 char *ptr; 3029 int umlen2; 3030 for (ptr = t, umlen2 = 0; ptr < mpos; 3031 METAINC(ptr), umlen2++) { 3032 set_pat_end(p, *ptr); 3033 if (pattrylen(p, t, ptr - t, umlen2, ioff)) { 3034 mpos = t + patmatchlen(); 3035 break; 3036 } 3037 } 3038 } 3039 if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) { 3040 *sp = get_match_ret(*sp, t-s, mpos-s, fl, 3041 replstr, repllist); 3042 if (mpos == t) 3043 METAINC(mpos); 3044 } 3045 if (!(fl & SUB_GLOBAL)) { 3046 if (n) { 3047 /* 3048 * Looking for a later match: in this case, 3049 * we can continue looking for matches from 3050 * the next character, even if it overlaps 3051 * with what we just found. 3052 */ 3053 continue; 3054 } else { 3055 return 1; 3056 } 3057 } 3058 /* 3059 * For a global match, we need to skip the stuff 3060 * which is already marked for replacement. 3061 */ 3062 matched = 1; 3063 for ( ; t < mpos; t++, ioff++, umlen--) 3064 if (*t == Meta) 3065 t++; 3066 break; 3067 } 3068 if (*t == Meta) 3069 t++; 3070 } 3071 } while (matched); 3072 /* 3073 * check if we can match a blank string, if so do it 3074 * at the start. Goodness knows if this is a good idea 3075 * with global substitution, so it doesn't happen. 3076 */ 3077 set_pat_start(p, l); 3078 if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && 3079 pattry(p, s + l) && !--n) { 3080 *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); 3081 return 1; 3082 } 3083 break; 3084 3085 case (SUB_END|SUB_SUBSTR): 3086 case (SUB_END|SUB_LONG|SUB_SUBSTR): 3087 /* Longest/shortest at end, matching substrings. */ 3088 if (!(fl & SUB_LONG)) { 3089 set_pat_start(p, l); 3090 if (pattrylen(p, s + l, 0, 0, uml) && !--n) { 3091 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 3092 return 1; 3093 } 3094 } 3095 for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s; 3096 t--, ioff--, umlen++) { 3097 if (t > s && t[-1] == Meta) 3098 t--; 3099 set_pat_start(p, t-s); 3100 if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) { 3101 /* Found the longest match */ 3102 char *mpos = t + patmatchlen(); 3103 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { 3104 char *ptr; 3105 int umlen2; 3106 for (ptr = t, umlen2 = 0; ptr < mpos; 3107 METAINC(ptr), umlen2++) { 3108 set_pat_end(p, *ptr); 3109 if (pattrylen(p, t, ptr - t, umlen2, ioff)) { 3110 mpos = t + patmatchlen(); 3111 break; 3112 } 3113 } 3114 } 3115 *sp = get_match_ret(*sp, t-s, mpos-s, fl, 3116 replstr, NULL); 3117 return 1; 3118 } 3119 } 3120 set_pat_start(p, l); 3121 if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) { 3122 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); 3123 return 1; 3124 } 3125 break; 3126 } 3127 } 3128 3129 if (repllist && nonempty(repllist)) { 3130 /* Put all the bits of a global search and replace together. */ 3131 LinkNode nd; 3132 Repldata rd; 3133 int lleft = 0; /* size of returned string */ 3134 char *ptr, *start; 3135 int i; 3136 3137 i = 0; /* start of last chunk we got from *sp */ 3138 for (nd = firstnode(repllist); nd; incnode(nd)) { 3139 rd = (Repldata) getdata(nd); 3140 lleft += rd->b - i; /* previous chunk of *sp */ 3141 lleft += strlen(rd->replstr); /* the replaced bit */ 3142 i = rd->e; /* start of next chunk of *sp */ 3143 } 3144 lleft += l - i; /* final chunk from *sp */ 3145 start = t = zhalloc(lleft+1); 3146 i = 0; 3147 for (nd = firstnode(repllist); nd; incnode(nd)) { 3148 rd = (Repldata) getdata(nd); 3149 memcpy(t, s + i, rd->b - i); 3150 t += rd->b - i; 3151 ptr = rd->replstr; 3152 while (*ptr) 3153 *t++ = *ptr++; 3154 i = rd->e; 3155 } 3156 memcpy(t, s + i, l - i); 3157 start[lleft] = '\0'; 3158 *sp = (char *)start; 3159 return 1; 3160 } 3161 3162 /* munge the whole string: no match, so no replstr */ 3163 *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); 3164 return 1; 3165} 3166 3167/**/ 3168#endif /* MULTIBYTE_SUPPORT */ 3169 3170/* blindly turn a string into a tokenised expression without lexing */ 3171 3172/**/ 3173mod_export void 3174tokenize(char *s) 3175{ 3176 zshtokenize(s, 0); 3177} 3178 3179/* 3180 * shtokenize is used when we tokenize a string with GLOB_SUBST set. 3181 * In that case we need to retain backslashes when we turn the 3182 * pattern back into a string, so that the string is not 3183 * modified if it failed to match a pattern. 3184 * 3185 * It may be modified by the effect of SH_GLOB which turns off 3186 * various zsh-specific options. 3187 */ 3188 3189/**/ 3190mod_export void 3191shtokenize(char *s) 3192{ 3193 int flags = ZSHTOK_SUBST; 3194 if (isset(SHGLOB)) 3195 flags |= ZSHTOK_SHGLOB; 3196 zshtokenize(s, flags); 3197} 3198 3199/**/ 3200static void 3201zshtokenize(char *s, int flags) 3202{ 3203 char *t; 3204 int bslash = 0; 3205 3206 for (; *s; s++) { 3207 cont: 3208 switch (*s) { 3209 case Bnull: 3210 case Bnullkeep: 3211 case '\\': 3212 if (bslash) { 3213 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; 3214 break; 3215 } 3216 bslash = 1; 3217 continue; 3218 case '<': 3219 if (flags & ZSHTOK_SHGLOB) 3220 break; 3221 if (bslash) { 3222 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; 3223 break; 3224 } 3225 t = s; 3226 while (idigit(*++s)); 3227 if (*s != '-') 3228 goto cont; 3229 while (idigit(*++s)); 3230 if (*s != '>') 3231 goto cont; 3232 *t = Inang; 3233 *s = Outang; 3234 break; 3235 case '(': 3236 case '|': 3237 case ')': 3238 if (flags & ZSHTOK_SHGLOB) 3239 break; 3240 case '>': 3241 case '^': 3242 case '#': 3243 case '~': 3244 case '[': 3245 case ']': 3246 case '*': 3247 case '?': 3248 case '=': 3249 for (t = ztokens; *t; t++) 3250 if (*t == *s) { 3251 if (bslash) 3252 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; 3253 else 3254 *s = (t - ztokens) + Pound; 3255 break; 3256 } 3257 } 3258 bslash = 0; 3259 } 3260} 3261 3262/* remove unnecessary Nulargs */ 3263 3264/**/ 3265mod_export void 3266remnulargs(char *s) 3267{ 3268 if (*s) { 3269 char *o = s, c; 3270 3271 while ((c = *s++)) 3272 if (c == Bnullkeep) { 3273 /* 3274 * An active backslash that needs to be turned back into 3275 * a real backslash for output. However, we don't 3276 * do that yet since we need to ignore it during 3277 * pattern matching. 3278 */ 3279 continue; 3280 } else if (inull(c)) { 3281 char *t = s - 1; 3282 3283 while ((c = *s++)) { 3284 if (c == Bnullkeep) 3285 *t++ = '\\'; 3286 else if (!inull(c)) 3287 *t++ = c; 3288 } 3289 *t = '\0'; 3290 if (!*o) { 3291 o[0] = Nularg; 3292 o[1] = '\0'; 3293 } 3294 break; 3295 } 3296 } 3297} 3298 3299/* qualifier functions: mostly self-explanatory, see glob(). */ 3300 3301/* device number */ 3302 3303/**/ 3304static int 3305qualdev(UNUSED(char *name), struct stat *buf, off_t dv, UNUSED(char *dummy)) 3306{ 3307 return (off_t)buf->st_dev == dv; 3308} 3309 3310/* number of hard links to file */ 3311 3312/**/ 3313static int 3314qualnlink(UNUSED(char *name), struct stat *buf, off_t ct, UNUSED(char *dummy)) 3315{ 3316 return (g_range < 0 ? buf->st_nlink < ct : 3317 g_range > 0 ? buf->st_nlink > ct : 3318 buf->st_nlink == ct); 3319} 3320 3321/* user ID */ 3322 3323/**/ 3324static int 3325qualuid(UNUSED(char *name), struct stat *buf, off_t uid, UNUSED(char *dummy)) 3326{ 3327 return buf->st_uid == uid; 3328} 3329 3330/* group ID */ 3331 3332/**/ 3333static int 3334qualgid(UNUSED(char *name), struct stat *buf, off_t gid, UNUSED(char *dummy)) 3335{ 3336 return buf->st_gid == gid; 3337} 3338 3339/* device special file? */ 3340 3341/**/ 3342static int 3343qualisdev(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3344{ 3345 return S_ISBLK(buf->st_mode) || S_ISCHR(buf->st_mode); 3346} 3347 3348/* block special file? */ 3349 3350/**/ 3351static int 3352qualisblk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3353{ 3354 return S_ISBLK(buf->st_mode); 3355} 3356 3357/* character special file? */ 3358 3359/**/ 3360static int 3361qualischr(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3362{ 3363 return S_ISCHR(buf->st_mode); 3364} 3365 3366/* directory? */ 3367 3368/**/ 3369static int 3370qualisdir(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3371{ 3372 return S_ISDIR(buf->st_mode); 3373} 3374 3375/* FIFO? */ 3376 3377/**/ 3378static int 3379qualisfifo(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3380{ 3381 return S_ISFIFO(buf->st_mode); 3382} 3383 3384/* symbolic link? */ 3385 3386/**/ 3387static int 3388qualislnk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3389{ 3390 return S_ISLNK(buf->st_mode); 3391} 3392 3393/* regular file? */ 3394 3395/**/ 3396static int 3397qualisreg(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3398{ 3399 return S_ISREG(buf->st_mode); 3400} 3401 3402/* socket? */ 3403 3404/**/ 3405static int 3406qualissock(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy)) 3407{ 3408 return S_ISSOCK(buf->st_mode); 3409} 3410 3411/* given flag is set in mode */ 3412 3413/**/ 3414static int 3415qualflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy)) 3416{ 3417 return mode_to_octal(buf->st_mode) & mod; 3418} 3419 3420/* mode matches specification */ 3421 3422/**/ 3423static int 3424qualmodeflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy)) 3425{ 3426 long v = mode_to_octal(buf->st_mode), y = mod & 07777, n = mod >> 12; 3427 3428 return ((v & y) == y && !(v & n)); 3429} 3430 3431/* regular executable file? */ 3432 3433/**/ 3434static int 3435qualiscom(UNUSED(char *name), struct stat *buf, UNUSED(off_t mod), UNUSED(char *dummy)) 3436{ 3437 return S_ISREG(buf->st_mode) && (buf->st_mode & S_IXUGO); 3438} 3439 3440/* size in required range? */ 3441 3442/**/ 3443static int 3444qualsize(UNUSED(char *name), struct stat *buf, off_t size, UNUSED(char *dummy)) 3445{ 3446#if defined(LONG_IS_64_BIT) || defined(OFF_T_IS_64_BIT) 3447# define QS_CAST_SIZE() 3448 off_t scaled = buf->st_size; 3449#else 3450# define QS_CAST_SIZE() (unsigned long) 3451 unsigned long scaled = (unsigned long)buf->st_size; 3452#endif 3453 3454 switch (g_units) { 3455 case TT_POSIX_BLOCKS: 3456 scaled += 511l; 3457 scaled /= 512l; 3458 break; 3459 case TT_KILOBYTES: 3460 scaled += 1023l; 3461 scaled /= 1024l; 3462 break; 3463 case TT_MEGABYTES: 3464 scaled += 1048575l; 3465 scaled /= 1048576l; 3466 break; 3467 } 3468 3469 return (g_range < 0 ? scaled < QS_CAST_SIZE() size : 3470 g_range > 0 ? scaled > QS_CAST_SIZE() size : 3471 scaled == QS_CAST_SIZE() size); 3472#undef QS_CAST_SIZE 3473} 3474 3475/* time in required range? */ 3476 3477/**/ 3478static int 3479qualtime(UNUSED(char *name), struct stat *buf, off_t days, UNUSED(char *dummy)) 3480{ 3481 time_t now, diff; 3482 3483 time(&now); 3484 diff = now - (g_amc == 0 ? buf->st_atime : g_amc == 1 ? buf->st_mtime : 3485 buf->st_ctime); 3486 /* handle multipliers indicating units */ 3487 switch (g_units) { 3488 case TT_DAYS: 3489 diff /= 86400l; 3490 break; 3491 case TT_HOURS: 3492 diff /= 3600l; 3493 break; 3494 case TT_MINS: 3495 diff /= 60l; 3496 break; 3497 case TT_WEEKS: 3498 diff /= 604800l; 3499 break; 3500 case TT_MONTHS: 3501 diff /= 2592000l; 3502 break; 3503 } 3504 3505 return (g_range < 0 ? diff < days : 3506 g_range > 0 ? diff > days : 3507 diff == days); 3508} 3509 3510/* evaluate a string */ 3511 3512/**/ 3513static int 3514qualsheval(char *name, UNUSED(struct stat *buf), UNUSED(off_t days), char *str) 3515{ 3516 Eprog prog; 3517 3518 if ((prog = parse_string(str, 0))) { 3519 int ef = errflag, lv = lastval, ret; 3520 3521 unsetparam("reply"); 3522 setsparam("REPLY", ztrdup(name)); 3523 3524 execode(prog, 1, 0, "globqual"); 3525 3526 ret = lastval; 3527 errflag = ef; 3528 lastval = lv; 3529 3530 if (!(inserts = getaparam("reply")) && 3531 !(inserts = gethparam("reply"))) { 3532 char *tmp; 3533 3534 if ((tmp = getsparam("reply")) || (tmp = getsparam("REPLY"))) { 3535 static char *tmparr[2]; 3536 3537 tmparr[0] = tmp; 3538 tmparr[1] = NULL; 3539 3540 inserts = tmparr; 3541 } 3542 } 3543 3544 return !ret; 3545 } 3546 return 0; 3547} 3548 3549/**/ 3550static int 3551qualnonemptydir(char *name, struct stat *buf, UNUSED(off_t days), UNUSED(char *str)) 3552{ 3553 DIR *dirh; 3554 struct dirent *de; 3555 int unamelen; 3556 char *uname = unmetafy(dupstring(name), &unamelen); 3557 3558 if (!S_ISDIR(buf->st_mode)) 3559 return 0; 3560 3561 if (buf->st_nlink > 2) 3562 return 1; 3563 3564 if (!(dirh = opendir(uname))) 3565 return 0; 3566 3567 while ((de = readdir(dirh))) { 3568 if (strcmp(de->d_name, ".") && strcmp(de->d_name, "..")) { 3569 closedir(dirh); 3570 return 1; 3571 } 3572 } 3573 3574 closedir(dirh); 3575 return 0; 3576} 3577