1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26/*- 27 * This is a new directory-walking system that addresses a number 28 * of problems I've had with fts(3). In particular, it has no 29 * pathname-length limits (other than the size of 'int'), handles 30 * deep logical traversals, uses considerably less memory, and has 31 * an opaque interface (easier to modify in the future). 32 * 33 * Internally, it keeps a single list of "tree_entry" items that 34 * represent filesystem objects that require further attention. 35 * Non-directories are not kept in memory: they are pulled from 36 * readdir(), returned to the client, then freed as soon as possible. 37 * Any directory entry to be traversed gets pushed onto the stack. 38 * 39 * There is surprisingly little information that needs to be kept for 40 * each item on the stack. Just the name, depth (represented here as the 41 * string length of the parent directory's pathname), and some markers 42 * indicating how to get back to the parent (via chdir("..") for a 43 * regular dir or via fchdir(2) for a symlink). 44 */ 45#include "tree_config.h" 46__FBSDID("$FreeBSD$"); 47 48#ifdef HAVE_SYS_STAT_H 49#include <sys/stat.h> 50#endif 51#ifdef HAVE_DIRENT_H 52#include <dirent.h> 53#endif 54#ifdef HAVE_ERRNO_H 55#include <errno.h> 56#endif 57#ifdef HAVE_FCNTL_H 58#include <fcntl.h> 59#endif 60#ifdef HAVE_STDLIB_H 61#include <stdlib.h> 62#endif 63#ifdef HAVE_STRING_H 64#include <string.h> 65#endif 66#ifdef HAVE_UNISTD_H 67#include <unistd.h> 68#endif 69 70#include "tree.h" 71 72/* 73 * TODO: 74 * 1) Loop checking. 75 * 3) Arbitrary logical traversals by closing/reopening intermediate fds. 76 */ 77 78struct tree_entry { 79 struct tree_entry *next; 80 struct tree_entry *parent; 81 char *name; 82 size_t dirname_length; 83 dev_t dev; 84 ino_t ino; 85 int fd; 86 int flags; 87}; 88 89/* Definitions for tree_entry.flags bitmap. */ 90#define isDir 1 /* This entry is a regular directory. */ 91#define isDirLink 2 /* This entry is a symbolic link to a directory. */ 92#define needsPreVisit 4 /* This entry needs to be previsited. */ 93#define needsPostVisit 8 /* This entry needs to be postvisited. */ 94 95/* 96 * Local data for this package. 97 */ 98struct tree { 99 struct tree_entry *stack; 100 struct tree_entry *current; 101 DIR *d; 102 int initialDirFd; 103 int flags; 104 int visit_type; 105 int tree_errno; /* Error code from last failed operation. */ 106 107 char *buff; 108 const char *basename; 109 size_t buff_length; 110 size_t path_length; 111 size_t dirname_length; 112 113 int depth; 114 int openCount; 115 int maxOpenCount; 116 117 struct stat lst; 118 struct stat st; 119}; 120 121/* Definitions for tree.flags bitmap. */ 122#define needsReturn 8 /* Marks first entry as not having been returned yet. */ 123#define hasStat 16 /* The st entry is set. */ 124#define hasLstat 32 /* The lst entry is set. */ 125 126 127#ifdef HAVE_DIRENT_D_NAMLEN 128/* BSD extension; avoids need for a strlen() call. */ 129#define D_NAMELEN(dp) (dp)->d_namlen 130#else 131#define D_NAMELEN(dp) (strlen((dp)->d_name)) 132#endif 133 134#if 0 135#include <stdio.h> 136void 137tree_dump(struct tree *t, FILE *out) 138{ 139 struct tree_entry *te; 140 141 fprintf(out, "\tdepth: %d\n", t->depth); 142 fprintf(out, "\tbuff: %s\n", t->buff); 143 fprintf(out, "\tpwd: "); fflush(stdout); system("pwd"); 144 fprintf(out, "\taccess: %s\n", t->basename); 145 fprintf(out, "\tstack:\n"); 146 for (te = t->stack; te != NULL; te = te->next) { 147 fprintf(out, "\t\tte->name: %s%s%s\n", te->name, 148 te->flags & needsPreVisit ? "" : " *", 149 t->current == te ? " (current)" : ""); 150 } 151} 152#endif 153 154/* 155 * Add a directory path to the current stack. 156 */ 157static void 158tree_push(struct tree *t, const char *path) 159{ 160 struct tree_entry *te; 161 162 te = malloc(sizeof(*te)); 163 memset(te, 0, sizeof(*te)); 164 te->next = t->stack; 165 t->stack = te; 166 te->fd = -1; 167 te->name = strdup(path); 168 te->flags = needsPreVisit | needsPostVisit; 169 te->dirname_length = t->dirname_length; 170} 171 172/* 173 * Append a name to the current path. 174 */ 175static void 176tree_append(struct tree *t, const char *name, size_t name_length) 177{ 178 char *p; 179 180 if (t->buff != NULL) 181 t->buff[t->dirname_length] = '\0'; 182 /* Strip trailing '/' from name, unless entire name is "/". */ 183 while (name_length > 1 && name[name_length - 1] == '/') 184 name_length--; 185 186 /* Resize pathname buffer as needed. */ 187 while (name_length + 1 + t->dirname_length >= t->buff_length) { 188 t->buff_length *= 2; 189 if (t->buff_length < 1024) 190 t->buff_length = 1024; 191 t->buff = realloc(t->buff, t->buff_length); 192 } 193 p = t->buff + t->dirname_length; 194 t->path_length = t->dirname_length + name_length; 195 /* Add a separating '/' if it's needed. */ 196 if (t->dirname_length > 0 && p[-1] != '/') { 197 *p++ = '/'; 198 t->path_length ++; 199 } 200 strncpy(p, name, name_length); 201 p[name_length] = '\0'; 202 t->basename = p; 203} 204 205/* 206 * Open a directory tree for traversal. 207 */ 208struct tree * 209tree_open(const char *path) 210{ 211 struct tree *t; 212 213 t = malloc(sizeof(*t)); 214 memset(t, 0, sizeof(*t)); 215 tree_append(t, path, strlen(path)); 216 t->initialDirFd = open(".", O_RDONLY); 217 /* 218 * During most of the traversal, items are set up and then 219 * returned immediately from tree_next(). That doesn't work 220 * for the very first entry, so we set a flag for this special 221 * case. 222 */ 223 t->flags = needsReturn; 224 return (t); 225} 226 227/* 228 * We've finished a directory; ascend back to the parent. 229 */ 230static void 231tree_ascend(struct tree *t) 232{ 233 struct tree_entry *te; 234 235 te = t->stack; 236 t->depth--; 237 if (te->flags & isDirLink) { 238 fchdir(te->fd); 239 close(te->fd); 240 t->openCount--; 241 } else { 242 chdir(".."); 243 } 244} 245 246/* 247 * Pop the working stack. 248 */ 249static void 250tree_pop(struct tree *t) 251{ 252 struct tree_entry *te; 253 254 t->buff[t->dirname_length] = '\0'; 255 if (t->stack == t->current && t->current != NULL) 256 t->current = t->current->parent; 257 te = t->stack; 258 t->stack = te->next; 259 t->dirname_length = te->dirname_length; 260 t->basename = t->buff + t->dirname_length; 261 /* Special case: starting dir doesn't skip leading '/'. */ 262 if (t->dirname_length > 0) 263 t->basename++; 264 free(te->name); 265 free(te); 266} 267 268/* 269 * Get the next item in the tree traversal. 270 */ 271int 272tree_next(struct tree *t) 273{ 274 struct dirent *de = NULL; 275 276 /* Handle the startup case by returning the initial entry. */ 277 if (t->flags & needsReturn) { 278 t->flags &= ~needsReturn; 279 return (t->visit_type = TREE_REGULAR); 280 } 281 282 while (t->stack != NULL) { 283 /* If there's an open dir, get the next entry from there. */ 284 while (t->d != NULL) { 285 de = readdir(t->d); 286 if (de == NULL) { 287 closedir(t->d); 288 t->d = NULL; 289 } else if (de->d_name[0] == '.' 290 && de->d_name[1] == '\0') { 291 /* Skip '.' */ 292 } else if (de->d_name[0] == '.' 293 && de->d_name[1] == '.' 294 && de->d_name[2] == '\0') { 295 /* Skip '..' */ 296 } else { 297 /* 298 * Append the path to the current path 299 * and return it. 300 */ 301 tree_append(t, de->d_name, D_NAMELEN(de)); 302 t->flags &= ~hasLstat; 303 t->flags &= ~hasStat; 304 return (t->visit_type = TREE_REGULAR); 305 } 306 } 307 308 /* If the current dir needs to be visited, set it up. */ 309 if (t->stack->flags & needsPreVisit) { 310 t->current = t->stack; 311 tree_append(t, t->stack->name, strlen(t->stack->name)); 312 t->stack->flags &= ~needsPreVisit; 313 /* If it is a link, set up fd for the ascent. */ 314 if (t->stack->flags & isDirLink) { 315 t->stack->fd = open(".", O_RDONLY); 316 t->openCount++; 317 if (t->openCount > t->maxOpenCount) 318 t->maxOpenCount = t->openCount; 319 } 320 t->dirname_length = t->path_length; 321 if (chdir(t->stack->name) != 0) { 322 /* chdir() failed; return error */ 323 tree_pop(t); 324 t->tree_errno = errno; 325 return (t->visit_type = TREE_ERROR_DIR); 326 } 327 t->depth++; 328 t->d = opendir("."); 329 if (t->d == NULL) { 330 tree_ascend(t); /* Undo "chdir" */ 331 tree_pop(t); 332 t->tree_errno = errno; 333 return (t->visit_type = TREE_ERROR_DIR); 334 } 335 t->flags &= ~hasLstat; 336 t->flags &= ~hasStat; 337 t->basename = "."; 338 return (t->visit_type = TREE_POSTDESCENT); 339 } 340 341 /* We've done everything necessary for the top stack entry. */ 342 if (t->stack->flags & needsPostVisit) { 343 tree_ascend(t); 344 tree_pop(t); 345 t->flags &= ~hasLstat; 346 t->flags &= ~hasStat; 347 return (t->visit_type = TREE_POSTASCENT); 348 } 349 } 350 return (t->visit_type = 0); 351} 352 353/* 354 * Return error code. 355 */ 356int 357tree_errno(struct tree *t) 358{ 359 return (t->tree_errno); 360} 361 362/* 363 * Called by the client to mark the directory just returned from 364 * tree_next() as needing to be visited. 365 */ 366void 367tree_descend(struct tree *t) 368{ 369 if (t->visit_type != TREE_REGULAR) 370 return; 371 372 if (tree_current_is_physical_dir(t)) { 373 tree_push(t, t->basename); 374 t->stack->flags |= isDir; 375 } else if (tree_current_is_dir(t)) { 376 tree_push(t, t->basename); 377 t->stack->flags |= isDirLink; 378 } 379} 380 381/* 382 * Get the stat() data for the entry just returned from tree_next(). 383 */ 384const struct stat * 385tree_current_stat(struct tree *t) 386{ 387 if (!(t->flags & hasStat)) { 388 if (stat(t->basename, &t->st) != 0) 389 return NULL; 390 t->flags |= hasStat; 391 } 392 return (&t->st); 393} 394 395/* 396 * Get the lstat() data for the entry just returned from tree_next(). 397 */ 398const struct stat * 399tree_current_lstat(struct tree *t) 400{ 401 if (!(t->flags & hasLstat)) { 402 if (lstat(t->basename, &t->lst) != 0) 403 return NULL; 404 t->flags |= hasLstat; 405 } 406 return (&t->lst); 407} 408 409/* 410 * Test whether current entry is a dir or link to a dir. 411 */ 412int 413tree_current_is_dir(struct tree *t) 414{ 415 const struct stat *st; 416 417 /* 418 * If we already have lstat() info, then try some 419 * cheap tests to determine if this is a dir. 420 */ 421 if (t->flags & hasLstat) { 422 /* If lstat() says it's a dir, it must be a dir. */ 423 if (S_ISDIR(tree_current_lstat(t)->st_mode)) 424 return 1; 425 /* Not a dir; might be a link to a dir. */ 426 /* If it's not a link, then it's not a link to a dir. */ 427 if (!S_ISLNK(tree_current_lstat(t)->st_mode)) 428 return 0; 429 /* 430 * It's a link, but we don't know what it's a link to, 431 * so we'll have to use stat(). 432 */ 433 } 434 435 st = tree_current_stat(t); 436 /* If we can't stat it, it's not a dir. */ 437 if (st == NULL) 438 return 0; 439 /* Use the definitive test. Hopefully this is cached. */ 440 return (S_ISDIR(st->st_mode)); 441} 442 443/* 444 * Test whether current entry is a physical directory. Usually, we 445 * already have at least one of stat() or lstat() in memory, so we 446 * use tricks to try to avoid an extra trip to the disk. 447 */ 448int 449tree_current_is_physical_dir(struct tree *t) 450{ 451 const struct stat *st; 452 453 /* 454 * If stat() says it isn't a dir, then it's not a dir. 455 * If stat() data is cached, this check is free, so do it first. 456 */ 457 if ((t->flags & hasStat) 458 && (!S_ISDIR(tree_current_stat(t)->st_mode))) 459 return 0; 460 461 /* 462 * Either stat() said it was a dir (in which case, we have 463 * to determine whether it's really a link to a dir) or 464 * stat() info wasn't available. So we use lstat(), which 465 * hopefully is already cached. 466 */ 467 468 st = tree_current_lstat(t); 469 /* If we can't stat it, it's not a dir. */ 470 if (st == NULL) 471 return 0; 472 /* Use the definitive test. Hopefully this is cached. */ 473 return (S_ISDIR(st->st_mode)); 474} 475 476/* 477 * Test whether current entry is a symbolic link. 478 */ 479int 480tree_current_is_physical_link(struct tree *t) 481{ 482 const struct stat *st = tree_current_lstat(t); 483 if (st == NULL) 484 return 0; 485 return (S_ISLNK(st->st_mode)); 486} 487 488/* 489 * Return the access path for the entry just returned from tree_next(). 490 */ 491const char * 492tree_current_access_path(struct tree *t) 493{ 494 return (t->basename); 495} 496 497/* 498 * Return the full path for the entry just returned from tree_next(). 499 */ 500const char * 501tree_current_path(struct tree *t) 502{ 503 return (t->buff); 504} 505 506/* 507 * Return the length of the path for the entry just returned from tree_next(). 508 */ 509size_t 510tree_current_pathlen(struct tree *t) 511{ 512 return (t->path_length); 513} 514 515/* 516 * Return the nesting depth of the entry just returned from tree_next(). 517 */ 518int 519tree_current_depth(struct tree *t) 520{ 521 return (t->depth); 522} 523 524/* 525 * Terminate the traversal and release any resources. 526 */ 527void 528tree_close(struct tree *t) 529{ 530 /* Release anything remaining in the stack. */ 531 while (t->stack != NULL) 532 tree_pop(t); 533 if (t->buff) 534 free(t->buff); 535 /* chdir() back to where we started. */ 536 if (t->initialDirFd >= 0) { 537 fchdir(t->initialDirFd); 538 close(t->initialDirFd); 539 t->initialDirFd = -1; 540 } 541 free(t); 542} 543