1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/*-
27 * This is a new directory-walking system that addresses a number
28 * of problems I've had with fts(3).  In particular, it has no
29 * pathname-length limits (other than the size of 'int'), handles
30 * deep logical traversals, uses considerably less memory, and has
31 * an opaque interface (easier to modify in the future).
32 *
33 * Internally, it keeps a single list of "tree_entry" items that
34 * represent filesystem objects that require further attention.
35 * Non-directories are not kept in memory: they are pulled from
36 * readdir(), returned to the client, then freed as soon as possible.
37 * Any directory entry to be traversed gets pushed onto the stack.
38 *
39 * There is surprisingly little information that needs to be kept for
40 * each item on the stack.  Just the name, depth (represented here as the
41 * string length of the parent directory's pathname), and some markers
42 * indicating how to get back to the parent (via chdir("..") for a
43 * regular dir or via fchdir(2) for a symlink).
44 */
45#include "tree_config.h"
46__FBSDID("$FreeBSD$");
47
48#ifdef HAVE_SYS_STAT_H
49#include <sys/stat.h>
50#endif
51#ifdef HAVE_DIRENT_H
52#include <dirent.h>
53#endif
54#ifdef HAVE_ERRNO_H
55#include <errno.h>
56#endif
57#ifdef HAVE_FCNTL_H
58#include <fcntl.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_STRING_H
64#include <string.h>
65#endif
66#ifdef HAVE_UNISTD_H
67#include <unistd.h>
68#endif
69
70#include "tree.h"
71
72/*
73 * TODO:
74 *    1) Loop checking.
75 *    3) Arbitrary logical traversals by closing/reopening intermediate fds.
76 */
77
78struct tree_entry {
79	struct tree_entry *next;
80	struct tree_entry *parent;
81	char *name;
82	size_t dirname_length;
83	dev_t dev;
84	ino_t ino;
85	int fd;
86	int flags;
87};
88
89/* Definitions for tree_entry.flags bitmap. */
90#define	isDir 1 /* This entry is a regular directory. */
91#define	isDirLink 2 /* This entry is a symbolic link to a directory. */
92#define	needsPreVisit 4 /* This entry needs to be previsited. */
93#define	needsPostVisit 8 /* This entry needs to be postvisited. */
94
95/*
96 * Local data for this package.
97 */
98struct tree {
99	struct tree_entry	*stack;
100	struct tree_entry	*current;
101	DIR	*d;
102	int	 initialDirFd;
103	int	 flags;
104	int	 visit_type;
105	int	 tree_errno; /* Error code from last failed operation. */
106
107	char	*buff;
108	const char	*basename;
109	size_t	 buff_length;
110	size_t	 path_length;
111	size_t	 dirname_length;
112
113	int	 depth;
114	int	 openCount;
115	int	 maxOpenCount;
116
117	struct stat	lst;
118	struct stat	st;
119};
120
121/* Definitions for tree.flags bitmap. */
122#define needsReturn 8  /* Marks first entry as not having been returned yet. */
123#define hasStat 16  /* The st entry is set. */
124#define hasLstat 32 /* The lst entry is set. */
125
126
127#ifdef HAVE_DIRENT_D_NAMLEN
128/* BSD extension; avoids need for a strlen() call. */
129#define D_NAMELEN(dp)	(dp)->d_namlen
130#else
131#define D_NAMELEN(dp)	(strlen((dp)->d_name))
132#endif
133
134#if 0
135#include <stdio.h>
136void
137tree_dump(struct tree *t, FILE *out)
138{
139	struct tree_entry *te;
140
141	fprintf(out, "\tdepth: %d\n", t->depth);
142	fprintf(out, "\tbuff: %s\n", t->buff);
143	fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
144	fprintf(out, "\taccess: %s\n", t->basename);
145	fprintf(out, "\tstack:\n");
146	for (te = t->stack; te != NULL; te = te->next) {
147		fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
148		    te->flags & needsPreVisit ? "" : " *",
149		    t->current == te ? " (current)" : "");
150	}
151}
152#endif
153
154/*
155 * Add a directory path to the current stack.
156 */
157static void
158tree_push(struct tree *t, const char *path)
159{
160	struct tree_entry *te;
161
162	te = malloc(sizeof(*te));
163	memset(te, 0, sizeof(*te));
164	te->next = t->stack;
165	t->stack = te;
166	te->fd = -1;
167	te->name = strdup(path);
168	te->flags = needsPreVisit | needsPostVisit;
169	te->dirname_length = t->dirname_length;
170}
171
172/*
173 * Append a name to the current path.
174 */
175static void
176tree_append(struct tree *t, const char *name, size_t name_length)
177{
178	char *p;
179
180	if (t->buff != NULL)
181		t->buff[t->dirname_length] = '\0';
182	/* Strip trailing '/' from name, unless entire name is "/". */
183	while (name_length > 1 && name[name_length - 1] == '/')
184		name_length--;
185
186	/* Resize pathname buffer as needed. */
187	while (name_length + 1 + t->dirname_length >= t->buff_length) {
188		t->buff_length *= 2;
189		if (t->buff_length < 1024)
190			t->buff_length = 1024;
191		t->buff = realloc(t->buff, t->buff_length);
192	}
193	p = t->buff + t->dirname_length;
194	t->path_length = t->dirname_length + name_length;
195	/* Add a separating '/' if it's needed. */
196	if (t->dirname_length > 0 && p[-1] != '/') {
197		*p++ = '/';
198		t->path_length ++;
199	}
200	strncpy(p, name, name_length);
201	p[name_length] = '\0';
202	t->basename = p;
203}
204
205/*
206 * Open a directory tree for traversal.
207 */
208struct tree *
209tree_open(const char *path)
210{
211	struct tree *t;
212
213	t = malloc(sizeof(*t));
214	memset(t, 0, sizeof(*t));
215	tree_append(t, path, strlen(path));
216	t->initialDirFd = open(".", O_RDONLY);
217	/*
218	 * During most of the traversal, items are set up and then
219	 * returned immediately from tree_next().  That doesn't work
220	 * for the very first entry, so we set a flag for this special
221	 * case.
222	 */
223	t->flags = needsReturn;
224	return (t);
225}
226
227/*
228 * We've finished a directory; ascend back to the parent.
229 */
230static void
231tree_ascend(struct tree *t)
232{
233	struct tree_entry *te;
234
235	te = t->stack;
236	t->depth--;
237	if (te->flags & isDirLink) {
238		fchdir(te->fd);
239		close(te->fd);
240		t->openCount--;
241	} else {
242		chdir("..");
243	}
244}
245
246/*
247 * Pop the working stack.
248 */
249static void
250tree_pop(struct tree *t)
251{
252	struct tree_entry *te;
253
254	t->buff[t->dirname_length] = '\0';
255	if (t->stack == t->current && t->current != NULL)
256		t->current = t->current->parent;
257	te = t->stack;
258	t->stack = te->next;
259	t->dirname_length = te->dirname_length;
260	t->basename = t->buff + t->dirname_length;
261	/* Special case: starting dir doesn't skip leading '/'. */
262	if (t->dirname_length > 0)
263		t->basename++;
264	free(te->name);
265	free(te);
266}
267
268/*
269 * Get the next item in the tree traversal.
270 */
271int
272tree_next(struct tree *t)
273{
274	struct dirent *de = NULL;
275
276	/* Handle the startup case by returning the initial entry. */
277	if (t->flags & needsReturn) {
278		t->flags &= ~needsReturn;
279		return (t->visit_type = TREE_REGULAR);
280	}
281
282	while (t->stack != NULL) {
283		/* If there's an open dir, get the next entry from there. */
284		while (t->d != NULL) {
285			de = readdir(t->d);
286			if (de == NULL) {
287				closedir(t->d);
288				t->d = NULL;
289			} else if (de->d_name[0] == '.'
290			    && de->d_name[1] == '\0') {
291				/* Skip '.' */
292			} else if (de->d_name[0] == '.'
293			    && de->d_name[1] == '.'
294			    && de->d_name[2] == '\0') {
295				/* Skip '..' */
296			} else {
297				/*
298				 * Append the path to the current path
299				 * and return it.
300				 */
301				tree_append(t, de->d_name, D_NAMELEN(de));
302				t->flags &= ~hasLstat;
303				t->flags &= ~hasStat;
304				return (t->visit_type = TREE_REGULAR);
305			}
306		}
307
308		/* If the current dir needs to be visited, set it up. */
309		if (t->stack->flags & needsPreVisit) {
310			t->current = t->stack;
311			tree_append(t, t->stack->name, strlen(t->stack->name));
312			t->stack->flags &= ~needsPreVisit;
313			/* If it is a link, set up fd for the ascent. */
314			if (t->stack->flags & isDirLink) {
315				t->stack->fd = open(".", O_RDONLY);
316				t->openCount++;
317				if (t->openCount > t->maxOpenCount)
318					t->maxOpenCount = t->openCount;
319			}
320			t->dirname_length = t->path_length;
321			if (chdir(t->stack->name) != 0) {
322				/* chdir() failed; return error */
323				tree_pop(t);
324				t->tree_errno = errno;
325				return (t->visit_type = TREE_ERROR_DIR);
326			}
327			t->depth++;
328			t->d = opendir(".");
329			if (t->d == NULL) {
330				tree_ascend(t); /* Undo "chdir" */
331				tree_pop(t);
332				t->tree_errno = errno;
333				return (t->visit_type = TREE_ERROR_DIR);
334			}
335			t->flags &= ~hasLstat;
336			t->flags &= ~hasStat;
337			t->basename = ".";
338			return (t->visit_type = TREE_POSTDESCENT);
339		}
340
341		/* We've done everything necessary for the top stack entry. */
342		if (t->stack->flags & needsPostVisit) {
343			tree_ascend(t);
344			tree_pop(t);
345			t->flags &= ~hasLstat;
346			t->flags &= ~hasStat;
347			return (t->visit_type = TREE_POSTASCENT);
348		}
349	}
350	return (t->visit_type = 0);
351}
352
353/*
354 * Return error code.
355 */
356int
357tree_errno(struct tree *t)
358{
359	return (t->tree_errno);
360}
361
362/*
363 * Called by the client to mark the directory just returned from
364 * tree_next() as needing to be visited.
365 */
366void
367tree_descend(struct tree *t)
368{
369	if (t->visit_type != TREE_REGULAR)
370		return;
371
372	if (tree_current_is_physical_dir(t)) {
373		tree_push(t, t->basename);
374		t->stack->flags |= isDir;
375	} else if (tree_current_is_dir(t)) {
376		tree_push(t, t->basename);
377		t->stack->flags |= isDirLink;
378	}
379}
380
381/*
382 * Get the stat() data for the entry just returned from tree_next().
383 */
384const struct stat *
385tree_current_stat(struct tree *t)
386{
387	if (!(t->flags & hasStat)) {
388		if (stat(t->basename, &t->st) != 0)
389			return NULL;
390		t->flags |= hasStat;
391	}
392	return (&t->st);
393}
394
395/*
396 * Get the lstat() data for the entry just returned from tree_next().
397 */
398const struct stat *
399tree_current_lstat(struct tree *t)
400{
401	if (!(t->flags & hasLstat)) {
402		if (lstat(t->basename, &t->lst) != 0)
403			return NULL;
404		t->flags |= hasLstat;
405	}
406	return (&t->lst);
407}
408
409/*
410 * Test whether current entry is a dir or link to a dir.
411 */
412int
413tree_current_is_dir(struct tree *t)
414{
415	const struct stat *st;
416
417	/*
418	 * If we already have lstat() info, then try some
419	 * cheap tests to determine if this is a dir.
420	 */
421	if (t->flags & hasLstat) {
422		/* If lstat() says it's a dir, it must be a dir. */
423		if (S_ISDIR(tree_current_lstat(t)->st_mode))
424			return 1;
425		/* Not a dir; might be a link to a dir. */
426		/* If it's not a link, then it's not a link to a dir. */
427		if (!S_ISLNK(tree_current_lstat(t)->st_mode))
428			return 0;
429		/*
430		 * It's a link, but we don't know what it's a link to,
431		 * so we'll have to use stat().
432		 */
433	}
434
435	st = tree_current_stat(t);
436	/* If we can't stat it, it's not a dir. */
437	if (st == NULL)
438		return 0;
439	/* Use the definitive test.  Hopefully this is cached. */
440	return (S_ISDIR(st->st_mode));
441}
442
443/*
444 * Test whether current entry is a physical directory.  Usually, we
445 * already have at least one of stat() or lstat() in memory, so we
446 * use tricks to try to avoid an extra trip to the disk.
447 */
448int
449tree_current_is_physical_dir(struct tree *t)
450{
451	const struct stat *st;
452
453	/*
454	 * If stat() says it isn't a dir, then it's not a dir.
455	 * If stat() data is cached, this check is free, so do it first.
456	 */
457	if ((t->flags & hasStat)
458	    && (!S_ISDIR(tree_current_stat(t)->st_mode)))
459		return 0;
460
461	/*
462	 * Either stat() said it was a dir (in which case, we have
463	 * to determine whether it's really a link to a dir) or
464	 * stat() info wasn't available.  So we use lstat(), which
465	 * hopefully is already cached.
466	 */
467
468	st = tree_current_lstat(t);
469	/* If we can't stat it, it's not a dir. */
470	if (st == NULL)
471		return 0;
472	/* Use the definitive test.  Hopefully this is cached. */
473	return (S_ISDIR(st->st_mode));
474}
475
476/*
477 * Test whether current entry is a symbolic link.
478 */
479int
480tree_current_is_physical_link(struct tree *t)
481{
482	const struct stat *st = tree_current_lstat(t);
483	if (st == NULL)
484		return 0;
485	return (S_ISLNK(st->st_mode));
486}
487
488/*
489 * Return the access path for the entry just returned from tree_next().
490 */
491const char *
492tree_current_access_path(struct tree *t)
493{
494	return (t->basename);
495}
496
497/*
498 * Return the full path for the entry just returned from tree_next().
499 */
500const char *
501tree_current_path(struct tree *t)
502{
503	return (t->buff);
504}
505
506/*
507 * Return the length of the path for the entry just returned from tree_next().
508 */
509size_t
510tree_current_pathlen(struct tree *t)
511{
512	return (t->path_length);
513}
514
515/*
516 * Return the nesting depth of the entry just returned from tree_next().
517 */
518int
519tree_current_depth(struct tree *t)
520{
521	return (t->depth);
522}
523
524/*
525 * Terminate the traversal and release any resources.
526 */
527void
528tree_close(struct tree *t)
529{
530	/* Release anything remaining in the stack. */
531	while (t->stack != NULL)
532		tree_pop(t);
533	if (t->buff)
534		free(t->buff);
535	/* chdir() back to where we started. */
536	if (t->initialDirFd >= 0) {
537		fchdir(t->initialDirFd);
538		close(t->initialDirFd);
539		t->initialDirFd = -1;
540	}
541	free(t);
542}
543