1/*-
2 * Copyright (c) 2003-2004 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*-
28 * There is a single list of "tree_entry" items that represent
29 * filesystem objects that require further attention.  Non-directories
30 * are not kept in memory: they are pulled from readdir(), returned to
31 * the client, then freed as soon as possible.  Any directory entry to
32 * be traversed gets pushed onto the stack.
33 *
34 * There is surprisingly little information that needs to be kept for
35 * each item on the stack.  Just the name, depth (represented here as the
36 * string length of the parent directory's pathname), and some markers
37 * indicating how to get back to the parent (via chdir("..") for a
38 * regular dir or via fchdir(2) for a symlink).
39 */
40
41#include <sys/stat.h>
42#include <dirent.h>
43#include <fcntl.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47
48#include "tree.h"
49
50/*
51 * TODO:
52 *    1) Loop checking.
53 *    3) Arbitrary logical traversals by closing/reopening intermediate fds.
54 */
55
56struct tree_entry {
57	struct tree_entry *next;
58	char *name;
59	size_t dirname_length;
60	int fd;
61	int flags;
62};
63
64/* Definitions for tree_entry.flags bitmap. */
65#define isDir 1 /* This entry is a regular directory. */
66#define isDirLink 2 /* This entry is a symbolic link to a directory. */
67#define needsTraversal 4 /* This entry hasn't yet been traversed. */
68
69/*
70 * Local data for this package.
71 */
72struct tree {
73	struct tree_entry	*stack;
74	DIR	*d;
75	int	 initialDirFd;
76	int	 flags;
77
78	char	*buff;
79	char	*basename;
80	size_t	 buff_length;
81	size_t	 path_length;
82	size_t	 dirname_length;
83
84	int	 depth;
85	int	 openCount;
86	int	 maxOpenCount;
87
88	struct stat	lst;
89	struct stat	st;
90};
91
92/* Definitions for tree.flags bitmap. */
93#define needsReturn 8  /* Marks first entry as not having been returned yet. */
94#define hasStat 16  /* The st entry is set. */
95#define hasLstat 32 /* The lst entry is set. */
96
97
98#define HAVE_DIRENT_D_NAMLEN 1
99#ifdef HAVE_DIRENT_D_NAMLEN
100/* BSD extension; avoids need for a strlen() call. */
101#define D_NAMELEN(dp)	(dp)->d_namlen
102#else
103#define D_NAMELEN(dp)	(strlen((dp)->d_name))
104#endif
105
106#if 0
107static void
108dumpStack(struct tree *t)
109{
110	struct tree_entry *te;
111
112	printf("\tbuff: %s\n", t->buff);
113	printf("\tpwd: "); fflush(stdout); system("pwd");
114	printf("\tstack:\n");
115	for (te = t->stack; te != NULL; te = te->next) {
116		printf("\t\tte->name: %s %s\n", te->name, te->flags & needsTraversal ? "" : "*");
117	}
118}
119#endif
120
121/*
122 * Add a directory path to the current stack.
123 */
124static void
125tree_add(struct tree *t, const char *path)
126{
127	struct tree_entry *te;
128
129	te = malloc(sizeof(*te));
130	memset(te, 0, sizeof(*te));
131	te->next = t->stack;
132	t->stack = te;
133	te->fd = -1;
134	te->name = strdup(path);
135	te->flags = needsTraversal;
136	te->dirname_length = t->dirname_length;
137}
138
139/*
140 * Append a name to the current path.
141 */
142static void
143tree_append(struct tree *t, const char *name, size_t name_length)
144{
145	if (t->buff != NULL)
146		t->buff[t->dirname_length] = '\0';
147
148	/* Resize pathname buffer as needed. */
149	while (name_length + 1 + t->dirname_length >= t->buff_length) {
150		t->buff_length *= 2;
151		if (t->buff_length < 1024)
152			t->buff_length = 1024;
153		t->buff = realloc(t->buff, t->buff_length);
154	}
155	t->basename = t->buff + t->dirname_length;
156	t->path_length = t->dirname_length + name_length;
157	if (t->dirname_length > 0) {
158		*t->basename++ = '/';
159		t->path_length ++;
160	}
161	strcpy(t->basename, name);
162}
163
164/*
165 * Open a directory tree for traversal.
166 */
167struct tree *
168tree_open(const char *path)
169{
170	struct tree *t;
171
172	t = malloc(sizeof(*t));
173	memset(t, 0, sizeof(*t));
174	tree_append(t, path, strlen(path));
175	t->initialDirFd = open(".", O_RDONLY);
176	/*
177	 * During most of the traversal, items are set up and then
178	 * returned immediately from tree_next().  That doesn't work
179	 * for the very first entry, so we set a flag for this special
180	 * case.
181	 */
182	t->flags = needsReturn;
183	return (t);
184}
185
186/*
187 * We've finished a directory; ascend back to the parent.
188 */
189static void
190tree_ascend(struct tree *t)
191{
192	struct tree_entry *te;
193
194	te = t->stack;
195	t->depth--;
196	if (te->flags & isDirLink) {
197		fchdir(te->fd);
198		close(te->fd);
199		t->openCount--;
200	} else {
201		chdir("..");
202	}
203}
204
205/*
206 * Pop the working stack.
207 */
208static void
209tree_pop(struct tree *t)
210{
211	struct tree_entry *te;
212
213	te = t->stack;
214	t->stack = te->next;
215	t->dirname_length = te->dirname_length;
216	free(te->name);
217	free(te);
218}
219
220/*
221 * Get the next item in the tree traversal.
222 */
223int
224tree_next(struct tree *t)
225{
226	struct dirent *de = NULL;
227
228	/* Handle the startup case by returning the initial entry. */
229	if (t->flags & needsReturn) {
230		t->flags &= ~needsReturn;
231		return (1);
232	}
233
234	while (t->stack != NULL) {
235		/* If there's an open dir, get the next entry from there. */
236		while (t->d != NULL) {
237			de = readdir(t->d);
238			if (de == NULL) {
239				closedir(t->d);
240				t->d = NULL;
241			} else if (de->d_name[0] == '.'
242			    && de->d_name[1] == '\0') {
243				/* Skip '.' */
244			} else if (de->d_name[0] == '.'
245			    && de->d_name[1] == '.'
246			    && de->d_name[2] == '\0') {
247				/* Skip '..' */
248			} else {
249				/*
250				 * Append the path to the current path
251				 * and return it.
252				 */
253				tree_append(t, de->d_name, D_NAMELEN(de));
254				t->flags &= ~hasLstat;
255				t->flags &= ~hasStat;
256				return (1);
257			}
258		}
259
260		/* If the current dir needs to be traversed, set it up. */
261		if (t->stack->flags & needsTraversal) {
262			tree_append(t, t->stack->name, strlen(t->stack->name));
263			t->stack->flags &= ~needsTraversal;
264			/* If it is a link, set up fd for the ascent. */
265			if (t->stack->flags & isDirLink) {
266				t->stack->fd = open(".", O_RDONLY);
267				t->openCount++;
268				if (t->openCount > t->maxOpenCount)
269					t->maxOpenCount = t->openCount;
270			}
271			if (chdir(t->stack->name) == 0) {
272				t->depth++;
273				t->dirname_length = t->path_length;
274				t->d = opendir(".");
275			} else
276				tree_pop(t);
277			continue;
278		}
279
280		/* We've done everything necessary for the top stack entry. */
281		tree_ascend(t);
282		tree_pop(t);
283	}
284	return (0);
285}
286
287/*
288 * Called by the client to mark the directory just returned from
289 * tree_next() as needing to be visited.
290 */
291void
292tree_descend(struct tree *t)
293{
294	const struct stat *s = tree_current_lstat(t);
295
296	if (S_ISDIR(s->st_mode)) {
297		tree_add(t, t->basename);
298		t->stack->flags |= isDir;
299	}
300
301	if (S_ISLNK(s->st_mode) && S_ISDIR(tree_current_stat(t)->st_mode)) {
302		tree_add(t, t->basename);
303		t->stack->flags |= isDirLink;
304	}
305}
306
307/*
308 * Get the stat() data for the entry just returned from tree_next().
309 */
310const struct stat *
311tree_current_stat(struct tree *t)
312{
313	if (!(t->flags & hasStat)) {
314		stat(t->basename, &t->st);
315		t->flags |= hasStat;
316	}
317	return (&t->st);
318}
319
320/*
321 * Get the lstat() data for the entry just returned from tree_next().
322 */
323const struct stat *
324tree_current_lstat(struct tree *t)
325{
326	if (!(t->flags & hasLstat)) {
327		lstat(t->basename, &t->lst);
328		t->flags |= hasLstat;
329	}
330	return (&t->lst);
331}
332
333/*
334 * Return the access path for the entry just returned from tree_next().
335 */
336const char *
337tree_current_access_path(struct tree *t)
338{
339	return (t->basename);
340}
341
342/*
343 * Return the full path for the entry just returned from tree_next().
344 */
345const char *
346tree_current_path(struct tree *t)
347{
348	return (t->buff);
349}
350
351/*
352 * Return the length of the path for the entry just returned from tree_next().
353 */
354size_t
355tree_current_pathlen(struct tree *t)
356{
357	return (t->path_length);
358}
359
360/*
361 * Return the nesting depth of the entry just returned from tree_next().
362 */
363int
364tree_current_depth(struct tree *t)
365{
366	return (t->depth);
367}
368
369/*
370 * Terminate the traversal and release any resources.
371 */
372void
373tree_close(struct tree *t)
374{
375	/* Release anything remaining in the stack. */
376	while (t->stack != NULL)
377		tree_pop(t);
378	if (t->buff)
379		free(t->buff);
380	/* chdir() back to where we started. */
381	if (t->initialDirFd >= 0) {
382		fchdir(t->initialDirFd);
383		close(t->initialDirFd);
384		t->initialDirFd = -1;
385	}
386	free(t);
387}
388
389
390#if 0
391/* Main function for testing. */
392#include <stdio.h>
393
394int main(int argc, char **argv)
395{
396	size_t max_path_len = 0;
397	int max_depth = 0;
398
399	system("pwd");
400	while (*++argv) {
401		struct tree *t = tree_open(*argv);
402		while (tree_next(t)) {
403			size_t path_len = tree_current_pathlen(t);
404			int depth = tree_current_depth(t);
405			if (path_len > max_path_len)
406				max_path_len = path_len;
407			if (depth > max_depth)
408				max_depth = depth;
409			printf("%s\n", tree_current_path(t));
410			if (S_ISDIR(tree_current_lstat(t)->st_mode))
411				tree_descend(t); /* Descend into every dir. */
412		}
413		tree_close(t);
414		printf("Max path length: %d\n", max_path_len);
415		printf("Max depth: %d\n", max_depth);
416		printf("Final open count: %d\n", t->openCount);
417		printf("Max open count: %d\n", t->maxOpenCount);
418		fflush(stdout);
419		system("pwd");
420	}
421	return (0);
422}
423#endif
424