1/*	$Id: compat_fts.c,v 1.17 2020/06/15 01:37:14 schwarze Exp $	*/
2/*	$OpenBSD: fts.c,v 1.59 2019/06/28 13:32:41 deraadt Exp $	*/
3
4/*-
5 * Copyright (c) 1990, 1993, 1994
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32#include "config.h"
33
34#include <sys/stat.h>
35#include <sys/types.h>
36
37#include <dirent.h>
38#include <errno.h>
39#include <fcntl.h>
40#include <limits.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include "compat_fts.h"
45
46#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
47
48static FTSENT	*fts_alloc(FTS *, const char *, size_t);
49static FTSENT	*fts_build(FTS *);
50static void	 fts_lfree(FTSENT *);
51static void	 fts_load(FTS *, FTSENT *);
52static size_t	 fts_maxarglen(char * const *);
53static void	 fts_padjust(FTS *, FTSENT *);
54static int	 fts_palloc(FTS *, size_t);
55static FTSENT	*fts_sort(FTS *, FTSENT *, int);
56static unsigned short	 fts_stat(FTS *, FTSENT *);
57
58typedef int (*qsort_compar_proto)(const void *, const void *);
59
60#define	ISDOT(a)	(a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
61#ifndef	O_CLOEXEC
62#define	O_CLOEXEC	0
63#endif
64
65#define	CLR(opt)	(sp->fts_options &= ~(opt))
66#define	ISSET(opt)	(sp->fts_options & (opt))
67#define	SET(opt)	(sp->fts_options |= (opt))
68
69FTS *
70fts_open(char * const *argv, int options,
71    int (*compar)(const FTSENT **, const FTSENT **))
72{
73	FTS *sp;
74	FTSENT *p, *root;
75	int nitems;
76	FTSENT *parent, *prev;
77
78	/* Options check. */
79	if (options & ~FTS_OPTIONMASK) {
80		errno = EINVAL;
81		return (NULL);
82	}
83
84	/* At least one path must be specified. */
85	if (*argv == NULL) {
86		errno = EINVAL;
87		return (NULL);
88	}
89
90	/* Allocate/initialize the stream */
91	if ((sp = calloc(1, sizeof(FTS))) == NULL)
92		return (NULL);
93	sp->fts_compar = compar;
94	sp->fts_options = options;
95
96	/*
97	 * Start out with 1K of path space, and enough, in any case,
98	 * to hold the user's paths.
99	 */
100	if (fts_palloc(sp, MAXIMUM(fts_maxarglen(argv), PATH_MAX)))
101		goto mem1;
102
103	/* Allocate/initialize root's parent. */
104	if ((parent = fts_alloc(sp, "", 0)) == NULL)
105		goto mem2;
106	parent->fts_level = FTS_ROOTPARENTLEVEL;
107
108	/* Allocate/initialize root(s). */
109	for (root = prev = NULL, nitems = 0; *argv; ++argv, ++nitems) {
110		if ((p = fts_alloc(sp, *argv, strlen(*argv))) == NULL)
111			goto mem3;
112		p->fts_level = FTS_ROOTLEVEL;
113		p->fts_parent = parent;
114		p->fts_accpath = p->fts_name;
115		p->fts_info = fts_stat(sp, p);
116
117		/* Command-line "." and ".." are real directories. */
118		if (p->fts_info == FTS_DOT)
119			p->fts_info = FTS_D;
120
121		/*
122		 * If comparison routine supplied, traverse in sorted
123		 * order; otherwise traverse in the order specified.
124		 */
125		if (compar) {
126			p->fts_link = root;
127			root = p;
128		} else {
129			p->fts_link = NULL;
130			if (root == NULL)
131				root = p;
132			else
133				prev->fts_link = p;
134			prev = p;
135		}
136	}
137	if (compar && nitems > 1)
138		root = fts_sort(sp, root, nitems);
139
140	/*
141	 * Allocate a dummy pointer and make fts_read think that we've just
142	 * finished the node before the root(s); set p->fts_info to FTS_INIT
143	 * so that everything about the "current" node is ignored.
144	 */
145	if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
146		goto mem3;
147	sp->fts_cur->fts_link = root;
148	sp->fts_cur->fts_info = FTS_INIT;
149
150	if (nitems == 0)
151		free(parent);
152
153	return (sp);
154
155mem3:	fts_lfree(root);
156	free(parent);
157mem2:	free(sp->fts_path);
158mem1:	free(sp);
159	return (NULL);
160}
161
162static void
163fts_load(FTS *sp, FTSENT *p)
164{
165	size_t len;
166	char *cp;
167
168	/*
169	 * Load the stream structure for the next traversal.  Since we don't
170	 * actually enter the directory until after the preorder visit, set
171	 * the fts_accpath field specially so the chdir gets done to the right
172	 * place and the user can access the first node.  From fts_open it's
173	 * known that the path will fit.
174	 */
175	len = p->fts_pathlen = p->fts_namelen;
176	memmove(sp->fts_path, p->fts_name, len + 1);
177	if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
178		len = strlen(++cp);
179		memmove(p->fts_name, cp, len + 1);
180		p->fts_namelen = len;
181	}
182	p->fts_accpath = p->fts_path = sp->fts_path;
183	sp->fts_dev = p->fts_dev;
184}
185
186int
187fts_close(FTS *sp)
188{
189	FTSENT *freep, *p;
190
191	/*
192	 * This still works if we haven't read anything -- the dummy structure
193	 * points to the root list, so we step through to the end of the root
194	 * list which has a valid parent pointer.
195	 */
196	if (sp->fts_cur) {
197		for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
198			freep = p;
199			p = p->fts_link ? p->fts_link : p->fts_parent;
200			free(freep);
201		}
202		free(p);
203	}
204
205	/* Free up child linked list, sort array, path buffer, stream ptr.*/
206	if (sp->fts_child)
207		fts_lfree(sp->fts_child);
208	free(sp->fts_array);
209	free(sp->fts_path);
210	free(sp);
211
212	return (0);
213}
214
215/*
216 * Special case of "/" at the end of the path so that slashes aren't
217 * appended which would cause paths to be written as "....//foo".
218 */
219#define	NAPPEND(p)							\
220	(p->fts_path[p->fts_pathlen - 1] == '/'				\
221	    ? p->fts_pathlen - 1 : p->fts_pathlen)
222
223FTSENT *
224fts_read(FTS *sp)
225{
226	FTSENT *p, *tmp;
227	int instr;
228	char *t;
229
230	/* If finished or unrecoverable error, return NULL. */
231	if (sp->fts_cur == NULL || ISSET(FTS_STOP))
232		return (NULL);
233
234	/* Set current node pointer. */
235	p = sp->fts_cur;
236
237	/* Save and zero out user instructions. */
238	instr = p->fts_instr;
239	p->fts_instr = FTS_NOINSTR;
240
241	/* Directory in pre-order. */
242	if (p->fts_info == FTS_D) {
243		/* If skipped or crossed mount point, do post-order visit. */
244		if (instr == FTS_SKIP ||
245		    (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
246			if (sp->fts_child) {
247				fts_lfree(sp->fts_child);
248				sp->fts_child = NULL;
249			}
250			p->fts_info = FTS_DP;
251			return (p);
252		}
253
254		/*
255		 * If haven't read do so.  If the read fails, fts_build sets
256		 * FTS_STOP or the fts_info field of the node.
257		 */
258		if (sp->fts_child) {
259			/* nothing */
260		} else if ((sp->fts_child = fts_build(sp)) == NULL) {
261			if (ISSET(FTS_STOP))
262				return (NULL);
263			return (p);
264		}
265		p = sp->fts_child;
266		sp->fts_child = NULL;
267		goto name;
268	}
269
270	/* Move to the next node on this level. */
271next:	tmp = p;
272	if ((p = p->fts_link)) {
273		free(tmp);
274
275		/*
276		 * If reached the top, return to the original directory (or
277		 * the root of the tree), and load the paths for the next root.
278		 */
279		if (p->fts_level == FTS_ROOTLEVEL) {
280			fts_load(sp, p);
281			return (sp->fts_cur = p);
282		}
283
284		/*
285		 * User may have called fts_set on the node.  If skipped,
286		 * ignore.  If followed, get a file descriptor so we can
287		 * get back if necessary.
288		 */
289		if (p->fts_instr == FTS_SKIP)
290			goto next;
291
292name:		t = sp->fts_path + NAPPEND(p->fts_parent);
293		*t++ = '/';
294		memmove(t, p->fts_name, p->fts_namelen + 1);
295		return (sp->fts_cur = p);
296	}
297
298	/* Move up to the parent node. */
299	p = tmp->fts_parent;
300	free(tmp);
301
302	if (p->fts_level == FTS_ROOTPARENTLEVEL) {
303		/*
304		 * Done; free everything up and set errno to 0 so the user
305		 * can distinguish between error and EOF.
306		 */
307		free(p);
308		errno = 0;
309		return (sp->fts_cur = NULL);
310	}
311
312	/* NUL terminate the pathname. */
313	sp->fts_path[p->fts_pathlen] = '\0';
314
315	p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
316	return (sp->fts_cur = p);
317}
318
319/*
320 * Fts_set takes the stream as an argument although it's not used in this
321 * implementation; it would be necessary if anyone wanted to add global
322 * semantics to fts using fts_set.  An error return is allowed for similar
323 * reasons.
324 */
325int
326fts_set(FTS *sp, FTSENT *p, int instr)
327{
328	if (instr && instr != FTS_NOINSTR && instr != FTS_SKIP) {
329		errno = EINVAL;
330		return (1);
331	}
332	p->fts_instr = instr;
333	return (0);
334}
335
336/*
337 * This is the tricky part -- do not casually change *anything* in here.  The
338 * idea is to build the linked list of entries that are used by fts_children
339 * and fts_read.  There are lots of special cases.
340 *
341 * The real slowdown in walking the tree is the stat calls.  If FTS_NOSTAT is
342 * set and it's a physical walk (so that symbolic links can't be directories),
343 * we can do things quickly.  First, if it's a 4.4BSD file system, the type
344 * of the file is in the directory entry.  Otherwise, we assume that the number
345 * of subdirectories in a node is equal to the number of links to the parent.
346 * The former skips all stat calls.  The latter skips stat calls in any leaf
347 * directories and for any files after the subdirectories in the directory have
348 * been found, cutting the stat calls by about 2/3.
349 */
350static FTSENT *
351fts_build(FTS *sp)
352{
353	struct dirent *dp;
354	FTSENT *p, *head;
355	FTSENT *cur, *tail;
356	DIR *dirp;
357	void *oldaddr;
358	size_t dlen, len, maxlen;
359	int nitems, level, doadjust;
360	int saved_errno;
361	char *cp;
362
363	/* Set current node pointer. */
364	cur = sp->fts_cur;
365
366	/*
367	 * Open the directory for reading.  If this fails, we're done.
368	 * If being called from fts_read, set the fts_info field.
369	 */
370	if ((dirp = opendir(cur->fts_accpath)) == NULL) {
371		cur->fts_info = FTS_DNR;
372		cur->fts_errno = errno;
373		return (NULL);
374	}
375
376	/*
377	 * Figure out the max file name length that can be stored in the
378	 * current path -- the inner loop allocates more path as necessary.
379	 * We really wouldn't have to do the maxlen calculations here, we
380	 * could do them in fts_read before returning the path, but it's a
381	 * lot easier here since the length is part of the dirent structure.
382	 *
383	 * If not changing directories set a pointer so that can just append
384	 * each new name into the path.
385	 */
386	len = NAPPEND(cur);
387	cp = sp->fts_path + len;
388	*cp++ = '/';
389	len++;
390	maxlen = sp->fts_pathlen - len;
391
392	/*
393	 * fts_level is signed so we must prevent it from wrapping
394	 * around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL.
395	 */
396	level = cur->fts_level;
397	if (level < FTS_MAXLEVEL)
398	    level++;
399
400	/* Read the directory, attaching each entry to the `link' pointer. */
401	doadjust = 0;
402	for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) {
403		if (ISDOT(dp->d_name))
404			continue;
405
406#if HAVE_DIRENT_NAMLEN
407		dlen = dp->d_namlen;
408#else
409		dlen = strlen(dp->d_name);
410#endif
411
412		if (!(p = fts_alloc(sp, dp->d_name, dlen)))
413			goto mem1;
414		if (dlen >= maxlen) {	/* include space for NUL */
415			oldaddr = sp->fts_path;
416			if (fts_palloc(sp, dlen + len + 1)) {
417				/*
418				 * No more memory for path or structures.  Save
419				 * errno, free up the current structure and the
420				 * structures already allocated.
421				 */
422mem1:				saved_errno = errno;
423				free(p);
424				fts_lfree(head);
425				(void)closedir(dirp);
426				cur->fts_info = FTS_ERR;
427				SET(FTS_STOP);
428				errno = saved_errno;
429				return (NULL);
430			}
431			/* Did realloc() change the pointer? */
432			if (oldaddr != sp->fts_path) {
433				doadjust = 1;
434				cp = sp->fts_path + len;
435			}
436			maxlen = sp->fts_pathlen - len;
437		}
438
439		p->fts_level = level;
440		p->fts_parent = sp->fts_cur;
441		p->fts_pathlen = len + dlen;
442		if (p->fts_pathlen < len) {
443			/*
444			 * If we wrap, free up the current structure and
445			 * the structures already allocated, then error
446			 * out with ENAMETOOLONG.
447			 */
448			free(p);
449			fts_lfree(head);
450			(void)closedir(dirp);
451			cur->fts_info = FTS_ERR;
452			SET(FTS_STOP);
453			errno = ENAMETOOLONG;
454			return (NULL);
455		}
456
457		/* Build a file name for fts_stat to stat. */
458		p->fts_accpath = p->fts_path;
459		memmove(cp, p->fts_name, p->fts_namelen + 1);
460		/* Stat it. */
461		p->fts_info = fts_stat(sp, p);
462
463		/* We walk in directory order so "ls -f" doesn't get upset. */
464		p->fts_link = NULL;
465		if (head == NULL)
466			head = tail = p;
467		else {
468			tail->fts_link = p;
469			tail = p;
470		}
471		++nitems;
472	}
473	if (dirp)
474		(void)closedir(dirp);
475
476	/*
477	 * If realloc() changed the address of the path, adjust the
478	 * addresses for the rest of the tree and the dir list.
479	 */
480	if (doadjust)
481		fts_padjust(sp, head);
482
483	/*
484	 * If not changing directories, reset the path back to original
485	 * state.
486	 */
487	if (len == sp->fts_pathlen || nitems == 0)
488		--cp;
489	*cp = '\0';
490
491	/* If didn't find anything, return NULL. */
492	if (!nitems) {
493		cur->fts_info = FTS_DP;
494		return (NULL);
495	}
496
497	/* Sort the entries. */
498	if (sp->fts_compar && nitems > 1)
499		head = fts_sort(sp, head, nitems);
500	return (head);
501}
502
503static unsigned short
504fts_stat(FTS *sp, FTSENT *p)
505{
506	FTSENT *t;
507	dev_t dev;
508	ino_t ino;
509	struct stat *sbp;
510
511	/* If user needs stat info, stat buffer already allocated. */
512	sbp = p->fts_statp;
513
514	if (lstat(p->fts_accpath, sbp)) {
515		p->fts_errno = errno;
516		memset(sbp, 0, sizeof(struct stat));
517		return (FTS_NS);
518	}
519
520	if (S_ISDIR(sbp->st_mode)) {
521		/*
522		 * Set the device/inode.  Used to find cycles and check for
523		 * crossing mount points.  Also remember the link count, used
524		 * in fts_build to limit the number of stat calls.  It is
525		 * understood that these fields are only referenced if fts_info
526		 * is set to FTS_D.
527		 */
528		dev = p->fts_dev = sbp->st_dev;
529		ino = p->fts_ino = sbp->st_ino;
530		p->fts_nlink = sbp->st_nlink;
531
532		if (ISDOT(p->fts_name))
533			return (FTS_DOT);
534
535		/*
536		 * Cycle detection is done by brute force when the directory
537		 * is first encountered.  If the tree gets deep enough or the
538		 * number of symbolic links to directories is high enough,
539		 * something faster might be worthwhile.
540		 */
541		for (t = p->fts_parent;
542		    t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
543			if (ino == t->fts_ino && dev == t->fts_dev) {
544				p->fts_cycle = t;
545				return (FTS_DC);
546			}
547		return (FTS_D);
548	}
549	if (S_ISLNK(sbp->st_mode))
550		return (FTS_SL);
551	if (S_ISREG(sbp->st_mode))
552		return (FTS_F);
553	return (FTS_DEFAULT);
554}
555
556static FTSENT *
557fts_sort(FTS *sp, FTSENT *head, int nitems)
558{
559	FTSENT **ap, *p;
560
561	/*
562	 * Construct an array of pointers to the structures and call qsort(3).
563	 * Reassemble the array in the order returned by qsort.  If unable to
564	 * sort for memory reasons, return the directory entries in their
565	 * current order.  Allocate enough space for the current needs plus
566	 * 40 so don't realloc one entry at a time.
567	 */
568	if (nitems > sp->fts_nitems) {
569		struct _ftsent **a;
570
571		if ((a = reallocarray(sp->fts_array,
572		    nitems + 40, sizeof(FTSENT *))) == NULL) {
573			free(sp->fts_array);
574			sp->fts_array = NULL;
575			sp->fts_nitems = 0;
576			return (head);
577		}
578		sp->fts_nitems = nitems + 40;
579		sp->fts_array = a;
580	}
581	for (ap = sp->fts_array, p = head; p; p = p->fts_link)
582		*ap++ = p;
583	qsort(sp->fts_array, nitems, sizeof(FTSENT *),
584	    (qsort_compar_proto)sp->fts_compar);
585	for (head = *(ap = sp->fts_array); --nitems; ++ap)
586		ap[0]->fts_link = ap[1];
587	ap[0]->fts_link = NULL;
588	return (head);
589}
590
591static FTSENT *
592fts_alloc(FTS *sp, const char *name, size_t namelen)
593{
594	FTSENT *p;
595	size_t len;
596
597	len = sizeof(FTSENT) + namelen;
598	if ((p = calloc(1, len)) == NULL)
599		return (NULL);
600
601	p->fts_path = sp->fts_path;
602	p->fts_namelen = namelen;
603	p->fts_instr = FTS_NOINSTR;
604	p->fts_statp = malloc(sizeof(struct stat));
605	if (p->fts_statp == NULL) {
606		free(p);
607		return (NULL);
608	}
609	memcpy(p->fts_name, name, namelen);
610
611	return (p);
612}
613
614static void
615fts_lfree(FTSENT *head)
616{
617	FTSENT *p;
618
619	/* Free a linked list of structures. */
620	while ((p = head)) {
621		head = head->fts_link;
622		free(p);
623	}
624}
625
626/*
627 * Allow essentially unlimited paths; find, rm, ls should all work on any tree.
628 * Most systems will allow creation of paths much longer than PATH_MAX, even
629 * though the kernel won't resolve them.  Add the size (not just what's needed)
630 * plus 256 bytes so don't realloc the path 2 bytes at a time.
631 */
632static int
633fts_palloc(FTS *sp, size_t more)
634{
635	char *p;
636
637	/*
638	 * Check for possible wraparound.
639	 */
640	more += 256;
641	if (sp->fts_pathlen + more < sp->fts_pathlen) {
642		free(sp->fts_path);
643		sp->fts_path = NULL;
644		errno = ENAMETOOLONG;
645		return (1);
646	}
647	p = recallocarray(sp->fts_path, sp->fts_pathlen,
648	    sp->fts_pathlen + more, 1);
649	if (p == NULL) {
650		free(sp->fts_path);
651		sp->fts_path = NULL;
652		return (1);
653	}
654	sp->fts_pathlen += more;
655	sp->fts_path = p;
656	return (0);
657}
658
659/*
660 * When the path is realloc'd, have to fix all of the pointers in structures
661 * already returned.
662 */
663static void
664fts_padjust(FTS *sp, FTSENT *head)
665{
666	FTSENT *p;
667	char *addr = sp->fts_path;
668
669#define	ADJUST(p) {							\
670	if ((p)->fts_accpath != (p)->fts_name) {			\
671		(p)->fts_accpath =					\
672		    (char *)addr + ((p)->fts_accpath - (p)->fts_path);	\
673	}								\
674	(p)->fts_path = addr;						\
675}
676	/* Adjust the current set of children. */
677	for (p = sp->fts_child; p; p = p->fts_link)
678		ADJUST(p);
679
680	/* Adjust the rest of the tree, including the current level. */
681	for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
682		ADJUST(p);
683		p = p->fts_link ? p->fts_link : p->fts_parent;
684	}
685}
686
687static size_t
688fts_maxarglen(char * const *argv)
689{
690	size_t len, max;
691
692	for (max = 0; *argv; ++argv)
693		if ((len = strlen(*argv)) > max)
694			max = len;
695	return (max + 1);
696}
697