1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Newcomb.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/param.h>
36#include <sys/queue.h>
37#include <sys/stat.h>
38#include <err.h>
39#include <errno.h>
40#include <fnmatch.h>
41#include <fts.h>
42#include <getopt.h>
43#include <libutil.h>
44#include <locale.h>
45#include <stdint.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <sysexits.h>
50#include <unistd.h>
51#include <libxo/xo.h>
52
53#define SI_OPT	(CHAR_MAX + 1)
54
55#define UNITS_2		1
56#define UNITS_SI	2
57
58static SLIST_HEAD(ignhead, ignentry) ignores;
59struct ignentry {
60	char			*mask;
61	SLIST_ENTRY(ignentry)	next;
62};
63
64static int	linkchk(FTSENT *);
65static void	usage(void);
66static void	prthumanval(const char *, int64_t);
67static void	ignoreadd(const char *);
68static void	ignoreclean(void);
69static int	ignorep(FTSENT *);
70static void	siginfo(int __unused);
71
72static int	nodumpflag = 0;
73static int	Aflag, hflag;
74static long	blocksize, cblocksize;
75static volatile sig_atomic_t info;
76
77static const struct option long_options[] =
78{
79	{ "si", no_argument, NULL, SI_OPT },
80	{ NULL, no_argument, NULL, 0 },
81};
82
83int
84main(int argc, char *argv[])
85{
86	FTS		*fts;
87	FTSENT		*p;
88	off_t		savednumber, curblocks;
89	off_t		threshold, threshold_sign;
90	int		ftsoptions;
91	int		depth;
92	int		Hflag, Lflag, aflag, sflag, dflag, cflag;
93	int		lflag, ch, notused, rval;
94	char 		**save;
95	static char	dot[] = ".";
96
97	setlocale(LC_ALL, "");
98
99	Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
100
101	save = argv;
102	ftsoptions = FTS_PHYSICAL;
103	savednumber = 0;
104	threshold = 0;
105	threshold_sign = 1;
106	cblocksize = DEV_BSIZE;
107	blocksize = 0;
108	depth = INT_MAX;
109	SLIST_INIT(&ignores);
110
111	argc = xo_parse_args(argc, argv);
112	if (argc < 0)
113		exit(EX_USAGE);
114
115	while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
116	    long_options, NULL)) != -1)
117		switch (ch) {
118		case 'A':
119			Aflag = 1;
120			break;
121		case 'B':
122			errno = 0;
123			cblocksize = atoi(optarg);
124			if (errno == ERANGE || cblocksize <= 0) {
125				xo_warnx("invalid argument to option B: %s",
126				    optarg);
127				usage();
128			}
129			break;
130		case 'H':
131			Hflag = 1;
132			Lflag = 0;
133			break;
134		case 'I':
135			ignoreadd(optarg);
136			break;
137		case 'L':
138			Lflag = 1;
139			Hflag = 0;
140			break;
141		case 'P':
142			Hflag = Lflag = 0;
143			break;
144		case 'a':
145			aflag = 1;
146			break;
147		case 's':
148			sflag = 1;
149			break;
150		case 'd':
151			dflag = 1;
152			errno = 0;
153			depth = atoi(optarg);
154			if (errno == ERANGE || depth < 0) {
155				xo_warnx("invalid argument to option d: %s",
156				    optarg);
157				usage();
158			}
159			break;
160		case 'c':
161			cflag = 1;
162			break;
163		case 'g':
164			hflag = 0;
165			blocksize = 1073741824;
166			break;
167		case 'h':
168			hflag = UNITS_2;
169			break;
170		case 'k':
171			hflag = 0;
172			blocksize = 1024;
173			break;
174		case 'l':
175			lflag = 1;
176			break;
177		case 'm':
178			hflag = 0;
179			blocksize = 1048576;
180			break;
181		case 'n':
182			nodumpflag = 1;
183			break;
184		case 'r':		 /* Compatibility. */
185			break;
186		case 't' :
187			if (expand_number(optarg, &threshold) != 0 ||
188			    threshold == 0) {
189				xo_warnx("invalid threshold: %s", optarg);
190				usage();
191			} else if (threshold < 0)
192				threshold_sign = -1;
193			break;
194		case 'x':
195			ftsoptions |= FTS_XDEV;
196			break;
197		case SI_OPT:
198			hflag = UNITS_SI;
199			break;
200		case '?':
201		default:
202			usage();
203			/* NOTREACHED */
204		}
205
206	argc -= optind;
207	argv += optind;
208
209	/*
210	 * XXX
211	 * Because of the way that fts(3) works, logical walks will not count
212	 * the blocks actually used by symbolic links.  We rationalize this by
213	 * noting that users computing logical sizes are likely to do logical
214	 * copies, so not counting the links is correct.  The real reason is
215	 * that we'd have to re-implement the kernel's symbolic link traversing
216	 * algorithm to get this right.  If, for example, you have relative
217	 * symbolic links referencing other relative symbolic links, it gets
218	 * very nasty, very fast.  The bottom line is that it's documented in
219	 * the man page, so it's a feature.
220	 */
221
222	if (Hflag)
223		ftsoptions |= FTS_COMFOLLOW;
224	if (Lflag) {
225		ftsoptions &= ~FTS_PHYSICAL;
226		ftsoptions |= FTS_LOGICAL;
227	}
228
229	if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
230		cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
231
232	if (aflag + dflag + sflag > 1)
233		usage();
234	if (sflag)
235		depth = 0;
236
237	if (!*argv) {
238		argv = save;
239		argv[0] = dot;
240		argv[1] = NULL;
241	}
242
243	if (blocksize == 0)
244		(void)getbsize(&notused, &blocksize);
245
246	if (!Aflag) {
247		cblocksize /= DEV_BSIZE;
248		blocksize /= DEV_BSIZE;
249	}
250
251	if (threshold != 0)
252		threshold = howmany(threshold / DEV_BSIZE * cblocksize,
253		    blocksize);
254
255	rval = 0;
256
257	(void)signal(SIGINFO, siginfo);
258
259	if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
260		err(1, "fts_open");
261
262	xo_open_container("disk-usage-information");
263	xo_open_list("paths");
264	while (errno = 0, (p = fts_read(fts)) != NULL) {
265		switch (p->fts_info) {
266		case FTS_D:			/* Ignore. */
267			if (ignorep(p))
268				fts_set(fts, p, FTS_SKIP);
269			break;
270		case FTS_DP:
271			if (ignorep(p))
272				break;
273
274			curblocks = Aflag ?
275			    howmany(p->fts_statp->st_size, cblocksize) :
276			    howmany(p->fts_statp->st_blocks, cblocksize);
277			p->fts_parent->fts_bignum += p->fts_bignum +=
278			    curblocks;
279
280			if (p->fts_level <= depth && threshold <=
281			    threshold_sign * howmany(p->fts_bignum *
282			    cblocksize, blocksize)) {
283				xo_open_instance("paths");
284				if (hflag > 0) {
285					prthumanval("{:blocks/%4s}",
286					    p->fts_bignum);
287					xo_emit("\t{:path/%s}\n", p->fts_path);
288				} else {
289					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
290					    (intmax_t)howmany(p->fts_bignum *
291					    cblocksize, blocksize),
292					    p->fts_path);
293				}
294				xo_close_instance("paths");
295			}
296			if (info) {
297				info = 0;
298				(void)printf("\t%s\n", p->fts_path);
299			}
300			break;
301		case FTS_DC:			/* Ignore. */
302			break;
303		case FTS_DNR:			/* Warn, continue. */
304		case FTS_ERR:
305		case FTS_NS:
306			xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
307			rval = 1;
308			break;
309		default:
310			if (ignorep(p))
311				break;
312
313			if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
314			    linkchk(p))
315				break;
316
317			curblocks = Aflag ?
318			    howmany(p->fts_statp->st_size, cblocksize) :
319			    howmany(p->fts_statp->st_blocks, cblocksize);
320
321			if (aflag || p->fts_level == 0) {
322				xo_open_instance("paths");
323				if (hflag > 0) {
324					prthumanval("{:blocks/%4s}", curblocks);
325					xo_emit("\t{:path/%s}\n", p->fts_path);
326				} else {
327					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
328					    (intmax_t)howmany(curblocks *
329					    cblocksize, blocksize),
330					    p->fts_path);
331				}
332				xo_close_instance("paths");
333			}
334
335			p->fts_parent->fts_bignum += curblocks;
336		}
337		savednumber = p->fts_parent->fts_bignum;
338	}
339	xo_close_list("paths");
340
341	if (errno)
342		xo_err(1, "fts_read");
343
344	if (cflag) {
345		if (hflag > 0) {
346			prthumanval("{:total-blocks/%4s}\ttotal\n",
347			    savednumber);
348		} else {
349			xo_emit("{:total-blocks/%jd}\ttotal\n",
350			    (intmax_t)howmany(
351			    savednumber * cblocksize, blocksize));
352		}
353	}
354
355	ignoreclean();
356	xo_close_container("disk-usage-information");
357	if (xo_finish() < 0)
358		xo_err(1, "stdout");
359	exit(rval);
360}
361
362static int
363linkchk(FTSENT *p)
364{
365	struct links_entry {
366		struct links_entry *next;
367		struct links_entry *previous;
368		int	 links;
369		dev_t	 dev;
370		ino_t	 ino;
371	};
372	static const size_t links_hash_initial_size = 8192;
373	static struct links_entry **buckets;
374	static struct links_entry *free_list;
375	static size_t number_buckets;
376	static unsigned long number_entries;
377	static char stop_allocating;
378	struct links_entry *le, **new_buckets;
379	struct stat *st;
380	size_t i, new_size;
381	int hash;
382
383	st = p->fts_statp;
384
385	/* If necessary, initialize the hash table. */
386	if (buckets == NULL) {
387		number_buckets = links_hash_initial_size;
388		buckets = malloc(number_buckets * sizeof(buckets[0]));
389		if (buckets == NULL)
390			errx(1, "No memory for hardlink detection");
391		for (i = 0; i < number_buckets; i++)
392			buckets[i] = NULL;
393	}
394
395	/* If the hash table is getting too full, enlarge it. */
396	if (number_entries > number_buckets * 10 && !stop_allocating) {
397		new_size = number_buckets * 2;
398		new_buckets = calloc(new_size, sizeof(struct links_entry *));
399
400		/* Try releasing the free list to see if that helps. */
401		if (new_buckets == NULL && free_list != NULL) {
402			while (free_list != NULL) {
403				le = free_list;
404				free_list = le->next;
405				free(le);
406			}
407			new_buckets = calloc(new_size, sizeof(new_buckets[0]));
408		}
409
410		if (new_buckets == NULL) {
411			stop_allocating = 1;
412			xo_warnx("No more memory for tracking hard links");
413		} else {
414			for (i = 0; i < number_buckets; i++) {
415				while (buckets[i] != NULL) {
416					/* Remove entry from old bucket. */
417					le = buckets[i];
418					buckets[i] = le->next;
419
420					/* Add entry to new bucket. */
421					hash = (le->dev ^ le->ino) % new_size;
422
423					if (new_buckets[hash] != NULL)
424						new_buckets[hash]->previous =
425						    le;
426					le->next = new_buckets[hash];
427					le->previous = NULL;
428					new_buckets[hash] = le;
429				}
430			}
431			free(buckets);
432			buckets = new_buckets;
433			number_buckets = new_size;
434		}
435	}
436
437	/* Try to locate this entry in the hash table. */
438	hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
439	for (le = buckets[hash]; le != NULL; le = le->next) {
440		if (le->dev == st->st_dev && le->ino == st->st_ino) {
441			/*
442			 * Save memory by releasing an entry when we've seen
443			 * all of its links.
444			 */
445			if (--le->links <= 0) {
446				if (le->previous != NULL)
447					le->previous->next = le->next;
448				if (le->next != NULL)
449					le->next->previous = le->previous;
450				if (buckets[hash] == le)
451					buckets[hash] = le->next;
452				number_entries--;
453				/* Recycle this node through the free list */
454				if (stop_allocating) {
455					free(le);
456				} else {
457					le->next = free_list;
458					free_list = le;
459				}
460			}
461			return (1);
462		}
463	}
464
465	if (stop_allocating)
466		return (0);
467
468	/* Add this entry to the links cache. */
469	if (free_list != NULL) {
470		/* Pull a node from the free list if we can. */
471		le = free_list;
472		free_list = le->next;
473	} else
474		/* Malloc one if we have to. */
475		le = malloc(sizeof(struct links_entry));
476	if (le == NULL) {
477		stop_allocating = 1;
478		xo_warnx("No more memory for tracking hard links");
479		return (0);
480	}
481	le->dev = st->st_dev;
482	le->ino = st->st_ino;
483	le->links = st->st_nlink - 1;
484	number_entries++;
485	le->next = buckets[hash];
486	le->previous = NULL;
487	if (buckets[hash] != NULL)
488		buckets[hash]->previous = le;
489	buckets[hash] = le;
490	return (0);
491}
492
493static void
494prthumanval(const char *fmt, int64_t bytes)
495{
496	char buf[5];
497	int flags;
498
499	bytes *= cblocksize;
500	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
501	if (!Aflag)
502		bytes *= DEV_BSIZE;
503	if (hflag == UNITS_SI)
504		flags |= HN_DIVISOR_1000;
505
506	humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
507
508	xo_emit(fmt, buf);
509}
510
511static void
512usage(void)
513{
514	xo_error(
515		"usage: du [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m] "
516		"[-a | -s | -d depth] [-B blocksize] [-I mask] "
517		"[-t threshold] [file ...]\n");
518	exit(EX_USAGE);
519}
520
521static void
522ignoreadd(const char *mask)
523{
524	struct ignentry *ign;
525
526	ign = calloc(1, sizeof(*ign));
527	if (ign == NULL)
528		errx(1, "cannot allocate memory");
529	ign->mask = strdup(mask);
530	if (ign->mask == NULL)
531		errx(1, "cannot allocate memory");
532	SLIST_INSERT_HEAD(&ignores, ign, next);
533}
534
535static void
536ignoreclean(void)
537{
538	struct ignentry *ign;
539
540	while (!SLIST_EMPTY(&ignores)) {
541		ign = SLIST_FIRST(&ignores);
542		SLIST_REMOVE_HEAD(&ignores, next);
543		free(ign->mask);
544		free(ign);
545	}
546}
547
548static int
549ignorep(FTSENT *ent)
550{
551	struct ignentry *ign;
552
553	if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
554		return 1;
555	SLIST_FOREACH(ign, &ignores, next)
556		if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
557			return 1;
558	return 0;
559}
560
561static void
562siginfo(int sig __unused)
563{
564
565	info = 1;
566}
567