flist.c revision 1.5
1/*	$Id: flist.c,v 1.5 2019/02/11 21:41:22 deraadt Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/param.h>
18#include <sys/stat.h>
19
20#include <assert.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
24#include <fts.h>
25#include <search.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30
31#include "extern.h"
32
33/*
34 * We allocate our file list in chunk sizes so as not to do it one by
35 * one.
36 * Preferrably we get one or two allocation.
37 */
38#define	FLIST_CHUNK_SIZE (1024)
39
40/*
41 * These flags are part of the rsync protocol.
42 * They are sent as the first byte for a file transmission and encode
43 * information that affects subsequent transmissions.
44 */
45#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
46#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
47#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
48#define FLIST_TIME_SAME  0x0080 /* time is repeat */
49
50/*
51 * Requied way to sort a filename list.
52 */
53static int
54flist_cmp(const void *p1, const void *p2)
55{
56	const struct flist *f1 = p1, *f2 = p2;
57
58	return strcmp(f1->wpath, f2->wpath);
59}
60
61/*
62 * Deduplicate our file list (which may be zero-length).
63 * Returns zero on failure, non-zero on success.
64 */
65static int
66flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
67{
68	size_t		 i, j;
69	struct flist	*new;
70	struct flist	*f, *fnext;
71
72	if (*sz == 0)
73		return 1;
74
75	/* Create a new buffer, "new", and copy. */
76
77	new = calloc(*sz, sizeof(struct flist));
78	if (new == NULL) {
79		ERR(sess, "calloc");
80		return 0;
81	}
82
83	for (i = j = 0; i < *sz - 1; i++) {
84		f = &(*fl)[i];
85		fnext = &(*fl)[i + 1];
86
87		if (strcmp(f->wpath, fnext->wpath)) {
88			new[j++] = *f;
89			continue;
90		}
91
92		/*
93		 * Our working (destination) paths are the same.
94		 * If the actual file is the same (as given on the
95		 * command-line), then we can just discard the first.
96		 * Otherwise, we need to bail out: it means we have two
97		 * different files with the relative path on the
98		 * destination side.
99		 */
100
101		if (strcmp(f->path, fnext->path) == 0) {
102			new[j++] = *f;
103			i++;
104			WARNX(sess, "%s: duplicate path: %s",
105			    f->wpath, f->path);
106			free(fnext->path);
107			free(fnext->link);
108			fnext->path = fnext->link = NULL;
109			continue;
110		}
111
112		ERRX(sess, "%s: duplicate working path for "
113		    "possibly different file: %s, %s",
114		    f->wpath, f->path, fnext->path);
115		free(new);
116		return 0;
117	}
118
119	/* Don't forget the last entry. */
120
121	if (i == *sz - 1)
122		new[j++] = (*fl)[i];
123
124	/*
125	 * Reassign to the deduplicated array.
126	 * If we started out with *sz > 0, which we check for at the
127	 * beginning, then we'll always continue having *sz > 0.
128	 */
129
130	free(*fl);
131	*fl = new;
132	*sz = j;
133	assert(*sz);
134	return 1;
135}
136
137/*
138 * We're now going to find our top-level directories.
139 * This only applies to recursive mode.
140 * If we have the first element as the ".", then that's the "top
141 * directory" of our transfer.
142 * Otherwise, mark up all top-level directories in the set.
143 */
144static void
145flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
146{
147	size_t		 i;
148	const char	*cp;
149
150	if (!sess->opts->recursive)
151		return;
152
153	if (flsz && strcmp(fl[0].wpath, ".")) {
154		for (i = 0; i < flsz; i++) {
155			if (!S_ISDIR(fl[i].st.mode))
156				continue;
157			cp = strchr(fl[i].wpath, '/');
158			if (cp != NULL && cp[1] != '\0')
159				continue;
160			fl[i].st.flags |= FLSTAT_TOP_DIR;
161			LOG4(sess, "%s: top-level", fl[i].wpath);
162		}
163	} else if (flsz) {
164		fl[0].st.flags |= FLSTAT_TOP_DIR;
165		LOG4(sess, "%s: top-level", fl[0].wpath);
166	}
167}
168
169/*
170 * Filter through the fts() file information.
171 * We want directories (pre-order), regular files, and symlinks.
172 * Everything else is skipped and possibly warned about.
173 * Return zero to skip, non-zero to examine.
174 */
175static int
176flist_fts_check(struct sess *sess, FTSENT *ent)
177{
178
179	if (ent->fts_info == FTS_F  ||
180	    ent->fts_info == FTS_D ||
181	    ent->fts_info == FTS_SL ||
182	    ent->fts_info == FTS_SLNONE)
183		return 1;
184
185	if (ent->fts_info == FTS_DC) {
186		WARNX(sess, "%s: directory cycle", ent->fts_path);
187	} else if (ent->fts_info == FTS_DNR) {
188		errno = ent->fts_errno;
189		WARN(sess, "%s: unreadable directory", ent->fts_path);
190	} else if (ent->fts_info == FTS_DOT) {
191		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
192	} else if (ent->fts_info == FTS_ERR) {
193		errno = ent->fts_errno;
194		WARN(sess, "%s", ent->fts_path);
195	} else if (ent->fts_info == FTS_DEFAULT) {
196		WARNX(sess, "%s: skipping special", ent->fts_path);
197	} else if (ent->fts_info == FTS_NS) {
198		errno = ent->fts_errno;
199		WARN(sess, "%s: could not stat", ent->fts_path);
200	}
201
202	return 0;
203}
204
205/*
206 * Copy necessary elements in "st" into the fields of "f".
207 */
208static void
209flist_copy_stat(struct flist *f, const struct stat *st)
210{
211	f->st.mode = st->st_mode;
212	f->st.uid = st->st_uid;
213	f->st.gid = st->st_gid;
214	f->st.size = st->st_size;
215	f->st.mtime = st->st_mtime;
216}
217
218void
219flist_free(struct flist *f, size_t sz)
220{
221	size_t	 i;
222
223	if (f == NULL)
224		return;
225
226	for (i = 0; i < sz; i++) {
227		free(f[i].path);
228		free(f[i].link);
229	}
230	free(f);
231}
232
233/*
234 * Serialise our file list (which may be zero-length) to the wire.
235 * Makes sure that the receiver isn't going to block on sending us
236 * return messages on the log channel.
237 * Return zero on failure, non-zero on success.
238 */
239int
240flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
241    size_t flsz)
242{
243	size_t		 i, fnlen;
244	uint8_t		 flag;
245	const struct flist *f;
246	const char	*fn;
247
248	/* Double-check that we've no pending multiplexed data. */
249
250	LOG2(sess, "sending file metadata list: %zu", flsz);
251
252	for (i = 0; i < flsz; i++) {
253		f = &fl[i];
254		fn = f->wpath;
255		fnlen = strlen(f->wpath);
256		assert(fnlen > 0);
257
258		/*
259		 * If applicable, unclog the read buffer.
260		 * This happens when the receiver has a lot of log
261		 * messages and all we're doing is sending our file list
262		 * without checking for messages.
263		 */
264
265		if (sess->mplex_reads &&
266		    io_read_check(sess, fdin) &&
267		     !io_read_flush(sess, fdin)) {
268			ERRX1(sess, "io_read_flush");
269			return 0;
270		}
271
272		/*
273		 * For ease, make all of our filenames be "long"
274		 * regardless their actual length.
275		 * This also makes sure that we don't transmit a zero
276		 * byte unintentionally.
277		 */
278
279		flag = FLIST_NAME_LONG;
280
281		LOG3(sess, "%s: sending file metadata: "
282			"size %jd, mtime %jd, mode %o",
283			fn, (intmax_t)f->st.size,
284			(intmax_t)f->st.mtime, f->st.mode);
285
286		/* Now write to the wire. */
287		/* FIXME: buffer this. */
288
289		if (!io_write_byte(sess, fdout, flag)) {
290			ERRX1(sess, "io_write_byte");
291			return 0;
292		} else if (!io_write_int(sess, fdout, fnlen)) {
293			ERRX1(sess, "io_write_int");
294			return 0;
295		} else if (!io_write_buf(sess, fdout, fn, fnlen)) {
296			ERRX1(sess, "io_write_buf");
297			return 0;
298		} else if (!io_write_long(sess, fdout, f->st.size)) {
299			ERRX1(sess, "io_write_long");
300			return 0;
301		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
302			ERRX1(sess, "io_write_int");
303			return 0;
304		} else if (!io_write_int(sess, fdout, f->st.mode)) {
305			ERRX1(sess, "io_write_int");
306			return 0;
307		}
308
309		/* Optional link information. */
310
311		if (S_ISLNK(f->st.mode) &&
312		    sess->opts->preserve_links) {
313			fn = f->link;
314			fnlen = strlen(f->link);
315			if (!io_write_int(sess, fdout, fnlen)) {
316				ERRX1(sess, "io_write_int");
317				return 0;
318			}
319			if (!io_write_buf(sess, fdout, fn, fnlen)) {
320				ERRX1(sess, "io_write_int");
321				return 0;
322			}
323		}
324
325		if (S_ISREG(f->st.mode))
326			sess->total_size += f->st.size;
327	}
328
329	if (!io_write_byte(sess, fdout, 0)) {
330		ERRX1(sess, "io_write_byte");
331		return 0;
332	}
333
334	return 1;
335}
336
337/*
338 * Read the filename of a file list.
339 * This is the most expensive part of the file list transfer, so a lot
340 * of attention has gone into transmitting as little as possible.
341 * Micro-optimisation, but whatever.
342 * Fills in "f" with the full path on success.
343 * Returns zero on failure, non-zero on success.
344 */
345static int
346flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
347    char last[MAXPATHLEN])
348{
349	uint8_t		 bval;
350	size_t		 partial = 0;
351	size_t		 pathlen = 0, len;
352
353	/*
354	 * Read our filename.
355	 * If we have FLIST_NAME_SAME, we inherit some of the last
356	 * transmitted name.
357	 * If we have FLIST_NAME_LONG, then the string length is greater
358	 * than byte-size.
359	 */
360
361	if (FLIST_NAME_SAME & flags) {
362		if (!io_read_byte(sess, fd, &bval)) {
363			ERRX1(sess, "io_read_byte");
364			return 0;
365		}
366		partial = bval;
367	}
368
369	/* Get the (possibly-remaining) filename length. */
370
371	if (FLIST_NAME_LONG & flags) {
372		if (!io_read_size(sess, fd, &pathlen)) {
373			ERRX1(sess, "io_read_size");
374			return 0;
375		}
376	} else {
377		if (!io_read_byte(sess, fd, &bval)) {
378			ERRX1(sess, "io_read_byte");
379			return 0;
380		}
381		pathlen = bval;
382	}
383
384	/* Allocate our full filename length. */
385	/* FIXME: maximum pathname length. */
386
387	if ((len = pathlen + partial) == 0) {
388		ERRX(sess, "security violation: "
389			"zero-length pathname");
390		return 0;
391	}
392
393	if ((f->path = malloc(len + 1)) == NULL) {
394		ERR(sess, "malloc");
395		return 0;
396	}
397	f->path[len] = '\0';
398
399	if (FLIST_NAME_SAME & flags)
400		memcpy(f->path, last, partial);
401
402	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
403		ERRX1(sess, "io_read_buf");
404		return 0;
405	}
406
407	if (f->path[0] == '/') {
408		ERRX(sess, "security violation: "
409			"absolute pathname: %s", f->path);
410		return 0;
411	}
412
413	if (strstr(f->path, "/../") != NULL ||
414	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
415	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
416	    strcmp(f->path, "..") == 0) {
417		ERRX(sess, "%s: security violation: "
418			"backtracking pathname", f->path);
419		return 0;
420	}
421
422	/* Record our last path and construct our filename. */
423
424	strlcpy(last, f->path, MAXPATHLEN);
425	f->wpath = f->path;
426	return 1;
427}
428
429/*
430 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
431 * Returns zero on failure, non-zero on success.
432 */
433static int
434flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
435{
436	void	*pp;
437
438	if (*sz + 1 <= *max)  {
439		(*sz)++;
440		return 1;
441	}
442
443	pp = recallocarray(*fl, *max,
444		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
445	if (pp == NULL) {
446		ERR(sess, "recallocarray");
447		return 0;
448	}
449	*fl = pp;
450	*max += FLIST_CHUNK_SIZE;
451	(*sz)++;
452	return 1;
453}
454
455/*
456 * Copy a regular or symbolic link file "path" into "f".
457 * This handles the correct path creation and symbolic linking.
458 * Returns zero on failure, non-zero on success.
459 */
460static int
461flist_append(struct sess *sess, struct flist *f, struct stat *st,
462    const char *path)
463{
464
465	/*
466	 * Copy the full path for local addressing and transmit
467	 * only the filename part for the receiver.
468	 */
469
470	if ((f->path = strdup(path)) == NULL) {
471		ERR(sess, "strdup");
472		return 0;
473	}
474
475	if ((f->wpath = strrchr(f->path, '/')) == NULL)
476		f->wpath = f->path;
477	else
478		f->wpath++;
479
480	/*
481	 * On the receiving end, we'll strip out all bits on the
482	 * mode except for the file permissions.
483	 * No need to warn about it here.
484	 */
485
486	flist_copy_stat(f, st);
487
488	/* Optionally copy link information. */
489
490	if (S_ISLNK(st->st_mode)) {
491		f->link = symlink_read(sess, f->path);
492		if (f->link == NULL) {
493			ERRX1(sess, "symlink_read");
494			return 0;
495		}
496	}
497
498	return 1;
499}
500
501/*
502 * Receive a file list from the wire, filling in length "sz" (which may
503 * possibly be zero) and list "flp" on success.
504 * Return zero on failure, non-zero on success.
505 */
506int
507flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
508{
509	struct flist	*fl = NULL;
510	struct flist	*ff;
511	const struct flist *fflast = NULL;
512	size_t		 flsz = 0, flmax = 0, lsz;
513	uint8_t		 flag;
514	char		 last[MAXPATHLEN];
515	uint64_t	 lval; /* temporary values... */
516	int32_t		 ival;
517
518	last[0] = '\0';
519
520	for (;;) {
521		if (!io_read_byte(sess, fd, &flag)) {
522			ERRX1(sess, "io_read_byte");
523			goto out;
524		} else if (flag == 0)
525			break;
526
527		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
528			ERRX1(sess, "flist_realloc");
529			goto out;
530		}
531
532		ff = &fl[flsz - 1];
533		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
534
535		/* Filename first. */
536
537		if (!flist_recv_name(sess, fd, ff, flag, last)) {
538			ERRX1(sess, "flist_recv_name");
539			goto out;
540		}
541
542		/* Read the file size. */
543
544		if (!io_read_ulong(sess, fd, &lval)) {
545			ERRX1(sess, "io_read_ulong");
546			goto out;
547		}
548		ff->st.size = lval;
549
550		/* Read the modification time. */
551
552		if (!(FLIST_TIME_SAME & flag)) {
553			if (!io_read_int(sess, fd, &ival)) {
554				ERRX1(sess, "io_read_int");
555				goto out;
556			}
557			ff->st.mtime = ival;
558		} else if (fflast == NULL) {
559			ERRX(sess, "same time without last entry");
560			goto out;
561		}  else
562			ff->st.mtime = fflast->st.mtime;
563
564		/* Read the file mode. */
565
566		if (!(FLIST_MODE_SAME & flag)) {
567			if (!io_read_int(sess, fd, &ival)) {
568				ERRX1(sess, "io_read_int");
569				goto out;
570			}
571			ff->st.mode = ival;
572		} else if (fflast == NULL) {
573			ERRX(sess, "same mode without last entry");
574			goto out;
575		} else
576			ff->st.mode = fflast->st.mode;
577
578		/* Optionally read the link information. */
579
580		if (S_ISLNK(ff->st.mode) &&
581		    sess->opts->preserve_links) {
582			if (!io_read_size(sess, fd, &lsz)) {
583				ERRX1(sess, "io_read_size");
584				goto out;
585			} else if (lsz == 0) {
586				ERRX(sess, "empty link name");
587				goto out;
588			}
589			ff->link = calloc(lsz + 1, 1);
590			if (ff->link == NULL) {
591				ERR(sess, "calloc");
592				goto out;
593			}
594			if (!io_read_buf(sess, fd, ff->link, lsz)) {
595				ERRX1(sess, "io_read_buf");
596				goto out;
597			}
598		}
599
600		LOG3(sess, "%s: received file metadata: "
601			"size %jd, mtime %jd, mode %o",
602			ff->path, (intmax_t)ff->st.size,
603			(intmax_t)ff->st.mtime, ff->st.mode);
604
605		if (S_ISREG(ff->st.mode))
606			sess->total_size += ff->st.size;
607	}
608
609	/* Remember to order the received list. */
610
611	LOG2(sess, "received file metadata list: %zu", flsz);
612	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
613	flist_topdirs(sess, fl, flsz);
614	*sz = flsz;
615	*flp = fl;
616	return 1;
617out:
618	flist_free(fl, flsz);
619	*sz = 0;
620	*flp = NULL;
621	return 0;
622}
623
624/*
625 * Generate a flist possibly-recursively given a file root, which may
626 * also be a regular file or symlink.
627 * On success, augments the generated list in "flp" of length "sz".
628 * Returns zero on failure, non-zero on success.
629 */
630static int
631flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
632    size_t *max)
633{
634	char		*cargv[2], *cp;
635	int		 rc = 0;
636	FTS		*fts;
637	FTSENT		*ent;
638	struct flist	*f;
639	size_t		 flsz = 0, stripdir;
640	struct stat	 st;
641
642	cargv[0] = root;
643	cargv[1] = NULL;
644
645	/*
646	 * If we're a file, then revert to the same actions we use for
647	 * the non-recursive scan.
648	 */
649
650	if (lstat(root, &st) == -1) {
651		ERR(sess, "%s: lstat", root);
652		return 0;
653	} else if (S_ISREG(st.st_mode)) {
654		if (!flist_realloc(sess, fl, sz, max)) {
655			ERRX1(sess, "flist_realloc");
656			return 0;
657		}
658		f = &(*fl)[(*sz) - 1];
659		assert(f != NULL);
660
661		if (!flist_append(sess, f, &st, root)) {
662			ERRX1(sess, "flist_append");
663			return 0;
664		} else if (unveil(root, "r") == -1) {
665			ERR(sess, "%s: unveil", root);
666			return 0;
667		}
668		return 1;
669	} else if (S_ISLNK(st.st_mode)) {
670		if (!sess->opts->preserve_links) {
671			WARNX(sess, "%s: skipping symlink", root);
672			return 1;
673		} else if (!flist_realloc(sess, fl, sz, max)) {
674			ERRX1(sess, "flist_realloc");
675			return 0;
676		}
677		f = &(*fl)[(*sz) - 1];
678		assert(f != NULL);
679
680		if (!flist_append(sess, f, &st, root)) {
681			ERRX1(sess, "flist_append");
682			return 0;
683		} else if (unveil(root, "r") == -1) {
684			ERR(sess, "%s: unveil", root);
685			return 0;
686		}
687		return 1;
688	} else if (!S_ISDIR(st.st_mode)) {
689		WARNX(sess, "%s: skipping special", root);
690		return 1;
691	}
692
693	/*
694	 * If we end with a slash, it means that we're not supposed to
695	 * copy the directory part itself---only the contents.
696	 * So set "stripdir" to be what we take out.
697	 */
698
699	stripdir = strlen(root);
700	assert(stripdir > 0);
701	if (root[stripdir - 1] != '/')
702		stripdir = 0;
703
704	/*
705	 * If we're not stripping anything, then see if we need to strip
706	 * out the leading material in the path up to and including the
707	 * last directory component.
708	 */
709
710	if (stripdir == 0)
711		if ((cp = strrchr(root, '/')) != NULL)
712			stripdir = cp - root + 1;
713
714	/*
715	 * If we're recursive, then we need to take down all of the
716	 * files and directory components, so use fts(3).
717	 * Copying the information file-by-file into the flstat.
718	 * We'll make sense of it in flist_send.
719	 */
720
721	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
722		ERR(sess, "fts_open");
723		return 0;
724	}
725
726	errno = 0;
727	while ((ent = fts_read(fts)) != NULL) {
728		if (!flist_fts_check(sess, ent)) {
729			errno = 0;
730			continue;
731		}
732
733		/* We don't allow symlinks without -l. */
734
735		assert(ent->fts_statp != NULL);
736		if (S_ISLNK(ent->fts_statp->st_mode) &&
737		    !sess->opts->preserve_links) {
738			WARNX(sess, "%s: skipping "
739				"symlink", ent->fts_path);
740			continue;
741		}
742
743		/* Allocate a new file entry. */
744
745		if (!flist_realloc(sess, fl, sz, max)) {
746			ERRX1(sess, "flist_realloc");
747			goto out;
748		}
749		flsz++;
750		f = &(*fl)[*sz - 1];
751
752		/* Our path defaults to "." for the root. */
753
754		if ('\0' == ent->fts_path[stripdir]) {
755			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
756				ERR(sess, "asprintf");
757				f->path = NULL;
758				goto out;
759			}
760		} else {
761			if ((f->path = strdup(ent->fts_path)) == NULL) {
762				ERR(sess, "strdup");
763				goto out;
764			}
765		}
766
767		f->wpath = f->path + stripdir;
768		flist_copy_stat(f, ent->fts_statp);
769
770		/* Optionally copy link information. */
771
772		if (S_ISLNK(ent->fts_statp->st_mode)) {
773			f->link = symlink_read(sess, f->path);
774			if (f->link == NULL) {
775				ERRX1(sess, "symlink_read");
776				goto out;
777			}
778		}
779
780		/* Reset errno for next fts_read() call. */
781		errno = 0;
782	}
783	if (errno) {
784		ERR(sess, "fts_read");
785		goto out;
786	} else if (unveil(root, "r") == -1) {
787		ERR(sess, "%s: unveil", root);
788		goto out;
789	}
790
791	LOG3(sess, "generated %zu filenames: %s", flsz, root);
792	rc = 1;
793out:
794	fts_close(fts);
795	return rc;
796}
797
798/*
799 * Generate a flist recursively given the array of directories (or
800 * files, symlinks, doesn't matter) specified in argv (argc >0).
801 * On success, stores the generated list in "flp" with length "sz",
802 * which may be zero.
803 * Returns zero on failure, non-zero on success.
804 */
805static int
806flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
807    size_t *sz)
808{
809	size_t		 i, max = 0;
810
811	for (i = 0; i < argc; i++)
812		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
813			break;
814
815	if (i == argc) {
816		LOG2(sess, "recursively generated %zu filenames", *sz);
817		return 1;
818	}
819
820	ERRX1(sess, "flist_gen_dirent");
821	flist_free(*flp, max);
822	*flp = NULL;
823	*sz = 0;
824	return 0;
825}
826
827/*
828 * Generate list of files from the command-line argc (>0) and argv.
829 * On success, stores the generated list in "flp" with length "sz",
830 * which may be zero.
831 * Returns zero on failure, non-zero on success.
832 */
833static int
834flist_gen_files(struct sess *sess, size_t argc, char **argv,
835    struct flist **flp, size_t *sz)
836{
837	struct flist	*fl = NULL, *f;
838	size_t		 i, flsz = 0;
839	struct stat	 st;
840
841	assert(argc);
842
843	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
844		ERR(sess, "calloc");
845		return 0;
846	}
847
848	for (i = 0; i < argc; i++) {
849		if ('\0' == argv[i][0])
850			continue;
851		if (lstat(argv[i], &st) == -1) {
852			ERR(sess, "%s: lstat", argv[i]);
853			goto out;
854		}
855
856		/*
857		 * File type checks.
858		 * In non-recursive mode, we don't accept directories.
859		 * We also skip symbolic links without -l.
860		 * Beyond that, we only accept regular files.
861		 */
862
863		if (S_ISDIR(st.st_mode)) {
864			WARNX(sess, "%s: skipping directory", argv[i]);
865			continue;
866		} else if (S_ISLNK(st.st_mode)) {
867			if (!sess->opts->preserve_links) {
868				WARNX(sess, "%s: skipping "
869					"symlink", argv[i]);
870				continue;
871			}
872		} else if (!S_ISREG(st.st_mode)) {
873			WARNX(sess, "%s: skipping special", argv[i]);
874			continue;
875		}
876
877
878		f = &fl[flsz++];
879		assert(f != NULL);
880
881		/* Add this file to our file-system worldview. */
882
883		if (unveil(argv[i], "r") == -1) {
884			ERR(sess, "%s: unveil", argv[i]);
885			goto out;
886		} else if (!flist_append(sess, f, &st, argv[i])) {
887			ERRX1(sess, "flist_append");
888			goto out;
889		}
890	}
891
892	LOG2(sess, "non-recursively generated %zu filenames", flsz);
893	*sz = flsz;
894	*flp = fl;
895	return 1;
896out:
897	flist_free(fl, argc);
898	*sz = 0;
899	*flp = NULL;
900	return 0;
901}
902
903/*
904 * Generate a sorted, de-duplicated list of file metadata.
905 * In non-recursive mode (the default), we use only the files we're
906 * given.
907 * Otherwise, directories are recursively examined.
908 * Returns zero on failure, non-zero on success.
909 * On success, "fl" will need to be freed with flist_free().
910 */
911int
912flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
913    size_t *sz)
914{
915	int	 rc;
916
917	assert(argc > 0);
918	rc = sess->opts->recursive ?
919		flist_gen_dirs(sess, argc, argv, flp, sz) :
920		flist_gen_files(sess, argc, argv, flp, sz);
921
922	/* After scanning, lock our file-system view. */
923
924	if (unveil(NULL, NULL) == -1) {
925		ERR(sess, "unveil");
926		return 0;
927	} else if (!rc)
928		return 0;
929
930	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
931
932	if (flist_dedupe(sess, flp, sz)) {
933		flist_topdirs(sess, *flp, *sz);
934		return 1;
935	}
936
937	ERRX1(sess, "flist_dedupe");
938	flist_free(*flp, *sz);
939	*flp = NULL;
940	*sz = 0;
941	return 0;
942}
943
944/*
945 * Generate a list of files in root to delete that are within the
946 * top-level directories stipulated by "wfl".
947 * Only handles symbolic links, directories, and regular files.
948 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
949 * on success.
950 * On success, "fl" will need to be freed with flist_free().
951 */
952int
953flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
954    size_t *sz,	const struct flist *wfl, size_t wflsz)
955{
956	char		**cargv = NULL;
957	int		  rc = 0, c;
958	FTS		 *fts = NULL;
959	FTSENT		 *ent;
960	struct flist	 *f;
961	size_t		  cargvs = 0, i, j, max = 0, stripdir;
962	ENTRY		  hent;
963	ENTRY		 *hentp;
964
965	*fl = NULL;
966	*sz = 0;
967
968	/* Only run this code when we're recursive. */
969
970	if (!sess->opts->recursive)
971		return 1;
972
973	/*
974	 * Gather up all top-level directories for scanning.
975	 * This is stipulated by rsync's --delete behaviour, where we
976	 * only delete things in the top-level directories given on the
977	 * command line.
978	 */
979
980	assert(wflsz > 0);
981	for (i = 0; i < wflsz; i++)
982		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
983			cargvs++;
984	if (cargvs == 0)
985		return 1;
986
987	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
988		ERR(sess, "calloc");
989		return 0;
990	}
991
992	/*
993	 * If we're given just a "." as the first entry, that means
994	 * we're doing a relative copy with a trailing slash.
995	 * Special-case this just for the sake of simplicity.
996	 * Otherwise, look through all top-levels.
997	 */
998
999	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1000		assert(cargvs == 1);
1001		assert(S_ISDIR(wfl[0].st.mode));
1002		if (asprintf(&cargv[0], "%s/", root) < 0) {
1003			ERR(sess, "asprintf");
1004			cargv[0] = NULL;
1005			goto out;
1006		}
1007		cargv[1] = NULL;
1008	} else {
1009		for (i = j = 0; i < wflsz; i++) {
1010			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1011				continue;
1012			assert(S_ISDIR(wfl[i].st.mode));
1013			assert(strcmp(wfl[i].wpath, "."));
1014			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1015			if (c < 0) {
1016				ERR(sess, "asprintf");
1017				cargv[j] = NULL;
1018				goto out;
1019			}
1020			LOG4(sess, "%s: will scan "
1021				"for deletions", cargv[j]);
1022			j++;
1023		}
1024		assert(j == cargvs);
1025		cargv[j] = NULL;
1026	}
1027
1028	LOG2(sess, "delete from %zu directories", cargvs);
1029
1030	/*
1031	 * Next, use the standard hcreate(3) hashtable interface to hash
1032	 * all of the files that we want to synchronise.
1033	 * This way, we'll be able to determine which files we want to
1034	 * delete in O(n) time instead of O(n * search) time.
1035	 * Plus, we can do the scan in-band and only allocate the files
1036	 * we want to delete.
1037	 */
1038
1039	if (!hcreate(wflsz)) {
1040		ERR(sess, "hcreate");
1041		goto out;
1042	}
1043
1044	for (i = 0; i < wflsz; i++) {
1045		memset(&hent, 0, sizeof(ENTRY));
1046		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1047			ERR(sess, "strdup");
1048			goto out;
1049		}
1050		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1051			ERR(sess, "hsearch");
1052			goto out;
1053		} else if (hentp->key != hent.key) {
1054			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1055			free(hent.key);
1056			goto out;
1057		}
1058	}
1059
1060	/*
1061	 * Now we're going to try to descend into all of the top-level
1062	 * directories stipulated by the file list.
1063	 * If the directories don't exist, it's ok.
1064	 */
1065
1066	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1067		ERR(sess, "fts_open");
1068		goto out;
1069	}
1070
1071	stripdir = strlen(root) + 1;
1072	errno = 0;
1073	while ((ent = fts_read(fts)) != NULL) {
1074		if (ent->fts_info == FTS_NS)
1075			continue;
1076		if (!flist_fts_check(sess, ent)) {
1077			errno = 0;
1078			continue;
1079		} else if (stripdir >= ent->fts_pathlen)
1080			continue;
1081
1082		/* Look up in hashtable. */
1083
1084		memset(&hent, 0, sizeof(ENTRY));
1085		hent.key = ent->fts_path + stripdir;
1086		if (hsearch(hent, FIND) != NULL)
1087			continue;
1088
1089		/* Not found: we'll delete it. */
1090
1091		if (!flist_realloc(sess, fl, sz, &max)) {
1092			ERRX1(sess, "flist_realloc");
1093			goto out;
1094		}
1095		f = &(*fl)[*sz - 1];
1096
1097		if ((f->path = strdup(ent->fts_path)) == NULL) {
1098			ERR(sess, "strdup");
1099			goto out;
1100		}
1101		f->wpath = f->path + stripdir;
1102		assert(ent->fts_statp != NULL);
1103		flist_copy_stat(f, ent->fts_statp);
1104		errno = 0;
1105	}
1106
1107	if (errno) {
1108		ERR(sess, "fts_read");
1109		goto out;
1110	}
1111
1112	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1113	rc = 1;
1114out:
1115	if (fts != NULL)
1116		fts_close(fts);
1117	for (i = 0; i < cargvs; i++)
1118		free(cargv[i]);
1119	free(cargv);
1120	hdestroy();
1121	return rc;
1122}
1123
1124/*
1125 * Delete all files and directories in "fl".
1126 * If called with a zero-length "fl", does nothing.
1127 * If dry_run is specified, simply write what would be done.
1128 * Return zero on failure, non-zero on success.
1129 */
1130int
1131flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1132{
1133	ssize_t	 i;
1134	int	 flag;
1135
1136	if (flsz == 0)
1137		return 1;
1138
1139	assert(sess->opts->del);
1140	assert(sess->opts->recursive);
1141
1142	for (i = flsz - 1; i >= 0; i--) {
1143		LOG1(sess, "%s: deleting", fl[i].wpath);
1144		if (sess->opts->dry_run)
1145			continue;
1146		assert(root != -1);
1147		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1148		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1149		    errno != ENOENT) {
1150			ERR(sess, "%s: unlinkat", fl[i].wpath);
1151			return 0;
1152		}
1153	}
1154
1155	return 1;
1156}
1157