flist.c revision 1.18
1/*	$Id: flist.c,v 1.18 2019/02/21 22:07:44 benno Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include <sys/param.h>
19#include <sys/stat.h>
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <fts.h>
25#include <inttypes.h>
26#include <search.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
32#include "extern.h"
33
34/*
35 * We allocate our file list in chunk sizes so as not to do it one by
36 * one.
37 * Preferrably we get one or two allocation.
38 */
39#define	FLIST_CHUNK_SIZE (1024)
40
41/*
42 * These flags are part of the rsync protocol.
43 * They are sent as the first byte for a file transmission and encode
44 * information that affects subsequent transmissions.
45 */
46#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
47#define	FLIST_RDEV_SAME  0x0004 /* rdev is repeat */
48#define	FLIST_UID_SAME	 0x0008 /* uid is repeat */
49#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
50#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
51#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
52#define FLIST_TIME_SAME  0x0080 /* time is repeat */
53
54/*
55 * Requied way to sort a filename list.
56 */
57static int
58flist_cmp(const void *p1, const void *p2)
59{
60	const struct flist *f1 = p1, *f2 = p2;
61
62	return strcmp(f1->wpath, f2->wpath);
63}
64
65/*
66 * Deduplicate our file list (which may be zero-length).
67 * Returns zero on failure, non-zero on success.
68 */
69static int
70flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
71{
72	size_t		 i, j;
73	struct flist	*new;
74	struct flist	*f, *fnext;
75
76	if (*sz == 0)
77		return 1;
78
79	/* Create a new buffer, "new", and copy. */
80
81	new = calloc(*sz, sizeof(struct flist));
82	if (new == NULL) {
83		ERR(sess, "calloc");
84		return 0;
85	}
86
87	for (i = j = 0; i < *sz - 1; i++) {
88		f = &(*fl)[i];
89		fnext = &(*fl)[i + 1];
90
91		if (strcmp(f->wpath, fnext->wpath)) {
92			new[j++] = *f;
93			continue;
94		}
95
96		/*
97		 * Our working (destination) paths are the same.
98		 * If the actual file is the same (as given on the
99		 * command-line), then we can just discard the first.
100		 * Otherwise, we need to bail out: it means we have two
101		 * different files with the relative path on the
102		 * destination side.
103		 */
104
105		if (strcmp(f->path, fnext->path) == 0) {
106			new[j++] = *f;
107			i++;
108			WARNX(sess, "%s: duplicate path: %s",
109			    f->wpath, f->path);
110			free(fnext->path);
111			free(fnext->link);
112			fnext->path = fnext->link = NULL;
113			continue;
114		}
115
116		ERRX(sess, "%s: duplicate working path for "
117		    "possibly different file: %s, %s",
118		    f->wpath, f->path, fnext->path);
119		free(new);
120		return 0;
121	}
122
123	/* Don't forget the last entry. */
124
125	if (i == *sz - 1)
126		new[j++] = (*fl)[i];
127
128	/*
129	 * Reassign to the deduplicated array.
130	 * If we started out with *sz > 0, which we check for at the
131	 * beginning, then we'll always continue having *sz > 0.
132	 */
133
134	free(*fl);
135	*fl = new;
136	*sz = j;
137	assert(*sz);
138	return 1;
139}
140
141/*
142 * We're now going to find our top-level directories.
143 * This only applies to recursive mode.
144 * If we have the first element as the ".", then that's the "top
145 * directory" of our transfer.
146 * Otherwise, mark up all top-level directories in the set.
147 */
148static void
149flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
150{
151	size_t		 i;
152	const char	*cp;
153
154	if (!sess->opts->recursive)
155		return;
156
157	if (flsz && strcmp(fl[0].wpath, ".")) {
158		for (i = 0; i < flsz; i++) {
159			if (!S_ISDIR(fl[i].st.mode))
160				continue;
161			cp = strchr(fl[i].wpath, '/');
162			if (cp != NULL && cp[1] != '\0')
163				continue;
164			fl[i].st.flags |= FLSTAT_TOP_DIR;
165			LOG4(sess, "%s: top-level", fl[i].wpath);
166		}
167	} else if (flsz) {
168		fl[0].st.flags |= FLSTAT_TOP_DIR;
169		LOG4(sess, "%s: top-level", fl[0].wpath);
170	}
171}
172
173/*
174 * Filter through the fts() file information.
175 * We want directories (pre-order), regular files, and symlinks.
176 * Everything else is skipped and possibly warned about.
177 * Return zero to skip, non-zero to examine.
178 */
179static int
180flist_fts_check(struct sess *sess, FTSENT *ent)
181{
182
183	if (ent->fts_info == FTS_F  ||
184	    ent->fts_info == FTS_D ||
185	    ent->fts_info == FTS_SL ||
186	    ent->fts_info == FTS_SLNONE)
187		return 1;
188
189	if (ent->fts_info == FTS_DC) {
190		WARNX(sess, "%s: directory cycle", ent->fts_path);
191	} else if (ent->fts_info == FTS_DNR) {
192		errno = ent->fts_errno;
193		WARN(sess, "%s: unreadable directory", ent->fts_path);
194	} else if (ent->fts_info == FTS_DOT) {
195		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
196	} else if (ent->fts_info == FTS_ERR) {
197		errno = ent->fts_errno;
198		WARN(sess, "%s", ent->fts_path);
199	} else if (ent->fts_info == FTS_DEFAULT) {
200		if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
201		    S_ISCHR(ent->fts_statp->st_mode))) ||
202		    (sess->opts->specials &&
203		    (S_ISFIFO(ent->fts_statp->st_mode) ||
204		    S_ISSOCK(ent->fts_statp->st_mode)))) {
205			return 1;
206		}
207		WARNX(sess, "%s: skipping special", ent->fts_path);
208	} else if (ent->fts_info == FTS_NS) {
209		errno = ent->fts_errno;
210		WARN(sess, "%s: could not stat", ent->fts_path);
211	}
212
213	return 0;
214}
215
216/*
217 * Copy necessary elements in "st" into the fields of "f".
218 */
219static void
220flist_copy_stat(struct flist *f, const struct stat *st)
221{
222	f->st.mode = st->st_mode;
223	f->st.uid = st->st_uid;
224	f->st.gid = st->st_gid;
225	f->st.size = st->st_size;
226	f->st.mtime = st->st_mtime;
227	f->st.rdev = st->st_rdev;
228}
229
230void
231flist_free(struct flist *f, size_t sz)
232{
233	size_t	 i;
234
235	if (f == NULL)
236		return;
237
238	for (i = 0; i < sz; i++) {
239		free(f[i].path);
240		free(f[i].link);
241	}
242	free(f);
243}
244
245/*
246 * Serialise our file list (which may be zero-length) to the wire.
247 * Makes sure that the receiver isn't going to block on sending us
248 * return messages on the log channel.
249 * Return zero on failure, non-zero on success.
250 */
251int
252flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
253    size_t flsz)
254{
255	size_t		 i, sz, gidsz = 0, uidsz = 0;
256	uint8_t		 flag;
257	const struct flist *f;
258	const char	*fn;
259	struct ident	*gids = NULL, *uids = NULL;
260	int		 rc = 0;
261
262	/* Double-check that we've no pending multiplexed data. */
263
264	LOG2(sess, "sending file metadata list: %zu", flsz);
265
266	for (i = 0; i < flsz; i++) {
267		f = &fl[i];
268		fn = f->wpath;
269		sz = strlen(f->wpath);
270		assert(sz > 0);
271
272		/*
273		 * If applicable, unclog the read buffer.
274		 * This happens when the receiver has a lot of log
275		 * messages and all we're doing is sending our file list
276		 * without checking for messages.
277		 */
278
279		if (sess->mplex_reads &&
280		    io_read_check(sess, fdin) &&
281		     !io_read_flush(sess, fdin)) {
282			ERRX1(sess, "io_read_flush");
283			goto out;
284		}
285
286		/*
287		 * For ease, make all of our filenames be "long"
288		 * regardless their actual length.
289		 * This also makes sure that we don't transmit a zero
290		 * byte unintentionally.
291		 */
292
293		flag = FLIST_NAME_LONG;
294
295		LOG3(sess, "%s: sending file metadata: "
296			"size %jd, mtime %jd, mode %o",
297			fn, (intmax_t)f->st.size,
298			(intmax_t)f->st.mtime, f->st.mode);
299
300		/* Now write to the wire. */
301		/* FIXME: buffer this. */
302
303		if (!io_write_byte(sess, fdout, flag)) {
304			ERRX1(sess, "io_write_byte");
305			goto out;
306		} else if (!io_write_int(sess, fdout, sz)) {
307			ERRX1(sess, "io_write_int");
308			goto out;
309		} else if (!io_write_buf(sess, fdout, fn, sz)) {
310			ERRX1(sess, "io_write_buf");
311			goto out;
312		} else if (!io_write_long(sess, fdout, f->st.size)) {
313			ERRX1(sess, "io_write_long");
314			goto out;
315		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
316			ERRX1(sess, "io_write_int");
317			goto out;
318		} else if (!io_write_int(sess, fdout, f->st.mode)) {
319			ERRX1(sess, "io_write_int");
320			goto out;
321		}
322
323		/* Conditional part: uid. */
324
325		if (sess->opts->preserve_uids) {
326			if (!io_write_int(sess, fdout, f->st.uid)) {
327				ERRX1(sess, "io_write_int");
328				goto out;
329			}
330			if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) {
331				ERRX1(sess, "idents_add");
332				goto out;
333			}
334		}
335
336		/* Conditional part: gid. */
337
338		if (sess->opts->preserve_gids) {
339			if (!io_write_int(sess, fdout, f->st.gid)) {
340				ERRX1(sess, "io_write_int");
341				goto out;
342			}
343			if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) {
344				ERRX1(sess, "idents_add");
345				goto out;
346			}
347		}
348
349		/* Conditional part: devices & special files. */
350
351		if ((sess->opts->devices && (S_ISBLK(f->st.mode) ||
352		     S_ISCHR(f->st.mode))) ||
353		    (sess->opts->specials && (S_ISFIFO(f->st.mode) ||
354		    S_ISSOCK(f->st.mode)))) {
355			if (!io_write_int(sess, fdout, f->st.rdev)) {
356				ERRX1(sess, "io_write_int");
357				goto out;
358			}
359		}
360
361		/* Conditional part: link. */
362
363		if (S_ISLNK(f->st.mode) &&
364		    sess->opts->preserve_links) {
365			fn = f->link;
366			sz = strlen(f->link);
367			if (!io_write_int(sess, fdout, sz)) {
368				ERRX1(sess, "io_write_int");
369				goto out;
370			}
371			if (!io_write_buf(sess, fdout, fn, sz)) {
372				ERRX1(sess, "io_write_int");
373				goto out;
374			}
375		}
376
377		if (S_ISREG(f->st.mode))
378			sess->total_size += f->st.size;
379	}
380
381	/* Signal end of file list. */
382
383	if (!io_write_byte(sess, fdout, 0)) {
384		ERRX1(sess, "io_write_byte");
385		goto out;
386	}
387
388	/* Conditionally write identifier lists. */
389
390	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
391		LOG2(sess, "sending uid list: %zu", uidsz);
392		if (!idents_send(sess, fdout, uids, uidsz)) {
393			ERRX1(sess, "idents_send");
394			goto out;
395		}
396	}
397
398	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
399		LOG2(sess, "sending gid list: %zu", gidsz);
400		if (!idents_send(sess, fdout, gids, gidsz)) {
401			ERRX1(sess, "idents_send");
402			goto out;
403		}
404	}
405
406	rc = 1;
407out:
408	idents_free(gids, gidsz);
409	idents_free(uids, uidsz);
410	return rc;
411}
412
413/*
414 * Read the filename of a file list.
415 * This is the most expensive part of the file list transfer, so a lot
416 * of attention has gone into transmitting as little as possible.
417 * Micro-optimisation, but whatever.
418 * Fills in "f" with the full path on success.
419 * Returns zero on failure, non-zero on success.
420 */
421static int
422flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
423    char last[MAXPATHLEN])
424{
425	uint8_t		 bval;
426	size_t		 partial = 0;
427	size_t		 pathlen = 0, len;
428
429	/*
430	 * Read our filename.
431	 * If we have FLIST_NAME_SAME, we inherit some of the last
432	 * transmitted name.
433	 * If we have FLIST_NAME_LONG, then the string length is greater
434	 * than byte-size.
435	 */
436
437	if (FLIST_NAME_SAME & flags) {
438		if (!io_read_byte(sess, fd, &bval)) {
439			ERRX1(sess, "io_read_byte");
440			return 0;
441		}
442		partial = bval;
443	}
444
445	/* Get the (possibly-remaining) filename length. */
446
447	if (FLIST_NAME_LONG & flags) {
448		if (!io_read_size(sess, fd, &pathlen)) {
449			ERRX1(sess, "io_read_size");
450			return 0;
451		}
452	} else {
453		if (!io_read_byte(sess, fd, &bval)) {
454			ERRX1(sess, "io_read_byte");
455			return 0;
456		}
457		pathlen = bval;
458	}
459
460	/* Allocate our full filename length. */
461	/* FIXME: maximum pathname length. */
462
463	if ((len = pathlen + partial) == 0) {
464		ERRX(sess, "security violation: "
465			"zero-length pathname");
466		return 0;
467	}
468
469	if ((f->path = malloc(len + 1)) == NULL) {
470		ERR(sess, "malloc");
471		return 0;
472	}
473	f->path[len] = '\0';
474
475	if (FLIST_NAME_SAME & flags)
476		memcpy(f->path, last, partial);
477
478	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
479		ERRX1(sess, "io_read_buf");
480		return 0;
481	}
482
483	if (f->path[0] == '/') {
484		ERRX(sess, "security violation: "
485			"absolute pathname: %s", f->path);
486		return 0;
487	}
488
489	if (strstr(f->path, "/../") != NULL ||
490	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
491	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
492	    strcmp(f->path, "..") == 0) {
493		ERRX(sess, "%s: security violation: "
494			"backtracking pathname", f->path);
495		return 0;
496	}
497
498	/* Record our last path and construct our filename. */
499
500	strlcpy(last, f->path, MAXPATHLEN);
501	f->wpath = f->path;
502	return 1;
503}
504
505/*
506 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
507 * Returns zero on failure, non-zero on success.
508 */
509static int
510flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
511{
512	void	*pp;
513
514	if (*sz + 1 <= *max)  {
515		(*sz)++;
516		return 1;
517	}
518
519	pp = recallocarray(*fl, *max,
520		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
521	if (pp == NULL) {
522		ERR(sess, "recallocarray");
523		return 0;
524	}
525	*fl = pp;
526	*max += FLIST_CHUNK_SIZE;
527	(*sz)++;
528	return 1;
529}
530
531/*
532 * Copy a regular or symbolic link file "path" into "f".
533 * This handles the correct path creation and symbolic linking.
534 * Returns zero on failure, non-zero on success.
535 */
536static int
537flist_append(struct sess *sess, struct flist *f, struct stat *st,
538    const char *path)
539{
540
541	/*
542	 * Copy the full path for local addressing and transmit
543	 * only the filename part for the receiver.
544	 */
545
546	if ((f->path = strdup(path)) == NULL) {
547		ERR(sess, "strdup");
548		return 0;
549	}
550
551	if ((f->wpath = strrchr(f->path, '/')) == NULL)
552		f->wpath = f->path;
553	else
554		f->wpath++;
555
556	/*
557	 * On the receiving end, we'll strip out all bits on the
558	 * mode except for the file permissions.
559	 * No need to warn about it here.
560	 */
561
562	flist_copy_stat(f, st);
563
564	/* Optionally copy link information. */
565
566	if (S_ISLNK(st->st_mode)) {
567		f->link = symlink_read(sess, f->path);
568		if (f->link == NULL) {
569			ERRX1(sess, "symlink_read");
570			return 0;
571		}
572	}
573
574	return 1;
575}
576
577/*
578 * Receive a file list from the wire, filling in length "sz" (which may
579 * possibly be zero) and list "flp" on success.
580 * Return zero on failure, non-zero on success.
581 */
582int
583flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
584{
585	struct flist	*fl = NULL;
586	struct flist	*ff;
587	const struct flist *fflast = NULL;
588	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
589	uint8_t		 flag;
590	char		 last[MAXPATHLEN];
591	uint64_t	 lval; /* temporary values... */
592	int32_t		 ival;
593	struct ident	*gids = NULL, *uids = NULL;
594
595	last[0] = '\0';
596
597	for (;;) {
598		if (!io_read_byte(sess, fd, &flag)) {
599			ERRX1(sess, "io_read_byte");
600			goto out;
601		} else if (flag == 0)
602			break;
603
604		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
605			ERRX1(sess, "flist_realloc");
606			goto out;
607		}
608
609		ff = &fl[flsz - 1];
610		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
611
612		/* Filename first. */
613
614		if (!flist_recv_name(sess, fd, ff, flag, last)) {
615			ERRX1(sess, "flist_recv_name");
616			goto out;
617		}
618
619		/* Read the file size. */
620
621		if (!io_read_ulong(sess, fd, &lval)) {
622			ERRX1(sess, "io_read_ulong");
623			goto out;
624		}
625		ff->st.size = lval;
626
627		/* Read the modification time. */
628
629		if (!(FLIST_TIME_SAME & flag)) {
630			if (!io_read_int(sess, fd, &ival)) {
631				ERRX1(sess, "io_read_int");
632				goto out;
633			}
634			ff->st.mtime = ival;
635		} else if (fflast == NULL) {
636			ERRX(sess, "same time without last entry");
637			goto out;
638		}  else
639			ff->st.mtime = fflast->st.mtime;
640
641		/* Read the file mode. */
642
643		if (!(FLIST_MODE_SAME & flag)) {
644			if (!io_read_int(sess, fd, &ival)) {
645				ERRX1(sess, "io_read_int");
646				goto out;
647			}
648			ff->st.mode = ival;
649		} else if (fflast == NULL) {
650			ERRX(sess, "same mode without last entry");
651			goto out;
652		} else
653			ff->st.mode = fflast->st.mode;
654
655		/* Conditional part: uid. */
656
657		if (sess->opts->preserve_uids) {
658			if (!(FLIST_UID_SAME & flag)) {
659				if (!io_read_int(sess, fd, &ival)) {
660					ERRX1(sess, "io_read_int");
661					goto out;
662				}
663				ff->st.uid = ival;
664			} else if (fflast == NULL) {
665				ERRX(sess, "same uid "
666					"without last entry");
667				goto out;
668			} else
669				ff->st.uid = fflast->st.uid;
670		}
671
672		/* Conditional part: gid. */
673
674		if (sess->opts->preserve_gids) {
675			if (!(FLIST_GID_SAME & flag)) {
676				if (!io_read_int(sess, fd, &ival)) {
677					ERRX1(sess, "io_read_int");
678					goto out;
679				}
680				ff->st.gid = ival;
681			} else if (fflast == NULL) {
682				ERRX(sess, "same gid "
683					"without last entry");
684				goto out;
685			} else
686				ff->st.gid = fflast->st.gid;
687		}
688
689		/* Conditional part: devices & special files. */
690
691		if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
692		     S_ISCHR(ff->st.mode))) ||
693		    (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
694		    S_ISSOCK(ff->st.mode)))) {
695			if (!(FLIST_RDEV_SAME & flag)) {
696				if (!io_read_int(sess, fd, &ival)) {
697					ERRX1(sess, "io_read_int");
698					goto out;
699				}
700				ff->st.rdev = ival;
701			} else if (fflast == NULL) {
702				ERRX(sess, "same device without last entry");
703				goto out;
704			} else
705				ff->st.rdev = fflast->st.rdev;
706		}
707
708		/* Conditional part: link. */
709
710		if (S_ISLNK(ff->st.mode) &&
711		    sess->opts->preserve_links) {
712			if (!io_read_size(sess, fd, &lsz)) {
713				ERRX1(sess, "io_read_size");
714				goto out;
715			} else if (lsz == 0) {
716				ERRX(sess, "empty link name");
717				goto out;
718			}
719			ff->link = calloc(lsz + 1, 1);
720			if (ff->link == NULL) {
721				ERR(sess, "calloc");
722				goto out;
723			}
724			if (!io_read_buf(sess, fd, ff->link, lsz)) {
725				ERRX1(sess, "io_read_buf");
726				goto out;
727			}
728		}
729
730		LOG3(sess, "%s: received file metadata: "
731			"size %jd, mtime %jd, mode %o, rdev (%d, %d)",
732			ff->path, (intmax_t)ff->st.size,
733			(intmax_t)ff->st.mtime, ff->st.mode,
734			major(ff->st.rdev), minor(ff->st.rdev));
735
736		if (S_ISREG(ff->st.mode))
737			sess->total_size += ff->st.size;
738	}
739
740	/* Conditionally read the user/group list. */
741
742	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
743		if (!idents_recv(sess, fd, &uids, &uidsz)) {
744			ERRX1(sess, "idents_recv");
745			goto out;
746		}
747		LOG2(sess, "received uid list: %zu", uidsz);
748	}
749
750	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
751		if (!idents_recv(sess, fd, &gids, &gidsz)) {
752			ERRX1(sess, "idents_recv");
753			goto out;
754		}
755		LOG2(sess, "received gid list: %zu", gidsz);
756	}
757
758	/* Remember to order the received list. */
759
760	LOG2(sess, "received file metadata list: %zu", flsz);
761	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
762	flist_topdirs(sess, fl, flsz);
763	*sz = flsz;
764	*flp = fl;
765
766	/* Conditionally remap and reassign identifiers. */
767
768	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
769		idents_remap(sess, 0, uids, uidsz);
770		idents_assign_uid(sess, fl, flsz, uids, uidsz);
771	}
772
773	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
774		idents_remap(sess, 1, gids, gidsz);
775		idents_assign_gid(sess, fl, flsz, gids, gidsz);
776	}
777
778	idents_free(gids, gidsz);
779	idents_free(uids, uidsz);
780	return 1;
781out:
782	flist_free(fl, flsz);
783	idents_free(gids, gidsz);
784	idents_free(uids, uidsz);
785	*sz = 0;
786	*flp = NULL;
787	return 0;
788}
789
790/*
791 * Generate a flist possibly-recursively given a file root, which may
792 * also be a regular file or symlink.
793 * On success, augments the generated list in "flp" of length "sz".
794 * Returns zero on failure, non-zero on success.
795 */
796static int
797flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
798    size_t *max)
799{
800	char		*cargv[2], *cp;
801	int		 rc = 0;
802	FTS		*fts;
803	FTSENT		*ent;
804	struct flist	*f;
805	size_t		 flsz = 0, stripdir;
806	struct stat	 st;
807
808	cargv[0] = root;
809	cargv[1] = NULL;
810
811	/*
812	 * If we're a file, then revert to the same actions we use for
813	 * the non-recursive scan.
814	 */
815
816	if (lstat(root, &st) == -1) {
817		ERR(sess, "%s: lstat", root);
818		return 0;
819	} else if (S_ISREG(st.st_mode)) {
820		if (!flist_realloc(sess, fl, sz, max)) {
821			ERRX1(sess, "flist_realloc");
822			return 0;
823		}
824		f = &(*fl)[(*sz) - 1];
825		assert(f != NULL);
826
827		if (!flist_append(sess, f, &st, root)) {
828			ERRX1(sess, "flist_append");
829			return 0;
830		}
831		if (unveil(root, "r") == -1) {
832			ERR(sess, "%s: unveil", root);
833			return 0;
834		}
835		return 1;
836	} else if (S_ISLNK(st.st_mode)) {
837		if (!sess->opts->preserve_links) {
838			WARNX(sess, "%s: skipping symlink", root);
839			return 1;
840		} else if (!flist_realloc(sess, fl, sz, max)) {
841			ERRX1(sess, "flist_realloc");
842			return 0;
843		}
844		f = &(*fl)[(*sz) - 1];
845		assert(f != NULL);
846
847		if (!flist_append(sess, f, &st, root)) {
848			ERRX1(sess, "flist_append");
849			return 0;
850		}
851		if (unveil(root, "r") == -1) {
852			ERR(sess, "%s: unveil", root);
853			return 0;
854		}
855		return 1;
856	} else if (!S_ISDIR(st.st_mode)) {
857		WARNX(sess, "%s: skipping special", root);
858		return 1;
859	}
860
861	/*
862	 * If we end with a slash, it means that we're not supposed to
863	 * copy the directory part itself---only the contents.
864	 * So set "stripdir" to be what we take out.
865	 */
866
867	stripdir = strlen(root);
868	assert(stripdir > 0);
869	if (root[stripdir - 1] != '/')
870		stripdir = 0;
871
872	/*
873	 * If we're not stripping anything, then see if we need to strip
874	 * out the leading material in the path up to and including the
875	 * last directory component.
876	 */
877
878	if (stripdir == 0)
879		if ((cp = strrchr(root, '/')) != NULL)
880			stripdir = cp - root + 1;
881
882	/*
883	 * If we're recursive, then we need to take down all of the
884	 * files and directory components, so use fts(3).
885	 * Copying the information file-by-file into the flstat.
886	 * We'll make sense of it in flist_send.
887	 */
888
889	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
890		ERR(sess, "fts_open");
891		return 0;
892	}
893
894	errno = 0;
895	while ((ent = fts_read(fts)) != NULL) {
896		if (!flist_fts_check(sess, ent)) {
897			errno = 0;
898			continue;
899		}
900
901		/* We don't allow symlinks without -l. */
902
903		assert(ent->fts_statp != NULL);
904		if (S_ISLNK(ent->fts_statp->st_mode) &&
905		    !sess->opts->preserve_links) {
906			WARNX(sess, "%s: skipping "
907				"symlink", ent->fts_path);
908			continue;
909		}
910
911		/* Allocate a new file entry. */
912
913		if (!flist_realloc(sess, fl, sz, max)) {
914			ERRX1(sess, "flist_realloc");
915			goto out;
916		}
917		flsz++;
918		f = &(*fl)[*sz - 1];
919
920		/* Our path defaults to "." for the root. */
921
922		if ('\0' == ent->fts_path[stripdir]) {
923			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
924				ERR(sess, "asprintf");
925				f->path = NULL;
926				goto out;
927			}
928		} else {
929			if ((f->path = strdup(ent->fts_path)) == NULL) {
930				ERR(sess, "strdup");
931				goto out;
932			}
933		}
934
935		f->wpath = f->path + stripdir;
936		flist_copy_stat(f, ent->fts_statp);
937
938		/* Optionally copy link information. */
939
940		if (S_ISLNK(ent->fts_statp->st_mode)) {
941			f->link = symlink_read(sess, f->path);
942			if (f->link == NULL) {
943				ERRX1(sess, "symlink_read");
944				goto out;
945			}
946		}
947
948		/* Reset errno for next fts_read() call. */
949		errno = 0;
950	}
951	if (errno) {
952		ERR(sess, "fts_read");
953		goto out;
954	}
955	if (unveil(root, "r") == -1) {
956		ERR(sess, "%s: unveil", root);
957		goto out;
958	}
959
960	LOG3(sess, "generated %zu filenames: %s", flsz, root);
961	rc = 1;
962out:
963	fts_close(fts);
964	return rc;
965}
966
967/*
968 * Generate a flist recursively given the array of directories (or
969 * files, symlinks, doesn't matter) specified in argv (argc >0).
970 * On success, stores the generated list in "flp" with length "sz",
971 * which may be zero.
972 * Returns zero on failure, non-zero on success.
973 */
974static int
975flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
976    size_t *sz)
977{
978	size_t		 i, max = 0;
979
980	for (i = 0; i < argc; i++)
981		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
982			break;
983
984	if (i == argc) {
985		LOG2(sess, "recursively generated %zu filenames", *sz);
986		return 1;
987	}
988
989	ERRX1(sess, "flist_gen_dirent");
990	flist_free(*flp, max);
991	*flp = NULL;
992	*sz = 0;
993	return 0;
994}
995
996/*
997 * Generate list of files from the command-line argc (>0) and argv.
998 * On success, stores the generated list in "flp" with length "sz",
999 * which may be zero.
1000 * Returns zero on failure, non-zero on success.
1001 */
1002static int
1003flist_gen_files(struct sess *sess, size_t argc, char **argv,
1004    struct flist **flp, size_t *sz)
1005{
1006	struct flist	*fl = NULL, *f;
1007	size_t		 i, flsz = 0;
1008	struct stat	 st;
1009
1010	assert(argc);
1011
1012	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1013		ERR(sess, "calloc");
1014		return 0;
1015	}
1016
1017	for (i = 0; i < argc; i++) {
1018		if ('\0' == argv[i][0])
1019			continue;
1020		if (lstat(argv[i], &st) == -1) {
1021			ERR(sess, "%s: lstat", argv[i]);
1022			goto out;
1023		}
1024
1025		/*
1026		 * File type checks.
1027		 * In non-recursive mode, we don't accept directories.
1028		 * We also skip symbolic links without -l.
1029		 * Beyond that, we only accept regular files.
1030		 */
1031
1032		if (S_ISDIR(st.st_mode)) {
1033			WARNX(sess, "%s: skipping directory", argv[i]);
1034			continue;
1035		} else if (S_ISLNK(st.st_mode)) {
1036			if (!sess->opts->preserve_links) {
1037				WARNX(sess, "%s: skipping "
1038					"symlink", argv[i]);
1039				continue;
1040			}
1041		} else if (!S_ISREG(st.st_mode)) {
1042			WARNX(sess, "%s: skipping special", argv[i]);
1043			continue;
1044		}
1045
1046
1047		f = &fl[flsz++];
1048		assert(f != NULL);
1049
1050		/* Add this file to our file-system worldview. */
1051
1052		if (unveil(argv[i], "r") == -1) {
1053			ERR(sess, "%s: unveil", argv[i]);
1054			goto out;
1055		}
1056		if (!flist_append(sess, f, &st, argv[i])) {
1057			ERRX1(sess, "flist_append");
1058			goto out;
1059		}
1060	}
1061
1062	LOG2(sess, "non-recursively generated %zu filenames", flsz);
1063	*sz = flsz;
1064	*flp = fl;
1065	return 1;
1066out:
1067	flist_free(fl, argc);
1068	*sz = 0;
1069	*flp = NULL;
1070	return 0;
1071}
1072
1073/*
1074 * Generate a sorted, de-duplicated list of file metadata.
1075 * In non-recursive mode (the default), we use only the files we're
1076 * given.
1077 * Otherwise, directories are recursively examined.
1078 * Returns zero on failure, non-zero on success.
1079 * On success, "fl" will need to be freed with flist_free().
1080 */
1081int
1082flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1083    size_t *sz)
1084{
1085	int	 rc;
1086
1087	assert(argc > 0);
1088	rc = sess->opts->recursive ?
1089		flist_gen_dirs(sess, argc, argv, flp, sz) :
1090		flist_gen_files(sess, argc, argv, flp, sz);
1091
1092	/* After scanning, lock our file-system view. */
1093
1094	if (unveil(NULL, NULL) == -1) {
1095		ERR(sess, "unveil");
1096		return 0;
1097	}
1098	if (!rc)
1099		return 0;
1100
1101	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1102
1103	if (flist_dedupe(sess, flp, sz)) {
1104		flist_topdirs(sess, *flp, *sz);
1105		return 1;
1106	}
1107
1108	ERRX1(sess, "flist_dedupe");
1109	flist_free(*flp, *sz);
1110	*flp = NULL;
1111	*sz = 0;
1112	return 0;
1113}
1114
1115/*
1116 * Generate a list of files in root to delete that are within the
1117 * top-level directories stipulated by "wfl".
1118 * Only handles symbolic links, directories, and regular files.
1119 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1120 * on success.
1121 * On success, "fl" will need to be freed with flist_free().
1122 */
1123int
1124flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1125    size_t *sz,	const struct flist *wfl, size_t wflsz)
1126{
1127	char		**cargv = NULL;
1128	int		  rc = 0, c;
1129	FTS		 *fts = NULL;
1130	FTSENT		 *ent;
1131	struct flist	 *f;
1132	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1133	ENTRY		  hent;
1134	ENTRY		 *hentp;
1135
1136	*fl = NULL;
1137	*sz = 0;
1138
1139	/* Only run this code when we're recursive. */
1140
1141	if (!sess->opts->recursive)
1142		return 1;
1143
1144	/*
1145	 * Gather up all top-level directories for scanning.
1146	 * This is stipulated by rsync's --delete behaviour, where we
1147	 * only delete things in the top-level directories given on the
1148	 * command line.
1149	 */
1150
1151	assert(wflsz > 0);
1152	for (i = 0; i < wflsz; i++)
1153		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1154			cargvs++;
1155	if (cargvs == 0)
1156		return 1;
1157
1158	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1159		ERR(sess, "calloc");
1160		return 0;
1161	}
1162
1163	/*
1164	 * If we're given just a "." as the first entry, that means
1165	 * we're doing a relative copy with a trailing slash.
1166	 * Special-case this just for the sake of simplicity.
1167	 * Otherwise, look through all top-levels.
1168	 */
1169
1170	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1171		assert(cargvs == 1);
1172		assert(S_ISDIR(wfl[0].st.mode));
1173		if (asprintf(&cargv[0], "%s/", root) < 0) {
1174			ERR(sess, "asprintf");
1175			cargv[0] = NULL;
1176			goto out;
1177		}
1178		cargv[1] = NULL;
1179	} else {
1180		for (i = j = 0; i < wflsz; i++) {
1181			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1182				continue;
1183			assert(S_ISDIR(wfl[i].st.mode));
1184			assert(strcmp(wfl[i].wpath, "."));
1185			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1186			if (c < 0) {
1187				ERR(sess, "asprintf");
1188				cargv[j] = NULL;
1189				goto out;
1190			}
1191			LOG4(sess, "%s: will scan for deletions", cargv[j]);
1192			j++;
1193		}
1194		assert(j == cargvs);
1195		cargv[j] = NULL;
1196	}
1197
1198	LOG2(sess, "delete from %zu directories", cargvs);
1199
1200	/*
1201	 * Next, use the standard hcreate(3) hashtable interface to hash
1202	 * all of the files that we want to synchronise.
1203	 * This way, we'll be able to determine which files we want to
1204	 * delete in O(n) time instead of O(n * search) time.
1205	 * Plus, we can do the scan in-band and only allocate the files
1206	 * we want to delete.
1207	 */
1208
1209	if (!hcreate(wflsz)) {
1210		ERR(sess, "hcreate");
1211		goto out;
1212	}
1213
1214	for (i = 0; i < wflsz; i++) {
1215		memset(&hent, 0, sizeof(ENTRY));
1216		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1217			ERR(sess, "strdup");
1218			goto out;
1219		}
1220		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1221			ERR(sess, "hsearch");
1222			goto out;
1223		} else if (hentp->key != hent.key) {
1224			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1225			free(hent.key);
1226			goto out;
1227		}
1228	}
1229
1230	/*
1231	 * Now we're going to try to descend into all of the top-level
1232	 * directories stipulated by the file list.
1233	 * If the directories don't exist, it's ok.
1234	 */
1235
1236	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1237		ERR(sess, "fts_open");
1238		goto out;
1239	}
1240
1241	stripdir = strlen(root) + 1;
1242	errno = 0;
1243	while ((ent = fts_read(fts)) != NULL) {
1244		if (ent->fts_info == FTS_NS)
1245			continue;
1246		if (!flist_fts_check(sess, ent)) {
1247			errno = 0;
1248			continue;
1249		} else if (stripdir >= ent->fts_pathlen)
1250			continue;
1251
1252		/* Look up in hashtable. */
1253
1254		memset(&hent, 0, sizeof(ENTRY));
1255		hent.key = ent->fts_path + stripdir;
1256		if (hsearch(hent, FIND) != NULL)
1257			continue;
1258
1259		/* Not found: we'll delete it. */
1260
1261		if (!flist_realloc(sess, fl, sz, &max)) {
1262			ERRX1(sess, "flist_realloc");
1263			goto out;
1264		}
1265		f = &(*fl)[*sz - 1];
1266
1267		if ((f->path = strdup(ent->fts_path)) == NULL) {
1268			ERR(sess, "strdup");
1269			goto out;
1270		}
1271		f->wpath = f->path + stripdir;
1272		assert(ent->fts_statp != NULL);
1273		flist_copy_stat(f, ent->fts_statp);
1274		errno = 0;
1275	}
1276
1277	if (errno) {
1278		ERR(sess, "fts_read");
1279		goto out;
1280	}
1281
1282	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1283	rc = 1;
1284out:
1285	if (fts != NULL)
1286		fts_close(fts);
1287	for (i = 0; i < cargvs; i++)
1288		free(cargv[i]);
1289	free(cargv);
1290	hdestroy();
1291	return rc;
1292}
1293
1294/*
1295 * Delete all files and directories in "fl".
1296 * If called with a zero-length "fl", does nothing.
1297 * If dry_run is specified, simply write what would be done.
1298 * Return zero on failure, non-zero on success.
1299 */
1300int
1301flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1302{
1303	ssize_t	 i;
1304	int	 flag;
1305
1306	if (flsz == 0)
1307		return 1;
1308
1309	assert(sess->opts->del);
1310	assert(sess->opts->recursive);
1311
1312	for (i = flsz - 1; i >= 0; i--) {
1313		LOG1(sess, "%s: deleting", fl[i].wpath);
1314		if (sess->opts->dry_run)
1315			continue;
1316		assert(root != -1);
1317		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1318		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1319		    errno != ENOENT) {
1320			ERR(sess, "%s: unlinkat", fl[i].wpath);
1321			return 0;
1322		}
1323	}
1324
1325	return 1;
1326}
1327