flist.c revision 1.15
1/*	$Id: flist.c,v 1.15 2019/02/16 10:48:05 florian Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/param.h>
18#include <sys/stat.h>
19
20#include <assert.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <fts.h>
24#include <inttypes.h>
25#include <search.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30
31#include "extern.h"
32
33/*
34 * We allocate our file list in chunk sizes so as not to do it one by
35 * one.
36 * Preferrably we get one or two allocation.
37 */
38#define	FLIST_CHUNK_SIZE (1024)
39
40/*
41 * These flags are part of the rsync protocol.
42 * They are sent as the first byte for a file transmission and encode
43 * information that affects subsequent transmissions.
44 */
45#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
46#define	FLIST_RDEV_SAME  0x0004 /* rdev is repeat */
47#define	FLIST_UID_SAME	 0x0008 /* uid is repeat */
48#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
49#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
50#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
51#define FLIST_TIME_SAME  0x0080 /* time is repeat */
52
53/*
54 * Requied way to sort a filename list.
55 */
56static int
57flist_cmp(const void *p1, const void *p2)
58{
59	const struct flist *f1 = p1, *f2 = p2;
60
61	return strcmp(f1->wpath, f2->wpath);
62}
63
64/*
65 * Deduplicate our file list (which may be zero-length).
66 * Returns zero on failure, non-zero on success.
67 */
68static int
69flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
70{
71	size_t		 i, j;
72	struct flist	*new;
73	struct flist	*f, *fnext;
74
75	if (*sz == 0)
76		return 1;
77
78	/* Create a new buffer, "new", and copy. */
79
80	new = calloc(*sz, sizeof(struct flist));
81	if (new == NULL) {
82		ERR(sess, "calloc");
83		return 0;
84	}
85
86	for (i = j = 0; i < *sz - 1; i++) {
87		f = &(*fl)[i];
88		fnext = &(*fl)[i + 1];
89
90		if (strcmp(f->wpath, fnext->wpath)) {
91			new[j++] = *f;
92			continue;
93		}
94
95		/*
96		 * Our working (destination) paths are the same.
97		 * If the actual file is the same (as given on the
98		 * command-line), then we can just discard the first.
99		 * Otherwise, we need to bail out: it means we have two
100		 * different files with the relative path on the
101		 * destination side.
102		 */
103
104		if (strcmp(f->path, fnext->path) == 0) {
105			new[j++] = *f;
106			i++;
107			WARNX(sess, "%s: duplicate path: %s",
108			    f->wpath, f->path);
109			free(fnext->path);
110			free(fnext->link);
111			fnext->path = fnext->link = NULL;
112			continue;
113		}
114
115		ERRX(sess, "%s: duplicate working path for "
116		    "possibly different file: %s, %s",
117		    f->wpath, f->path, fnext->path);
118		free(new);
119		return 0;
120	}
121
122	/* Don't forget the last entry. */
123
124	if (i == *sz - 1)
125		new[j++] = (*fl)[i];
126
127	/*
128	 * Reassign to the deduplicated array.
129	 * If we started out with *sz > 0, which we check for at the
130	 * beginning, then we'll always continue having *sz > 0.
131	 */
132
133	free(*fl);
134	*fl = new;
135	*sz = j;
136	assert(*sz);
137	return 1;
138}
139
140/*
141 * We're now going to find our top-level directories.
142 * This only applies to recursive mode.
143 * If we have the first element as the ".", then that's the "top
144 * directory" of our transfer.
145 * Otherwise, mark up all top-level directories in the set.
146 */
147static void
148flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
149{
150	size_t		 i;
151	const char	*cp;
152
153	if (!sess->opts->recursive)
154		return;
155
156	if (flsz && strcmp(fl[0].wpath, ".")) {
157		for (i = 0; i < flsz; i++) {
158			if (!S_ISDIR(fl[i].st.mode))
159				continue;
160			cp = strchr(fl[i].wpath, '/');
161			if (cp != NULL && cp[1] != '\0')
162				continue;
163			fl[i].st.flags |= FLSTAT_TOP_DIR;
164			LOG4(sess, "%s: top-level", fl[i].wpath);
165		}
166	} else if (flsz) {
167		fl[0].st.flags |= FLSTAT_TOP_DIR;
168		LOG4(sess, "%s: top-level", fl[0].wpath);
169	}
170}
171
172/*
173 * Filter through the fts() file information.
174 * We want directories (pre-order), regular files, and symlinks.
175 * Everything else is skipped and possibly warned about.
176 * Return zero to skip, non-zero to examine.
177 */
178static int
179flist_fts_check(struct sess *sess, FTSENT *ent)
180{
181
182	if (ent->fts_info == FTS_F  ||
183	    ent->fts_info == FTS_D ||
184	    ent->fts_info == FTS_SL ||
185	    ent->fts_info == FTS_SLNONE)
186		return 1;
187
188	if (ent->fts_info == FTS_DC) {
189		WARNX(sess, "%s: directory cycle", ent->fts_path);
190	} else if (ent->fts_info == FTS_DNR) {
191		errno = ent->fts_errno;
192		WARN(sess, "%s: unreadable directory", ent->fts_path);
193	} else if (ent->fts_info == FTS_DOT) {
194		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
195	} else if (ent->fts_info == FTS_ERR) {
196		errno = ent->fts_errno;
197		WARN(sess, "%s", ent->fts_path);
198	} else if (ent->fts_info == FTS_DEFAULT) {
199		if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
200		    S_ISCHR(ent->fts_statp->st_mode))) ||
201		    (sess->opts->specials &&
202		    (S_ISFIFO(ent->fts_statp->st_mode) ||
203		    S_ISSOCK(ent->fts_statp->st_mode)))) {
204			return 1;
205		}
206		WARNX(sess, "%s: skipping special", ent->fts_path);
207	} else if (ent->fts_info == FTS_NS) {
208		errno = ent->fts_errno;
209		WARN(sess, "%s: could not stat", ent->fts_path);
210	}
211
212	return 0;
213}
214
215/*
216 * Copy necessary elements in "st" into the fields of "f".
217 */
218static void
219flist_copy_stat(struct flist *f, const struct stat *st)
220{
221	f->st.mode = st->st_mode;
222	f->st.uid = st->st_uid;
223	f->st.gid = st->st_gid;
224	f->st.size = st->st_size;
225	f->st.mtime = st->st_mtime;
226	f->st.rdev = st->st_rdev;
227}
228
229void
230flist_free(struct flist *f, size_t sz)
231{
232	size_t	 i;
233
234	if (f == NULL)
235		return;
236
237	for (i = 0; i < sz; i++) {
238		free(f[i].path);
239		free(f[i].link);
240	}
241	free(f);
242}
243
244/*
245 * Serialise our file list (which may be zero-length) to the wire.
246 * Makes sure that the receiver isn't going to block on sending us
247 * return messages on the log channel.
248 * Return zero on failure, non-zero on success.
249 */
250int
251flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
252    size_t flsz)
253{
254	size_t		 i, sz, gidsz = 0, uidsz = 0;
255	uint8_t		 flag;
256	const struct flist *f;
257	const char	*fn;
258	struct ident	*gids = NULL, *uids = NULL;
259	int		 rc = 0;
260
261	/* Double-check that we've no pending multiplexed data. */
262
263	LOG2(sess, "sending file metadata list: %zu", flsz);
264
265	for (i = 0; i < flsz; i++) {
266		f = &fl[i];
267		fn = f->wpath;
268		sz = strlen(f->wpath);
269		assert(sz > 0);
270
271		/*
272		 * If applicable, unclog the read buffer.
273		 * This happens when the receiver has a lot of log
274		 * messages and all we're doing is sending our file list
275		 * without checking for messages.
276		 */
277
278		if (sess->mplex_reads &&
279		    io_read_check(sess, fdin) &&
280		     !io_read_flush(sess, fdin)) {
281			ERRX1(sess, "io_read_flush");
282			goto out;
283		}
284
285		/*
286		 * For ease, make all of our filenames be "long"
287		 * regardless their actual length.
288		 * This also makes sure that we don't transmit a zero
289		 * byte unintentionally.
290		 */
291
292		flag = FLIST_NAME_LONG;
293
294		LOG3(sess, "%s: sending file metadata: "
295			"size %jd, mtime %jd, mode %o",
296			fn, (intmax_t)f->st.size,
297			(intmax_t)f->st.mtime, f->st.mode);
298
299		/* Now write to the wire. */
300		/* FIXME: buffer this. */
301
302		if (!io_write_byte(sess, fdout, flag)) {
303			ERRX1(sess, "io_write_byte");
304			goto out;
305		} else if (!io_write_int(sess, fdout, sz)) {
306			ERRX1(sess, "io_write_int");
307			goto out;
308		} else if (!io_write_buf(sess, fdout, fn, sz)) {
309			ERRX1(sess, "io_write_buf");
310			goto out;
311		} else if (!io_write_long(sess, fdout, f->st.size)) {
312			ERRX1(sess, "io_write_long");
313			goto out;
314		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
315			ERRX1(sess, "io_write_int");
316			goto out;
317		} else if (!io_write_int(sess, fdout, f->st.mode)) {
318			ERRX1(sess, "io_write_int");
319			goto out;
320		}
321
322		/* Conditional part: uid. */
323
324		if (sess->opts->preserve_uids) {
325			if (!io_write_int(sess, fdout, f->st.uid)) {
326				ERRX1(sess, "io_write_int");
327				goto out;
328			}
329			if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) {
330				ERRX1(sess, "idents_add");
331				goto out;
332			}
333		}
334
335		/* Conditional part: gid. */
336
337		if (sess->opts->preserve_gids) {
338			if (!io_write_int(sess, fdout, f->st.gid)) {
339				ERRX1(sess, "io_write_int");
340				goto out;
341			}
342			if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) {
343				ERRX1(sess, "idents_add");
344				goto out;
345			}
346		}
347
348		/* Conditional part: link. */
349
350		if (S_ISLNK(f->st.mode) &&
351		    sess->opts->preserve_links) {
352			fn = f->link;
353			sz = strlen(f->link);
354			if (!io_write_int(sess, fdout, sz)) {
355				ERRX1(sess, "io_write_int");
356				goto out;
357			}
358			if (!io_write_buf(sess, fdout, fn, sz)) {
359				ERRX1(sess, "io_write_int");
360				goto out;
361			}
362		}
363
364		if (S_ISBLK(f->st.mode) || S_ISCHR(f->st.mode) ||
365		    S_ISFIFO(f->st.mode) || S_ISSOCK(f->st.mode)) {
366			if (!io_write_int(sess, fdout, f->st.rdev)) {
367				ERRX1(sess, "io_write_int");
368				goto out;
369			}
370		}
371
372		if (S_ISREG(f->st.mode))
373			sess->total_size += f->st.size;
374	}
375
376	/* Signal end of file list. */
377
378	if (!io_write_byte(sess, fdout, 0)) {
379		ERRX1(sess, "io_write_byte");
380		goto out;
381	}
382
383	/* Conditionally write identifier lists. */
384
385	if (sess->opts->preserve_uids) {
386		LOG2(sess, "sending uid list: %zu", uidsz);
387		if (!idents_send(sess, fdout, uids, uidsz)) {
388			ERRX1(sess, "idents_send");
389			goto out;
390		}
391	}
392
393	if (sess->opts->preserve_gids) {
394		LOG2(sess, "sending gid list: %zu", gidsz);
395		if (!idents_send(sess, fdout, gids, gidsz)) {
396			ERRX1(sess, "idents_send");
397			goto out;
398		}
399	}
400
401	rc = 1;
402out:
403	idents_free(gids, gidsz);
404	idents_free(uids, uidsz);
405	return rc;
406}
407
408/*
409 * Read the filename of a file list.
410 * This is the most expensive part of the file list transfer, so a lot
411 * of attention has gone into transmitting as little as possible.
412 * Micro-optimisation, but whatever.
413 * Fills in "f" with the full path on success.
414 * Returns zero on failure, non-zero on success.
415 */
416static int
417flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
418    char last[MAXPATHLEN])
419{
420	uint8_t		 bval;
421	size_t		 partial = 0;
422	size_t		 pathlen = 0, len;
423
424	/*
425	 * Read our filename.
426	 * If we have FLIST_NAME_SAME, we inherit some of the last
427	 * transmitted name.
428	 * If we have FLIST_NAME_LONG, then the string length is greater
429	 * than byte-size.
430	 */
431
432	if (FLIST_NAME_SAME & flags) {
433		if (!io_read_byte(sess, fd, &bval)) {
434			ERRX1(sess, "io_read_byte");
435			return 0;
436		}
437		partial = bval;
438	}
439
440	/* Get the (possibly-remaining) filename length. */
441
442	if (FLIST_NAME_LONG & flags) {
443		if (!io_read_size(sess, fd, &pathlen)) {
444			ERRX1(sess, "io_read_size");
445			return 0;
446		}
447	} else {
448		if (!io_read_byte(sess, fd, &bval)) {
449			ERRX1(sess, "io_read_byte");
450			return 0;
451		}
452		pathlen = bval;
453	}
454
455	/* Allocate our full filename length. */
456	/* FIXME: maximum pathname length. */
457
458	if ((len = pathlen + partial) == 0) {
459		ERRX(sess, "security violation: "
460			"zero-length pathname");
461		return 0;
462	}
463
464	if ((f->path = malloc(len + 1)) == NULL) {
465		ERR(sess, "malloc");
466		return 0;
467	}
468	f->path[len] = '\0';
469
470	if (FLIST_NAME_SAME & flags)
471		memcpy(f->path, last, partial);
472
473	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
474		ERRX1(sess, "io_read_buf");
475		return 0;
476	}
477
478	if (f->path[0] == '/') {
479		ERRX(sess, "security violation: "
480			"absolute pathname: %s", f->path);
481		return 0;
482	}
483
484	if (strstr(f->path, "/../") != NULL ||
485	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
486	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
487	    strcmp(f->path, "..") == 0) {
488		ERRX(sess, "%s: security violation: "
489			"backtracking pathname", f->path);
490		return 0;
491	}
492
493	/* Record our last path and construct our filename. */
494
495	strlcpy(last, f->path, MAXPATHLEN);
496	f->wpath = f->path;
497	return 1;
498}
499
500/*
501 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
502 * Returns zero on failure, non-zero on success.
503 */
504static int
505flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
506{
507	void	*pp;
508
509	if (*sz + 1 <= *max)  {
510		(*sz)++;
511		return 1;
512	}
513
514	pp = recallocarray(*fl, *max,
515		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
516	if (pp == NULL) {
517		ERR(sess, "recallocarray");
518		return 0;
519	}
520	*fl = pp;
521	*max += FLIST_CHUNK_SIZE;
522	(*sz)++;
523	return 1;
524}
525
526/*
527 * Copy a regular or symbolic link file "path" into "f".
528 * This handles the correct path creation and symbolic linking.
529 * Returns zero on failure, non-zero on success.
530 */
531static int
532flist_append(struct sess *sess, struct flist *f, struct stat *st,
533    const char *path)
534{
535
536	/*
537	 * Copy the full path for local addressing and transmit
538	 * only the filename part for the receiver.
539	 */
540
541	if ((f->path = strdup(path)) == NULL) {
542		ERR(sess, "strdup");
543		return 0;
544	}
545
546	if ((f->wpath = strrchr(f->path, '/')) == NULL)
547		f->wpath = f->path;
548	else
549		f->wpath++;
550
551	/*
552	 * On the receiving end, we'll strip out all bits on the
553	 * mode except for the file permissions.
554	 * No need to warn about it here.
555	 */
556
557	flist_copy_stat(f, st);
558
559	/* Optionally copy link information. */
560
561	if (S_ISLNK(st->st_mode)) {
562		f->link = symlink_read(sess, f->path);
563		if (f->link == NULL) {
564			ERRX1(sess, "symlink_read");
565			return 0;
566		}
567	}
568
569	return 1;
570}
571
572/*
573 * Receive a file list from the wire, filling in length "sz" (which may
574 * possibly be zero) and list "flp" on success.
575 * Return zero on failure, non-zero on success.
576 */
577int
578flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
579{
580	struct flist	*fl = NULL;
581	struct flist	*ff;
582	const struct flist *fflast = NULL;
583	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
584	uint8_t		 flag;
585	char		 last[MAXPATHLEN];
586	uint64_t	 lval; /* temporary values... */
587	int32_t		 ival;
588	struct ident	*gids = NULL, *uids = NULL;
589
590	last[0] = '\0';
591
592	for (;;) {
593		if (!io_read_byte(sess, fd, &flag)) {
594			ERRX1(sess, "io_read_byte");
595			goto out;
596		} else if (flag == 0)
597			break;
598
599		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
600			ERRX1(sess, "flist_realloc");
601			goto out;
602		}
603
604		ff = &fl[flsz - 1];
605		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
606
607		/* Filename first. */
608
609		if (!flist_recv_name(sess, fd, ff, flag, last)) {
610			ERRX1(sess, "flist_recv_name");
611			goto out;
612		}
613
614		/* Read the file size. */
615
616		if (!io_read_ulong(sess, fd, &lval)) {
617			ERRX1(sess, "io_read_ulong");
618			goto out;
619		}
620		ff->st.size = lval;
621
622		/* Read the modification time. */
623
624		if (!(FLIST_TIME_SAME & flag)) {
625			if (!io_read_int(sess, fd, &ival)) {
626				ERRX1(sess, "io_read_int");
627				goto out;
628			}
629			ff->st.mtime = ival;
630		} else if (fflast == NULL) {
631			ERRX(sess, "same time without last entry");
632			goto out;
633		}  else
634			ff->st.mtime = fflast->st.mtime;
635
636		/* Read the file mode. */
637
638		if (!(FLIST_MODE_SAME & flag)) {
639			if (!io_read_int(sess, fd, &ival)) {
640				ERRX1(sess, "io_read_int");
641				goto out;
642			}
643			ff->st.mode = ival;
644		} else if (fflast == NULL) {
645			ERRX(sess, "same mode without last entry");
646			goto out;
647		} else
648			ff->st.mode = fflast->st.mode;
649
650		/* Conditional part: uid. */
651
652		if (sess->opts->preserve_uids) {
653			if (!(FLIST_UID_SAME & flag)) {
654				if (!io_read_int(sess, fd, &ival)) {
655					ERRX1(sess, "io_read_int");
656					goto out;
657				}
658				ff->st.uid = ival;
659			} else if (fflast == NULL) {
660				ERRX(sess, "same uid "
661					"without last entry");
662				goto out;
663			} else
664				ff->st.uid = fflast->st.uid;
665		}
666
667		/* Conditional part: gid. */
668
669		if (sess->opts->preserve_gids) {
670			if (!(FLIST_GID_SAME & flag)) {
671				if (!io_read_int(sess, fd, &ival)) {
672					ERRX1(sess, "io_read_int");
673					goto out;
674				}
675				ff->st.gid = ival;
676			} else if (fflast == NULL) {
677				ERRX(sess, "same gid "
678					"without last entry");
679				goto out;
680			} else
681				ff->st.gid = fflast->st.gid;
682		}
683
684		/* handle devices & special files*/
685
686		if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
687		    S_ISCHR(ff->st.mode))) ||
688		    (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
689		    S_ISSOCK(ff->st.mode)))) {
690			if (!(FLIST_RDEV_SAME & flag)) {
691				if (!io_read_int(sess, fd, &ival)) {
692					ERRX1(sess, "io_read_int");
693					goto out;
694				}
695				ff->st.rdev = ival;
696			} else if (fflast == NULL) {
697				ERRX(sess, "same mode without last entry");
698				goto out;
699			} else
700				ff->st.rdev = fflast->st.rdev;
701		}
702
703		/* Conditional part: link. */
704
705		if (S_ISLNK(ff->st.mode) &&
706		    sess->opts->preserve_links) {
707			if (!io_read_size(sess, fd, &lsz)) {
708				ERRX1(sess, "io_read_size");
709				goto out;
710			} else if (lsz == 0) {
711				ERRX(sess, "empty link name");
712				goto out;
713			}
714			ff->link = calloc(lsz + 1, 1);
715			if (ff->link == NULL) {
716				ERR(sess, "calloc");
717				goto out;
718			}
719			if (!io_read_buf(sess, fd, ff->link, lsz)) {
720				ERRX1(sess, "io_read_buf");
721				goto out;
722			}
723		}
724
725		LOG3(sess, "%s: received file metadata: "
726			"size %jd, mtime %jd, mode %o, rdev (%d, %d)",
727			ff->path, (intmax_t)ff->st.size,
728			(intmax_t)ff->st.mtime, ff->st.mode,
729			major(ff->st.rdev), minor(ff->st.rdev));
730
731		if (S_ISREG(ff->st.mode))
732			sess->total_size += ff->st.size;
733	}
734
735	/* Conditionally read the user/group list. */
736
737	if (sess->opts->preserve_uids) {
738		if (!idents_recv(sess, fd, &uids, &uidsz)) {
739			ERRX1(sess, "idents_recv");
740			goto out;
741		}
742		LOG2(sess, "received uid list: %zu", uidsz);
743	}
744
745	if (sess->opts->preserve_gids) {
746		if (!idents_recv(sess, fd, &gids, &gidsz)) {
747			ERRX1(sess, "idents_recv");
748			goto out;
749		}
750		LOG2(sess, "received gid list: %zu", gidsz);
751	}
752
753	/* Remember to order the received list. */
754
755	LOG2(sess, "received file metadata list: %zu", flsz);
756	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
757	flist_topdirs(sess, fl, flsz);
758	*sz = flsz;
759	*flp = fl;
760
761	/* Conditionally remap and reassign identifiers. */
762
763	if (sess->opts->preserve_uids) {
764		idents_remap(sess, 0, uids, uidsz);
765		idents_assign_uid(sess, fl, flsz, uids, uidsz);
766	}
767
768	if (sess->opts->preserve_gids) {
769		idents_remap(sess, 1, gids, gidsz);
770		idents_assign_gid(sess, fl, flsz, gids, gidsz);
771	}
772
773	idents_free(gids, gidsz);
774	idents_free(uids, uidsz);
775	return 1;
776out:
777	flist_free(fl, flsz);
778	idents_free(gids, gidsz);
779	idents_free(uids, uidsz);
780	*sz = 0;
781	*flp = NULL;
782	return 0;
783}
784
785/*
786 * Generate a flist possibly-recursively given a file root, which may
787 * also be a regular file or symlink.
788 * On success, augments the generated list in "flp" of length "sz".
789 * Returns zero on failure, non-zero on success.
790 */
791static int
792flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
793    size_t *max)
794{
795	char		*cargv[2], *cp;
796	int		 rc = 0;
797	FTS		*fts;
798	FTSENT		*ent;
799	struct flist	*f;
800	size_t		 flsz = 0, stripdir;
801	struct stat	 st;
802
803	cargv[0] = root;
804	cargv[1] = NULL;
805
806	/*
807	 * If we're a file, then revert to the same actions we use for
808	 * the non-recursive scan.
809	 */
810
811	if (lstat(root, &st) == -1) {
812		ERR(sess, "%s: lstat", root);
813		return 0;
814	} else if (S_ISREG(st.st_mode)) {
815		if (!flist_realloc(sess, fl, sz, max)) {
816			ERRX1(sess, "flist_realloc");
817			return 0;
818		}
819		f = &(*fl)[(*sz) - 1];
820		assert(f != NULL);
821
822		if (!flist_append(sess, f, &st, root)) {
823			ERRX1(sess, "flist_append");
824			return 0;
825		}
826		if (unveil(root, "r") == -1) {
827			ERR(sess, "%s: unveil", root);
828			return 0;
829		}
830		return 1;
831	} else if (S_ISLNK(st.st_mode)) {
832		if (!sess->opts->preserve_links) {
833			WARNX(sess, "%s: skipping symlink", root);
834			return 1;
835		} else if (!flist_realloc(sess, fl, sz, max)) {
836			ERRX1(sess, "flist_realloc");
837			return 0;
838		}
839		f = &(*fl)[(*sz) - 1];
840		assert(f != NULL);
841
842		if (!flist_append(sess, f, &st, root)) {
843			ERRX1(sess, "flist_append");
844			return 0;
845		}
846		if (unveil(root, "r") == -1) {
847			ERR(sess, "%s: unveil", root);
848			return 0;
849		}
850		return 1;
851	} else if (!S_ISDIR(st.st_mode)) {
852		WARNX(sess, "%s: skipping special", root);
853		return 1;
854	}
855
856	/*
857	 * If we end with a slash, it means that we're not supposed to
858	 * copy the directory part itself---only the contents.
859	 * So set "stripdir" to be what we take out.
860	 */
861
862	stripdir = strlen(root);
863	assert(stripdir > 0);
864	if (root[stripdir - 1] != '/')
865		stripdir = 0;
866
867	/*
868	 * If we're not stripping anything, then see if we need to strip
869	 * out the leading material in the path up to and including the
870	 * last directory component.
871	 */
872
873	if (stripdir == 0)
874		if ((cp = strrchr(root, '/')) != NULL)
875			stripdir = cp - root + 1;
876
877	/*
878	 * If we're recursive, then we need to take down all of the
879	 * files and directory components, so use fts(3).
880	 * Copying the information file-by-file into the flstat.
881	 * We'll make sense of it in flist_send.
882	 */
883
884	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
885		ERR(sess, "fts_open");
886		return 0;
887	}
888
889	errno = 0;
890	while ((ent = fts_read(fts)) != NULL) {
891		if (!flist_fts_check(sess, ent)) {
892			errno = 0;
893			continue;
894		}
895
896		/* We don't allow symlinks without -l. */
897
898		assert(ent->fts_statp != NULL);
899		if (S_ISLNK(ent->fts_statp->st_mode) &&
900		    !sess->opts->preserve_links) {
901			WARNX(sess, "%s: skipping "
902				"symlink", ent->fts_path);
903			continue;
904		}
905
906		/* Allocate a new file entry. */
907
908		if (!flist_realloc(sess, fl, sz, max)) {
909			ERRX1(sess, "flist_realloc");
910			goto out;
911		}
912		flsz++;
913		f = &(*fl)[*sz - 1];
914
915		/* Our path defaults to "." for the root. */
916
917		if ('\0' == ent->fts_path[stripdir]) {
918			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
919				ERR(sess, "asprintf");
920				f->path = NULL;
921				goto out;
922			}
923		} else {
924			if ((f->path = strdup(ent->fts_path)) == NULL) {
925				ERR(sess, "strdup");
926				goto out;
927			}
928		}
929
930		f->wpath = f->path + stripdir;
931		flist_copy_stat(f, ent->fts_statp);
932
933		/* Optionally copy link information. */
934
935		if (S_ISLNK(ent->fts_statp->st_mode)) {
936			f->link = symlink_read(sess, f->path);
937			if (f->link == NULL) {
938				ERRX1(sess, "symlink_read");
939				goto out;
940			}
941		}
942
943		/* Reset errno for next fts_read() call. */
944		errno = 0;
945	}
946	if (errno) {
947		ERR(sess, "fts_read");
948		goto out;
949	}
950	if (unveil(root, "r") == -1) {
951		ERR(sess, "%s: unveil", root);
952		goto out;
953	}
954
955	LOG3(sess, "generated %zu filenames: %s", flsz, root);
956	rc = 1;
957out:
958	fts_close(fts);
959	return rc;
960}
961
962/*
963 * Generate a flist recursively given the array of directories (or
964 * files, symlinks, doesn't matter) specified in argv (argc >0).
965 * On success, stores the generated list in "flp" with length "sz",
966 * which may be zero.
967 * Returns zero on failure, non-zero on success.
968 */
969static int
970flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
971    size_t *sz)
972{
973	size_t		 i, max = 0;
974
975	for (i = 0; i < argc; i++)
976		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
977			break;
978
979	if (i == argc) {
980		LOG2(sess, "recursively generated %zu filenames", *sz);
981		return 1;
982	}
983
984	ERRX1(sess, "flist_gen_dirent");
985	flist_free(*flp, max);
986	*flp = NULL;
987	*sz = 0;
988	return 0;
989}
990
991/*
992 * Generate list of files from the command-line argc (>0) and argv.
993 * On success, stores the generated list in "flp" with length "sz",
994 * which may be zero.
995 * Returns zero on failure, non-zero on success.
996 */
997static int
998flist_gen_files(struct sess *sess, size_t argc, char **argv,
999    struct flist **flp, size_t *sz)
1000{
1001	struct flist	*fl = NULL, *f;
1002	size_t		 i, flsz = 0;
1003	struct stat	 st;
1004
1005	assert(argc);
1006
1007	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1008		ERR(sess, "calloc");
1009		return 0;
1010	}
1011
1012	for (i = 0; i < argc; i++) {
1013		if ('\0' == argv[i][0])
1014			continue;
1015		if (lstat(argv[i], &st) == -1) {
1016			ERR(sess, "%s: lstat", argv[i]);
1017			goto out;
1018		}
1019
1020		/*
1021		 * File type checks.
1022		 * In non-recursive mode, we don't accept directories.
1023		 * We also skip symbolic links without -l.
1024		 * Beyond that, we only accept regular files.
1025		 */
1026
1027		if (S_ISDIR(st.st_mode)) {
1028			WARNX(sess, "%s: skipping directory", argv[i]);
1029			continue;
1030		} else if (S_ISLNK(st.st_mode)) {
1031			if (!sess->opts->preserve_links) {
1032				WARNX(sess, "%s: skipping "
1033					"symlink", argv[i]);
1034				continue;
1035			}
1036		} else if (!S_ISREG(st.st_mode)) {
1037			WARNX(sess, "%s: skipping special", argv[i]);
1038			continue;
1039		}
1040
1041
1042		f = &fl[flsz++];
1043		assert(f != NULL);
1044
1045		/* Add this file to our file-system worldview. */
1046
1047		if (unveil(argv[i], "r") == -1) {
1048			ERR(sess, "%s: unveil", argv[i]);
1049			goto out;
1050		}
1051		if (!flist_append(sess, f, &st, argv[i])) {
1052			ERRX1(sess, "flist_append");
1053			goto out;
1054		}
1055	}
1056
1057	LOG2(sess, "non-recursively generated %zu filenames", flsz);
1058	*sz = flsz;
1059	*flp = fl;
1060	return 1;
1061out:
1062	flist_free(fl, argc);
1063	*sz = 0;
1064	*flp = NULL;
1065	return 0;
1066}
1067
1068/*
1069 * Generate a sorted, de-duplicated list of file metadata.
1070 * In non-recursive mode (the default), we use only the files we're
1071 * given.
1072 * Otherwise, directories are recursively examined.
1073 * Returns zero on failure, non-zero on success.
1074 * On success, "fl" will need to be freed with flist_free().
1075 */
1076int
1077flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1078    size_t *sz)
1079{
1080	int	 rc;
1081
1082	assert(argc > 0);
1083	rc = sess->opts->recursive ?
1084		flist_gen_dirs(sess, argc, argv, flp, sz) :
1085		flist_gen_files(sess, argc, argv, flp, sz);
1086
1087	/* After scanning, lock our file-system view. */
1088
1089	if (unveil(NULL, NULL) == -1) {
1090		ERR(sess, "unveil");
1091		return 0;
1092	}
1093	if (!rc)
1094		return 0;
1095
1096	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1097
1098	if (flist_dedupe(sess, flp, sz)) {
1099		flist_topdirs(sess, *flp, *sz);
1100		return 1;
1101	}
1102
1103	ERRX1(sess, "flist_dedupe");
1104	flist_free(*flp, *sz);
1105	*flp = NULL;
1106	*sz = 0;
1107	return 0;
1108}
1109
1110/*
1111 * Generate a list of files in root to delete that are within the
1112 * top-level directories stipulated by "wfl".
1113 * Only handles symbolic links, directories, and regular files.
1114 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1115 * on success.
1116 * On success, "fl" will need to be freed with flist_free().
1117 */
1118int
1119flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1120    size_t *sz,	const struct flist *wfl, size_t wflsz)
1121{
1122	char		**cargv = NULL;
1123	int		  rc = 0, c;
1124	FTS		 *fts = NULL;
1125	FTSENT		 *ent;
1126	struct flist	 *f;
1127	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1128	ENTRY		  hent;
1129	ENTRY		 *hentp;
1130
1131	*fl = NULL;
1132	*sz = 0;
1133
1134	/* Only run this code when we're recursive. */
1135
1136	if (!sess->opts->recursive)
1137		return 1;
1138
1139	/*
1140	 * Gather up all top-level directories for scanning.
1141	 * This is stipulated by rsync's --delete behaviour, where we
1142	 * only delete things in the top-level directories given on the
1143	 * command line.
1144	 */
1145
1146	assert(wflsz > 0);
1147	for (i = 0; i < wflsz; i++)
1148		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1149			cargvs++;
1150	if (cargvs == 0)
1151		return 1;
1152
1153	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1154		ERR(sess, "calloc");
1155		return 0;
1156	}
1157
1158	/*
1159	 * If we're given just a "." as the first entry, that means
1160	 * we're doing a relative copy with a trailing slash.
1161	 * Special-case this just for the sake of simplicity.
1162	 * Otherwise, look through all top-levels.
1163	 */
1164
1165	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1166		assert(cargvs == 1);
1167		assert(S_ISDIR(wfl[0].st.mode));
1168		if (asprintf(&cargv[0], "%s/", root) < 0) {
1169			ERR(sess, "asprintf");
1170			cargv[0] = NULL;
1171			goto out;
1172		}
1173		cargv[1] = NULL;
1174	} else {
1175		for (i = j = 0; i < wflsz; i++) {
1176			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1177				continue;
1178			assert(S_ISDIR(wfl[i].st.mode));
1179			assert(strcmp(wfl[i].wpath, "."));
1180			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1181			if (c < 0) {
1182				ERR(sess, "asprintf");
1183				cargv[j] = NULL;
1184				goto out;
1185			}
1186			LOG4(sess, "%s: will scan for deletions", cargv[j]);
1187			j++;
1188		}
1189		assert(j == cargvs);
1190		cargv[j] = NULL;
1191	}
1192
1193	LOG2(sess, "delete from %zu directories", cargvs);
1194
1195	/*
1196	 * Next, use the standard hcreate(3) hashtable interface to hash
1197	 * all of the files that we want to synchronise.
1198	 * This way, we'll be able to determine which files we want to
1199	 * delete in O(n) time instead of O(n * search) time.
1200	 * Plus, we can do the scan in-band and only allocate the files
1201	 * we want to delete.
1202	 */
1203
1204	if (!hcreate(wflsz)) {
1205		ERR(sess, "hcreate");
1206		goto out;
1207	}
1208
1209	for (i = 0; i < wflsz; i++) {
1210		memset(&hent, 0, sizeof(ENTRY));
1211		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1212			ERR(sess, "strdup");
1213			goto out;
1214		}
1215		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1216			ERR(sess, "hsearch");
1217			goto out;
1218		} else if (hentp->key != hent.key) {
1219			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1220			free(hent.key);
1221			goto out;
1222		}
1223	}
1224
1225	/*
1226	 * Now we're going to try to descend into all of the top-level
1227	 * directories stipulated by the file list.
1228	 * If the directories don't exist, it's ok.
1229	 */
1230
1231	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1232		ERR(sess, "fts_open");
1233		goto out;
1234	}
1235
1236	stripdir = strlen(root) + 1;
1237	errno = 0;
1238	while ((ent = fts_read(fts)) != NULL) {
1239		if (ent->fts_info == FTS_NS)
1240			continue;
1241		if (!flist_fts_check(sess, ent)) {
1242			errno = 0;
1243			continue;
1244		} else if (stripdir >= ent->fts_pathlen)
1245			continue;
1246
1247		/* Look up in hashtable. */
1248
1249		memset(&hent, 0, sizeof(ENTRY));
1250		hent.key = ent->fts_path + stripdir;
1251		if (hsearch(hent, FIND) != NULL)
1252			continue;
1253
1254		/* Not found: we'll delete it. */
1255
1256		if (!flist_realloc(sess, fl, sz, &max)) {
1257			ERRX1(sess, "flist_realloc");
1258			goto out;
1259		}
1260		f = &(*fl)[*sz - 1];
1261
1262		if ((f->path = strdup(ent->fts_path)) == NULL) {
1263			ERR(sess, "strdup");
1264			goto out;
1265		}
1266		f->wpath = f->path + stripdir;
1267		assert(ent->fts_statp != NULL);
1268		flist_copy_stat(f, ent->fts_statp);
1269		errno = 0;
1270	}
1271
1272	if (errno) {
1273		ERR(sess, "fts_read");
1274		goto out;
1275	}
1276
1277	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1278	rc = 1;
1279out:
1280	if (fts != NULL)
1281		fts_close(fts);
1282	for (i = 0; i < cargvs; i++)
1283		free(cargv[i]);
1284	free(cargv);
1285	hdestroy();
1286	return rc;
1287}
1288
1289/*
1290 * Delete all files and directories in "fl".
1291 * If called with a zero-length "fl", does nothing.
1292 * If dry_run is specified, simply write what would be done.
1293 * Return zero on failure, non-zero on success.
1294 */
1295int
1296flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1297{
1298	ssize_t	 i;
1299	int	 flag;
1300
1301	if (flsz == 0)
1302		return 1;
1303
1304	assert(sess->opts->del);
1305	assert(sess->opts->recursive);
1306
1307	for (i = flsz - 1; i >= 0; i--) {
1308		LOG1(sess, "%s: deleting", fl[i].wpath);
1309		if (sess->opts->dry_run)
1310			continue;
1311		assert(root != -1);
1312		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1313		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1314		    errno != ENOENT) {
1315			ERR(sess, "%s: unlinkat", fl[i].wpath);
1316			return 0;
1317		}
1318	}
1319
1320	return 1;
1321}
1322