flist.c revision 1.8
1/*	$Id: flist.c,v 1.8 2019/02/12 19:04:52 benno Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/param.h>
18#include <sys/stat.h>
19
20#include <assert.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <fts.h>
24#include <inttypes.h>
25#include <search.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30
31#include "extern.h"
32
33/*
34 * We allocate our file list in chunk sizes so as not to do it one by
35 * one.
36 * Preferrably we get one or two allocation.
37 */
38#define	FLIST_CHUNK_SIZE (1024)
39
40/*
41 * These flags are part of the rsync protocol.
42 * They are sent as the first byte for a file transmission and encode
43 * information that affects subsequent transmissions.
44 */
45#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
46#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
47#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
48#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
49#define FLIST_TIME_SAME  0x0080 /* time is repeat */
50
51/*
52 * Requied way to sort a filename list.
53 */
54static int
55flist_cmp(const void *p1, const void *p2)
56{
57	const struct flist *f1 = p1, *f2 = p2;
58
59	return strcmp(f1->wpath, f2->wpath);
60}
61
62/*
63 * Deduplicate our file list (which may be zero-length).
64 * Returns zero on failure, non-zero on success.
65 */
66static int
67flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
68{
69	size_t		 i, j;
70	struct flist	*new;
71	struct flist	*f, *fnext;
72
73	if (*sz == 0)
74		return 1;
75
76	/* Create a new buffer, "new", and copy. */
77
78	new = calloc(*sz, sizeof(struct flist));
79	if (new == NULL) {
80		ERR(sess, "calloc");
81		return 0;
82	}
83
84	for (i = j = 0; i < *sz - 1; i++) {
85		f = &(*fl)[i];
86		fnext = &(*fl)[i + 1];
87
88		if (strcmp(f->wpath, fnext->wpath)) {
89			new[j++] = *f;
90			continue;
91		}
92
93		/*
94		 * Our working (destination) paths are the same.
95		 * If the actual file is the same (as given on the
96		 * command-line), then we can just discard the first.
97		 * Otherwise, we need to bail out: it means we have two
98		 * different files with the relative path on the
99		 * destination side.
100		 */
101
102		if (strcmp(f->path, fnext->path) == 0) {
103			new[j++] = *f;
104			i++;
105			WARNX(sess, "%s: duplicate path: %s",
106			    f->wpath, f->path);
107			free(fnext->path);
108			free(fnext->link);
109			fnext->path = fnext->link = NULL;
110			continue;
111		}
112
113		ERRX(sess, "%s: duplicate working path for "
114		    "possibly different file: %s, %s",
115		    f->wpath, f->path, fnext->path);
116		free(new);
117		return 0;
118	}
119
120	/* Don't forget the last entry. */
121
122	if (i == *sz - 1)
123		new[j++] = (*fl)[i];
124
125	/*
126	 * Reassign to the deduplicated array.
127	 * If we started out with *sz > 0, which we check for at the
128	 * beginning, then we'll always continue having *sz > 0.
129	 */
130
131	free(*fl);
132	*fl = new;
133	*sz = j;
134	assert(*sz);
135	return 1;
136}
137
138/*
139 * We're now going to find our top-level directories.
140 * This only applies to recursive mode.
141 * If we have the first element as the ".", then that's the "top
142 * directory" of our transfer.
143 * Otherwise, mark up all top-level directories in the set.
144 */
145static void
146flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
147{
148	size_t		 i;
149	const char	*cp;
150
151	if (!sess->opts->recursive)
152		return;
153
154	if (flsz && strcmp(fl[0].wpath, ".")) {
155		for (i = 0; i < flsz; i++) {
156			if (!S_ISDIR(fl[i].st.mode))
157				continue;
158			cp = strchr(fl[i].wpath, '/');
159			if (cp != NULL && cp[1] != '\0')
160				continue;
161			fl[i].st.flags |= FLSTAT_TOP_DIR;
162			LOG4(sess, "%s: top-level", fl[i].wpath);
163		}
164	} else if (flsz) {
165		fl[0].st.flags |= FLSTAT_TOP_DIR;
166		LOG4(sess, "%s: top-level", fl[0].wpath);
167	}
168}
169
170/*
171 * Filter through the fts() file information.
172 * We want directories (pre-order), regular files, and symlinks.
173 * Everything else is skipped and possibly warned about.
174 * Return zero to skip, non-zero to examine.
175 */
176static int
177flist_fts_check(struct sess *sess, FTSENT *ent)
178{
179
180	if (ent->fts_info == FTS_F  ||
181	    ent->fts_info == FTS_D ||
182	    ent->fts_info == FTS_SL ||
183	    ent->fts_info == FTS_SLNONE)
184		return 1;
185
186	if (ent->fts_info == FTS_DC) {
187		WARNX(sess, "%s: directory cycle", ent->fts_path);
188	} else if (ent->fts_info == FTS_DNR) {
189		errno = ent->fts_errno;
190		WARN(sess, "%s: unreadable directory", ent->fts_path);
191	} else if (ent->fts_info == FTS_DOT) {
192		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
193	} else if (ent->fts_info == FTS_ERR) {
194		errno = ent->fts_errno;
195		WARN(sess, "%s", ent->fts_path);
196	} else if (ent->fts_info == FTS_DEFAULT) {
197		WARNX(sess, "%s: skipping special", ent->fts_path);
198	} else if (ent->fts_info == FTS_NS) {
199		errno = ent->fts_errno;
200		WARN(sess, "%s: could not stat", ent->fts_path);
201	}
202
203	return 0;
204}
205
206/*
207 * Copy necessary elements in "st" into the fields of "f".
208 */
209static void
210flist_copy_stat(struct flist *f, const struct stat *st)
211{
212	f->st.mode = st->st_mode;
213	f->st.uid = st->st_uid;
214	f->st.gid = st->st_gid;
215	f->st.size = st->st_size;
216	f->st.mtime = st->st_mtime;
217}
218
219void
220flist_free(struct flist *f, size_t sz)
221{
222	size_t	 i;
223
224	if (f == NULL)
225		return;
226
227	for (i = 0; i < sz; i++) {
228		free(f[i].path);
229		free(f[i].link);
230	}
231	free(f);
232}
233
234/*
235 * Serialise our file list (which may be zero-length) to the wire.
236 * Makes sure that the receiver isn't going to block on sending us
237 * return messages on the log channel.
238 * Return zero on failure, non-zero on success.
239 */
240int
241flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
242    size_t flsz)
243{
244	size_t		 i, sz, gidsz = 0;
245	uint8_t		 flag;
246	const struct flist *f;
247	const char	*fn;
248	struct ident	*gids = NULL;
249	int		 rc = 0;
250
251	/* Double-check that we've no pending multiplexed data. */
252
253	LOG2(sess, "sending file metadata list: %zu", flsz);
254
255	for (i = 0; i < flsz; i++) {
256		f = &fl[i];
257		fn = f->wpath;
258		sz = strlen(f->wpath);
259		assert(sz > 0);
260
261		/*
262		 * If applicable, unclog the read buffer.
263		 * This happens when the receiver has a lot of log
264		 * messages and all we're doing is sending our file list
265		 * without checking for messages.
266		 */
267
268		if (sess->mplex_reads &&
269		    io_read_check(sess, fdin) &&
270		     !io_read_flush(sess, fdin)) {
271			ERRX1(sess, "io_read_flush");
272			goto out;
273		}
274
275		/*
276		 * For ease, make all of our filenames be "long"
277		 * regardless their actual length.
278		 * This also makes sure that we don't transmit a zero
279		 * byte unintentionally.
280		 */
281
282		flag = FLIST_NAME_LONG;
283
284		LOG3(sess, "%s: sending file metadata: "
285			"size %jd, mtime %jd, mode %o",
286			fn, (intmax_t)f->st.size,
287			(intmax_t)f->st.mtime, f->st.mode);
288
289		/* Now write to the wire. */
290		/* FIXME: buffer this. */
291
292		if (!io_write_byte(sess, fdout, flag)) {
293			ERRX1(sess, "io_write_byte");
294			goto out;
295		} else if (!io_write_int(sess, fdout, sz)) {
296			ERRX1(sess, "io_write_int");
297			goto out;
298		} else if (!io_write_buf(sess, fdout, fn, sz)) {
299			ERRX1(sess, "io_write_buf");
300			goto out;
301		} else if (!io_write_long(sess, fdout, f->st.size)) {
302			ERRX1(sess, "io_write_long");
303			goto out;
304		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
305			ERRX1(sess, "io_write_int");
306			goto out;
307		} else if (!io_write_int(sess, fdout, f->st.mode)) {
308			ERRX1(sess, "io_write_int");
309			goto out;
310		}
311
312		/* Conditional part: gid. */
313
314		if (sess->opts->preserve_gids) {
315			if (!io_write_int(sess, fdout, f->st.gid)) {
316				ERRX1(sess, "io_write_int");
317				goto out;
318			}
319			if (!idents_gid_add(sess, &gids, &gidsz, f->st.gid)) {
320				ERRX1(sess, "idents_gid_add");
321				goto out;
322			}
323		}
324
325		/* Conditional part: link. */
326
327		if (S_ISLNK(f->st.mode) &&
328		    sess->opts->preserve_links) {
329			fn = f->link;
330			sz = strlen(f->link);
331			if (!io_write_int(sess, fdout, sz)) {
332				ERRX1(sess, "io_write_int");
333				goto out;
334			}
335			if (!io_write_buf(sess, fdout, fn, sz)) {
336				ERRX1(sess, "io_write_int");
337				goto out;
338			}
339		}
340
341		if (S_ISREG(f->st.mode))
342			sess->total_size += f->st.size;
343	}
344
345	/* Signal end of file list. */
346
347	if (!io_write_byte(sess, fdout, 0)) {
348		ERRX1(sess, "io_write_byte");
349		goto out;
350	}
351
352	/* Conditionally write gid list and terminator. */
353
354	if (sess->opts->preserve_gids) {
355		LOG2(sess, "sending gid list: %zu", gidsz);
356		if (!idents_send(sess, fdout, gids, gidsz)) {
357			ERRX1(sess, "idents_send");
358			goto out;
359		}
360	}
361
362	rc = 1;
363out:
364	idents_free(gids, gidsz);
365	return rc;
366}
367
368/*
369 * Read the filename of a file list.
370 * This is the most expensive part of the file list transfer, so a lot
371 * of attention has gone into transmitting as little as possible.
372 * Micro-optimisation, but whatever.
373 * Fills in "f" with the full path on success.
374 * Returns zero on failure, non-zero on success.
375 */
376static int
377flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
378    char last[MAXPATHLEN])
379{
380	uint8_t		 bval;
381	size_t		 partial = 0;
382	size_t		 pathlen = 0, len;
383
384	/*
385	 * Read our filename.
386	 * If we have FLIST_NAME_SAME, we inherit some of the last
387	 * transmitted name.
388	 * If we have FLIST_NAME_LONG, then the string length is greater
389	 * than byte-size.
390	 */
391
392	if (FLIST_NAME_SAME & flags) {
393		if (!io_read_byte(sess, fd, &bval)) {
394			ERRX1(sess, "io_read_byte");
395			return 0;
396		}
397		partial = bval;
398	}
399
400	/* Get the (possibly-remaining) filename length. */
401
402	if (FLIST_NAME_LONG & flags) {
403		if (!io_read_size(sess, fd, &pathlen)) {
404			ERRX1(sess, "io_read_size");
405			return 0;
406		}
407	} else {
408		if (!io_read_byte(sess, fd, &bval)) {
409			ERRX1(sess, "io_read_byte");
410			return 0;
411		}
412		pathlen = bval;
413	}
414
415	/* Allocate our full filename length. */
416	/* FIXME: maximum pathname length. */
417
418	if ((len = pathlen + partial) == 0) {
419		ERRX(sess, "security violation: "
420			"zero-length pathname");
421		return 0;
422	}
423
424	if ((f->path = malloc(len + 1)) == NULL) {
425		ERR(sess, "malloc");
426		return 0;
427	}
428	f->path[len] = '\0';
429
430	if (FLIST_NAME_SAME & flags)
431		memcpy(f->path, last, partial);
432
433	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
434		ERRX1(sess, "io_read_buf");
435		return 0;
436	}
437
438	if (f->path[0] == '/') {
439		ERRX(sess, "security violation: "
440			"absolute pathname: %s", f->path);
441		return 0;
442	}
443
444	if (strstr(f->path, "/../") != NULL ||
445	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
446	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
447	    strcmp(f->path, "..") == 0) {
448		ERRX(sess, "%s: security violation: "
449			"backtracking pathname", f->path);
450		return 0;
451	}
452
453	/* Record our last path and construct our filename. */
454
455	strlcpy(last, f->path, MAXPATHLEN);
456	f->wpath = f->path;
457	return 1;
458}
459
460/*
461 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
462 * Returns zero on failure, non-zero on success.
463 */
464static int
465flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
466{
467	void	*pp;
468
469	if (*sz + 1 <= *max)  {
470		(*sz)++;
471		return 1;
472	}
473
474	pp = recallocarray(*fl, *max,
475		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
476	if (pp == NULL) {
477		ERR(sess, "recallocarray");
478		return 0;
479	}
480	*fl = pp;
481	*max += FLIST_CHUNK_SIZE;
482	(*sz)++;
483	return 1;
484}
485
486/*
487 * Copy a regular or symbolic link file "path" into "f".
488 * This handles the correct path creation and symbolic linking.
489 * Returns zero on failure, non-zero on success.
490 */
491static int
492flist_append(struct sess *sess, struct flist *f, struct stat *st,
493    const char *path)
494{
495
496	/*
497	 * Copy the full path for local addressing and transmit
498	 * only the filename part for the receiver.
499	 */
500
501	if ((f->path = strdup(path)) == NULL) {
502		ERR(sess, "strdup");
503		return 0;
504	}
505
506	if ((f->wpath = strrchr(f->path, '/')) == NULL)
507		f->wpath = f->path;
508	else
509		f->wpath++;
510
511	/*
512	 * On the receiving end, we'll strip out all bits on the
513	 * mode except for the file permissions.
514	 * No need to warn about it here.
515	 */
516
517	flist_copy_stat(f, st);
518
519	/* Optionally copy link information. */
520
521	if (S_ISLNK(st->st_mode)) {
522		f->link = symlink_read(sess, f->path);
523		if (f->link == NULL) {
524			ERRX1(sess, "symlink_read");
525			return 0;
526		}
527	}
528
529	return 1;
530}
531
532/*
533 * Receive a file list from the wire, filling in length "sz" (which may
534 * possibly be zero) and list "flp" on success.
535 * Return zero on failure, non-zero on success.
536 */
537int
538flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
539{
540	struct flist	*fl = NULL;
541	struct flist	*ff;
542	const struct flist *fflast = NULL;
543	size_t		 i, j, flsz = 0, flmax = 0, lsz, gidsz = 0;
544	uint8_t		 flag;
545	char		 last[MAXPATHLEN];
546	uint64_t	 lval; /* temporary values... */
547	int32_t		 ival;
548	struct ident	*gids = NULL;
549
550	last[0] = '\0';
551
552	for (;;) {
553		if (!io_read_byte(sess, fd, &flag)) {
554			ERRX1(sess, "io_read_byte");
555			goto out;
556		} else if (flag == 0)
557			break;
558
559		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
560			ERRX1(sess, "flist_realloc");
561			goto out;
562		}
563
564		ff = &fl[flsz - 1];
565		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
566
567		/* Filename first. */
568
569		if (!flist_recv_name(sess, fd, ff, flag, last)) {
570			ERRX1(sess, "flist_recv_name");
571			goto out;
572		}
573
574		/* Read the file size. */
575
576		if (!io_read_ulong(sess, fd, &lval)) {
577			ERRX1(sess, "io_read_ulong");
578			goto out;
579		}
580		ff->st.size = lval;
581
582		/* Read the modification time. */
583
584		if (!(FLIST_TIME_SAME & flag)) {
585			if (!io_read_int(sess, fd, &ival)) {
586				ERRX1(sess, "io_read_int");
587				goto out;
588			}
589			ff->st.mtime = ival;
590		} else if (fflast == NULL) {
591			ERRX(sess, "same time without last entry");
592			goto out;
593		}  else
594			ff->st.mtime = fflast->st.mtime;
595
596		/* Read the file mode. */
597
598		if (!(FLIST_MODE_SAME & flag)) {
599			if (!io_read_int(sess, fd, &ival)) {
600				ERRX1(sess, "io_read_int");
601				goto out;
602			}
603			ff->st.mode = ival;
604		} else if (fflast == NULL) {
605			ERRX(sess, "same mode without last entry");
606			goto out;
607		} else
608			ff->st.mode = fflast->st.mode;
609
610		/* Conditional part: gid. */
611
612		if (sess->opts->preserve_gids) {
613			if ( ! (FLIST_GID_SAME & flag)) {
614				if ( ! io_read_int(sess, fd, &ival)) {
615					ERRX1(sess, "io_read_int");
616					goto out;
617				}
618				ff->st.gid = ival;
619			} else if (NULL == fflast) {
620				ERRX(sess, "same gid "
621					"without last entry");
622				goto out;
623			} else
624				ff->st.gid = fflast->st.gid;
625		}
626
627		/* Conditional part: link. */
628
629		if (S_ISLNK(ff->st.mode) &&
630		    sess->opts->preserve_links) {
631			if (!io_read_size(sess, fd, &lsz)) {
632				ERRX1(sess, "io_read_size");
633				goto out;
634			} else if (lsz == 0) {
635				ERRX(sess, "empty link name");
636				goto out;
637			}
638			ff->link = calloc(lsz + 1, 1);
639			if (ff->link == NULL) {
640				ERR(sess, "calloc");
641				goto out;
642			}
643			if (!io_read_buf(sess, fd, ff->link, lsz)) {
644				ERRX1(sess, "io_read_buf");
645				goto out;
646			}
647		}
648
649		LOG3(sess, "%s: received file metadata: "
650			"size %jd, mtime %jd, mode %o",
651			ff->path, (intmax_t)ff->st.size,
652			(intmax_t)ff->st.mtime, ff->st.mode);
653
654		if (S_ISREG(ff->st.mode))
655			sess->total_size += ff->st.size;
656	}
657
658	/*
659	 * Now conditionally read the group list.
660	 * We then remap all group identifiers to the local ids.
661	 */
662
663	if (sess->opts->preserve_gids) {
664		if (!idents_recv(sess, fd, &gids, &gidsz)) {
665			ERRX1(sess, "idents_recv");
666			goto out;
667		}
668		LOG2(sess, "received gid list: %zu", gidsz);
669		idents_gid_remap(sess, gids, gidsz);
670	}
671
672	/* Remember to order the received list. */
673
674	LOG2(sess, "received file metadata list: %zu", flsz);
675	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
676	flist_topdirs(sess, fl, flsz);
677	*sz = flsz;
678	*flp = fl;
679
680	/* Lastly, reassign group identifiers. */
681
682	if (sess->opts->preserve_gids) {
683		for (i = 0; i < flsz; i++) {
684			for (j = 0; j < gidsz; j++)
685				if ((int32_t)fl[i].st.gid == gids[j].id)
686					break;
687			assert(j < gidsz);
688			fl[i].st.gid = gids[j].mapped;
689		}
690	}
691
692	idents_free(gids, gidsz);
693	return 1;
694out:
695	flist_free(fl, flsz);
696	idents_free(gids, gidsz);
697	*sz = 0;
698	*flp = NULL;
699	return 0;
700}
701
702/*
703 * Generate a flist possibly-recursively given a file root, which may
704 * also be a regular file or symlink.
705 * On success, augments the generated list in "flp" of length "sz".
706 * Returns zero on failure, non-zero on success.
707 */
708static int
709flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
710    size_t *max)
711{
712	char		*cargv[2], *cp;
713	int		 rc = 0;
714	FTS		*fts;
715	FTSENT		*ent;
716	struct flist	*f;
717	size_t		 flsz = 0, stripdir;
718	struct stat	 st;
719
720	cargv[0] = root;
721	cargv[1] = NULL;
722
723	/*
724	 * If we're a file, then revert to the same actions we use for
725	 * the non-recursive scan.
726	 */
727
728	if (lstat(root, &st) == -1) {
729		ERR(sess, "%s: lstat", root);
730		return 0;
731	} else if (S_ISREG(st.st_mode)) {
732		if (!flist_realloc(sess, fl, sz, max)) {
733			ERRX1(sess, "flist_realloc");
734			return 0;
735		}
736		f = &(*fl)[(*sz) - 1];
737		assert(f != NULL);
738
739		if (!flist_append(sess, f, &st, root)) {
740			ERRX1(sess, "flist_append");
741			return 0;
742		} else if (unveil(root, "r") == -1) {
743			ERR(sess, "%s: unveil", root);
744			return 0;
745		}
746		return 1;
747	} else if (S_ISLNK(st.st_mode)) {
748		if (!sess->opts->preserve_links) {
749			WARNX(sess, "%s: skipping symlink", root);
750			return 1;
751		} else if (!flist_realloc(sess, fl, sz, max)) {
752			ERRX1(sess, "flist_realloc");
753			return 0;
754		}
755		f = &(*fl)[(*sz) - 1];
756		assert(f != NULL);
757
758		if (!flist_append(sess, f, &st, root)) {
759			ERRX1(sess, "flist_append");
760			return 0;
761		} else if (unveil(root, "r") == -1) {
762			ERR(sess, "%s: unveil", root);
763			return 0;
764		}
765		return 1;
766	} else if (!S_ISDIR(st.st_mode)) {
767		WARNX(sess, "%s: skipping special", root);
768		return 1;
769	}
770
771	/*
772	 * If we end with a slash, it means that we're not supposed to
773	 * copy the directory part itself---only the contents.
774	 * So set "stripdir" to be what we take out.
775	 */
776
777	stripdir = strlen(root);
778	assert(stripdir > 0);
779	if (root[stripdir - 1] != '/')
780		stripdir = 0;
781
782	/*
783	 * If we're not stripping anything, then see if we need to strip
784	 * out the leading material in the path up to and including the
785	 * last directory component.
786	 */
787
788	if (stripdir == 0)
789		if ((cp = strrchr(root, '/')) != NULL)
790			stripdir = cp - root + 1;
791
792	/*
793	 * If we're recursive, then we need to take down all of the
794	 * files and directory components, so use fts(3).
795	 * Copying the information file-by-file into the flstat.
796	 * We'll make sense of it in flist_send.
797	 */
798
799	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
800		ERR(sess, "fts_open");
801		return 0;
802	}
803
804	errno = 0;
805	while ((ent = fts_read(fts)) != NULL) {
806		if (!flist_fts_check(sess, ent)) {
807			errno = 0;
808			continue;
809		}
810
811		/* We don't allow symlinks without -l. */
812
813		assert(ent->fts_statp != NULL);
814		if (S_ISLNK(ent->fts_statp->st_mode) &&
815		    !sess->opts->preserve_links) {
816			WARNX(sess, "%s: skipping "
817				"symlink", ent->fts_path);
818			continue;
819		}
820
821		/* Allocate a new file entry. */
822
823		if (!flist_realloc(sess, fl, sz, max)) {
824			ERRX1(sess, "flist_realloc");
825			goto out;
826		}
827		flsz++;
828		f = &(*fl)[*sz - 1];
829
830		/* Our path defaults to "." for the root. */
831
832		if ('\0' == ent->fts_path[stripdir]) {
833			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
834				ERR(sess, "asprintf");
835				f->path = NULL;
836				goto out;
837			}
838		} else {
839			if ((f->path = strdup(ent->fts_path)) == NULL) {
840				ERR(sess, "strdup");
841				goto out;
842			}
843		}
844
845		f->wpath = f->path + stripdir;
846		flist_copy_stat(f, ent->fts_statp);
847
848		/* Optionally copy link information. */
849
850		if (S_ISLNK(ent->fts_statp->st_mode)) {
851			f->link = symlink_read(sess, f->path);
852			if (f->link == NULL) {
853				ERRX1(sess, "symlink_read");
854				goto out;
855			}
856		}
857
858		/* Reset errno for next fts_read() call. */
859		errno = 0;
860	}
861	if (errno) {
862		ERR(sess, "fts_read");
863		goto out;
864	} else if (unveil(root, "r") == -1) {
865		ERR(sess, "%s: unveil", root);
866		goto out;
867	}
868
869	LOG3(sess, "generated %zu filenames: %s", flsz, root);
870	rc = 1;
871out:
872	fts_close(fts);
873	return rc;
874}
875
876/*
877 * Generate a flist recursively given the array of directories (or
878 * files, symlinks, doesn't matter) specified in argv (argc >0).
879 * On success, stores the generated list in "flp" with length "sz",
880 * which may be zero.
881 * Returns zero on failure, non-zero on success.
882 */
883static int
884flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
885    size_t *sz)
886{
887	size_t		 i, max = 0;
888
889	for (i = 0; i < argc; i++)
890		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
891			break;
892
893	if (i == argc) {
894		LOG2(sess, "recursively generated %zu filenames", *sz);
895		return 1;
896	}
897
898	ERRX1(sess, "flist_gen_dirent");
899	flist_free(*flp, max);
900	*flp = NULL;
901	*sz = 0;
902	return 0;
903}
904
905/*
906 * Generate list of files from the command-line argc (>0) and argv.
907 * On success, stores the generated list in "flp" with length "sz",
908 * which may be zero.
909 * Returns zero on failure, non-zero on success.
910 */
911static int
912flist_gen_files(struct sess *sess, size_t argc, char **argv,
913    struct flist **flp, size_t *sz)
914{
915	struct flist	*fl = NULL, *f;
916	size_t		 i, flsz = 0;
917	struct stat	 st;
918
919	assert(argc);
920
921	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
922		ERR(sess, "calloc");
923		return 0;
924	}
925
926	for (i = 0; i < argc; i++) {
927		if ('\0' == argv[i][0])
928			continue;
929		if (lstat(argv[i], &st) == -1) {
930			ERR(sess, "%s: lstat", argv[i]);
931			goto out;
932		}
933
934		/*
935		 * File type checks.
936		 * In non-recursive mode, we don't accept directories.
937		 * We also skip symbolic links without -l.
938		 * Beyond that, we only accept regular files.
939		 */
940
941		if (S_ISDIR(st.st_mode)) {
942			WARNX(sess, "%s: skipping directory", argv[i]);
943			continue;
944		} else if (S_ISLNK(st.st_mode)) {
945			if (!sess->opts->preserve_links) {
946				WARNX(sess, "%s: skipping "
947					"symlink", argv[i]);
948				continue;
949			}
950		} else if (!S_ISREG(st.st_mode)) {
951			WARNX(sess, "%s: skipping special", argv[i]);
952			continue;
953		}
954
955
956		f = &fl[flsz++];
957		assert(f != NULL);
958
959		/* Add this file to our file-system worldview. */
960
961		if (unveil(argv[i], "r") == -1) {
962			ERR(sess, "%s: unveil", argv[i]);
963			goto out;
964		} else if (!flist_append(sess, f, &st, argv[i])) {
965			ERRX1(sess, "flist_append");
966			goto out;
967		}
968	}
969
970	LOG2(sess, "non-recursively generated %zu filenames", flsz);
971	*sz = flsz;
972	*flp = fl;
973	return 1;
974out:
975	flist_free(fl, argc);
976	*sz = 0;
977	*flp = NULL;
978	return 0;
979}
980
981/*
982 * Generate a sorted, de-duplicated list of file metadata.
983 * In non-recursive mode (the default), we use only the files we're
984 * given.
985 * Otherwise, directories are recursively examined.
986 * Returns zero on failure, non-zero on success.
987 * On success, "fl" will need to be freed with flist_free().
988 */
989int
990flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
991    size_t *sz)
992{
993	int	 rc;
994
995	assert(argc > 0);
996	rc = sess->opts->recursive ?
997		flist_gen_dirs(sess, argc, argv, flp, sz) :
998		flist_gen_files(sess, argc, argv, flp, sz);
999
1000	/* After scanning, lock our file-system view. */
1001
1002	if (unveil(NULL, NULL) == -1) {
1003		ERR(sess, "unveil");
1004		return 0;
1005	} else if (!rc)
1006		return 0;
1007
1008	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1009
1010	if (flist_dedupe(sess, flp, sz)) {
1011		flist_topdirs(sess, *flp, *sz);
1012		return 1;
1013	}
1014
1015	ERRX1(sess, "flist_dedupe");
1016	flist_free(*flp, *sz);
1017	*flp = NULL;
1018	*sz = 0;
1019	return 0;
1020}
1021
1022/*
1023 * Generate a list of files in root to delete that are within the
1024 * top-level directories stipulated by "wfl".
1025 * Only handles symbolic links, directories, and regular files.
1026 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1027 * on success.
1028 * On success, "fl" will need to be freed with flist_free().
1029 */
1030int
1031flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1032    size_t *sz,	const struct flist *wfl, size_t wflsz)
1033{
1034	char		**cargv = NULL;
1035	int		  rc = 0, c;
1036	FTS		 *fts = NULL;
1037	FTSENT		 *ent;
1038	struct flist	 *f;
1039	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1040	ENTRY		  hent;
1041	ENTRY		 *hentp;
1042
1043	*fl = NULL;
1044	*sz = 0;
1045
1046	/* Only run this code when we're recursive. */
1047
1048	if (!sess->opts->recursive)
1049		return 1;
1050
1051	/*
1052	 * Gather up all top-level directories for scanning.
1053	 * This is stipulated by rsync's --delete behaviour, where we
1054	 * only delete things in the top-level directories given on the
1055	 * command line.
1056	 */
1057
1058	assert(wflsz > 0);
1059	for (i = 0; i < wflsz; i++)
1060		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1061			cargvs++;
1062	if (cargvs == 0)
1063		return 1;
1064
1065	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1066		ERR(sess, "calloc");
1067		return 0;
1068	}
1069
1070	/*
1071	 * If we're given just a "." as the first entry, that means
1072	 * we're doing a relative copy with a trailing slash.
1073	 * Special-case this just for the sake of simplicity.
1074	 * Otherwise, look through all top-levels.
1075	 */
1076
1077	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1078		assert(cargvs == 1);
1079		assert(S_ISDIR(wfl[0].st.mode));
1080		if (asprintf(&cargv[0], "%s/", root) < 0) {
1081			ERR(sess, "asprintf");
1082			cargv[0] = NULL;
1083			goto out;
1084		}
1085		cargv[1] = NULL;
1086	} else {
1087		for (i = j = 0; i < wflsz; i++) {
1088			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1089				continue;
1090			assert(S_ISDIR(wfl[i].st.mode));
1091			assert(strcmp(wfl[i].wpath, "."));
1092			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1093			if (c < 0) {
1094				ERR(sess, "asprintf");
1095				cargv[j] = NULL;
1096				goto out;
1097			}
1098			LOG4(sess, "%s: will scan "
1099				"for deletions", cargv[j]);
1100			j++;
1101		}
1102		assert(j == cargvs);
1103		cargv[j] = NULL;
1104	}
1105
1106	LOG2(sess, "delete from %zu directories", cargvs);
1107
1108	/*
1109	 * Next, use the standard hcreate(3) hashtable interface to hash
1110	 * all of the files that we want to synchronise.
1111	 * This way, we'll be able to determine which files we want to
1112	 * delete in O(n) time instead of O(n * search) time.
1113	 * Plus, we can do the scan in-band and only allocate the files
1114	 * we want to delete.
1115	 */
1116
1117	if (!hcreate(wflsz)) {
1118		ERR(sess, "hcreate");
1119		goto out;
1120	}
1121
1122	for (i = 0; i < wflsz; i++) {
1123		memset(&hent, 0, sizeof(ENTRY));
1124		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1125			ERR(sess, "strdup");
1126			goto out;
1127		}
1128		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1129			ERR(sess, "hsearch");
1130			goto out;
1131		} else if (hentp->key != hent.key) {
1132			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1133			free(hent.key);
1134			goto out;
1135		}
1136	}
1137
1138	/*
1139	 * Now we're going to try to descend into all of the top-level
1140	 * directories stipulated by the file list.
1141	 * If the directories don't exist, it's ok.
1142	 */
1143
1144	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1145		ERR(sess, "fts_open");
1146		goto out;
1147	}
1148
1149	stripdir = strlen(root) + 1;
1150	errno = 0;
1151	while ((ent = fts_read(fts)) != NULL) {
1152		if (ent->fts_info == FTS_NS)
1153			continue;
1154		if (!flist_fts_check(sess, ent)) {
1155			errno = 0;
1156			continue;
1157		} else if (stripdir >= ent->fts_pathlen)
1158			continue;
1159
1160		/* Look up in hashtable. */
1161
1162		memset(&hent, 0, sizeof(ENTRY));
1163		hent.key = ent->fts_path + stripdir;
1164		if (hsearch(hent, FIND) != NULL)
1165			continue;
1166
1167		/* Not found: we'll delete it. */
1168
1169		if (!flist_realloc(sess, fl, sz, &max)) {
1170			ERRX1(sess, "flist_realloc");
1171			goto out;
1172		}
1173		f = &(*fl)[*sz - 1];
1174
1175		if ((f->path = strdup(ent->fts_path)) == NULL) {
1176			ERR(sess, "strdup");
1177			goto out;
1178		}
1179		f->wpath = f->path + stripdir;
1180		assert(ent->fts_statp != NULL);
1181		flist_copy_stat(f, ent->fts_statp);
1182		errno = 0;
1183	}
1184
1185	if (errno) {
1186		ERR(sess, "fts_read");
1187		goto out;
1188	}
1189
1190	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1191	rc = 1;
1192out:
1193	if (fts != NULL)
1194		fts_close(fts);
1195	for (i = 0; i < cargvs; i++)
1196		free(cargv[i]);
1197	free(cargv);
1198	hdestroy();
1199	return rc;
1200}
1201
1202/*
1203 * Delete all files and directories in "fl".
1204 * If called with a zero-length "fl", does nothing.
1205 * If dry_run is specified, simply write what would be done.
1206 * Return zero on failure, non-zero on success.
1207 */
1208int
1209flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1210{
1211	ssize_t	 i;
1212	int	 flag;
1213
1214	if (flsz == 0)
1215		return 1;
1216
1217	assert(sess->opts->del);
1218	assert(sess->opts->recursive);
1219
1220	for (i = flsz - 1; i >= 0; i--) {
1221		LOG1(sess, "%s: deleting", fl[i].wpath);
1222		if (sess->opts->dry_run)
1223			continue;
1224		assert(root != -1);
1225		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1226		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1227		    errno != ENOENT) {
1228			ERR(sess, "%s: unlinkat", fl[i].wpath);
1229			return 0;
1230		}
1231	}
1232
1233	return 1;
1234}
1235