flist.c revision 1.36
1/*	$OpenBSD: flist.c,v 1.36 2021/11/03 14:42:12 deraadt Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include <sys/stat.h>
19
20#include <assert.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <fts.h>
24#include <limits.h>
25#include <inttypes.h>
26#include <search.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
32#include "extern.h"
33
34/*
35 * We allocate our file list in chunk sizes so as not to do it one by
36 * one.
37 * Preferrably we get one or two allocation.
38 */
39#define	FLIST_CHUNK_SIZE (1024)
40
41/*
42 * These flags are part of the rsync protocol.
43 * They are sent as the first byte for a file transmission and encode
44 * information that affects subsequent transmissions.
45 */
46#define FLIST_TOP_LEVEL	 0x0001 /* needed for remote --delete */
47#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
48#define	FLIST_RDEV_SAME  0x0004 /* rdev is repeat */
49#define	FLIST_UID_SAME	 0x0008 /* uid is repeat */
50#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
51#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
52#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
53#define FLIST_TIME_SAME  0x0080 /* time is repeat */
54
55/*
56 * Required way to sort a filename list.
57 */
58static int
59flist_cmp(const void *p1, const void *p2)
60{
61	const struct flist *f1 = p1, *f2 = p2;
62
63	return strcmp(f1->wpath, f2->wpath);
64}
65
66/*
67 * Deduplicate our file list (which may be zero-length).
68 * Returns zero on failure, non-zero on success.
69 */
70static int
71flist_dedupe(struct flist **fl, size_t *sz)
72{
73	size_t		 i, j;
74	struct flist	*new;
75	struct flist	*f, *fnext;
76
77	if (*sz == 0)
78		return 1;
79
80	/* Create a new buffer, "new", and copy. */
81
82	new = calloc(*sz, sizeof(struct flist));
83	if (new == NULL) {
84		ERR("calloc");
85		return 0;
86	}
87
88	for (i = j = 0; i < *sz - 1; i++) {
89		f = &(*fl)[i];
90		fnext = &(*fl)[i + 1];
91
92		if (strcmp(f->wpath, fnext->wpath)) {
93			new[j++] = *f;
94			continue;
95		}
96
97		/*
98		 * Our working (destination) paths are the same.
99		 * If the actual file is the same (as given on the
100		 * command-line), then we can just discard the first.
101		 * Otherwise, we need to bail out: it means we have two
102		 * different files with the relative path on the
103		 * destination side.
104		 */
105
106		if (strcmp(f->path, fnext->path) == 0) {
107			new[j++] = *f;
108			i++;
109			WARNX("%s: duplicate path: %s",
110			    f->wpath, f->path);
111			free(fnext->path);
112			free(fnext->link);
113			fnext->path = fnext->link = NULL;
114			continue;
115		}
116
117		ERRX("%s: duplicate working path for "
118		    "possibly different file: %s, %s",
119		    f->wpath, f->path, fnext->path);
120		free(new);
121		return 0;
122	}
123
124	/* Don't forget the last entry. */
125
126	if (i == *sz - 1)
127		new[j++] = (*fl)[i];
128
129	/*
130	 * Reassign to the deduplicated array.
131	 * If we started out with *sz > 0, which we check for at the
132	 * beginning, then we'll always continue having *sz > 0.
133	 */
134
135	free(*fl);
136	*fl = new;
137	*sz = j;
138	assert(*sz);
139	return 1;
140}
141
142/*
143 * We're now going to find our top-level directories.
144 * This only applies to recursive mode.
145 * If we have the first element as the ".", then that's the "top
146 * directory" of our transfer.
147 * Otherwise, mark up all top-level directories in the set.
148 * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't
149 * a top-level directory, but I'm not sure if GPL rsync(1) respects it
150 * the same way.
151 */
152static void
153flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
154{
155	size_t		 i;
156	const char	*cp;
157
158	if (!sess->opts->recursive)
159		return;
160
161	if (flsz && strcmp(fl[0].wpath, ".")) {
162		for (i = 0; i < flsz; i++) {
163			if (!S_ISDIR(fl[i].st.mode))
164				continue;
165			cp = strchr(fl[i].wpath, '/');
166			if (cp != NULL && cp[1] != '\0')
167				continue;
168			fl[i].st.flags |= FLSTAT_TOP_DIR;
169			LOG4("%s: top-level", fl[i].wpath);
170		}
171	} else if (flsz) {
172		fl[0].st.flags |= FLSTAT_TOP_DIR;
173		LOG4("%s: top-level", fl[0].wpath);
174	}
175}
176
177/*
178 * Filter through the fts() file information.
179 * We want directories (pre-order), regular files, and symlinks.
180 * Everything else is skipped and possibly warned about.
181 * Return zero to skip, non-zero to examine.
182 */
183static int
184flist_fts_check(struct sess *sess, FTSENT *ent)
185{
186
187	if (ent->fts_info == FTS_F  ||
188	    ent->fts_info == FTS_D ||
189	    ent->fts_info == FTS_SL ||
190	    ent->fts_info == FTS_SLNONE)
191		return 1;
192
193	if (ent->fts_info == FTS_DC) {
194		WARNX("%s: directory cycle", ent->fts_path);
195	} else if (ent->fts_info == FTS_DNR) {
196		errno = ent->fts_errno;
197		WARN("%s: unreadable directory", ent->fts_path);
198	} else if (ent->fts_info == FTS_DOT) {
199		WARNX("%s: skipping dot-file", ent->fts_path);
200	} else if (ent->fts_info == FTS_ERR) {
201		errno = ent->fts_errno;
202		WARN("%s", ent->fts_path);
203	} else if (ent->fts_info == FTS_DEFAULT) {
204		if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
205		    S_ISCHR(ent->fts_statp->st_mode))) ||
206		    (sess->opts->specials &&
207		    (S_ISFIFO(ent->fts_statp->st_mode) ||
208		    S_ISSOCK(ent->fts_statp->st_mode)))) {
209			return 1;
210		}
211		WARNX("%s: skipping special", ent->fts_path);
212	} else if (ent->fts_info == FTS_NS) {
213		errno = ent->fts_errno;
214		WARN("%s: could not stat", ent->fts_path);
215	}
216
217	return 0;
218}
219
220/*
221 * Copy necessary elements in "st" into the fields of "f".
222 */
223static void
224flist_copy_stat(struct flist *f, const struct stat *st)
225{
226	f->st.mode = st->st_mode;
227	f->st.uid = st->st_uid;
228	f->st.gid = st->st_gid;
229	f->st.size = st->st_size;
230	f->st.mtime = st->st_mtime;
231	f->st.rdev = st->st_rdev;
232}
233
234void
235flist_free(struct flist *f, size_t sz)
236{
237	size_t	 i;
238
239	if (f == NULL)
240		return;
241
242	for (i = 0; i < sz; i++) {
243		free(f[i].path);
244		free(f[i].link);
245	}
246	free(f);
247}
248
249/*
250 * Serialise our file list (which may be zero-length) to the wire.
251 * Makes sure that the receiver isn't going to block on sending us
252 * return messages on the log channel.
253 * Return zero on failure, non-zero on success.
254 */
255int
256flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
257    size_t flsz)
258{
259	size_t		 i, sz, gidsz = 0, uidsz = 0;
260	uint8_t		 flag;
261	const struct flist *f;
262	const char	*fn;
263	struct ident	*gids = NULL, *uids = NULL;
264	int		 rc = 0;
265
266	/* Double-check that we've no pending multiplexed data. */
267
268	LOG2("sending file metadata list: %zu", flsz);
269
270	for (i = 0; i < flsz; i++) {
271		f = &fl[i];
272		fn = f->wpath;
273		sz = strlen(f->wpath);
274		assert(sz > 0);
275		assert(sz < INT32_MAX);
276
277		/*
278		 * If applicable, unclog the read buffer.
279		 * This happens when the receiver has a lot of log
280		 * messages and all we're doing is sending our file list
281		 * without checking for messages.
282		 */
283
284		if (sess->mplex_reads &&
285		    io_read_check(fdin) &&
286		    !io_read_flush(sess, fdin)) {
287			ERRX1("io_read_flush");
288			goto out;
289		}
290
291		/*
292		 * For ease, make all of our filenames be "long"
293		 * regardless their actual length.
294		 * This also makes sure that we don't transmit a zero
295		 * byte unintentionally.
296		 */
297
298		flag = FLIST_NAME_LONG;
299		if ((FLSTAT_TOP_DIR & f->st.flags))
300			flag |= FLIST_TOP_LEVEL;
301
302		LOG3("%s: sending file metadata: "
303			"size %jd, mtime %jd, mode %o",
304			fn, (intmax_t)f->st.size,
305			(intmax_t)f->st.mtime, f->st.mode);
306
307		/* Now write to the wire. */
308		/* FIXME: buffer this. */
309
310		if (!io_write_byte(sess, fdout, flag)) {
311			ERRX1("io_write_byte");
312			goto out;
313		} else if (!io_write_int(sess, fdout, sz)) {
314			ERRX1("io_write_int");
315			goto out;
316		} else if (!io_write_buf(sess, fdout, fn, sz)) {
317			ERRX1("io_write_buf");
318			goto out;
319		} else if (!io_write_long(sess, fdout, f->st.size)) {
320			ERRX1("io_write_long");
321			goto out;
322		} else if (!io_write_uint(sess, fdout, (uint32_t)f->st.mtime)) {
323			ERRX1("io_write_uint");
324			goto out;
325		} else if (!io_write_uint(sess, fdout, f->st.mode)) {
326			ERRX1("io_write_uint");
327			goto out;
328		}
329
330		/* Conditional part: uid. */
331
332		if (sess->opts->preserve_uids) {
333			if (!io_write_uint(sess, fdout, f->st.uid)) {
334				ERRX1("io_write_uint");
335				goto out;
336			}
337			if (!idents_add(0, &uids, &uidsz, f->st.uid)) {
338				ERRX1("idents_add");
339				goto out;
340			}
341		}
342
343		/* Conditional part: gid. */
344
345		if (sess->opts->preserve_gids) {
346			if (!io_write_uint(sess, fdout, f->st.gid)) {
347				ERRX1("io_write_uint");
348				goto out;
349			}
350			if (!idents_add(1, &gids, &gidsz, f->st.gid)) {
351				ERRX1("idents_add");
352				goto out;
353			}
354		}
355
356		/* Conditional part: devices & special files. */
357
358		if ((sess->opts->devices && (S_ISBLK(f->st.mode) ||
359		    S_ISCHR(f->st.mode))) ||
360		    (sess->opts->specials && (S_ISFIFO(f->st.mode) ||
361		    S_ISSOCK(f->st.mode)))) {
362			if (!io_write_int(sess, fdout, f->st.rdev)) {
363				ERRX1("io_write_int");
364				goto out;
365			}
366		}
367
368		/* Conditional part: link. */
369
370		if (S_ISLNK(f->st.mode) &&
371		    sess->opts->preserve_links) {
372			fn = f->link;
373			sz = strlen(f->link);
374			assert(sz < INT32_MAX);
375			if (!io_write_int(sess, fdout, sz)) {
376				ERRX1("io_write_int");
377				goto out;
378			}
379			if (!io_write_buf(sess, fdout, fn, sz)) {
380				ERRX1("io_write_buf");
381				goto out;
382			}
383		}
384
385		if (S_ISREG(f->st.mode))
386			sess->total_size += f->st.size;
387	}
388
389	/* Signal end of file list. */
390
391	if (!io_write_byte(sess, fdout, 0)) {
392		ERRX1("io_write_byte");
393		goto out;
394	}
395
396	/* Conditionally write identifier lists. */
397
398	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
399		LOG2("sending uid list: %zu", uidsz);
400		if (!idents_send(sess, fdout, uids, uidsz)) {
401			ERRX1("idents_send");
402			goto out;
403		}
404	}
405
406	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
407		LOG2("sending gid list: %zu", gidsz);
408		if (!idents_send(sess, fdout, gids, gidsz)) {
409			ERRX1("idents_send");
410			goto out;
411		}
412	}
413
414	rc = 1;
415out:
416	idents_free(gids, gidsz);
417	idents_free(uids, uidsz);
418	return rc;
419}
420
421/*
422 * Read the filename of a file list.
423 * This is the most expensive part of the file list transfer, so a lot
424 * of attention has gone into transmitting as little as possible.
425 * Micro-optimisation, but whatever.
426 * Fills in "f" with the full path on success.
427 * Returns zero on failure, non-zero on success.
428 */
429static int
430flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
431    char last[PATH_MAX])
432{
433	uint8_t		 bval;
434	size_t		 partial = 0;
435	size_t		 pathlen = 0, len;
436
437	/*
438	 * Read our filename.
439	 * If we have FLIST_NAME_SAME, we inherit some of the last
440	 * transmitted name.
441	 * If we have FLIST_NAME_LONG, then the string length is greater
442	 * than byte-size.
443	 */
444
445	if (FLIST_NAME_SAME & flags) {
446		if (!io_read_byte(sess, fd, &bval)) {
447			ERRX1("io_read_byte");
448			return 0;
449		}
450		partial = bval;
451	}
452
453	/* Get the (possibly-remaining) filename length. */
454
455	if (FLIST_NAME_LONG & flags) {
456		if (!io_read_size(sess, fd, &pathlen)) {
457			ERRX1("io_read_size");
458			return 0;
459		}
460	} else {
461		if (!io_read_byte(sess, fd, &bval)) {
462			ERRX1("io_read_byte");
463			return 0;
464		}
465		pathlen = bval;
466	}
467
468	/* Allocate our full filename length. */
469	/* FIXME: maximum pathname length. */
470
471	if ((len = pathlen + partial) == 0) {
472		ERRX("security violation: zero-length pathname");
473		return 0;
474	}
475
476	if ((f->path = malloc(len + 1)) == NULL) {
477		ERR("malloc");
478		return 0;
479	}
480	f->path[len] = '\0';
481
482	if (FLIST_NAME_SAME & flags)
483		memcpy(f->path, last, partial);
484
485	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
486		ERRX1("io_read_buf");
487		return 0;
488	}
489
490	if (f->path[0] == '/') {
491		ERRX("security violation: absolute pathname: %s",
492		    f->path);
493		return 0;
494	}
495
496	if (strstr(f->path, "/../") != NULL ||
497	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
498	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
499	    strcmp(f->path, "..") == 0) {
500		ERRX("%s: security violation: backtracking pathname",
501		    f->path);
502		return 0;
503	}
504
505	/* Record our last path and construct our filename. */
506
507	strlcpy(last, f->path, PATH_MAX);
508	f->wpath = f->path;
509	return 1;
510}
511
512/*
513 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
514 * Returns zero on failure, non-zero on success.
515 */
516static int
517flist_realloc(struct flist **fl, size_t *sz, size_t *max)
518{
519	void	*pp;
520
521	if (*sz + 1 <= *max)  {
522		(*sz)++;
523		return 1;
524	}
525
526	pp = recallocarray(*fl, *max,
527		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
528	if (pp == NULL) {
529		ERR("recallocarray");
530		return 0;
531	}
532	*fl = pp;
533	*max += FLIST_CHUNK_SIZE;
534	(*sz)++;
535	return 1;
536}
537
538/*
539 * Copy a regular or symbolic link file "path" into "f".
540 * This handles the correct path creation and symbolic linking.
541 * Returns zero on failure, non-zero on success.
542 */
543static int
544flist_append(struct flist *f, struct stat *st, const char *path)
545{
546
547	/*
548	 * Copy the full path for local addressing and transmit
549	 * only the filename part for the receiver.
550	 */
551
552	if ((f->path = strdup(path)) == NULL) {
553		ERR("strdup");
554		return 0;
555	}
556
557	if ((f->wpath = strrchr(f->path, '/')) == NULL)
558		f->wpath = f->path;
559	else
560		f->wpath++;
561
562	/*
563	 * On the receiving end, we'll strip out all bits on the
564	 * mode except for the file permissions.
565	 * No need to warn about it here.
566	 */
567
568	flist_copy_stat(f, st);
569
570	/* Optionally copy link information. */
571
572	if (S_ISLNK(st->st_mode)) {
573		f->link = symlink_read(f->path);
574		if (f->link == NULL) {
575			ERRX1("symlink_read");
576			return 0;
577		}
578	}
579
580	return 1;
581}
582
583/*
584 * Receive a file list from the wire, filling in length "sz" (which may
585 * possibly be zero) and list "flp" on success.
586 * Return zero on failure, non-zero on success.
587 */
588int
589flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
590{
591	struct flist	*fl = NULL;
592	struct flist	*ff;
593	const struct flist *fflast = NULL;
594	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
595	uint8_t		 flag;
596	char		 last[PATH_MAX];
597	int64_t		 lval; /* temporary values... */
598	int32_t		 ival;
599	uint32_t	 uival;
600	struct ident	*gids = NULL, *uids = NULL;
601
602	last[0] = '\0';
603
604	for (;;) {
605		if (!io_read_byte(sess, fd, &flag)) {
606			ERRX1("io_read_byte");
607			goto out;
608		} else if (flag == 0)
609			break;
610
611		if (!flist_realloc(&fl, &flsz, &flmax)) {
612			ERRX1("flist_realloc");
613			goto out;
614		}
615
616		ff = &fl[flsz - 1];
617		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
618
619		/* Filename first. */
620
621		if (!flist_recv_name(sess, fd, ff, flag, last)) {
622			ERRX1("flist_recv_name");
623			goto out;
624		}
625
626		/* Read the file size. */
627
628		if (!io_read_long(sess, fd, &lval)) {
629			ERRX1("io_read_long");
630			goto out;
631		}
632		ff->st.size = lval;
633
634		/* Read the modification time. */
635
636		if (!(FLIST_TIME_SAME & flag)) {
637			if (!io_read_uint(sess, fd, &uival)) {
638				ERRX1("io_read_uint");
639				goto out;
640			}
641			ff->st.mtime = uival;	/* beyond 2038 */
642		} else if (fflast == NULL) {
643			ERRX("same time without last entry");
644			goto out;
645		}  else
646			ff->st.mtime = fflast->st.mtime;
647
648		/* Read the file mode. */
649
650		if (!(FLIST_MODE_SAME & flag)) {
651			if (!io_read_uint(sess, fd, &uival)) {
652				ERRX1("io_read_uint");
653				goto out;
654			}
655			ff->st.mode = uival;
656		} else if (fflast == NULL) {
657			ERRX("same mode without last entry");
658			goto out;
659		} else
660			ff->st.mode = fflast->st.mode;
661
662		/* Conditional part: uid. */
663
664		if (sess->opts->preserve_uids) {
665			if (!(FLIST_UID_SAME & flag)) {
666				if (!io_read_uint(sess, fd, &uival)) {
667					ERRX1("io_read_int");
668					goto out;
669				}
670				ff->st.uid = uival;
671			} else if (fflast == NULL) {
672				ERRX("same uid without last entry");
673				goto out;
674			} else
675				ff->st.uid = fflast->st.uid;
676		}
677
678		/* Conditional part: gid. */
679
680		if (sess->opts->preserve_gids) {
681			if (!(FLIST_GID_SAME & flag)) {
682				if (!io_read_uint(sess, fd, &uival)) {
683					ERRX1("io_read_uint");
684					goto out;
685				}
686				ff->st.gid = uival;
687			} else if (fflast == NULL) {
688				ERRX("same gid without last entry");
689				goto out;
690			} else
691				ff->st.gid = fflast->st.gid;
692		}
693
694		/* Conditional part: devices & special files. */
695
696		if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
697		    S_ISCHR(ff->st.mode))) ||
698		    (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
699		    S_ISSOCK(ff->st.mode)))) {
700			if (!(FLIST_RDEV_SAME & flag)) {
701				if (!io_read_int(sess, fd, &ival)) {
702					ERRX1("io_read_int");
703					goto out;
704				}
705				ff->st.rdev = ival;
706			} else if (fflast == NULL) {
707				ERRX("same device without last entry");
708				goto out;
709			} else
710				ff->st.rdev = fflast->st.rdev;
711		}
712
713		/* Conditional part: link. */
714
715		if (S_ISLNK(ff->st.mode) &&
716		    sess->opts->preserve_links) {
717			if (!io_read_size(sess, fd, &lsz)) {
718				ERRX1("io_read_size");
719				goto out;
720			} else if (lsz == 0) {
721				ERRX("empty link name");
722				goto out;
723			}
724			ff->link = calloc(lsz + 1, 1);
725			if (ff->link == NULL) {
726				ERR("calloc");
727				goto out;
728			}
729			if (!io_read_buf(sess, fd, ff->link, lsz)) {
730				ERRX1("io_read_buf");
731				goto out;
732			}
733		}
734
735		LOG3("%s: received file metadata: "
736			"size %jd, mtime %jd, mode %o, rdev (%d, %d)",
737			ff->path, (intmax_t)ff->st.size,
738			(intmax_t)ff->st.mtime, ff->st.mode,
739			major(ff->st.rdev), minor(ff->st.rdev));
740
741		if (S_ISREG(ff->st.mode))
742			sess->total_size += ff->st.size;
743	}
744
745	/* Conditionally read the user/group list. */
746
747	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
748		if (!idents_recv(sess, fd, &uids, &uidsz)) {
749			ERRX1("idents_recv");
750			goto out;
751		}
752		LOG2("received uid list: %zu", uidsz);
753	}
754
755	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
756		if (!idents_recv(sess, fd, &gids, &gidsz)) {
757			ERRX1("idents_recv");
758			goto out;
759		}
760		LOG2("received gid list: %zu", gidsz);
761	}
762
763	/* Remember to order the received list. */
764
765	LOG2("received file metadata list: %zu", flsz);
766	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
767	flist_topdirs(sess, fl, flsz);
768	*sz = flsz;
769	*flp = fl;
770
771	/* Conditionally remap and reassign identifiers. */
772
773	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
774		idents_remap(sess, 0, uids, uidsz);
775		idents_assign_uid(sess, fl, flsz, uids, uidsz);
776	}
777
778	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
779		idents_remap(sess, 1, gids, gidsz);
780		idents_assign_gid(sess, fl, flsz, gids, gidsz);
781	}
782
783	idents_free(gids, gidsz);
784	idents_free(uids, uidsz);
785	return 1;
786out:
787	flist_free(fl, flsz);
788	idents_free(gids, gidsz);
789	idents_free(uids, uidsz);
790	*sz = 0;
791	*flp = NULL;
792	return 0;
793}
794
795/*
796 * Generate a flist possibly-recursively given a file root, which may
797 * also be a regular file or symlink.
798 * On success, augments the generated list in "flp" of length "sz".
799 * Returns zero on failure, non-zero on success.
800 */
801static int
802flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
803    size_t *max)
804{
805	char		*cargv[2], *cp;
806	int		 rc = 0, flag;
807	FTS		*fts;
808	FTSENT		*ent;
809	struct flist	*f;
810	size_t		 i, flsz = 0, nxdev = 0, stripdir;
811	dev_t		*newxdev, *xdev = NULL;
812	struct stat	 st;
813
814	cargv[0] = root;
815	cargv[1] = NULL;
816
817	/*
818	 * If we're a file, then revert to the same actions we use for
819	 * the non-recursive scan.
820	 */
821
822	if (lstat(root, &st) == -1) {
823		ERR("%s: lstat", root);
824		return 0;
825	} else if (S_ISREG(st.st_mode)) {
826		/* filter files */
827		if (rules_match(root, 0) == -1) {
828			WARNX("%s: skipping excluded file", root);
829			return 1;
830		}
831		if (!flist_realloc(fl, sz, max)) {
832			ERRX1("flist_realloc");
833			return 0;
834		}
835		f = &(*fl)[(*sz) - 1];
836		assert(f != NULL);
837
838		if (!flist_append(f, &st, root)) {
839			ERRX1("flist_append");
840			return 0;
841		}
842		return 1;
843	} else if (S_ISLNK(st.st_mode)) {
844		if (!sess->opts->preserve_links) {
845			WARNX("%s: skipping symlink", root);
846			return 1;
847		}
848		/* filter files */
849		if (rules_match(root, 0) == -1) {
850			WARNX("%s: skipping excluded symlink", root);
851			return 1;
852		}
853		if (!flist_realloc(fl, sz, max)) {
854			ERRX1("flist_realloc");
855			return 0;
856		}
857		f = &(*fl)[(*sz) - 1];
858		assert(f != NULL);
859
860		if (!flist_append(f, &st, root)) {
861			ERRX1("flist_append");
862			return 0;
863		}
864		return 1;
865	} else if (!S_ISDIR(st.st_mode)) {
866		WARNX("%s: skipping special", root);
867		return 1;
868	}
869
870	/*
871	 * If we end with a slash, it means that we're not supposed to
872	 * copy the directory part itself---only the contents.
873	 * So set "stripdir" to be what we take out.
874	 */
875
876	stripdir = strlen(root);
877	assert(stripdir > 0);
878	if (root[stripdir - 1] != '/')
879		stripdir = 0;
880
881	/*
882	 * If we're not stripping anything, then see if we need to strip
883	 * out the leading material in the path up to and including the
884	 * last directory component.
885	 */
886
887	if (stripdir == 0)
888		if ((cp = strrchr(root, '/')) != NULL)
889			stripdir = cp - root + 1;
890
891	/*
892	 * If we're recursive, then we need to take down all of the
893	 * files and directory components, so use fts(3).
894	 * Copying the information file-by-file into the flstat.
895	 * We'll make sense of it in flist_send.
896	 */
897
898	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
899		ERR("fts_open");
900		return 0;
901	}
902
903	errno = 0;
904	while ((ent = fts_read(fts)) != NULL) {
905		if (!flist_fts_check(sess, ent)) {
906			errno = 0;
907			continue;
908		}
909
910		/* We don't allow symlinks without -l. */
911
912		assert(ent->fts_statp != NULL);
913		if (S_ISLNK(ent->fts_statp->st_mode) &&
914		    !sess->opts->preserve_links) {
915			WARNX("%s: skipping symlink", ent->fts_path);
916			continue;
917		}
918
919		/*
920		 * If rsync is told to avoid crossing a filesystem
921		 * boundary when recursing, then replace all mount point
922		 * directories with empty directories.  The latter is
923		 * prevented by telling rsync multiple times to avoid
924		 * crossing a filesystem boundary when recursing.
925		 * Replacing mount point directories is tricky. We need
926		 * to sort out which directories to include.  As such,
927		 * keep track of unique device inodes, and use these for
928		 * comparison.
929		 */
930
931		if (sess->opts->one_file_system &&
932		    ent->fts_statp->st_dev != st.st_dev) {
933			if (sess->opts->one_file_system > 1 ||
934			    !S_ISDIR(ent->fts_statp->st_mode))
935				continue;
936
937			flag = 0;
938			for (i = 0; i < nxdev; i++)
939				if (xdev[i] == ent->fts_statp->st_dev) {
940					flag = 1;
941					break;
942				}
943			if (flag)
944				continue;
945
946			if ((newxdev = reallocarray(xdev, nxdev + 1,
947			    sizeof(dev_t))) == NULL) {
948				ERRX1("reallocarray");
949				goto out;
950			}
951			xdev = newxdev;
952			xdev[nxdev] = ent->fts_statp->st_dev;
953			nxdev++;
954		}
955
956		/* filter files */
957		if (rules_match(ent->fts_path + stripdir,
958		    (ent->fts_info == FTS_D)) == -1) {
959			WARNX("%s: skipping excluded file",
960			    ent->fts_path + stripdir);
961			fts_set(fts, ent, FTS_SKIP);
962			continue;
963		}
964
965		/* Allocate a new file entry. */
966
967		if (!flist_realloc(fl, sz, max)) {
968			ERRX1("flist_realloc");
969			goto out;
970		}
971		flsz++;
972		f = &(*fl)[*sz - 1];
973
974		/* Our path defaults to "." for the root. */
975
976		if (ent->fts_path[stripdir] == '\0') {
977			if (asprintf(&f->path, "%s.", ent->fts_path) == -1) {
978				ERR("asprintf");
979				f->path = NULL;
980				goto out;
981			}
982		} else {
983			if ((f->path = strdup(ent->fts_path)) == NULL) {
984				ERR("strdup");
985				goto out;
986			}
987		}
988
989		f->wpath = f->path + stripdir;
990		flist_copy_stat(f, ent->fts_statp);
991
992		/* Optionally copy link information. */
993
994		if (S_ISLNK(ent->fts_statp->st_mode)) {
995			f->link = symlink_read(ent->fts_accpath);
996			if (f->link == NULL) {
997				ERRX1("symlink_read");
998				goto out;
999			}
1000		}
1001
1002		/* Reset errno for next fts_read() call. */
1003		errno = 0;
1004	}
1005	if (errno) {
1006		ERR("fts_read");
1007		goto out;
1008	}
1009
1010	LOG3("generated %zu filenames: %s", flsz, root);
1011	rc = 1;
1012out:
1013	fts_close(fts);
1014	free(xdev);
1015	return rc;
1016}
1017
1018/*
1019 * Generate a flist recursively given the array of directories (or
1020 * files, symlinks, doesn't matter) specified in argv (argc >0).
1021 * On success, stores the generated list in "flp" with length "sz",
1022 * which may be zero.
1023 * Returns zero on failure, non-zero on success.
1024 */
1025static int
1026flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1027    size_t *sz)
1028{
1029	size_t		 i, max = 0;
1030
1031	for (i = 0; i < argc; i++)
1032		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
1033			break;
1034
1035	if (i == argc) {
1036		LOG2("recursively generated %zu filenames", *sz);
1037		return 1;
1038	}
1039
1040	ERRX1("flist_gen_dirent");
1041	flist_free(*flp, max);
1042	*flp = NULL;
1043	*sz = 0;
1044	return 0;
1045}
1046
1047/*
1048 * Generate list of files from the command-line argc (>0) and argv.
1049 * On success, stores the generated list in "flp" with length "sz",
1050 * which may be zero.
1051 * Returns zero on failure, non-zero on success.
1052 */
1053static int
1054flist_gen_files(struct sess *sess, size_t argc, char **argv,
1055    struct flist **flp, size_t *sz)
1056{
1057	struct flist	*fl = NULL, *f;
1058	size_t		 i, flsz = 0;
1059	struct stat	 st;
1060
1061	assert(argc);
1062
1063	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1064		ERR("calloc");
1065		return 0;
1066	}
1067
1068	for (i = 0; i < argc; i++) {
1069		if (argv[i][0] == '\0')
1070			continue;
1071		if (lstat(argv[i], &st) == -1) {
1072			ERR("%s: lstat", argv[i]);
1073			goto out;
1074		}
1075
1076		/*
1077		 * File type checks.
1078		 * In non-recursive mode, we don't accept directories.
1079		 * We also skip symbolic links without -l.
1080		 * Beyond that, we only accept regular files.
1081		 */
1082
1083		if (S_ISDIR(st.st_mode)) {
1084			WARNX("%s: skipping directory", argv[i]);
1085			continue;
1086		} else if (S_ISLNK(st.st_mode)) {
1087			if (!sess->opts->preserve_links) {
1088				WARNX("%s: skipping symlink", argv[i]);
1089				continue;
1090			}
1091		} else if (!S_ISREG(st.st_mode)) {
1092			WARNX("%s: skipping special", argv[i]);
1093			continue;
1094		}
1095
1096		/* filter files */
1097		if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) {
1098			WARNX("%s: skipping excluded file", argv[i]);
1099			continue;
1100		}
1101
1102		f = &fl[flsz++];
1103		assert(f != NULL);
1104
1105		/* Add this file to our file-system worldview. */
1106
1107		if (!flist_append(f, &st, argv[i])) {
1108			ERRX1("flist_append");
1109			goto out;
1110		}
1111	}
1112
1113	LOG2("non-recursively generated %zu filenames", flsz);
1114	*sz = flsz;
1115	*flp = fl;
1116	return 1;
1117out:
1118	flist_free(fl, argc);
1119	*sz = 0;
1120	*flp = NULL;
1121	return 0;
1122}
1123
1124/*
1125 * Generate a sorted, de-duplicated list of file metadata.
1126 * In non-recursive mode (the default), we use only the files we're
1127 * given.
1128 * Otherwise, directories are recursively examined.
1129 * Returns zero on failure, non-zero on success.
1130 * On success, "fl" will need to be freed with flist_free().
1131 */
1132int
1133flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1134    size_t *sz)
1135{
1136	int	 rc;
1137
1138	assert(argc > 0);
1139	rc = sess->opts->recursive ?
1140		flist_gen_dirs(sess, argc, argv, flp, sz) :
1141		flist_gen_files(sess, argc, argv, flp, sz);
1142
1143	/* After scanning, lock our file-system view. */
1144
1145	if (!rc)
1146		return 0;
1147
1148	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1149
1150	if (flist_dedupe(flp, sz)) {
1151		flist_topdirs(sess, *flp, *sz);
1152		return 1;
1153	}
1154
1155	ERRX1("flist_dedupe");
1156	flist_free(*flp, *sz);
1157	*flp = NULL;
1158	*sz = 0;
1159	return 0;
1160}
1161
1162/*
1163 * Generate a list of files in root to delete that are within the
1164 * top-level directories stipulated by "wfl".
1165 * Only handles symbolic links, directories, and regular files.
1166 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1167 * on success.
1168 * On success, "fl" will need to be freed with flist_free().
1169 */
1170int
1171flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1172    size_t *sz,	const struct flist *wfl, size_t wflsz)
1173{
1174	char		**cargv = NULL;
1175	int		  rc = 0, c, flag;
1176	FTS		 *fts = NULL;
1177	FTSENT		 *ent;
1178	struct flist	 *f;
1179	struct stat	  st;
1180	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1181	ENTRY		  hent;
1182	ENTRY		 *hentp;
1183
1184	*fl = NULL;
1185	*sz = 0;
1186
1187	/* Only run this code when we're recursive. */
1188
1189	if (!sess->opts->recursive)
1190		return 1;
1191
1192	/*
1193	 * Gather up all top-level directories for scanning.
1194	 * This is stipulated by rsync's --delete behaviour, where we
1195	 * only delete things in the top-level directories given on the
1196	 * command line.
1197	 */
1198
1199	assert(wflsz > 0);
1200	for (i = 0; i < wflsz; i++)
1201		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1202			cargvs++;
1203	if (cargvs == 0)
1204		return 1;
1205
1206	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1207		ERR("calloc");
1208		return 0;
1209	}
1210
1211	/*
1212	 * If we're given just a "." as the first entry, that means
1213	 * we're doing a relative copy with a trailing slash.
1214	 * Special-case this just for the sake of simplicity.
1215	 * Otherwise, look through all top-levels.
1216	 */
1217
1218	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1219		assert(cargvs == 1);
1220		assert(S_ISDIR(wfl[0].st.mode));
1221		if (asprintf(&cargv[0], "%s/", root) == -1) {
1222			ERR("asprintf");
1223			cargv[0] = NULL;
1224			goto out;
1225		}
1226		cargv[1] = NULL;
1227	} else {
1228		for (i = j = 0; i < wflsz; i++) {
1229			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1230				continue;
1231			assert(S_ISDIR(wfl[i].st.mode));
1232			assert(strcmp(wfl[i].wpath, "."));
1233			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1234			if (c == -1) {
1235				ERR("asprintf");
1236				cargv[j] = NULL;
1237				goto out;
1238			}
1239			LOG4("%s: will scan for deletions", cargv[j]);
1240			j++;
1241		}
1242		assert(j == cargvs);
1243		cargv[j] = NULL;
1244	}
1245
1246	LOG2("delete from %zu directories", cargvs);
1247
1248	/*
1249	 * Next, use the standard hcreate(3) hashtable interface to hash
1250	 * all of the files that we want to synchronise.
1251	 * This way, we'll be able to determine which files we want to
1252	 * delete in O(n) time instead of O(n * search) time.
1253	 * Plus, we can do the scan in-band and only allocate the files
1254	 * we want to delete.
1255	 */
1256
1257	if (!hcreate(wflsz)) {
1258		ERR("hcreate");
1259		goto out;
1260	}
1261
1262	for (i = 0; i < wflsz; i++) {
1263		memset(&hent, 0, sizeof(ENTRY));
1264		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1265			ERR("strdup");
1266			goto out;
1267		}
1268		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1269			ERR("hsearch");
1270			goto out;
1271		} else if (hentp->key != hent.key) {
1272			ERRX("%s: duplicate", wfl[i].wpath);
1273			free(hent.key);
1274			goto out;
1275		}
1276	}
1277
1278	/*
1279	 * Now we're going to try to descend into all of the top-level
1280	 * directories stipulated by the file list.
1281	 * If the directories don't exist, it's ok.
1282	 */
1283
1284	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1285		ERR("fts_open");
1286		goto out;
1287	}
1288
1289	stripdir = strlen(root) + 1;
1290	errno = 0;
1291	while ((ent = fts_read(fts)) != NULL) {
1292		if (ent->fts_info == FTS_NS)
1293			continue;
1294		if (!flist_fts_check(sess, ent)) {
1295			errno = 0;
1296			continue;
1297		} else if (stripdir >= ent->fts_pathlen)
1298			continue;
1299
1300		assert(ent->fts_statp != NULL);
1301
1302		/*
1303		 * If rsync is told to avoid crossing a filesystem
1304		 * boundary when recursing, then exclude all entries
1305		 * from the list with a device inode, which does not
1306		 * match that of one of the top-level directories.
1307		 */
1308
1309		if (sess->opts->one_file_system) {
1310			flag = 0;
1311			for (i = 0; i < wflsz; i++) {
1312				if (stat(wfl[i].path, &st) == -1) {
1313					ERR("%s: stat", wfl[i].path);
1314					goto out;
1315				}
1316				if (ent->fts_statp->st_dev == st.st_dev) {
1317					flag = 1;
1318					break;
1319				}
1320			}
1321			if (!flag)
1322				continue;
1323		}
1324
1325		/* filter files on delete */
1326		/* TODO handle --delete-excluded */
1327		if (rules_match(ent->fts_path + stripdir,
1328		    (ent->fts_info == FTS_D)) == -1) {
1329			WARNX("skip excluded file %s",
1330			    ent->fts_path + stripdir);
1331			fts_set(fts, ent, FTS_SKIP);
1332			continue;
1333		}
1334
1335		/* Look up in hashtable. */
1336
1337		memset(&hent, 0, sizeof(ENTRY));
1338		hent.key = ent->fts_path + stripdir;
1339		if (hsearch(hent, FIND) != NULL)
1340			continue;
1341
1342		/* Not found: we'll delete it. */
1343
1344		if (!flist_realloc(fl, sz, &max)) {
1345			ERRX1("flist_realloc");
1346			goto out;
1347		}
1348		f = &(*fl)[*sz - 1];
1349
1350		if ((f->path = strdup(ent->fts_path)) == NULL) {
1351			ERR("strdup");
1352			goto out;
1353		}
1354		f->wpath = f->path + stripdir;
1355		flist_copy_stat(f, ent->fts_statp);
1356		errno = 0;
1357	}
1358
1359	if (errno) {
1360		ERR("fts_read");
1361		goto out;
1362	}
1363
1364	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1365	rc = 1;
1366out:
1367	if (fts != NULL)
1368		fts_close(fts);
1369	for (i = 0; i < cargvs; i++)
1370		free(cargv[i]);
1371	free(cargv);
1372	hdestroy();
1373	return rc;
1374}
1375
1376/*
1377 * Delete all files and directories in "fl".
1378 * If called with a zero-length "fl", does nothing.
1379 * If dry_run is specified, simply write what would be done.
1380 * Return zero on failure, non-zero on success.
1381 */
1382int
1383flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1384{
1385	ssize_t	 i;
1386	int	 flag;
1387
1388	if (flsz == 0)
1389		return 1;
1390
1391	assert(sess->opts->del);
1392	assert(sess->opts->recursive);
1393
1394	for (i = flsz - 1; i >= 0; i--) {
1395		LOG1("%s: deleting", fl[i].wpath);
1396		if (sess->opts->dry_run)
1397			continue;
1398		assert(root != -1);
1399		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1400		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1401		    errno != ENOENT) {
1402			ERR("%s: unlinkat", fl[i].wpath);
1403			return 0;
1404		}
1405	}
1406
1407	return 1;
1408}
1409