flist.c revision 1.20
1/*	$Id: flist.c,v 1.20 2019/03/18 15:33:21 deraadt Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include <sys/param.h>
19#include <sys/stat.h>
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <fts.h>
25#include <inttypes.h>
26#include <search.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
32#include "extern.h"
33
34/*
35 * We allocate our file list in chunk sizes so as not to do it one by
36 * one.
37 * Preferrably we get one or two allocation.
38 */
39#define	FLIST_CHUNK_SIZE (1024)
40
41/*
42 * These flags are part of the rsync protocol.
43 * They are sent as the first byte for a file transmission and encode
44 * information that affects subsequent transmissions.
45 */
46#define FLIST_TOP_LEVEL	 0x0001 /* needed for remote --delete */
47#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
48#define	FLIST_RDEV_SAME  0x0004 /* rdev is repeat */
49#define	FLIST_UID_SAME	 0x0008 /* uid is repeat */
50#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
51#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
52#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
53#define FLIST_TIME_SAME  0x0080 /* time is repeat */
54
55/*
56 * Requied way to sort a filename list.
57 */
58static int
59flist_cmp(const void *p1, const void *p2)
60{
61	const struct flist *f1 = p1, *f2 = p2;
62
63	return strcmp(f1->wpath, f2->wpath);
64}
65
66/*
67 * Deduplicate our file list (which may be zero-length).
68 * Returns zero on failure, non-zero on success.
69 */
70static int
71flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
72{
73	size_t		 i, j;
74	struct flist	*new;
75	struct flist	*f, *fnext;
76
77	if (*sz == 0)
78		return 1;
79
80	/* Create a new buffer, "new", and copy. */
81
82	new = calloc(*sz, sizeof(struct flist));
83	if (new == NULL) {
84		ERR(sess, "calloc");
85		return 0;
86	}
87
88	for (i = j = 0; i < *sz - 1; i++) {
89		f = &(*fl)[i];
90		fnext = &(*fl)[i + 1];
91
92		if (strcmp(f->wpath, fnext->wpath)) {
93			new[j++] = *f;
94			continue;
95		}
96
97		/*
98		 * Our working (destination) paths are the same.
99		 * If the actual file is the same (as given on the
100		 * command-line), then we can just discard the first.
101		 * Otherwise, we need to bail out: it means we have two
102		 * different files with the relative path on the
103		 * destination side.
104		 */
105
106		if (strcmp(f->path, fnext->path) == 0) {
107			new[j++] = *f;
108			i++;
109			WARNX(sess, "%s: duplicate path: %s",
110			    f->wpath, f->path);
111			free(fnext->path);
112			free(fnext->link);
113			fnext->path = fnext->link = NULL;
114			continue;
115		}
116
117		ERRX(sess, "%s: duplicate working path for "
118		    "possibly different file: %s, %s",
119		    f->wpath, f->path, fnext->path);
120		free(new);
121		return 0;
122	}
123
124	/* Don't forget the last entry. */
125
126	if (i == *sz - 1)
127		new[j++] = (*fl)[i];
128
129	/*
130	 * Reassign to the deduplicated array.
131	 * If we started out with *sz > 0, which we check for at the
132	 * beginning, then we'll always continue having *sz > 0.
133	 */
134
135	free(*fl);
136	*fl = new;
137	*sz = j;
138	assert(*sz);
139	return 1;
140}
141
142/*
143 * We're now going to find our top-level directories.
144 * This only applies to recursive mode.
145 * If we have the first element as the ".", then that's the "top
146 * directory" of our transfer.
147 * Otherwise, mark up all top-level directories in the set.
148 * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't
149 * a top-level directory, but I'm not sure if GPL rsync(1) respects it
150 * the same way.
151 */
152static void
153flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
154{
155	size_t		 i;
156	const char	*cp;
157
158	if (!sess->opts->recursive)
159		return;
160
161	if (flsz && strcmp(fl[0].wpath, ".")) {
162		for (i = 0; i < flsz; i++) {
163			if (!S_ISDIR(fl[i].st.mode))
164				continue;
165			cp = strchr(fl[i].wpath, '/');
166			if (cp != NULL && cp[1] != '\0')
167				continue;
168			fl[i].st.flags |= FLSTAT_TOP_DIR;
169			LOG4(sess, "%s: top-level", fl[i].wpath);
170		}
171	} else if (flsz) {
172		fl[0].st.flags |= FLSTAT_TOP_DIR;
173		LOG4(sess, "%s: top-level", fl[0].wpath);
174	}
175}
176
177/*
178 * Filter through the fts() file information.
179 * We want directories (pre-order), regular files, and symlinks.
180 * Everything else is skipped and possibly warned about.
181 * Return zero to skip, non-zero to examine.
182 */
183static int
184flist_fts_check(struct sess *sess, FTSENT *ent)
185{
186
187	if (ent->fts_info == FTS_F  ||
188	    ent->fts_info == FTS_D ||
189	    ent->fts_info == FTS_SL ||
190	    ent->fts_info == FTS_SLNONE)
191		return 1;
192
193	if (ent->fts_info == FTS_DC) {
194		WARNX(sess, "%s: directory cycle", ent->fts_path);
195	} else if (ent->fts_info == FTS_DNR) {
196		errno = ent->fts_errno;
197		WARN(sess, "%s: unreadable directory", ent->fts_path);
198	} else if (ent->fts_info == FTS_DOT) {
199		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
200	} else if (ent->fts_info == FTS_ERR) {
201		errno = ent->fts_errno;
202		WARN(sess, "%s", ent->fts_path);
203	} else if (ent->fts_info == FTS_DEFAULT) {
204		if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
205		    S_ISCHR(ent->fts_statp->st_mode))) ||
206		    (sess->opts->specials &&
207		    (S_ISFIFO(ent->fts_statp->st_mode) ||
208		    S_ISSOCK(ent->fts_statp->st_mode)))) {
209			return 1;
210		}
211		WARNX(sess, "%s: skipping special", ent->fts_path);
212	} else if (ent->fts_info == FTS_NS) {
213		errno = ent->fts_errno;
214		WARN(sess, "%s: could not stat", ent->fts_path);
215	}
216
217	return 0;
218}
219
220/*
221 * Copy necessary elements in "st" into the fields of "f".
222 */
223static void
224flist_copy_stat(struct flist *f, const struct stat *st)
225{
226	f->st.mode = st->st_mode;
227	f->st.uid = st->st_uid;
228	f->st.gid = st->st_gid;
229	f->st.size = st->st_size;
230	f->st.mtime = st->st_mtime;
231	f->st.rdev = st->st_rdev;
232}
233
234void
235flist_free(struct flist *f, size_t sz)
236{
237	size_t	 i;
238
239	if (f == NULL)
240		return;
241
242	for (i = 0; i < sz; i++) {
243		free(f[i].path);
244		free(f[i].link);
245	}
246	free(f);
247}
248
249/*
250 * Serialise our file list (which may be zero-length) to the wire.
251 * Makes sure that the receiver isn't going to block on sending us
252 * return messages on the log channel.
253 * Return zero on failure, non-zero on success.
254 */
255int
256flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
257    size_t flsz)
258{
259	size_t		 i, sz, gidsz = 0, uidsz = 0;
260	uint8_t		 flag;
261	const struct flist *f;
262	const char	*fn;
263	struct ident	*gids = NULL, *uids = NULL;
264	int		 rc = 0;
265
266	/* Double-check that we've no pending multiplexed data. */
267
268	LOG2(sess, "sending file metadata list: %zu", flsz);
269
270	for (i = 0; i < flsz; i++) {
271		f = &fl[i];
272		fn = f->wpath;
273		sz = strlen(f->wpath);
274		assert(sz > 0);
275
276		/*
277		 * If applicable, unclog the read buffer.
278		 * This happens when the receiver has a lot of log
279		 * messages and all we're doing is sending our file list
280		 * without checking for messages.
281		 */
282
283		if (sess->mplex_reads &&
284		    io_read_check(sess, fdin) &&
285		     !io_read_flush(sess, fdin)) {
286			ERRX1(sess, "io_read_flush");
287			goto out;
288		}
289
290		/*
291		 * For ease, make all of our filenames be "long"
292		 * regardless their actual length.
293		 * This also makes sure that we don't transmit a zero
294		 * byte unintentionally.
295		 */
296
297		flag = FLIST_NAME_LONG;
298		if ((FLSTAT_TOP_DIR & f->st.flags))
299			flag |= FLIST_TOP_LEVEL;
300
301		LOG3(sess, "%s: sending file metadata: "
302			"size %jd, mtime %jd, mode %o",
303			fn, (intmax_t)f->st.size,
304			(intmax_t)f->st.mtime, f->st.mode);
305
306		/* Now write to the wire. */
307		/* FIXME: buffer this. */
308
309		if (!io_write_byte(sess, fdout, flag)) {
310			ERRX1(sess, "io_write_byte");
311			goto out;
312		} else if (!io_write_int(sess, fdout, sz)) {
313			ERRX1(sess, "io_write_int");
314			goto out;
315		} else if (!io_write_buf(sess, fdout, fn, sz)) {
316			ERRX1(sess, "io_write_buf");
317			goto out;
318		} else if (!io_write_long(sess, fdout, f->st.size)) {
319			ERRX1(sess, "io_write_long");
320			goto out;
321		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
322			ERRX1(sess, "io_write_int");
323			goto out;
324		} else if (!io_write_int(sess, fdout, f->st.mode)) {
325			ERRX1(sess, "io_write_int");
326			goto out;
327		}
328
329		/* Conditional part: uid. */
330
331		if (sess->opts->preserve_uids) {
332			if (!io_write_int(sess, fdout, f->st.uid)) {
333				ERRX1(sess, "io_write_int");
334				goto out;
335			}
336			if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) {
337				ERRX1(sess, "idents_add");
338				goto out;
339			}
340		}
341
342		/* Conditional part: gid. */
343
344		if (sess->opts->preserve_gids) {
345			if (!io_write_int(sess, fdout, f->st.gid)) {
346				ERRX1(sess, "io_write_int");
347				goto out;
348			}
349			if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) {
350				ERRX1(sess, "idents_add");
351				goto out;
352			}
353		}
354
355		/* Conditional part: devices & special files. */
356
357		if ((sess->opts->devices && (S_ISBLK(f->st.mode) ||
358		     S_ISCHR(f->st.mode))) ||
359		    (sess->opts->specials && (S_ISFIFO(f->st.mode) ||
360		    S_ISSOCK(f->st.mode)))) {
361			if (!io_write_int(sess, fdout, f->st.rdev)) {
362				ERRX1(sess, "io_write_int");
363				goto out;
364			}
365		}
366
367		/* Conditional part: link. */
368
369		if (S_ISLNK(f->st.mode) &&
370		    sess->opts->preserve_links) {
371			fn = f->link;
372			sz = strlen(f->link);
373			if (!io_write_int(sess, fdout, sz)) {
374				ERRX1(sess, "io_write_int");
375				goto out;
376			}
377			if (!io_write_buf(sess, fdout, fn, sz)) {
378				ERRX1(sess, "io_write_int");
379				goto out;
380			}
381		}
382
383		if (S_ISREG(f->st.mode))
384			sess->total_size += f->st.size;
385	}
386
387	/* Signal end of file list. */
388
389	if (!io_write_byte(sess, fdout, 0)) {
390		ERRX1(sess, "io_write_byte");
391		goto out;
392	}
393
394	/* Conditionally write identifier lists. */
395
396	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
397		LOG2(sess, "sending uid list: %zu", uidsz);
398		if (!idents_send(sess, fdout, uids, uidsz)) {
399			ERRX1(sess, "idents_send");
400			goto out;
401		}
402	}
403
404	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
405		LOG2(sess, "sending gid list: %zu", gidsz);
406		if (!idents_send(sess, fdout, gids, gidsz)) {
407			ERRX1(sess, "idents_send");
408			goto out;
409		}
410	}
411
412	rc = 1;
413out:
414	idents_free(gids, gidsz);
415	idents_free(uids, uidsz);
416	return rc;
417}
418
419/*
420 * Read the filename of a file list.
421 * This is the most expensive part of the file list transfer, so a lot
422 * of attention has gone into transmitting as little as possible.
423 * Micro-optimisation, but whatever.
424 * Fills in "f" with the full path on success.
425 * Returns zero on failure, non-zero on success.
426 */
427static int
428flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
429    char last[MAXPATHLEN])
430{
431	uint8_t		 bval;
432	size_t		 partial = 0;
433	size_t		 pathlen = 0, len;
434
435	/*
436	 * Read our filename.
437	 * If we have FLIST_NAME_SAME, we inherit some of the last
438	 * transmitted name.
439	 * If we have FLIST_NAME_LONG, then the string length is greater
440	 * than byte-size.
441	 */
442
443	if (FLIST_NAME_SAME & flags) {
444		if (!io_read_byte(sess, fd, &bval)) {
445			ERRX1(sess, "io_read_byte");
446			return 0;
447		}
448		partial = bval;
449	}
450
451	/* Get the (possibly-remaining) filename length. */
452
453	if (FLIST_NAME_LONG & flags) {
454		if (!io_read_size(sess, fd, &pathlen)) {
455			ERRX1(sess, "io_read_size");
456			return 0;
457		}
458	} else {
459		if (!io_read_byte(sess, fd, &bval)) {
460			ERRX1(sess, "io_read_byte");
461			return 0;
462		}
463		pathlen = bval;
464	}
465
466	/* Allocate our full filename length. */
467	/* FIXME: maximum pathname length. */
468
469	if ((len = pathlen + partial) == 0) {
470		ERRX(sess, "security violation: "
471			"zero-length pathname");
472		return 0;
473	}
474
475	if ((f->path = malloc(len + 1)) == NULL) {
476		ERR(sess, "malloc");
477		return 0;
478	}
479	f->path[len] = '\0';
480
481	if (FLIST_NAME_SAME & flags)
482		memcpy(f->path, last, partial);
483
484	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
485		ERRX1(sess, "io_read_buf");
486		return 0;
487	}
488
489	if (f->path[0] == '/') {
490		ERRX(sess, "security violation: "
491			"absolute pathname: %s", f->path);
492		return 0;
493	}
494
495	if (strstr(f->path, "/../") != NULL ||
496	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
497	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
498	    strcmp(f->path, "..") == 0) {
499		ERRX(sess, "%s: security violation: "
500			"backtracking pathname", f->path);
501		return 0;
502	}
503
504	/* Record our last path and construct our filename. */
505
506	strlcpy(last, f->path, MAXPATHLEN);
507	f->wpath = f->path;
508	return 1;
509}
510
511/*
512 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
513 * Returns zero on failure, non-zero on success.
514 */
515static int
516flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
517{
518	void	*pp;
519
520	if (*sz + 1 <= *max)  {
521		(*sz)++;
522		return 1;
523	}
524
525	pp = recallocarray(*fl, *max,
526		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
527	if (pp == NULL) {
528		ERR(sess, "recallocarray");
529		return 0;
530	}
531	*fl = pp;
532	*max += FLIST_CHUNK_SIZE;
533	(*sz)++;
534	return 1;
535}
536
537/*
538 * Copy a regular or symbolic link file "path" into "f".
539 * This handles the correct path creation and symbolic linking.
540 * Returns zero on failure, non-zero on success.
541 */
542static int
543flist_append(struct sess *sess, struct flist *f, struct stat *st,
544    const char *path)
545{
546
547	/*
548	 * Copy the full path for local addressing and transmit
549	 * only the filename part for the receiver.
550	 */
551
552	if ((f->path = strdup(path)) == NULL) {
553		ERR(sess, "strdup");
554		return 0;
555	}
556
557	if ((f->wpath = strrchr(f->path, '/')) == NULL)
558		f->wpath = f->path;
559	else
560		f->wpath++;
561
562	/*
563	 * On the receiving end, we'll strip out all bits on the
564	 * mode except for the file permissions.
565	 * No need to warn about it here.
566	 */
567
568	flist_copy_stat(f, st);
569
570	/* Optionally copy link information. */
571
572	if (S_ISLNK(st->st_mode)) {
573		f->link = symlink_read(sess, f->path);
574		if (f->link == NULL) {
575			ERRX1(sess, "symlink_read");
576			return 0;
577		}
578	}
579
580	return 1;
581}
582
583/*
584 * Receive a file list from the wire, filling in length "sz" (which may
585 * possibly be zero) and list "flp" on success.
586 * Return zero on failure, non-zero on success.
587 */
588int
589flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
590{
591	struct flist	*fl = NULL;
592	struct flist	*ff;
593	const struct flist *fflast = NULL;
594	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
595	uint8_t		 flag;
596	char		 last[MAXPATHLEN];
597	uint64_t	 lval; /* temporary values... */
598	int32_t		 ival;
599	struct ident	*gids = NULL, *uids = NULL;
600
601	last[0] = '\0';
602
603	for (;;) {
604		if (!io_read_byte(sess, fd, &flag)) {
605			ERRX1(sess, "io_read_byte");
606			goto out;
607		} else if (flag == 0)
608			break;
609
610		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
611			ERRX1(sess, "flist_realloc");
612			goto out;
613		}
614
615		ff = &fl[flsz - 1];
616		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
617
618		/* Filename first. */
619
620		if (!flist_recv_name(sess, fd, ff, flag, last)) {
621			ERRX1(sess, "flist_recv_name");
622			goto out;
623		}
624
625		/* Read the file size. */
626
627		if (!io_read_ulong(sess, fd, &lval)) {
628			ERRX1(sess, "io_read_ulong");
629			goto out;
630		}
631		ff->st.size = lval;
632
633		/* Read the modification time. */
634
635		if (!(FLIST_TIME_SAME & flag)) {
636			if (!io_read_int(sess, fd, &ival)) {
637				ERRX1(sess, "io_read_int");
638				goto out;
639			}
640			ff->st.mtime = ival;
641		} else if (fflast == NULL) {
642			ERRX(sess, "same time without last entry");
643			goto out;
644		}  else
645			ff->st.mtime = fflast->st.mtime;
646
647		/* Read the file mode. */
648
649		if (!(FLIST_MODE_SAME & flag)) {
650			if (!io_read_int(sess, fd, &ival)) {
651				ERRX1(sess, "io_read_int");
652				goto out;
653			}
654			ff->st.mode = ival;
655		} else if (fflast == NULL) {
656			ERRX(sess, "same mode without last entry");
657			goto out;
658		} else
659			ff->st.mode = fflast->st.mode;
660
661		/* Conditional part: uid. */
662
663		if (sess->opts->preserve_uids) {
664			if (!(FLIST_UID_SAME & flag)) {
665				if (!io_read_int(sess, fd, &ival)) {
666					ERRX1(sess, "io_read_int");
667					goto out;
668				}
669				ff->st.uid = ival;
670			} else if (fflast == NULL) {
671				ERRX(sess, "same uid "
672					"without last entry");
673				goto out;
674			} else
675				ff->st.uid = fflast->st.uid;
676		}
677
678		/* Conditional part: gid. */
679
680		if (sess->opts->preserve_gids) {
681			if (!(FLIST_GID_SAME & flag)) {
682				if (!io_read_int(sess, fd, &ival)) {
683					ERRX1(sess, "io_read_int");
684					goto out;
685				}
686				ff->st.gid = ival;
687			} else if (fflast == NULL) {
688				ERRX(sess, "same gid "
689					"without last entry");
690				goto out;
691			} else
692				ff->st.gid = fflast->st.gid;
693		}
694
695		/* Conditional part: devices & special files. */
696
697		if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
698		     S_ISCHR(ff->st.mode))) ||
699		    (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
700		    S_ISSOCK(ff->st.mode)))) {
701			if (!(FLIST_RDEV_SAME & flag)) {
702				if (!io_read_int(sess, fd, &ival)) {
703					ERRX1(sess, "io_read_int");
704					goto out;
705				}
706				ff->st.rdev = ival;
707			} else if (fflast == NULL) {
708				ERRX(sess, "same device without last entry");
709				goto out;
710			} else
711				ff->st.rdev = fflast->st.rdev;
712		}
713
714		/* Conditional part: link. */
715
716		if (S_ISLNK(ff->st.mode) &&
717		    sess->opts->preserve_links) {
718			if (!io_read_size(sess, fd, &lsz)) {
719				ERRX1(sess, "io_read_size");
720				goto out;
721			} else if (lsz == 0) {
722				ERRX(sess, "empty link name");
723				goto out;
724			}
725			ff->link = calloc(lsz + 1, 1);
726			if (ff->link == NULL) {
727				ERR(sess, "calloc");
728				goto out;
729			}
730			if (!io_read_buf(sess, fd, ff->link, lsz)) {
731				ERRX1(sess, "io_read_buf");
732				goto out;
733			}
734		}
735
736		LOG3(sess, "%s: received file metadata: "
737			"size %jd, mtime %jd, mode %o, rdev (%d, %d)",
738			ff->path, (intmax_t)ff->st.size,
739			(intmax_t)ff->st.mtime, ff->st.mode,
740			major(ff->st.rdev), minor(ff->st.rdev));
741
742		if (S_ISREG(ff->st.mode))
743			sess->total_size += ff->st.size;
744	}
745
746	/* Conditionally read the user/group list. */
747
748	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
749		if (!idents_recv(sess, fd, &uids, &uidsz)) {
750			ERRX1(sess, "idents_recv");
751			goto out;
752		}
753		LOG2(sess, "received uid list: %zu", uidsz);
754	}
755
756	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
757		if (!idents_recv(sess, fd, &gids, &gidsz)) {
758			ERRX1(sess, "idents_recv");
759			goto out;
760		}
761		LOG2(sess, "received gid list: %zu", gidsz);
762	}
763
764	/* Remember to order the received list. */
765
766	LOG2(sess, "received file metadata list: %zu", flsz);
767	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
768	flist_topdirs(sess, fl, flsz);
769	*sz = flsz;
770	*flp = fl;
771
772	/* Conditionally remap and reassign identifiers. */
773
774	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
775		idents_remap(sess, 0, uids, uidsz);
776		idents_assign_uid(sess, fl, flsz, uids, uidsz);
777	}
778
779	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
780		idents_remap(sess, 1, gids, gidsz);
781		idents_assign_gid(sess, fl, flsz, gids, gidsz);
782	}
783
784	idents_free(gids, gidsz);
785	idents_free(uids, uidsz);
786	return 1;
787out:
788	flist_free(fl, flsz);
789	idents_free(gids, gidsz);
790	idents_free(uids, uidsz);
791	*sz = 0;
792	*flp = NULL;
793	return 0;
794}
795
796/*
797 * Generate a flist possibly-recursively given a file root, which may
798 * also be a regular file or symlink.
799 * On success, augments the generated list in "flp" of length "sz".
800 * Returns zero on failure, non-zero on success.
801 */
802static int
803flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
804    size_t *max)
805{
806	char		*cargv[2], *cp;
807	int		 rc = 0;
808	FTS		*fts;
809	FTSENT		*ent;
810	struct flist	*f;
811	size_t		 flsz = 0, stripdir;
812	struct stat	 st;
813
814	cargv[0] = root;
815	cargv[1] = NULL;
816
817	/*
818	 * If we're a file, then revert to the same actions we use for
819	 * the non-recursive scan.
820	 */
821
822	if (lstat(root, &st) == -1) {
823		ERR(sess, "%s: lstat", root);
824		return 0;
825	} else if (S_ISREG(st.st_mode)) {
826		if (!flist_realloc(sess, fl, sz, max)) {
827			ERRX1(sess, "flist_realloc");
828			return 0;
829		}
830		f = &(*fl)[(*sz) - 1];
831		assert(f != NULL);
832
833		if (!flist_append(sess, f, &st, root)) {
834			ERRX1(sess, "flist_append");
835			return 0;
836		}
837		if (unveil(root, "r") == -1) {
838			ERR(sess, "%s: unveil", root);
839			return 0;
840		}
841		return 1;
842	} else if (S_ISLNK(st.st_mode)) {
843		if (!sess->opts->preserve_links) {
844			WARNX(sess, "%s: skipping symlink", root);
845			return 1;
846		} else if (!flist_realloc(sess, fl, sz, max)) {
847			ERRX1(sess, "flist_realloc");
848			return 0;
849		}
850		f = &(*fl)[(*sz) - 1];
851		assert(f != NULL);
852
853		if (!flist_append(sess, f, &st, root)) {
854			ERRX1(sess, "flist_append");
855			return 0;
856		}
857		if (unveil(root, "r") == -1) {
858			ERR(sess, "%s: unveil", root);
859			return 0;
860		}
861		return 1;
862	} else if (!S_ISDIR(st.st_mode)) {
863		WARNX(sess, "%s: skipping special", root);
864		return 1;
865	}
866
867	/*
868	 * If we end with a slash, it means that we're not supposed to
869	 * copy the directory part itself---only the contents.
870	 * So set "stripdir" to be what we take out.
871	 */
872
873	stripdir = strlen(root);
874	assert(stripdir > 0);
875	if (root[stripdir - 1] != '/')
876		stripdir = 0;
877
878	/*
879	 * If we're not stripping anything, then see if we need to strip
880	 * out the leading material in the path up to and including the
881	 * last directory component.
882	 */
883
884	if (stripdir == 0)
885		if ((cp = strrchr(root, '/')) != NULL)
886			stripdir = cp - root + 1;
887
888	/*
889	 * If we're recursive, then we need to take down all of the
890	 * files and directory components, so use fts(3).
891	 * Copying the information file-by-file into the flstat.
892	 * We'll make sense of it in flist_send.
893	 */
894
895	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
896		ERR(sess, "fts_open");
897		return 0;
898	}
899
900	errno = 0;
901	while ((ent = fts_read(fts)) != NULL) {
902		if (!flist_fts_check(sess, ent)) {
903			errno = 0;
904			continue;
905		}
906
907		/* We don't allow symlinks without -l. */
908
909		assert(ent->fts_statp != NULL);
910		if (S_ISLNK(ent->fts_statp->st_mode) &&
911		    !sess->opts->preserve_links) {
912			WARNX(sess, "%s: skipping "
913				"symlink", ent->fts_path);
914			continue;
915		}
916
917		/* Allocate a new file entry. */
918
919		if (!flist_realloc(sess, fl, sz, max)) {
920			ERRX1(sess, "flist_realloc");
921			goto out;
922		}
923		flsz++;
924		f = &(*fl)[*sz - 1];
925
926		/* Our path defaults to "." for the root. */
927
928		if (ent->fts_path[stripdir] == '\0') {
929			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
930				ERR(sess, "asprintf");
931				f->path = NULL;
932				goto out;
933			}
934		} else {
935			if ((f->path = strdup(ent->fts_path)) == NULL) {
936				ERR(sess, "strdup");
937				goto out;
938			}
939		}
940
941		f->wpath = f->path + stripdir;
942		flist_copy_stat(f, ent->fts_statp);
943
944		/* Optionally copy link information. */
945
946		if (S_ISLNK(ent->fts_statp->st_mode)) {
947			f->link = symlink_read(sess, f->path);
948			if (f->link == NULL) {
949				ERRX1(sess, "symlink_read");
950				goto out;
951			}
952		}
953
954		/* Reset errno for next fts_read() call. */
955		errno = 0;
956	}
957	if (errno) {
958		ERR(sess, "fts_read");
959		goto out;
960	}
961	if (unveil(root, "r") == -1) {
962		ERR(sess, "%s: unveil", root);
963		goto out;
964	}
965
966	LOG3(sess, "generated %zu filenames: %s", flsz, root);
967	rc = 1;
968out:
969	fts_close(fts);
970	return rc;
971}
972
973/*
974 * Generate a flist recursively given the array of directories (or
975 * files, symlinks, doesn't matter) specified in argv (argc >0).
976 * On success, stores the generated list in "flp" with length "sz",
977 * which may be zero.
978 * Returns zero on failure, non-zero on success.
979 */
980static int
981flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
982    size_t *sz)
983{
984	size_t		 i, max = 0;
985
986	for (i = 0; i < argc; i++)
987		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
988			break;
989
990	if (i == argc) {
991		LOG2(sess, "recursively generated %zu filenames", *sz);
992		return 1;
993	}
994
995	ERRX1(sess, "flist_gen_dirent");
996	flist_free(*flp, max);
997	*flp = NULL;
998	*sz = 0;
999	return 0;
1000}
1001
1002/*
1003 * Generate list of files from the command-line argc (>0) and argv.
1004 * On success, stores the generated list in "flp" with length "sz",
1005 * which may be zero.
1006 * Returns zero on failure, non-zero on success.
1007 */
1008static int
1009flist_gen_files(struct sess *sess, size_t argc, char **argv,
1010    struct flist **flp, size_t *sz)
1011{
1012	struct flist	*fl = NULL, *f;
1013	size_t		 i, flsz = 0;
1014	struct stat	 st;
1015
1016	assert(argc);
1017
1018	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1019		ERR(sess, "calloc");
1020		return 0;
1021	}
1022
1023	for (i = 0; i < argc; i++) {
1024		if (argv[i][0] == '\0')
1025			continue;
1026		if (lstat(argv[i], &st) == -1) {
1027			ERR(sess, "%s: lstat", argv[i]);
1028			goto out;
1029		}
1030
1031		/*
1032		 * File type checks.
1033		 * In non-recursive mode, we don't accept directories.
1034		 * We also skip symbolic links without -l.
1035		 * Beyond that, we only accept regular files.
1036		 */
1037
1038		if (S_ISDIR(st.st_mode)) {
1039			WARNX(sess, "%s: skipping directory", argv[i]);
1040			continue;
1041		} else if (S_ISLNK(st.st_mode)) {
1042			if (!sess->opts->preserve_links) {
1043				WARNX(sess, "%s: skipping "
1044					"symlink", argv[i]);
1045				continue;
1046			}
1047		} else if (!S_ISREG(st.st_mode)) {
1048			WARNX(sess, "%s: skipping special", argv[i]);
1049			continue;
1050		}
1051
1052
1053		f = &fl[flsz++];
1054		assert(f != NULL);
1055
1056		/* Add this file to our file-system worldview. */
1057
1058		if (unveil(argv[i], "r") == -1) {
1059			ERR(sess, "%s: unveil", argv[i]);
1060			goto out;
1061		}
1062		if (!flist_append(sess, f, &st, argv[i])) {
1063			ERRX1(sess, "flist_append");
1064			goto out;
1065		}
1066	}
1067
1068	LOG2(sess, "non-recursively generated %zu filenames", flsz);
1069	*sz = flsz;
1070	*flp = fl;
1071	return 1;
1072out:
1073	flist_free(fl, argc);
1074	*sz = 0;
1075	*flp = NULL;
1076	return 0;
1077}
1078
1079/*
1080 * Generate a sorted, de-duplicated list of file metadata.
1081 * In non-recursive mode (the default), we use only the files we're
1082 * given.
1083 * Otherwise, directories are recursively examined.
1084 * Returns zero on failure, non-zero on success.
1085 * On success, "fl" will need to be freed with flist_free().
1086 */
1087int
1088flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1089    size_t *sz)
1090{
1091	int	 rc;
1092
1093	assert(argc > 0);
1094	rc = sess->opts->recursive ?
1095		flist_gen_dirs(sess, argc, argv, flp, sz) :
1096		flist_gen_files(sess, argc, argv, flp, sz);
1097
1098	/* After scanning, lock our file-system view. */
1099
1100	if (unveil(NULL, NULL) == -1) {
1101		ERR(sess, "unveil");
1102		return 0;
1103	}
1104	if (!rc)
1105		return 0;
1106
1107	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1108
1109	if (flist_dedupe(sess, flp, sz)) {
1110		flist_topdirs(sess, *flp, *sz);
1111		return 1;
1112	}
1113
1114	ERRX1(sess, "flist_dedupe");
1115	flist_free(*flp, *sz);
1116	*flp = NULL;
1117	*sz = 0;
1118	return 0;
1119}
1120
1121/*
1122 * Generate a list of files in root to delete that are within the
1123 * top-level directories stipulated by "wfl".
1124 * Only handles symbolic links, directories, and regular files.
1125 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1126 * on success.
1127 * On success, "fl" will need to be freed with flist_free().
1128 */
1129int
1130flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1131    size_t *sz,	const struct flist *wfl, size_t wflsz)
1132{
1133	char		**cargv = NULL;
1134	int		  rc = 0, c;
1135	FTS		 *fts = NULL;
1136	FTSENT		 *ent;
1137	struct flist	 *f;
1138	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1139	ENTRY		  hent;
1140	ENTRY		 *hentp;
1141
1142	*fl = NULL;
1143	*sz = 0;
1144
1145	/* Only run this code when we're recursive. */
1146
1147	if (!sess->opts->recursive)
1148		return 1;
1149
1150	/*
1151	 * Gather up all top-level directories for scanning.
1152	 * This is stipulated by rsync's --delete behaviour, where we
1153	 * only delete things in the top-level directories given on the
1154	 * command line.
1155	 */
1156
1157	assert(wflsz > 0);
1158	for (i = 0; i < wflsz; i++)
1159		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1160			cargvs++;
1161	if (cargvs == 0)
1162		return 1;
1163
1164	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1165		ERR(sess, "calloc");
1166		return 0;
1167	}
1168
1169	/*
1170	 * If we're given just a "." as the first entry, that means
1171	 * we're doing a relative copy with a trailing slash.
1172	 * Special-case this just for the sake of simplicity.
1173	 * Otherwise, look through all top-levels.
1174	 */
1175
1176	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1177		assert(cargvs == 1);
1178		assert(S_ISDIR(wfl[0].st.mode));
1179		if (asprintf(&cargv[0], "%s/", root) < 0) {
1180			ERR(sess, "asprintf");
1181			cargv[0] = NULL;
1182			goto out;
1183		}
1184		cargv[1] = NULL;
1185	} else {
1186		for (i = j = 0; i < wflsz; i++) {
1187			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1188				continue;
1189			assert(S_ISDIR(wfl[i].st.mode));
1190			assert(strcmp(wfl[i].wpath, "."));
1191			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1192			if (c < 0) {
1193				ERR(sess, "asprintf");
1194				cargv[j] = NULL;
1195				goto out;
1196			}
1197			LOG4(sess, "%s: will scan for deletions", cargv[j]);
1198			j++;
1199		}
1200		assert(j == cargvs);
1201		cargv[j] = NULL;
1202	}
1203
1204	LOG2(sess, "delete from %zu directories", cargvs);
1205
1206	/*
1207	 * Next, use the standard hcreate(3) hashtable interface to hash
1208	 * all of the files that we want to synchronise.
1209	 * This way, we'll be able to determine which files we want to
1210	 * delete in O(n) time instead of O(n * search) time.
1211	 * Plus, we can do the scan in-band and only allocate the files
1212	 * we want to delete.
1213	 */
1214
1215	if (!hcreate(wflsz)) {
1216		ERR(sess, "hcreate");
1217		goto out;
1218	}
1219
1220	for (i = 0; i < wflsz; i++) {
1221		memset(&hent, 0, sizeof(ENTRY));
1222		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1223			ERR(sess, "strdup");
1224			goto out;
1225		}
1226		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1227			ERR(sess, "hsearch");
1228			goto out;
1229		} else if (hentp->key != hent.key) {
1230			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1231			free(hent.key);
1232			goto out;
1233		}
1234	}
1235
1236	/*
1237	 * Now we're going to try to descend into all of the top-level
1238	 * directories stipulated by the file list.
1239	 * If the directories don't exist, it's ok.
1240	 */
1241
1242	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1243		ERR(sess, "fts_open");
1244		goto out;
1245	}
1246
1247	stripdir = strlen(root) + 1;
1248	errno = 0;
1249	while ((ent = fts_read(fts)) != NULL) {
1250		if (ent->fts_info == FTS_NS)
1251			continue;
1252		if (!flist_fts_check(sess, ent)) {
1253			errno = 0;
1254			continue;
1255		} else if (stripdir >= ent->fts_pathlen)
1256			continue;
1257
1258		/* Look up in hashtable. */
1259
1260		memset(&hent, 0, sizeof(ENTRY));
1261		hent.key = ent->fts_path + stripdir;
1262		if (hsearch(hent, FIND) != NULL)
1263			continue;
1264
1265		/* Not found: we'll delete it. */
1266
1267		if (!flist_realloc(sess, fl, sz, &max)) {
1268			ERRX1(sess, "flist_realloc");
1269			goto out;
1270		}
1271		f = &(*fl)[*sz - 1];
1272
1273		if ((f->path = strdup(ent->fts_path)) == NULL) {
1274			ERR(sess, "strdup");
1275			goto out;
1276		}
1277		f->wpath = f->path + stripdir;
1278		assert(ent->fts_statp != NULL);
1279		flist_copy_stat(f, ent->fts_statp);
1280		errno = 0;
1281	}
1282
1283	if (errno) {
1284		ERR(sess, "fts_read");
1285		goto out;
1286	}
1287
1288	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1289	rc = 1;
1290out:
1291	if (fts != NULL)
1292		fts_close(fts);
1293	for (i = 0; i < cargvs; i++)
1294		free(cargv[i]);
1295	free(cargv);
1296	hdestroy();
1297	return rc;
1298}
1299
1300/*
1301 * Delete all files and directories in "fl".
1302 * If called with a zero-length "fl", does nothing.
1303 * If dry_run is specified, simply write what would be done.
1304 * Return zero on failure, non-zero on success.
1305 */
1306int
1307flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1308{
1309	ssize_t	 i;
1310	int	 flag;
1311
1312	if (flsz == 0)
1313		return 1;
1314
1315	assert(sess->opts->del);
1316	assert(sess->opts->recursive);
1317
1318	for (i = flsz - 1; i >= 0; i--) {
1319		LOG1(sess, "%s: deleting", fl[i].wpath);
1320		if (sess->opts->dry_run)
1321			continue;
1322		assert(root != -1);
1323		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1324		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1325		    errno != ENOENT) {
1326			ERR(sess, "%s: unlinkat", fl[i].wpath);
1327			return 0;
1328		}
1329	}
1330
1331	return 1;
1332}
1333