flist.c revision 1.7
1/*	$Id: flist.c,v 1.7 2019/02/12 19:02:06 benno Exp $ */
2/*
3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/param.h>
18#include <sys/stat.h>
19
20#include <assert.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <fts.h>
24#include <grp.h>
25#include <inttypes.h>
26#include <search.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
32#include "extern.h"
33
34/*
35 * We allocate our file list in chunk sizes so as not to do it one by
36 * one.
37 * Preferrably we get one or two allocation.
38 */
39#define	FLIST_CHUNK_SIZE (1024)
40
41/*
42 * These flags are part of the rsync protocol.
43 * They are sent as the first byte for a file transmission and encode
44 * information that affects subsequent transmissions.
45 */
46#define FLIST_MODE_SAME  0x0002 /* mode is repeat */
47#define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
48#define	FLIST_NAME_SAME  0x0020 /* name is repeat */
49#define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
50#define FLIST_TIME_SAME  0x0080 /* time is repeat */
51
52/*
53 * Combination of name and numeric id for groups and users.
54 */
55struct	ident {
56	int32_t	 id; /* the gid_t or uid_t */
57	int32_t	 mapped; /* if receiving, the mapped gid */
58	char	*name; /* resolved name */
59};
60
61/*
62 * Free a list of struct ident previously allocated with flist_gid_add().
63 * Does nothing if the pointer is NULL.
64 */
65static void
66flist_ident_free(struct ident *p, size_t sz)
67{
68	size_t	 i;
69
70	if (NULL == p)
71		return;
72	for (i = 0; i < sz; i++)
73		free(p[i].name);
74	free(p);
75}
76
77/*
78 * Given a list of groups from the remote host, fill in our local
79 * identifiers of the same names.
80 * Use the remote numeric identifier if we can't find the group OR the
81 * group has identifier zero.
82 */
83static void
84flist_gid_remap(struct sess *sess, struct ident *gids, size_t gidsz)
85{
86	size_t	 	 i;
87	struct group	*grp;
88
89	for (i = 0; i < gidsz; i++) {
90		if (NULL == (grp = getgrnam(gids[i].name)))
91			gids[i].mapped = gids[i].id;
92		else if (0 == grp->gr_gid)
93			gids[i].mapped = gids[i].id;
94		else
95			gids[i].mapped = grp->gr_gid;
96		LOG4(sess, "remapped group %s: %" PRId32 " -> %" PRId32,
97			gids[i].name, gids[i].id, gids[i].mapped);
98	}
99}
100
101/*
102 * If "gid" is not part of the list of known groups, add it.
103 * This also verifies that the group name isn't too long.
104 * Return zero on failure, non-zero on success.
105 */
106static int
107flist_gid_add(struct sess *sess, struct ident **gids, size_t *gidsz, gid_t gid)
108{
109	struct group	*grp;
110	size_t		 i, sz;
111	void		*pp;
112
113	for (i = 0; i < *gidsz; i++)
114		if ((*gids)[i].id == (int32_t)gid)
115			return 1;
116
117	/*
118	 * Look us up in /etc/group.
119	 * Make sure that the group name length is sane: we transmit it
120	 * using a single byte.
121	 */
122
123	assert(i == *gidsz);
124	if (NULL == (grp = getgrgid(gid))) {
125		ERR(sess, "%u: unknown gid", gid);
126		return 0;
127	} else if ((sz = strlen(grp->gr_name)) > UINT8_MAX) {
128		ERRX(sess, "%u: group name too long: %s", gid, grp->gr_name);
129		return 0;
130	} else if (0 == sz) {
131		ERRX(sess, "%u: group name zero-length", gid);
132		return 0;
133	}
134
135	/* Add the group to the array. */
136
137	pp = reallocarray(*gids, *gidsz + 1, sizeof(struct ident));
138	if (NULL == pp) {
139		ERR(sess, "reallocarray");
140		return 0;
141	}
142	*gids = pp;
143	(*gids)[*gidsz].id = gid;
144	(*gids)[*gidsz].name = strdup(grp->gr_name);
145	if (NULL == (*gids)[*gidsz].name) {
146		ERR(sess, "strdup");
147		return 0;
148	}
149
150	LOG4(sess, "adding group to list: %s (%u)",
151		(*gids)[*gidsz].name, (*gids)[*gidsz].id);
152	(*gidsz)++;
153	return 1;
154}
155
156/*
157 * Requied way to sort a filename list.
158 */
159static int
160flist_cmp(const void *p1, const void *p2)
161{
162	const struct flist *f1 = p1, *f2 = p2;
163
164	return strcmp(f1->wpath, f2->wpath);
165}
166
167/*
168 * Deduplicate our file list (which may be zero-length).
169 * Returns zero on failure, non-zero on success.
170 */
171static int
172flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
173{
174	size_t		 i, j;
175	struct flist	*new;
176	struct flist	*f, *fnext;
177
178	if (*sz == 0)
179		return 1;
180
181	/* Create a new buffer, "new", and copy. */
182
183	new = calloc(*sz, sizeof(struct flist));
184	if (new == NULL) {
185		ERR(sess, "calloc");
186		return 0;
187	}
188
189	for (i = j = 0; i < *sz - 1; i++) {
190		f = &(*fl)[i];
191		fnext = &(*fl)[i + 1];
192
193		if (strcmp(f->wpath, fnext->wpath)) {
194			new[j++] = *f;
195			continue;
196		}
197
198		/*
199		 * Our working (destination) paths are the same.
200		 * If the actual file is the same (as given on the
201		 * command-line), then we can just discard the first.
202		 * Otherwise, we need to bail out: it means we have two
203		 * different files with the relative path on the
204		 * destination side.
205		 */
206
207		if (strcmp(f->path, fnext->path) == 0) {
208			new[j++] = *f;
209			i++;
210			WARNX(sess, "%s: duplicate path: %s",
211			    f->wpath, f->path);
212			free(fnext->path);
213			free(fnext->link);
214			fnext->path = fnext->link = NULL;
215			continue;
216		}
217
218		ERRX(sess, "%s: duplicate working path for "
219		    "possibly different file: %s, %s",
220		    f->wpath, f->path, fnext->path);
221		free(new);
222		return 0;
223	}
224
225	/* Don't forget the last entry. */
226
227	if (i == *sz - 1)
228		new[j++] = (*fl)[i];
229
230	/*
231	 * Reassign to the deduplicated array.
232	 * If we started out with *sz > 0, which we check for at the
233	 * beginning, then we'll always continue having *sz > 0.
234	 */
235
236	free(*fl);
237	*fl = new;
238	*sz = j;
239	assert(*sz);
240	return 1;
241}
242
243/*
244 * We're now going to find our top-level directories.
245 * This only applies to recursive mode.
246 * If we have the first element as the ".", then that's the "top
247 * directory" of our transfer.
248 * Otherwise, mark up all top-level directories in the set.
249 */
250static void
251flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
252{
253	size_t		 i;
254	const char	*cp;
255
256	if (!sess->opts->recursive)
257		return;
258
259	if (flsz && strcmp(fl[0].wpath, ".")) {
260		for (i = 0; i < flsz; i++) {
261			if (!S_ISDIR(fl[i].st.mode))
262				continue;
263			cp = strchr(fl[i].wpath, '/');
264			if (cp != NULL && cp[1] != '\0')
265				continue;
266			fl[i].st.flags |= FLSTAT_TOP_DIR;
267			LOG4(sess, "%s: top-level", fl[i].wpath);
268		}
269	} else if (flsz) {
270		fl[0].st.flags |= FLSTAT_TOP_DIR;
271		LOG4(sess, "%s: top-level", fl[0].wpath);
272	}
273}
274
275/*
276 * Filter through the fts() file information.
277 * We want directories (pre-order), regular files, and symlinks.
278 * Everything else is skipped and possibly warned about.
279 * Return zero to skip, non-zero to examine.
280 */
281static int
282flist_fts_check(struct sess *sess, FTSENT *ent)
283{
284
285	if (ent->fts_info == FTS_F  ||
286	    ent->fts_info == FTS_D ||
287	    ent->fts_info == FTS_SL ||
288	    ent->fts_info == FTS_SLNONE)
289		return 1;
290
291	if (ent->fts_info == FTS_DC) {
292		WARNX(sess, "%s: directory cycle", ent->fts_path);
293	} else if (ent->fts_info == FTS_DNR) {
294		errno = ent->fts_errno;
295		WARN(sess, "%s: unreadable directory", ent->fts_path);
296	} else if (ent->fts_info == FTS_DOT) {
297		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
298	} else if (ent->fts_info == FTS_ERR) {
299		errno = ent->fts_errno;
300		WARN(sess, "%s", ent->fts_path);
301	} else if (ent->fts_info == FTS_DEFAULT) {
302		WARNX(sess, "%s: skipping special", ent->fts_path);
303	} else if (ent->fts_info == FTS_NS) {
304		errno = ent->fts_errno;
305		WARN(sess, "%s: could not stat", ent->fts_path);
306	}
307
308	return 0;
309}
310
311/*
312 * Copy necessary elements in "st" into the fields of "f".
313 */
314static void
315flist_copy_stat(struct flist *f, const struct stat *st)
316{
317	f->st.mode = st->st_mode;
318	f->st.uid = st->st_uid;
319	f->st.gid = st->st_gid;
320	f->st.size = st->st_size;
321	f->st.mtime = st->st_mtime;
322}
323
324void
325flist_free(struct flist *f, size_t sz)
326{
327	size_t	 i;
328
329	if (f == NULL)
330		return;
331
332	for (i = 0; i < sz; i++) {
333		free(f[i].path);
334		free(f[i].link);
335	}
336	free(f);
337}
338
339/*
340 * Send a list of struct ident.
341 * See flist_recv_ident().
342 * We should only do this if we're preserving gids/uids.
343 * Return zero on failure, non-zero on success.
344 */
345static int
346flist_send_ident(struct sess *sess,
347	int fd, const struct ident *ids, size_t idsz)
348{
349	size_t	 i, sz;
350
351	for (i = 0; i < idsz; i++) {
352		assert(NULL != ids[i].name);
353		sz = strlen(ids[i].name);
354		assert(sz > 0 && sz <= UINT8_MAX);
355		if (!io_write_int(sess, fd, ids[i].id)) {
356			ERRX1(sess, "io_write_int");
357			return 0;
358		} else if (!io_write_byte(sess, fd, sz)) {
359			ERRX1(sess, "io_write_byte");
360			return 0;
361		} else if (!io_write_buf(sess, fd, ids[i].name, sz)) {
362			ERRX1(sess, "io_write_byte");
363			return 0;
364		}
365	}
366
367	if (!io_write_int(sess, fd, 0)) {
368		ERRX1(sess, "io_write_int");
369		return 0;
370	}
371
372	return 1;
373}
374
375/*
376 * Serialise our file list (which may be zero-length) to the wire.
377 * Makes sure that the receiver isn't going to block on sending us
378 * return messages on the log channel.
379 * Return zero on failure, non-zero on success.
380 */
381int
382flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
383    size_t flsz)
384{
385	size_t		 i, sz, gidsz = 0;
386	uint8_t		 flag;
387	const struct flist *f;
388	const char	*fn;
389	struct ident	*gids = NULL;
390	int		 rc = 0;
391
392	/* Double-check that we've no pending multiplexed data. */
393
394	LOG2(sess, "sending file metadata list: %zu", flsz);
395
396	for (i = 0; i < flsz; i++) {
397		f = &fl[i];
398		fn = f->wpath;
399		sz = strlen(f->wpath);
400		assert(sz > 0);
401
402		/*
403		 * If applicable, unclog the read buffer.
404		 * This happens when the receiver has a lot of log
405		 * messages and all we're doing is sending our file list
406		 * without checking for messages.
407		 */
408
409		if (sess->mplex_reads &&
410		    io_read_check(sess, fdin) &&
411		     !io_read_flush(sess, fdin)) {
412			ERRX1(sess, "io_read_flush");
413			goto out;
414		}
415
416		/*
417		 * For ease, make all of our filenames be "long"
418		 * regardless their actual length.
419		 * This also makes sure that we don't transmit a zero
420		 * byte unintentionally.
421		 */
422
423		flag = FLIST_NAME_LONG;
424
425		LOG3(sess, "%s: sending file metadata: "
426			"size %jd, mtime %jd, mode %o",
427			fn, (intmax_t)f->st.size,
428			(intmax_t)f->st.mtime, f->st.mode);
429
430		/* Now write to the wire. */
431		/* FIXME: buffer this. */
432
433		if (!io_write_byte(sess, fdout, flag)) {
434			ERRX1(sess, "io_write_byte");
435			goto out;
436		} else if (!io_write_int(sess, fdout, sz)) {
437			ERRX1(sess, "io_write_int");
438			goto out;
439		} else if (!io_write_buf(sess, fdout, fn, sz)) {
440			ERRX1(sess, "io_write_buf");
441			goto out;
442		} else if (!io_write_long(sess, fdout, f->st.size)) {
443			ERRX1(sess, "io_write_long");
444			goto out;
445		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
446			ERRX1(sess, "io_write_int");
447			goto out;
448		} else if (!io_write_int(sess, fdout, f->st.mode)) {
449			ERRX1(sess, "io_write_int");
450			goto out;
451		}
452
453		/* Conditional part: gid. */
454
455		if (sess->opts->preserve_gids) {
456			if (!io_write_int(sess, fdout, f->st.gid)) {
457				ERRX1(sess, "io_write_int");
458				goto out;
459			}
460			if (!flist_gid_add(sess, &gids, &gidsz, f->st.gid)) {
461				ERRX1(sess, "flist_gid_add");
462				goto out;
463			}
464		}
465
466		/* Conditional part: link. */
467
468		if (S_ISLNK(f->st.mode) &&
469		    sess->opts->preserve_links) {
470			fn = f->link;
471			sz = strlen(f->link);
472			if (!io_write_int(sess, fdout, sz)) {
473				ERRX1(sess, "io_write_int");
474				goto out;
475			}
476			if (!io_write_buf(sess, fdout, fn, sz)) {
477				ERRX1(sess, "io_write_int");
478				goto out;
479			}
480		}
481
482		if (S_ISREG(f->st.mode))
483			sess->total_size += f->st.size;
484	}
485
486	/* Signal end of file list. */
487
488	if (!io_write_byte(sess, fdout, 0)) {
489		ERRX1(sess, "io_write_byte");
490		goto out;
491	}
492
493	/* Conditionally write gid list and terminator. */
494
495	if (sess->opts->preserve_gids) {
496		LOG2(sess, "sending gid list: %zu", gidsz);
497		if (!flist_send_ident(sess, fdout, gids, gidsz)) {
498			ERRX1(sess, "flist_send_ident");
499			goto out;
500		}
501	}
502
503	rc = 1;
504out:
505	flist_ident_free(gids, gidsz);
506	return rc;
507}
508
509/*
510 * Read the filename of a file list.
511 * This is the most expensive part of the file list transfer, so a lot
512 * of attention has gone into transmitting as little as possible.
513 * Micro-optimisation, but whatever.
514 * Fills in "f" with the full path on success.
515 * Returns zero on failure, non-zero on success.
516 */
517static int
518flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
519    char last[MAXPATHLEN])
520{
521	uint8_t		 bval;
522	size_t		 partial = 0;
523	size_t		 pathlen = 0, len;
524
525	/*
526	 * Read our filename.
527	 * If we have FLIST_NAME_SAME, we inherit some of the last
528	 * transmitted name.
529	 * If we have FLIST_NAME_LONG, then the string length is greater
530	 * than byte-size.
531	 */
532
533	if (FLIST_NAME_SAME & flags) {
534		if (!io_read_byte(sess, fd, &bval)) {
535			ERRX1(sess, "io_read_byte");
536			return 0;
537		}
538		partial = bval;
539	}
540
541	/* Get the (possibly-remaining) filename length. */
542
543	if (FLIST_NAME_LONG & flags) {
544		if (!io_read_size(sess, fd, &pathlen)) {
545			ERRX1(sess, "io_read_size");
546			return 0;
547		}
548	} else {
549		if (!io_read_byte(sess, fd, &bval)) {
550			ERRX1(sess, "io_read_byte");
551			return 0;
552		}
553		pathlen = bval;
554	}
555
556	/* Allocate our full filename length. */
557	/* FIXME: maximum pathname length. */
558
559	if ((len = pathlen + partial) == 0) {
560		ERRX(sess, "security violation: "
561			"zero-length pathname");
562		return 0;
563	}
564
565	if ((f->path = malloc(len + 1)) == NULL) {
566		ERR(sess, "malloc");
567		return 0;
568	}
569	f->path[len] = '\0';
570
571	if (FLIST_NAME_SAME & flags)
572		memcpy(f->path, last, partial);
573
574	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
575		ERRX1(sess, "io_read_buf");
576		return 0;
577	}
578
579	if (f->path[0] == '/') {
580		ERRX(sess, "security violation: "
581			"absolute pathname: %s", f->path);
582		return 0;
583	}
584
585	if (strstr(f->path, "/../") != NULL ||
586	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
587	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
588	    strcmp(f->path, "..") == 0) {
589		ERRX(sess, "%s: security violation: "
590			"backtracking pathname", f->path);
591		return 0;
592	}
593
594	/* Record our last path and construct our filename. */
595
596	strlcpy(last, f->path, MAXPATHLEN);
597	f->wpath = f->path;
598	return 1;
599}
600
601/*
602 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
603 * Returns zero on failure, non-zero on success.
604 */
605static int
606flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
607{
608	void	*pp;
609
610	if (*sz + 1 <= *max)  {
611		(*sz)++;
612		return 1;
613	}
614
615	pp = recallocarray(*fl, *max,
616		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
617	if (pp == NULL) {
618		ERR(sess, "recallocarray");
619		return 0;
620	}
621	*fl = pp;
622	*max += FLIST_CHUNK_SIZE;
623	(*sz)++;
624	return 1;
625}
626
627/*
628 * Copy a regular or symbolic link file "path" into "f".
629 * This handles the correct path creation and symbolic linking.
630 * Returns zero on failure, non-zero on success.
631 */
632static int
633flist_append(struct sess *sess, struct flist *f, struct stat *st,
634    const char *path)
635{
636
637	/*
638	 * Copy the full path for local addressing and transmit
639	 * only the filename part for the receiver.
640	 */
641
642	if ((f->path = strdup(path)) == NULL) {
643		ERR(sess, "strdup");
644		return 0;
645	}
646
647	if ((f->wpath = strrchr(f->path, '/')) == NULL)
648		f->wpath = f->path;
649	else
650		f->wpath++;
651
652	/*
653	 * On the receiving end, we'll strip out all bits on the
654	 * mode except for the file permissions.
655	 * No need to warn about it here.
656	 */
657
658	flist_copy_stat(f, st);
659
660	/* Optionally copy link information. */
661
662	if (S_ISLNK(st->st_mode)) {
663		f->link = symlink_read(sess, f->path);
664		if (f->link == NULL) {
665			ERRX1(sess, "symlink_read");
666			return 0;
667		}
668	}
669
670	return 1;
671}
672
673/*
674 * Receive a list of struct ident.
675 * See flist_send_ident().
676 * We should only do this if we're preserving gids/uids.
677 * Return zero on failure, non-zero on success.
678 */
679static int
680flist_recv_ident(struct sess *sess,
681	int fd, struct ident **ids, size_t *idsz)
682{
683	int32_t	 id;
684	uint8_t	 sz;
685	void	*pp;
686
687	for (;;) {
688		if (!io_read_int(sess, fd, &id)) {
689			ERRX1(sess, "io_read_int");
690			return 0;
691		} else if (0 == id)
692			break;
693
694		pp = reallocarray(*ids,
695			*idsz + 1, sizeof(struct ident));
696		if (NULL == pp) {
697			ERR(sess, "reallocarray");
698			return 0;
699		}
700		*ids = pp;
701		memset(&(*ids)[*idsz], 0, sizeof(struct ident));
702		if (!io_read_byte(sess, fd, &sz)) {
703			ERRX1(sess, "io_read_byte");
704			return 0;
705		}
706		(*ids)[*idsz].id = id;
707		(*ids)[*idsz].name = calloc(sz + 1, 1);
708		if (NULL == (*ids)[*idsz].name) {
709			ERR(sess, "calloc");
710			return 0;
711		}
712		if (!io_read_buf(sess, fd, (*ids)[*idsz].name, sz)) {
713			ERRX1(sess, "io_read_buf");
714			return 0;
715		}
716		(*idsz)++;
717	}
718
719	return 1;
720}
721
722/*
723 * Receive a file list from the wire, filling in length "sz" (which may
724 * possibly be zero) and list "flp" on success.
725 * Return zero on failure, non-zero on success.
726 */
727int
728flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
729{
730	struct flist	*fl = NULL;
731	struct flist	*ff;
732	const struct flist *fflast = NULL;
733	size_t		 i, j, flsz = 0, flmax = 0, lsz, gidsz = 0;
734	uint8_t		 flag;
735	char		 last[MAXPATHLEN];
736	uint64_t	 lval; /* temporary values... */
737	int32_t		 ival;
738	struct ident	*gids = NULL;
739
740	last[0] = '\0';
741
742	for (;;) {
743		if (!io_read_byte(sess, fd, &flag)) {
744			ERRX1(sess, "io_read_byte");
745			goto out;
746		} else if (flag == 0)
747			break;
748
749		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
750			ERRX1(sess, "flist_realloc");
751			goto out;
752		}
753
754		ff = &fl[flsz - 1];
755		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
756
757		/* Filename first. */
758
759		if (!flist_recv_name(sess, fd, ff, flag, last)) {
760			ERRX1(sess, "flist_recv_name");
761			goto out;
762		}
763
764		/* Read the file size. */
765
766		if (!io_read_ulong(sess, fd, &lval)) {
767			ERRX1(sess, "io_read_ulong");
768			goto out;
769		}
770		ff->st.size = lval;
771
772		/* Read the modification time. */
773
774		if (!(FLIST_TIME_SAME & flag)) {
775			if (!io_read_int(sess, fd, &ival)) {
776				ERRX1(sess, "io_read_int");
777				goto out;
778			}
779			ff->st.mtime = ival;
780		} else if (fflast == NULL) {
781			ERRX(sess, "same time without last entry");
782			goto out;
783		}  else
784			ff->st.mtime = fflast->st.mtime;
785
786		/* Read the file mode. */
787
788		if (!(FLIST_MODE_SAME & flag)) {
789			if (!io_read_int(sess, fd, &ival)) {
790				ERRX1(sess, "io_read_int");
791				goto out;
792			}
793			ff->st.mode = ival;
794		} else if (fflast == NULL) {
795			ERRX(sess, "same mode without last entry");
796			goto out;
797		} else
798			ff->st.mode = fflast->st.mode;
799
800		/* Conditional part: gid. */
801
802		if (sess->opts->preserve_gids) {
803			if ( ! (FLIST_GID_SAME & flag)) {
804				if ( ! io_read_int(sess, fd, &ival)) {
805					ERRX1(sess, "io_read_int");
806					goto out;
807				}
808				ff->st.gid = ival;
809			} else if (NULL == fflast) {
810				ERRX(sess, "same gid "
811					"without last entry");
812				goto out;
813			} else
814				ff->st.gid = fflast->st.gid;
815		}
816
817		/* Conditional part: link. */
818
819		if (S_ISLNK(ff->st.mode) &&
820		    sess->opts->preserve_links) {
821			if (!io_read_size(sess, fd, &lsz)) {
822				ERRX1(sess, "io_read_size");
823				goto out;
824			} else if (lsz == 0) {
825				ERRX(sess, "empty link name");
826				goto out;
827			}
828			ff->link = calloc(lsz + 1, 1);
829			if (ff->link == NULL) {
830				ERR(sess, "calloc");
831				goto out;
832			}
833			if (!io_read_buf(sess, fd, ff->link, lsz)) {
834				ERRX1(sess, "io_read_buf");
835				goto out;
836			}
837		}
838
839		LOG3(sess, "%s: received file metadata: "
840			"size %jd, mtime %jd, mode %o",
841			ff->path, (intmax_t)ff->st.size,
842			(intmax_t)ff->st.mtime, ff->st.mode);
843
844		if (S_ISREG(ff->st.mode))
845			sess->total_size += ff->st.size;
846	}
847
848	/*
849	 * Now conditionally read the group list.
850	 * We then remap all group identifiers to the local ids.
851	 */
852
853	if (sess->opts->preserve_gids) {
854		if (!flist_recv_ident(sess, fd, &gids, &gidsz)) {
855			ERRX1(sess, "flist_recv_ident");
856			goto out;
857		}
858		LOG2(sess, "received gid list: %zu", gidsz);
859		flist_gid_remap(sess, gids, gidsz);
860	}
861
862	/* Remember to order the received list. */
863
864	LOG2(sess, "received file metadata list: %zu", flsz);
865	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
866	flist_topdirs(sess, fl, flsz);
867	*sz = flsz;
868	*flp = fl;
869
870	/* Lastly, reassign group identifiers. */
871
872	if (sess->opts->preserve_gids) {
873		for (i = 0; i < flsz; i++) {
874			for (j = 0; j < gidsz; j++)
875				if ((int32_t)fl[i].st.gid == gids[j].id)
876					break;
877			assert(j < gidsz);
878			fl[i].st.gid = gids[j].mapped;
879		}
880	}
881
882	flist_ident_free(gids, gidsz);
883	return 1;
884out:
885	flist_free(fl, flsz);
886	flist_ident_free(gids, gidsz);
887	*sz = 0;
888	*flp = NULL;
889	return 0;
890}
891
892/*
893 * Generate a flist possibly-recursively given a file root, which may
894 * also be a regular file or symlink.
895 * On success, augments the generated list in "flp" of length "sz".
896 * Returns zero on failure, non-zero on success.
897 */
898static int
899flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
900    size_t *max)
901{
902	char		*cargv[2], *cp;
903	int		 rc = 0;
904	FTS		*fts;
905	FTSENT		*ent;
906	struct flist	*f;
907	size_t		 flsz = 0, stripdir;
908	struct stat	 st;
909
910	cargv[0] = root;
911	cargv[1] = NULL;
912
913	/*
914	 * If we're a file, then revert to the same actions we use for
915	 * the non-recursive scan.
916	 */
917
918	if (lstat(root, &st) == -1) {
919		ERR(sess, "%s: lstat", root);
920		return 0;
921	} else if (S_ISREG(st.st_mode)) {
922		if (!flist_realloc(sess, fl, sz, max)) {
923			ERRX1(sess, "flist_realloc");
924			return 0;
925		}
926		f = &(*fl)[(*sz) - 1];
927		assert(f != NULL);
928
929		if (!flist_append(sess, f, &st, root)) {
930			ERRX1(sess, "flist_append");
931			return 0;
932		} else if (unveil(root, "r") == -1) {
933			ERR(sess, "%s: unveil", root);
934			return 0;
935		}
936		return 1;
937	} else if (S_ISLNK(st.st_mode)) {
938		if (!sess->opts->preserve_links) {
939			WARNX(sess, "%s: skipping symlink", root);
940			return 1;
941		} else if (!flist_realloc(sess, fl, sz, max)) {
942			ERRX1(sess, "flist_realloc");
943			return 0;
944		}
945		f = &(*fl)[(*sz) - 1];
946		assert(f != NULL);
947
948		if (!flist_append(sess, f, &st, root)) {
949			ERRX1(sess, "flist_append");
950			return 0;
951		} else if (unveil(root, "r") == -1) {
952			ERR(sess, "%s: unveil", root);
953			return 0;
954		}
955		return 1;
956	} else if (!S_ISDIR(st.st_mode)) {
957		WARNX(sess, "%s: skipping special", root);
958		return 1;
959	}
960
961	/*
962	 * If we end with a slash, it means that we're not supposed to
963	 * copy the directory part itself---only the contents.
964	 * So set "stripdir" to be what we take out.
965	 */
966
967	stripdir = strlen(root);
968	assert(stripdir > 0);
969	if (root[stripdir - 1] != '/')
970		stripdir = 0;
971
972	/*
973	 * If we're not stripping anything, then see if we need to strip
974	 * out the leading material in the path up to and including the
975	 * last directory component.
976	 */
977
978	if (stripdir == 0)
979		if ((cp = strrchr(root, '/')) != NULL)
980			stripdir = cp - root + 1;
981
982	/*
983	 * If we're recursive, then we need to take down all of the
984	 * files and directory components, so use fts(3).
985	 * Copying the information file-by-file into the flstat.
986	 * We'll make sense of it in flist_send.
987	 */
988
989	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
990		ERR(sess, "fts_open");
991		return 0;
992	}
993
994	errno = 0;
995	while ((ent = fts_read(fts)) != NULL) {
996		if (!flist_fts_check(sess, ent)) {
997			errno = 0;
998			continue;
999		}
1000
1001		/* We don't allow symlinks without -l. */
1002
1003		assert(ent->fts_statp != NULL);
1004		if (S_ISLNK(ent->fts_statp->st_mode) &&
1005		    !sess->opts->preserve_links) {
1006			WARNX(sess, "%s: skipping "
1007				"symlink", ent->fts_path);
1008			continue;
1009		}
1010
1011		/* Allocate a new file entry. */
1012
1013		if (!flist_realloc(sess, fl, sz, max)) {
1014			ERRX1(sess, "flist_realloc");
1015			goto out;
1016		}
1017		flsz++;
1018		f = &(*fl)[*sz - 1];
1019
1020		/* Our path defaults to "." for the root. */
1021
1022		if ('\0' == ent->fts_path[stripdir]) {
1023			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
1024				ERR(sess, "asprintf");
1025				f->path = NULL;
1026				goto out;
1027			}
1028		} else {
1029			if ((f->path = strdup(ent->fts_path)) == NULL) {
1030				ERR(sess, "strdup");
1031				goto out;
1032			}
1033		}
1034
1035		f->wpath = f->path + stripdir;
1036		flist_copy_stat(f, ent->fts_statp);
1037
1038		/* Optionally copy link information. */
1039
1040		if (S_ISLNK(ent->fts_statp->st_mode)) {
1041			f->link = symlink_read(sess, f->path);
1042			if (f->link == NULL) {
1043				ERRX1(sess, "symlink_read");
1044				goto out;
1045			}
1046		}
1047
1048		/* Reset errno for next fts_read() call. */
1049		errno = 0;
1050	}
1051	if (errno) {
1052		ERR(sess, "fts_read");
1053		goto out;
1054	} else if (unveil(root, "r") == -1) {
1055		ERR(sess, "%s: unveil", root);
1056		goto out;
1057	}
1058
1059	LOG3(sess, "generated %zu filenames: %s", flsz, root);
1060	rc = 1;
1061out:
1062	fts_close(fts);
1063	return rc;
1064}
1065
1066/*
1067 * Generate a flist recursively given the array of directories (or
1068 * files, symlinks, doesn't matter) specified in argv (argc >0).
1069 * On success, stores the generated list in "flp" with length "sz",
1070 * which may be zero.
1071 * Returns zero on failure, non-zero on success.
1072 */
1073static int
1074flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1075    size_t *sz)
1076{
1077	size_t		 i, max = 0;
1078
1079	for (i = 0; i < argc; i++)
1080		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
1081			break;
1082
1083	if (i == argc) {
1084		LOG2(sess, "recursively generated %zu filenames", *sz);
1085		return 1;
1086	}
1087
1088	ERRX1(sess, "flist_gen_dirent");
1089	flist_free(*flp, max);
1090	*flp = NULL;
1091	*sz = 0;
1092	return 0;
1093}
1094
1095/*
1096 * Generate list of files from the command-line argc (>0) and argv.
1097 * On success, stores the generated list in "flp" with length "sz",
1098 * which may be zero.
1099 * Returns zero on failure, non-zero on success.
1100 */
1101static int
1102flist_gen_files(struct sess *sess, size_t argc, char **argv,
1103    struct flist **flp, size_t *sz)
1104{
1105	struct flist	*fl = NULL, *f;
1106	size_t		 i, flsz = 0;
1107	struct stat	 st;
1108
1109	assert(argc);
1110
1111	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1112		ERR(sess, "calloc");
1113		return 0;
1114	}
1115
1116	for (i = 0; i < argc; i++) {
1117		if ('\0' == argv[i][0])
1118			continue;
1119		if (lstat(argv[i], &st) == -1) {
1120			ERR(sess, "%s: lstat", argv[i]);
1121			goto out;
1122		}
1123
1124		/*
1125		 * File type checks.
1126		 * In non-recursive mode, we don't accept directories.
1127		 * We also skip symbolic links without -l.
1128		 * Beyond that, we only accept regular files.
1129		 */
1130
1131		if (S_ISDIR(st.st_mode)) {
1132			WARNX(sess, "%s: skipping directory", argv[i]);
1133			continue;
1134		} else if (S_ISLNK(st.st_mode)) {
1135			if (!sess->opts->preserve_links) {
1136				WARNX(sess, "%s: skipping "
1137					"symlink", argv[i]);
1138				continue;
1139			}
1140		} else if (!S_ISREG(st.st_mode)) {
1141			WARNX(sess, "%s: skipping special", argv[i]);
1142			continue;
1143		}
1144
1145
1146		f = &fl[flsz++];
1147		assert(f != NULL);
1148
1149		/* Add this file to our file-system worldview. */
1150
1151		if (unveil(argv[i], "r") == -1) {
1152			ERR(sess, "%s: unveil", argv[i]);
1153			goto out;
1154		} else if (!flist_append(sess, f, &st, argv[i])) {
1155			ERRX1(sess, "flist_append");
1156			goto out;
1157		}
1158	}
1159
1160	LOG2(sess, "non-recursively generated %zu filenames", flsz);
1161	*sz = flsz;
1162	*flp = fl;
1163	return 1;
1164out:
1165	flist_free(fl, argc);
1166	*sz = 0;
1167	*flp = NULL;
1168	return 0;
1169}
1170
1171/*
1172 * Generate a sorted, de-duplicated list of file metadata.
1173 * In non-recursive mode (the default), we use only the files we're
1174 * given.
1175 * Otherwise, directories are recursively examined.
1176 * Returns zero on failure, non-zero on success.
1177 * On success, "fl" will need to be freed with flist_free().
1178 */
1179int
1180flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1181    size_t *sz)
1182{
1183	int	 rc;
1184
1185	assert(argc > 0);
1186	rc = sess->opts->recursive ?
1187		flist_gen_dirs(sess, argc, argv, flp, sz) :
1188		flist_gen_files(sess, argc, argv, flp, sz);
1189
1190	/* After scanning, lock our file-system view. */
1191
1192	if (unveil(NULL, NULL) == -1) {
1193		ERR(sess, "unveil");
1194		return 0;
1195	} else if (!rc)
1196		return 0;
1197
1198	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1199
1200	if (flist_dedupe(sess, flp, sz)) {
1201		flist_topdirs(sess, *flp, *sz);
1202		return 1;
1203	}
1204
1205	ERRX1(sess, "flist_dedupe");
1206	flist_free(*flp, *sz);
1207	*flp = NULL;
1208	*sz = 0;
1209	return 0;
1210}
1211
1212/*
1213 * Generate a list of files in root to delete that are within the
1214 * top-level directories stipulated by "wfl".
1215 * Only handles symbolic links, directories, and regular files.
1216 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1217 * on success.
1218 * On success, "fl" will need to be freed with flist_free().
1219 */
1220int
1221flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1222    size_t *sz,	const struct flist *wfl, size_t wflsz)
1223{
1224	char		**cargv = NULL;
1225	int		  rc = 0, c;
1226	FTS		 *fts = NULL;
1227	FTSENT		 *ent;
1228	struct flist	 *f;
1229	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1230	ENTRY		  hent;
1231	ENTRY		 *hentp;
1232
1233	*fl = NULL;
1234	*sz = 0;
1235
1236	/* Only run this code when we're recursive. */
1237
1238	if (!sess->opts->recursive)
1239		return 1;
1240
1241	/*
1242	 * Gather up all top-level directories for scanning.
1243	 * This is stipulated by rsync's --delete behaviour, where we
1244	 * only delete things in the top-level directories given on the
1245	 * command line.
1246	 */
1247
1248	assert(wflsz > 0);
1249	for (i = 0; i < wflsz; i++)
1250		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1251			cargvs++;
1252	if (cargvs == 0)
1253		return 1;
1254
1255	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1256		ERR(sess, "calloc");
1257		return 0;
1258	}
1259
1260	/*
1261	 * If we're given just a "." as the first entry, that means
1262	 * we're doing a relative copy with a trailing slash.
1263	 * Special-case this just for the sake of simplicity.
1264	 * Otherwise, look through all top-levels.
1265	 */
1266
1267	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1268		assert(cargvs == 1);
1269		assert(S_ISDIR(wfl[0].st.mode));
1270		if (asprintf(&cargv[0], "%s/", root) < 0) {
1271			ERR(sess, "asprintf");
1272			cargv[0] = NULL;
1273			goto out;
1274		}
1275		cargv[1] = NULL;
1276	} else {
1277		for (i = j = 0; i < wflsz; i++) {
1278			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1279				continue;
1280			assert(S_ISDIR(wfl[i].st.mode));
1281			assert(strcmp(wfl[i].wpath, "."));
1282			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1283			if (c < 0) {
1284				ERR(sess, "asprintf");
1285				cargv[j] = NULL;
1286				goto out;
1287			}
1288			LOG4(sess, "%s: will scan "
1289				"for deletions", cargv[j]);
1290			j++;
1291		}
1292		assert(j == cargvs);
1293		cargv[j] = NULL;
1294	}
1295
1296	LOG2(sess, "delete from %zu directories", cargvs);
1297
1298	/*
1299	 * Next, use the standard hcreate(3) hashtable interface to hash
1300	 * all of the files that we want to synchronise.
1301	 * This way, we'll be able to determine which files we want to
1302	 * delete in O(n) time instead of O(n * search) time.
1303	 * Plus, we can do the scan in-band and only allocate the files
1304	 * we want to delete.
1305	 */
1306
1307	if (!hcreate(wflsz)) {
1308		ERR(sess, "hcreate");
1309		goto out;
1310	}
1311
1312	for (i = 0; i < wflsz; i++) {
1313		memset(&hent, 0, sizeof(ENTRY));
1314		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1315			ERR(sess, "strdup");
1316			goto out;
1317		}
1318		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1319			ERR(sess, "hsearch");
1320			goto out;
1321		} else if (hentp->key != hent.key) {
1322			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1323			free(hent.key);
1324			goto out;
1325		}
1326	}
1327
1328	/*
1329	 * Now we're going to try to descend into all of the top-level
1330	 * directories stipulated by the file list.
1331	 * If the directories don't exist, it's ok.
1332	 */
1333
1334	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1335		ERR(sess, "fts_open");
1336		goto out;
1337	}
1338
1339	stripdir = strlen(root) + 1;
1340	errno = 0;
1341	while ((ent = fts_read(fts)) != NULL) {
1342		if (ent->fts_info == FTS_NS)
1343			continue;
1344		if (!flist_fts_check(sess, ent)) {
1345			errno = 0;
1346			continue;
1347		} else if (stripdir >= ent->fts_pathlen)
1348			continue;
1349
1350		/* Look up in hashtable. */
1351
1352		memset(&hent, 0, sizeof(ENTRY));
1353		hent.key = ent->fts_path + stripdir;
1354		if (hsearch(hent, FIND) != NULL)
1355			continue;
1356
1357		/* Not found: we'll delete it. */
1358
1359		if (!flist_realloc(sess, fl, sz, &max)) {
1360			ERRX1(sess, "flist_realloc");
1361			goto out;
1362		}
1363		f = &(*fl)[*sz - 1];
1364
1365		if ((f->path = strdup(ent->fts_path)) == NULL) {
1366			ERR(sess, "strdup");
1367			goto out;
1368		}
1369		f->wpath = f->path + stripdir;
1370		assert(ent->fts_statp != NULL);
1371		flist_copy_stat(f, ent->fts_statp);
1372		errno = 0;
1373	}
1374
1375	if (errno) {
1376		ERR(sess, "fts_read");
1377		goto out;
1378	}
1379
1380	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1381	rc = 1;
1382out:
1383	if (fts != NULL)
1384		fts_close(fts);
1385	for (i = 0; i < cargvs; i++)
1386		free(cargv[i]);
1387	free(cargv);
1388	hdestroy();
1389	return rc;
1390}
1391
1392/*
1393 * Delete all files and directories in "fl".
1394 * If called with a zero-length "fl", does nothing.
1395 * If dry_run is specified, simply write what would be done.
1396 * Return zero on failure, non-zero on success.
1397 */
1398int
1399flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1400{
1401	ssize_t	 i;
1402	int	 flag;
1403
1404	if (flsz == 0)
1405		return 1;
1406
1407	assert(sess->opts->del);
1408	assert(sess->opts->recursive);
1409
1410	for (i = flsz - 1; i >= 0; i--) {
1411		LOG1(sess, "%s: deleting", fl[i].wpath);
1412		if (sess->opts->dry_run)
1413			continue;
1414		assert(root != -1);
1415		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1416		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1417		    errno != ENOENT) {
1418			ERR(sess, "%s: unlinkat", fl[i].wpath);
1419			return 0;
1420		}
1421	}
1422
1423	return 1;
1424}
1425