flist.c revision 1.7
1/* $Id: flist.c,v 1.7 2019/02/12 19:02:06 benno Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/param.h> 18#include <sys/stat.h> 19 20#include <assert.h> 21#include <errno.h> 22#include <fcntl.h> 23#include <fts.h> 24#include <grp.h> 25#include <inttypes.h> 26#include <search.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <unistd.h> 31 32#include "extern.h" 33 34/* 35 * We allocate our file list in chunk sizes so as not to do it one by 36 * one. 37 * Preferrably we get one or two allocation. 38 */ 39#define FLIST_CHUNK_SIZE (1024) 40 41/* 42 * These flags are part of the rsync protocol. 43 * They are sent as the first byte for a file transmission and encode 44 * information that affects subsequent transmissions. 45 */ 46#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 47#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 48#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 49#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 50#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 51 52/* 53 * Combination of name and numeric id for groups and users. 54 */ 55struct ident { 56 int32_t id; /* the gid_t or uid_t */ 57 int32_t mapped; /* if receiving, the mapped gid */ 58 char *name; /* resolved name */ 59}; 60 61/* 62 * Free a list of struct ident previously allocated with flist_gid_add(). 63 * Does nothing if the pointer is NULL. 64 */ 65static void 66flist_ident_free(struct ident *p, size_t sz) 67{ 68 size_t i; 69 70 if (NULL == p) 71 return; 72 for (i = 0; i < sz; i++) 73 free(p[i].name); 74 free(p); 75} 76 77/* 78 * Given a list of groups from the remote host, fill in our local 79 * identifiers of the same names. 80 * Use the remote numeric identifier if we can't find the group OR the 81 * group has identifier zero. 82 */ 83static void 84flist_gid_remap(struct sess *sess, struct ident *gids, size_t gidsz) 85{ 86 size_t i; 87 struct group *grp; 88 89 for (i = 0; i < gidsz; i++) { 90 if (NULL == (grp = getgrnam(gids[i].name))) 91 gids[i].mapped = gids[i].id; 92 else if (0 == grp->gr_gid) 93 gids[i].mapped = gids[i].id; 94 else 95 gids[i].mapped = grp->gr_gid; 96 LOG4(sess, "remapped group %s: %" PRId32 " -> %" PRId32, 97 gids[i].name, gids[i].id, gids[i].mapped); 98 } 99} 100 101/* 102 * If "gid" is not part of the list of known groups, add it. 103 * This also verifies that the group name isn't too long. 104 * Return zero on failure, non-zero on success. 105 */ 106static int 107flist_gid_add(struct sess *sess, struct ident **gids, size_t *gidsz, gid_t gid) 108{ 109 struct group *grp; 110 size_t i, sz; 111 void *pp; 112 113 for (i = 0; i < *gidsz; i++) 114 if ((*gids)[i].id == (int32_t)gid) 115 return 1; 116 117 /* 118 * Look us up in /etc/group. 119 * Make sure that the group name length is sane: we transmit it 120 * using a single byte. 121 */ 122 123 assert(i == *gidsz); 124 if (NULL == (grp = getgrgid(gid))) { 125 ERR(sess, "%u: unknown gid", gid); 126 return 0; 127 } else if ((sz = strlen(grp->gr_name)) > UINT8_MAX) { 128 ERRX(sess, "%u: group name too long: %s", gid, grp->gr_name); 129 return 0; 130 } else if (0 == sz) { 131 ERRX(sess, "%u: group name zero-length", gid); 132 return 0; 133 } 134 135 /* Add the group to the array. */ 136 137 pp = reallocarray(*gids, *gidsz + 1, sizeof(struct ident)); 138 if (NULL == pp) { 139 ERR(sess, "reallocarray"); 140 return 0; 141 } 142 *gids = pp; 143 (*gids)[*gidsz].id = gid; 144 (*gids)[*gidsz].name = strdup(grp->gr_name); 145 if (NULL == (*gids)[*gidsz].name) { 146 ERR(sess, "strdup"); 147 return 0; 148 } 149 150 LOG4(sess, "adding group to list: %s (%u)", 151 (*gids)[*gidsz].name, (*gids)[*gidsz].id); 152 (*gidsz)++; 153 return 1; 154} 155 156/* 157 * Requied way to sort a filename list. 158 */ 159static int 160flist_cmp(const void *p1, const void *p2) 161{ 162 const struct flist *f1 = p1, *f2 = p2; 163 164 return strcmp(f1->wpath, f2->wpath); 165} 166 167/* 168 * Deduplicate our file list (which may be zero-length). 169 * Returns zero on failure, non-zero on success. 170 */ 171static int 172flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 173{ 174 size_t i, j; 175 struct flist *new; 176 struct flist *f, *fnext; 177 178 if (*sz == 0) 179 return 1; 180 181 /* Create a new buffer, "new", and copy. */ 182 183 new = calloc(*sz, sizeof(struct flist)); 184 if (new == NULL) { 185 ERR(sess, "calloc"); 186 return 0; 187 } 188 189 for (i = j = 0; i < *sz - 1; i++) { 190 f = &(*fl)[i]; 191 fnext = &(*fl)[i + 1]; 192 193 if (strcmp(f->wpath, fnext->wpath)) { 194 new[j++] = *f; 195 continue; 196 } 197 198 /* 199 * Our working (destination) paths are the same. 200 * If the actual file is the same (as given on the 201 * command-line), then we can just discard the first. 202 * Otherwise, we need to bail out: it means we have two 203 * different files with the relative path on the 204 * destination side. 205 */ 206 207 if (strcmp(f->path, fnext->path) == 0) { 208 new[j++] = *f; 209 i++; 210 WARNX(sess, "%s: duplicate path: %s", 211 f->wpath, f->path); 212 free(fnext->path); 213 free(fnext->link); 214 fnext->path = fnext->link = NULL; 215 continue; 216 } 217 218 ERRX(sess, "%s: duplicate working path for " 219 "possibly different file: %s, %s", 220 f->wpath, f->path, fnext->path); 221 free(new); 222 return 0; 223 } 224 225 /* Don't forget the last entry. */ 226 227 if (i == *sz - 1) 228 new[j++] = (*fl)[i]; 229 230 /* 231 * Reassign to the deduplicated array. 232 * If we started out with *sz > 0, which we check for at the 233 * beginning, then we'll always continue having *sz > 0. 234 */ 235 236 free(*fl); 237 *fl = new; 238 *sz = j; 239 assert(*sz); 240 return 1; 241} 242 243/* 244 * We're now going to find our top-level directories. 245 * This only applies to recursive mode. 246 * If we have the first element as the ".", then that's the "top 247 * directory" of our transfer. 248 * Otherwise, mark up all top-level directories in the set. 249 */ 250static void 251flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 252{ 253 size_t i; 254 const char *cp; 255 256 if (!sess->opts->recursive) 257 return; 258 259 if (flsz && strcmp(fl[0].wpath, ".")) { 260 for (i = 0; i < flsz; i++) { 261 if (!S_ISDIR(fl[i].st.mode)) 262 continue; 263 cp = strchr(fl[i].wpath, '/'); 264 if (cp != NULL && cp[1] != '\0') 265 continue; 266 fl[i].st.flags |= FLSTAT_TOP_DIR; 267 LOG4(sess, "%s: top-level", fl[i].wpath); 268 } 269 } else if (flsz) { 270 fl[0].st.flags |= FLSTAT_TOP_DIR; 271 LOG4(sess, "%s: top-level", fl[0].wpath); 272 } 273} 274 275/* 276 * Filter through the fts() file information. 277 * We want directories (pre-order), regular files, and symlinks. 278 * Everything else is skipped and possibly warned about. 279 * Return zero to skip, non-zero to examine. 280 */ 281static int 282flist_fts_check(struct sess *sess, FTSENT *ent) 283{ 284 285 if (ent->fts_info == FTS_F || 286 ent->fts_info == FTS_D || 287 ent->fts_info == FTS_SL || 288 ent->fts_info == FTS_SLNONE) 289 return 1; 290 291 if (ent->fts_info == FTS_DC) { 292 WARNX(sess, "%s: directory cycle", ent->fts_path); 293 } else if (ent->fts_info == FTS_DNR) { 294 errno = ent->fts_errno; 295 WARN(sess, "%s: unreadable directory", ent->fts_path); 296 } else if (ent->fts_info == FTS_DOT) { 297 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 298 } else if (ent->fts_info == FTS_ERR) { 299 errno = ent->fts_errno; 300 WARN(sess, "%s", ent->fts_path); 301 } else if (ent->fts_info == FTS_DEFAULT) { 302 WARNX(sess, "%s: skipping special", ent->fts_path); 303 } else if (ent->fts_info == FTS_NS) { 304 errno = ent->fts_errno; 305 WARN(sess, "%s: could not stat", ent->fts_path); 306 } 307 308 return 0; 309} 310 311/* 312 * Copy necessary elements in "st" into the fields of "f". 313 */ 314static void 315flist_copy_stat(struct flist *f, const struct stat *st) 316{ 317 f->st.mode = st->st_mode; 318 f->st.uid = st->st_uid; 319 f->st.gid = st->st_gid; 320 f->st.size = st->st_size; 321 f->st.mtime = st->st_mtime; 322} 323 324void 325flist_free(struct flist *f, size_t sz) 326{ 327 size_t i; 328 329 if (f == NULL) 330 return; 331 332 for (i = 0; i < sz; i++) { 333 free(f[i].path); 334 free(f[i].link); 335 } 336 free(f); 337} 338 339/* 340 * Send a list of struct ident. 341 * See flist_recv_ident(). 342 * We should only do this if we're preserving gids/uids. 343 * Return zero on failure, non-zero on success. 344 */ 345static int 346flist_send_ident(struct sess *sess, 347 int fd, const struct ident *ids, size_t idsz) 348{ 349 size_t i, sz; 350 351 for (i = 0; i < idsz; i++) { 352 assert(NULL != ids[i].name); 353 sz = strlen(ids[i].name); 354 assert(sz > 0 && sz <= UINT8_MAX); 355 if (!io_write_int(sess, fd, ids[i].id)) { 356 ERRX1(sess, "io_write_int"); 357 return 0; 358 } else if (!io_write_byte(sess, fd, sz)) { 359 ERRX1(sess, "io_write_byte"); 360 return 0; 361 } else if (!io_write_buf(sess, fd, ids[i].name, sz)) { 362 ERRX1(sess, "io_write_byte"); 363 return 0; 364 } 365 } 366 367 if (!io_write_int(sess, fd, 0)) { 368 ERRX1(sess, "io_write_int"); 369 return 0; 370 } 371 372 return 1; 373} 374 375/* 376 * Serialise our file list (which may be zero-length) to the wire. 377 * Makes sure that the receiver isn't going to block on sending us 378 * return messages on the log channel. 379 * Return zero on failure, non-zero on success. 380 */ 381int 382flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 383 size_t flsz) 384{ 385 size_t i, sz, gidsz = 0; 386 uint8_t flag; 387 const struct flist *f; 388 const char *fn; 389 struct ident *gids = NULL; 390 int rc = 0; 391 392 /* Double-check that we've no pending multiplexed data. */ 393 394 LOG2(sess, "sending file metadata list: %zu", flsz); 395 396 for (i = 0; i < flsz; i++) { 397 f = &fl[i]; 398 fn = f->wpath; 399 sz = strlen(f->wpath); 400 assert(sz > 0); 401 402 /* 403 * If applicable, unclog the read buffer. 404 * This happens when the receiver has a lot of log 405 * messages and all we're doing is sending our file list 406 * without checking for messages. 407 */ 408 409 if (sess->mplex_reads && 410 io_read_check(sess, fdin) && 411 !io_read_flush(sess, fdin)) { 412 ERRX1(sess, "io_read_flush"); 413 goto out; 414 } 415 416 /* 417 * For ease, make all of our filenames be "long" 418 * regardless their actual length. 419 * This also makes sure that we don't transmit a zero 420 * byte unintentionally. 421 */ 422 423 flag = FLIST_NAME_LONG; 424 425 LOG3(sess, "%s: sending file metadata: " 426 "size %jd, mtime %jd, mode %o", 427 fn, (intmax_t)f->st.size, 428 (intmax_t)f->st.mtime, f->st.mode); 429 430 /* Now write to the wire. */ 431 /* FIXME: buffer this. */ 432 433 if (!io_write_byte(sess, fdout, flag)) { 434 ERRX1(sess, "io_write_byte"); 435 goto out; 436 } else if (!io_write_int(sess, fdout, sz)) { 437 ERRX1(sess, "io_write_int"); 438 goto out; 439 } else if (!io_write_buf(sess, fdout, fn, sz)) { 440 ERRX1(sess, "io_write_buf"); 441 goto out; 442 } else if (!io_write_long(sess, fdout, f->st.size)) { 443 ERRX1(sess, "io_write_long"); 444 goto out; 445 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 446 ERRX1(sess, "io_write_int"); 447 goto out; 448 } else if (!io_write_int(sess, fdout, f->st.mode)) { 449 ERRX1(sess, "io_write_int"); 450 goto out; 451 } 452 453 /* Conditional part: gid. */ 454 455 if (sess->opts->preserve_gids) { 456 if (!io_write_int(sess, fdout, f->st.gid)) { 457 ERRX1(sess, "io_write_int"); 458 goto out; 459 } 460 if (!flist_gid_add(sess, &gids, &gidsz, f->st.gid)) { 461 ERRX1(sess, "flist_gid_add"); 462 goto out; 463 } 464 } 465 466 /* Conditional part: link. */ 467 468 if (S_ISLNK(f->st.mode) && 469 sess->opts->preserve_links) { 470 fn = f->link; 471 sz = strlen(f->link); 472 if (!io_write_int(sess, fdout, sz)) { 473 ERRX1(sess, "io_write_int"); 474 goto out; 475 } 476 if (!io_write_buf(sess, fdout, fn, sz)) { 477 ERRX1(sess, "io_write_int"); 478 goto out; 479 } 480 } 481 482 if (S_ISREG(f->st.mode)) 483 sess->total_size += f->st.size; 484 } 485 486 /* Signal end of file list. */ 487 488 if (!io_write_byte(sess, fdout, 0)) { 489 ERRX1(sess, "io_write_byte"); 490 goto out; 491 } 492 493 /* Conditionally write gid list and terminator. */ 494 495 if (sess->opts->preserve_gids) { 496 LOG2(sess, "sending gid list: %zu", gidsz); 497 if (!flist_send_ident(sess, fdout, gids, gidsz)) { 498 ERRX1(sess, "flist_send_ident"); 499 goto out; 500 } 501 } 502 503 rc = 1; 504out: 505 flist_ident_free(gids, gidsz); 506 return rc; 507} 508 509/* 510 * Read the filename of a file list. 511 * This is the most expensive part of the file list transfer, so a lot 512 * of attention has gone into transmitting as little as possible. 513 * Micro-optimisation, but whatever. 514 * Fills in "f" with the full path on success. 515 * Returns zero on failure, non-zero on success. 516 */ 517static int 518flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 519 char last[MAXPATHLEN]) 520{ 521 uint8_t bval; 522 size_t partial = 0; 523 size_t pathlen = 0, len; 524 525 /* 526 * Read our filename. 527 * If we have FLIST_NAME_SAME, we inherit some of the last 528 * transmitted name. 529 * If we have FLIST_NAME_LONG, then the string length is greater 530 * than byte-size. 531 */ 532 533 if (FLIST_NAME_SAME & flags) { 534 if (!io_read_byte(sess, fd, &bval)) { 535 ERRX1(sess, "io_read_byte"); 536 return 0; 537 } 538 partial = bval; 539 } 540 541 /* Get the (possibly-remaining) filename length. */ 542 543 if (FLIST_NAME_LONG & flags) { 544 if (!io_read_size(sess, fd, &pathlen)) { 545 ERRX1(sess, "io_read_size"); 546 return 0; 547 } 548 } else { 549 if (!io_read_byte(sess, fd, &bval)) { 550 ERRX1(sess, "io_read_byte"); 551 return 0; 552 } 553 pathlen = bval; 554 } 555 556 /* Allocate our full filename length. */ 557 /* FIXME: maximum pathname length. */ 558 559 if ((len = pathlen + partial) == 0) { 560 ERRX(sess, "security violation: " 561 "zero-length pathname"); 562 return 0; 563 } 564 565 if ((f->path = malloc(len + 1)) == NULL) { 566 ERR(sess, "malloc"); 567 return 0; 568 } 569 f->path[len] = '\0'; 570 571 if (FLIST_NAME_SAME & flags) 572 memcpy(f->path, last, partial); 573 574 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 575 ERRX1(sess, "io_read_buf"); 576 return 0; 577 } 578 579 if (f->path[0] == '/') { 580 ERRX(sess, "security violation: " 581 "absolute pathname: %s", f->path); 582 return 0; 583 } 584 585 if (strstr(f->path, "/../") != NULL || 586 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 587 (len > 2 && strncmp(f->path, "../", 3) == 0) || 588 strcmp(f->path, "..") == 0) { 589 ERRX(sess, "%s: security violation: " 590 "backtracking pathname", f->path); 591 return 0; 592 } 593 594 /* Record our last path and construct our filename. */ 595 596 strlcpy(last, f->path, MAXPATHLEN); 597 f->wpath = f->path; 598 return 1; 599} 600 601/* 602 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 603 * Returns zero on failure, non-zero on success. 604 */ 605static int 606flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 607{ 608 void *pp; 609 610 if (*sz + 1 <= *max) { 611 (*sz)++; 612 return 1; 613 } 614 615 pp = recallocarray(*fl, *max, 616 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 617 if (pp == NULL) { 618 ERR(sess, "recallocarray"); 619 return 0; 620 } 621 *fl = pp; 622 *max += FLIST_CHUNK_SIZE; 623 (*sz)++; 624 return 1; 625} 626 627/* 628 * Copy a regular or symbolic link file "path" into "f". 629 * This handles the correct path creation and symbolic linking. 630 * Returns zero on failure, non-zero on success. 631 */ 632static int 633flist_append(struct sess *sess, struct flist *f, struct stat *st, 634 const char *path) 635{ 636 637 /* 638 * Copy the full path for local addressing and transmit 639 * only the filename part for the receiver. 640 */ 641 642 if ((f->path = strdup(path)) == NULL) { 643 ERR(sess, "strdup"); 644 return 0; 645 } 646 647 if ((f->wpath = strrchr(f->path, '/')) == NULL) 648 f->wpath = f->path; 649 else 650 f->wpath++; 651 652 /* 653 * On the receiving end, we'll strip out all bits on the 654 * mode except for the file permissions. 655 * No need to warn about it here. 656 */ 657 658 flist_copy_stat(f, st); 659 660 /* Optionally copy link information. */ 661 662 if (S_ISLNK(st->st_mode)) { 663 f->link = symlink_read(sess, f->path); 664 if (f->link == NULL) { 665 ERRX1(sess, "symlink_read"); 666 return 0; 667 } 668 } 669 670 return 1; 671} 672 673/* 674 * Receive a list of struct ident. 675 * See flist_send_ident(). 676 * We should only do this if we're preserving gids/uids. 677 * Return zero on failure, non-zero on success. 678 */ 679static int 680flist_recv_ident(struct sess *sess, 681 int fd, struct ident **ids, size_t *idsz) 682{ 683 int32_t id; 684 uint8_t sz; 685 void *pp; 686 687 for (;;) { 688 if (!io_read_int(sess, fd, &id)) { 689 ERRX1(sess, "io_read_int"); 690 return 0; 691 } else if (0 == id) 692 break; 693 694 pp = reallocarray(*ids, 695 *idsz + 1, sizeof(struct ident)); 696 if (NULL == pp) { 697 ERR(sess, "reallocarray"); 698 return 0; 699 } 700 *ids = pp; 701 memset(&(*ids)[*idsz], 0, sizeof(struct ident)); 702 if (!io_read_byte(sess, fd, &sz)) { 703 ERRX1(sess, "io_read_byte"); 704 return 0; 705 } 706 (*ids)[*idsz].id = id; 707 (*ids)[*idsz].name = calloc(sz + 1, 1); 708 if (NULL == (*ids)[*idsz].name) { 709 ERR(sess, "calloc"); 710 return 0; 711 } 712 if (!io_read_buf(sess, fd, (*ids)[*idsz].name, sz)) { 713 ERRX1(sess, "io_read_buf"); 714 return 0; 715 } 716 (*idsz)++; 717 } 718 719 return 1; 720} 721 722/* 723 * Receive a file list from the wire, filling in length "sz" (which may 724 * possibly be zero) and list "flp" on success. 725 * Return zero on failure, non-zero on success. 726 */ 727int 728flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 729{ 730 struct flist *fl = NULL; 731 struct flist *ff; 732 const struct flist *fflast = NULL; 733 size_t i, j, flsz = 0, flmax = 0, lsz, gidsz = 0; 734 uint8_t flag; 735 char last[MAXPATHLEN]; 736 uint64_t lval; /* temporary values... */ 737 int32_t ival; 738 struct ident *gids = NULL; 739 740 last[0] = '\0'; 741 742 for (;;) { 743 if (!io_read_byte(sess, fd, &flag)) { 744 ERRX1(sess, "io_read_byte"); 745 goto out; 746 } else if (flag == 0) 747 break; 748 749 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 750 ERRX1(sess, "flist_realloc"); 751 goto out; 752 } 753 754 ff = &fl[flsz - 1]; 755 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 756 757 /* Filename first. */ 758 759 if (!flist_recv_name(sess, fd, ff, flag, last)) { 760 ERRX1(sess, "flist_recv_name"); 761 goto out; 762 } 763 764 /* Read the file size. */ 765 766 if (!io_read_ulong(sess, fd, &lval)) { 767 ERRX1(sess, "io_read_ulong"); 768 goto out; 769 } 770 ff->st.size = lval; 771 772 /* Read the modification time. */ 773 774 if (!(FLIST_TIME_SAME & flag)) { 775 if (!io_read_int(sess, fd, &ival)) { 776 ERRX1(sess, "io_read_int"); 777 goto out; 778 } 779 ff->st.mtime = ival; 780 } else if (fflast == NULL) { 781 ERRX(sess, "same time without last entry"); 782 goto out; 783 } else 784 ff->st.mtime = fflast->st.mtime; 785 786 /* Read the file mode. */ 787 788 if (!(FLIST_MODE_SAME & flag)) { 789 if (!io_read_int(sess, fd, &ival)) { 790 ERRX1(sess, "io_read_int"); 791 goto out; 792 } 793 ff->st.mode = ival; 794 } else if (fflast == NULL) { 795 ERRX(sess, "same mode without last entry"); 796 goto out; 797 } else 798 ff->st.mode = fflast->st.mode; 799 800 /* Conditional part: gid. */ 801 802 if (sess->opts->preserve_gids) { 803 if ( ! (FLIST_GID_SAME & flag)) { 804 if ( ! io_read_int(sess, fd, &ival)) { 805 ERRX1(sess, "io_read_int"); 806 goto out; 807 } 808 ff->st.gid = ival; 809 } else if (NULL == fflast) { 810 ERRX(sess, "same gid " 811 "without last entry"); 812 goto out; 813 } else 814 ff->st.gid = fflast->st.gid; 815 } 816 817 /* Conditional part: link. */ 818 819 if (S_ISLNK(ff->st.mode) && 820 sess->opts->preserve_links) { 821 if (!io_read_size(sess, fd, &lsz)) { 822 ERRX1(sess, "io_read_size"); 823 goto out; 824 } else if (lsz == 0) { 825 ERRX(sess, "empty link name"); 826 goto out; 827 } 828 ff->link = calloc(lsz + 1, 1); 829 if (ff->link == NULL) { 830 ERR(sess, "calloc"); 831 goto out; 832 } 833 if (!io_read_buf(sess, fd, ff->link, lsz)) { 834 ERRX1(sess, "io_read_buf"); 835 goto out; 836 } 837 } 838 839 LOG3(sess, "%s: received file metadata: " 840 "size %jd, mtime %jd, mode %o", 841 ff->path, (intmax_t)ff->st.size, 842 (intmax_t)ff->st.mtime, ff->st.mode); 843 844 if (S_ISREG(ff->st.mode)) 845 sess->total_size += ff->st.size; 846 } 847 848 /* 849 * Now conditionally read the group list. 850 * We then remap all group identifiers to the local ids. 851 */ 852 853 if (sess->opts->preserve_gids) { 854 if (!flist_recv_ident(sess, fd, &gids, &gidsz)) { 855 ERRX1(sess, "flist_recv_ident"); 856 goto out; 857 } 858 LOG2(sess, "received gid list: %zu", gidsz); 859 flist_gid_remap(sess, gids, gidsz); 860 } 861 862 /* Remember to order the received list. */ 863 864 LOG2(sess, "received file metadata list: %zu", flsz); 865 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 866 flist_topdirs(sess, fl, flsz); 867 *sz = flsz; 868 *flp = fl; 869 870 /* Lastly, reassign group identifiers. */ 871 872 if (sess->opts->preserve_gids) { 873 for (i = 0; i < flsz; i++) { 874 for (j = 0; j < gidsz; j++) 875 if ((int32_t)fl[i].st.gid == gids[j].id) 876 break; 877 assert(j < gidsz); 878 fl[i].st.gid = gids[j].mapped; 879 } 880 } 881 882 flist_ident_free(gids, gidsz); 883 return 1; 884out: 885 flist_free(fl, flsz); 886 flist_ident_free(gids, gidsz); 887 *sz = 0; 888 *flp = NULL; 889 return 0; 890} 891 892/* 893 * Generate a flist possibly-recursively given a file root, which may 894 * also be a regular file or symlink. 895 * On success, augments the generated list in "flp" of length "sz". 896 * Returns zero on failure, non-zero on success. 897 */ 898static int 899flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 900 size_t *max) 901{ 902 char *cargv[2], *cp; 903 int rc = 0; 904 FTS *fts; 905 FTSENT *ent; 906 struct flist *f; 907 size_t flsz = 0, stripdir; 908 struct stat st; 909 910 cargv[0] = root; 911 cargv[1] = NULL; 912 913 /* 914 * If we're a file, then revert to the same actions we use for 915 * the non-recursive scan. 916 */ 917 918 if (lstat(root, &st) == -1) { 919 ERR(sess, "%s: lstat", root); 920 return 0; 921 } else if (S_ISREG(st.st_mode)) { 922 if (!flist_realloc(sess, fl, sz, max)) { 923 ERRX1(sess, "flist_realloc"); 924 return 0; 925 } 926 f = &(*fl)[(*sz) - 1]; 927 assert(f != NULL); 928 929 if (!flist_append(sess, f, &st, root)) { 930 ERRX1(sess, "flist_append"); 931 return 0; 932 } else if (unveil(root, "r") == -1) { 933 ERR(sess, "%s: unveil", root); 934 return 0; 935 } 936 return 1; 937 } else if (S_ISLNK(st.st_mode)) { 938 if (!sess->opts->preserve_links) { 939 WARNX(sess, "%s: skipping symlink", root); 940 return 1; 941 } else if (!flist_realloc(sess, fl, sz, max)) { 942 ERRX1(sess, "flist_realloc"); 943 return 0; 944 } 945 f = &(*fl)[(*sz) - 1]; 946 assert(f != NULL); 947 948 if (!flist_append(sess, f, &st, root)) { 949 ERRX1(sess, "flist_append"); 950 return 0; 951 } else if (unveil(root, "r") == -1) { 952 ERR(sess, "%s: unveil", root); 953 return 0; 954 } 955 return 1; 956 } else if (!S_ISDIR(st.st_mode)) { 957 WARNX(sess, "%s: skipping special", root); 958 return 1; 959 } 960 961 /* 962 * If we end with a slash, it means that we're not supposed to 963 * copy the directory part itself---only the contents. 964 * So set "stripdir" to be what we take out. 965 */ 966 967 stripdir = strlen(root); 968 assert(stripdir > 0); 969 if (root[stripdir - 1] != '/') 970 stripdir = 0; 971 972 /* 973 * If we're not stripping anything, then see if we need to strip 974 * out the leading material in the path up to and including the 975 * last directory component. 976 */ 977 978 if (stripdir == 0) 979 if ((cp = strrchr(root, '/')) != NULL) 980 stripdir = cp - root + 1; 981 982 /* 983 * If we're recursive, then we need to take down all of the 984 * files and directory components, so use fts(3). 985 * Copying the information file-by-file into the flstat. 986 * We'll make sense of it in flist_send. 987 */ 988 989 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 990 ERR(sess, "fts_open"); 991 return 0; 992 } 993 994 errno = 0; 995 while ((ent = fts_read(fts)) != NULL) { 996 if (!flist_fts_check(sess, ent)) { 997 errno = 0; 998 continue; 999 } 1000 1001 /* We don't allow symlinks without -l. */ 1002 1003 assert(ent->fts_statp != NULL); 1004 if (S_ISLNK(ent->fts_statp->st_mode) && 1005 !sess->opts->preserve_links) { 1006 WARNX(sess, "%s: skipping " 1007 "symlink", ent->fts_path); 1008 continue; 1009 } 1010 1011 /* Allocate a new file entry. */ 1012 1013 if (!flist_realloc(sess, fl, sz, max)) { 1014 ERRX1(sess, "flist_realloc"); 1015 goto out; 1016 } 1017 flsz++; 1018 f = &(*fl)[*sz - 1]; 1019 1020 /* Our path defaults to "." for the root. */ 1021 1022 if ('\0' == ent->fts_path[stripdir]) { 1023 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 1024 ERR(sess, "asprintf"); 1025 f->path = NULL; 1026 goto out; 1027 } 1028 } else { 1029 if ((f->path = strdup(ent->fts_path)) == NULL) { 1030 ERR(sess, "strdup"); 1031 goto out; 1032 } 1033 } 1034 1035 f->wpath = f->path + stripdir; 1036 flist_copy_stat(f, ent->fts_statp); 1037 1038 /* Optionally copy link information. */ 1039 1040 if (S_ISLNK(ent->fts_statp->st_mode)) { 1041 f->link = symlink_read(sess, f->path); 1042 if (f->link == NULL) { 1043 ERRX1(sess, "symlink_read"); 1044 goto out; 1045 } 1046 } 1047 1048 /* Reset errno for next fts_read() call. */ 1049 errno = 0; 1050 } 1051 if (errno) { 1052 ERR(sess, "fts_read"); 1053 goto out; 1054 } else if (unveil(root, "r") == -1) { 1055 ERR(sess, "%s: unveil", root); 1056 goto out; 1057 } 1058 1059 LOG3(sess, "generated %zu filenames: %s", flsz, root); 1060 rc = 1; 1061out: 1062 fts_close(fts); 1063 return rc; 1064} 1065 1066/* 1067 * Generate a flist recursively given the array of directories (or 1068 * files, symlinks, doesn't matter) specified in argv (argc >0). 1069 * On success, stores the generated list in "flp" with length "sz", 1070 * which may be zero. 1071 * Returns zero on failure, non-zero on success. 1072 */ 1073static int 1074flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1075 size_t *sz) 1076{ 1077 size_t i, max = 0; 1078 1079 for (i = 0; i < argc; i++) 1080 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 1081 break; 1082 1083 if (i == argc) { 1084 LOG2(sess, "recursively generated %zu filenames", *sz); 1085 return 1; 1086 } 1087 1088 ERRX1(sess, "flist_gen_dirent"); 1089 flist_free(*flp, max); 1090 *flp = NULL; 1091 *sz = 0; 1092 return 0; 1093} 1094 1095/* 1096 * Generate list of files from the command-line argc (>0) and argv. 1097 * On success, stores the generated list in "flp" with length "sz", 1098 * which may be zero. 1099 * Returns zero on failure, non-zero on success. 1100 */ 1101static int 1102flist_gen_files(struct sess *sess, size_t argc, char **argv, 1103 struct flist **flp, size_t *sz) 1104{ 1105 struct flist *fl = NULL, *f; 1106 size_t i, flsz = 0; 1107 struct stat st; 1108 1109 assert(argc); 1110 1111 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 1112 ERR(sess, "calloc"); 1113 return 0; 1114 } 1115 1116 for (i = 0; i < argc; i++) { 1117 if ('\0' == argv[i][0]) 1118 continue; 1119 if (lstat(argv[i], &st) == -1) { 1120 ERR(sess, "%s: lstat", argv[i]); 1121 goto out; 1122 } 1123 1124 /* 1125 * File type checks. 1126 * In non-recursive mode, we don't accept directories. 1127 * We also skip symbolic links without -l. 1128 * Beyond that, we only accept regular files. 1129 */ 1130 1131 if (S_ISDIR(st.st_mode)) { 1132 WARNX(sess, "%s: skipping directory", argv[i]); 1133 continue; 1134 } else if (S_ISLNK(st.st_mode)) { 1135 if (!sess->opts->preserve_links) { 1136 WARNX(sess, "%s: skipping " 1137 "symlink", argv[i]); 1138 continue; 1139 } 1140 } else if (!S_ISREG(st.st_mode)) { 1141 WARNX(sess, "%s: skipping special", argv[i]); 1142 continue; 1143 } 1144 1145 1146 f = &fl[flsz++]; 1147 assert(f != NULL); 1148 1149 /* Add this file to our file-system worldview. */ 1150 1151 if (unveil(argv[i], "r") == -1) { 1152 ERR(sess, "%s: unveil", argv[i]); 1153 goto out; 1154 } else if (!flist_append(sess, f, &st, argv[i])) { 1155 ERRX1(sess, "flist_append"); 1156 goto out; 1157 } 1158 } 1159 1160 LOG2(sess, "non-recursively generated %zu filenames", flsz); 1161 *sz = flsz; 1162 *flp = fl; 1163 return 1; 1164out: 1165 flist_free(fl, argc); 1166 *sz = 0; 1167 *flp = NULL; 1168 return 0; 1169} 1170 1171/* 1172 * Generate a sorted, de-duplicated list of file metadata. 1173 * In non-recursive mode (the default), we use only the files we're 1174 * given. 1175 * Otherwise, directories are recursively examined. 1176 * Returns zero on failure, non-zero on success. 1177 * On success, "fl" will need to be freed with flist_free(). 1178 */ 1179int 1180flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1181 size_t *sz) 1182{ 1183 int rc; 1184 1185 assert(argc > 0); 1186 rc = sess->opts->recursive ? 1187 flist_gen_dirs(sess, argc, argv, flp, sz) : 1188 flist_gen_files(sess, argc, argv, flp, sz); 1189 1190 /* After scanning, lock our file-system view. */ 1191 1192 if (unveil(NULL, NULL) == -1) { 1193 ERR(sess, "unveil"); 1194 return 0; 1195 } else if (!rc) 1196 return 0; 1197 1198 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1199 1200 if (flist_dedupe(sess, flp, sz)) { 1201 flist_topdirs(sess, *flp, *sz); 1202 return 1; 1203 } 1204 1205 ERRX1(sess, "flist_dedupe"); 1206 flist_free(*flp, *sz); 1207 *flp = NULL; 1208 *sz = 0; 1209 return 0; 1210} 1211 1212/* 1213 * Generate a list of files in root to delete that are within the 1214 * top-level directories stipulated by "wfl". 1215 * Only handles symbolic links, directories, and regular files. 1216 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1217 * on success. 1218 * On success, "fl" will need to be freed with flist_free(). 1219 */ 1220int 1221flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1222 size_t *sz, const struct flist *wfl, size_t wflsz) 1223{ 1224 char **cargv = NULL; 1225 int rc = 0, c; 1226 FTS *fts = NULL; 1227 FTSENT *ent; 1228 struct flist *f; 1229 size_t cargvs = 0, i, j, max = 0, stripdir; 1230 ENTRY hent; 1231 ENTRY *hentp; 1232 1233 *fl = NULL; 1234 *sz = 0; 1235 1236 /* Only run this code when we're recursive. */ 1237 1238 if (!sess->opts->recursive) 1239 return 1; 1240 1241 /* 1242 * Gather up all top-level directories for scanning. 1243 * This is stipulated by rsync's --delete behaviour, where we 1244 * only delete things in the top-level directories given on the 1245 * command line. 1246 */ 1247 1248 assert(wflsz > 0); 1249 for (i = 0; i < wflsz; i++) 1250 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1251 cargvs++; 1252 if (cargvs == 0) 1253 return 1; 1254 1255 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1256 ERR(sess, "calloc"); 1257 return 0; 1258 } 1259 1260 /* 1261 * If we're given just a "." as the first entry, that means 1262 * we're doing a relative copy with a trailing slash. 1263 * Special-case this just for the sake of simplicity. 1264 * Otherwise, look through all top-levels. 1265 */ 1266 1267 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1268 assert(cargvs == 1); 1269 assert(S_ISDIR(wfl[0].st.mode)); 1270 if (asprintf(&cargv[0], "%s/", root) < 0) { 1271 ERR(sess, "asprintf"); 1272 cargv[0] = NULL; 1273 goto out; 1274 } 1275 cargv[1] = NULL; 1276 } else { 1277 for (i = j = 0; i < wflsz; i++) { 1278 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1279 continue; 1280 assert(S_ISDIR(wfl[i].st.mode)); 1281 assert(strcmp(wfl[i].wpath, ".")); 1282 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1283 if (c < 0) { 1284 ERR(sess, "asprintf"); 1285 cargv[j] = NULL; 1286 goto out; 1287 } 1288 LOG4(sess, "%s: will scan " 1289 "for deletions", cargv[j]); 1290 j++; 1291 } 1292 assert(j == cargvs); 1293 cargv[j] = NULL; 1294 } 1295 1296 LOG2(sess, "delete from %zu directories", cargvs); 1297 1298 /* 1299 * Next, use the standard hcreate(3) hashtable interface to hash 1300 * all of the files that we want to synchronise. 1301 * This way, we'll be able to determine which files we want to 1302 * delete in O(n) time instead of O(n * search) time. 1303 * Plus, we can do the scan in-band and only allocate the files 1304 * we want to delete. 1305 */ 1306 1307 if (!hcreate(wflsz)) { 1308 ERR(sess, "hcreate"); 1309 goto out; 1310 } 1311 1312 for (i = 0; i < wflsz; i++) { 1313 memset(&hent, 0, sizeof(ENTRY)); 1314 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1315 ERR(sess, "strdup"); 1316 goto out; 1317 } 1318 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1319 ERR(sess, "hsearch"); 1320 goto out; 1321 } else if (hentp->key != hent.key) { 1322 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1323 free(hent.key); 1324 goto out; 1325 } 1326 } 1327 1328 /* 1329 * Now we're going to try to descend into all of the top-level 1330 * directories stipulated by the file list. 1331 * If the directories don't exist, it's ok. 1332 */ 1333 1334 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1335 ERR(sess, "fts_open"); 1336 goto out; 1337 } 1338 1339 stripdir = strlen(root) + 1; 1340 errno = 0; 1341 while ((ent = fts_read(fts)) != NULL) { 1342 if (ent->fts_info == FTS_NS) 1343 continue; 1344 if (!flist_fts_check(sess, ent)) { 1345 errno = 0; 1346 continue; 1347 } else if (stripdir >= ent->fts_pathlen) 1348 continue; 1349 1350 /* Look up in hashtable. */ 1351 1352 memset(&hent, 0, sizeof(ENTRY)); 1353 hent.key = ent->fts_path + stripdir; 1354 if (hsearch(hent, FIND) != NULL) 1355 continue; 1356 1357 /* Not found: we'll delete it. */ 1358 1359 if (!flist_realloc(sess, fl, sz, &max)) { 1360 ERRX1(sess, "flist_realloc"); 1361 goto out; 1362 } 1363 f = &(*fl)[*sz - 1]; 1364 1365 if ((f->path = strdup(ent->fts_path)) == NULL) { 1366 ERR(sess, "strdup"); 1367 goto out; 1368 } 1369 f->wpath = f->path + stripdir; 1370 assert(ent->fts_statp != NULL); 1371 flist_copy_stat(f, ent->fts_statp); 1372 errno = 0; 1373 } 1374 1375 if (errno) { 1376 ERR(sess, "fts_read"); 1377 goto out; 1378 } 1379 1380 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1381 rc = 1; 1382out: 1383 if (fts != NULL) 1384 fts_close(fts); 1385 for (i = 0; i < cargvs; i++) 1386 free(cargv[i]); 1387 free(cargv); 1388 hdestroy(); 1389 return rc; 1390} 1391 1392/* 1393 * Delete all files and directories in "fl". 1394 * If called with a zero-length "fl", does nothing. 1395 * If dry_run is specified, simply write what would be done. 1396 * Return zero on failure, non-zero on success. 1397 */ 1398int 1399flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1400{ 1401 ssize_t i; 1402 int flag; 1403 1404 if (flsz == 0) 1405 return 1; 1406 1407 assert(sess->opts->del); 1408 assert(sess->opts->recursive); 1409 1410 for (i = flsz - 1; i >= 0; i--) { 1411 LOG1(sess, "%s: deleting", fl[i].wpath); 1412 if (sess->opts->dry_run) 1413 continue; 1414 assert(root != -1); 1415 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1416 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1417 errno != ENOENT) { 1418 ERR(sess, "%s: unlinkat", fl[i].wpath); 1419 return 0; 1420 } 1421 } 1422 1423 return 1; 1424} 1425