flist.c revision 1.15
1/* $Id: flist.c,v 1.15 2019/02/16 10:48:05 florian Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/param.h> 18#include <sys/stat.h> 19 20#include <assert.h> 21#include <errno.h> 22#include <fcntl.h> 23#include <fts.h> 24#include <inttypes.h> 25#include <search.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30 31#include "extern.h" 32 33/* 34 * We allocate our file list in chunk sizes so as not to do it one by 35 * one. 36 * Preferrably we get one or two allocation. 37 */ 38#define FLIST_CHUNK_SIZE (1024) 39 40/* 41 * These flags are part of the rsync protocol. 42 * They are sent as the first byte for a file transmission and encode 43 * information that affects subsequent transmissions. 44 */ 45#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 46#define FLIST_RDEV_SAME 0x0004 /* rdev is repeat */ 47#define FLIST_UID_SAME 0x0008 /* uid is repeat */ 48#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 49#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 50#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 51#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 52 53/* 54 * Requied way to sort a filename list. 55 */ 56static int 57flist_cmp(const void *p1, const void *p2) 58{ 59 const struct flist *f1 = p1, *f2 = p2; 60 61 return strcmp(f1->wpath, f2->wpath); 62} 63 64/* 65 * Deduplicate our file list (which may be zero-length). 66 * Returns zero on failure, non-zero on success. 67 */ 68static int 69flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 70{ 71 size_t i, j; 72 struct flist *new; 73 struct flist *f, *fnext; 74 75 if (*sz == 0) 76 return 1; 77 78 /* Create a new buffer, "new", and copy. */ 79 80 new = calloc(*sz, sizeof(struct flist)); 81 if (new == NULL) { 82 ERR(sess, "calloc"); 83 return 0; 84 } 85 86 for (i = j = 0; i < *sz - 1; i++) { 87 f = &(*fl)[i]; 88 fnext = &(*fl)[i + 1]; 89 90 if (strcmp(f->wpath, fnext->wpath)) { 91 new[j++] = *f; 92 continue; 93 } 94 95 /* 96 * Our working (destination) paths are the same. 97 * If the actual file is the same (as given on the 98 * command-line), then we can just discard the first. 99 * Otherwise, we need to bail out: it means we have two 100 * different files with the relative path on the 101 * destination side. 102 */ 103 104 if (strcmp(f->path, fnext->path) == 0) { 105 new[j++] = *f; 106 i++; 107 WARNX(sess, "%s: duplicate path: %s", 108 f->wpath, f->path); 109 free(fnext->path); 110 free(fnext->link); 111 fnext->path = fnext->link = NULL; 112 continue; 113 } 114 115 ERRX(sess, "%s: duplicate working path for " 116 "possibly different file: %s, %s", 117 f->wpath, f->path, fnext->path); 118 free(new); 119 return 0; 120 } 121 122 /* Don't forget the last entry. */ 123 124 if (i == *sz - 1) 125 new[j++] = (*fl)[i]; 126 127 /* 128 * Reassign to the deduplicated array. 129 * If we started out with *sz > 0, which we check for at the 130 * beginning, then we'll always continue having *sz > 0. 131 */ 132 133 free(*fl); 134 *fl = new; 135 *sz = j; 136 assert(*sz); 137 return 1; 138} 139 140/* 141 * We're now going to find our top-level directories. 142 * This only applies to recursive mode. 143 * If we have the first element as the ".", then that's the "top 144 * directory" of our transfer. 145 * Otherwise, mark up all top-level directories in the set. 146 */ 147static void 148flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 149{ 150 size_t i; 151 const char *cp; 152 153 if (!sess->opts->recursive) 154 return; 155 156 if (flsz && strcmp(fl[0].wpath, ".")) { 157 for (i = 0; i < flsz; i++) { 158 if (!S_ISDIR(fl[i].st.mode)) 159 continue; 160 cp = strchr(fl[i].wpath, '/'); 161 if (cp != NULL && cp[1] != '\0') 162 continue; 163 fl[i].st.flags |= FLSTAT_TOP_DIR; 164 LOG4(sess, "%s: top-level", fl[i].wpath); 165 } 166 } else if (flsz) { 167 fl[0].st.flags |= FLSTAT_TOP_DIR; 168 LOG4(sess, "%s: top-level", fl[0].wpath); 169 } 170} 171 172/* 173 * Filter through the fts() file information. 174 * We want directories (pre-order), regular files, and symlinks. 175 * Everything else is skipped and possibly warned about. 176 * Return zero to skip, non-zero to examine. 177 */ 178static int 179flist_fts_check(struct sess *sess, FTSENT *ent) 180{ 181 182 if (ent->fts_info == FTS_F || 183 ent->fts_info == FTS_D || 184 ent->fts_info == FTS_SL || 185 ent->fts_info == FTS_SLNONE) 186 return 1; 187 188 if (ent->fts_info == FTS_DC) { 189 WARNX(sess, "%s: directory cycle", ent->fts_path); 190 } else if (ent->fts_info == FTS_DNR) { 191 errno = ent->fts_errno; 192 WARN(sess, "%s: unreadable directory", ent->fts_path); 193 } else if (ent->fts_info == FTS_DOT) { 194 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 195 } else if (ent->fts_info == FTS_ERR) { 196 errno = ent->fts_errno; 197 WARN(sess, "%s", ent->fts_path); 198 } else if (ent->fts_info == FTS_DEFAULT) { 199 if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) || 200 S_ISCHR(ent->fts_statp->st_mode))) || 201 (sess->opts->specials && 202 (S_ISFIFO(ent->fts_statp->st_mode) || 203 S_ISSOCK(ent->fts_statp->st_mode)))) { 204 return 1; 205 } 206 WARNX(sess, "%s: skipping special", ent->fts_path); 207 } else if (ent->fts_info == FTS_NS) { 208 errno = ent->fts_errno; 209 WARN(sess, "%s: could not stat", ent->fts_path); 210 } 211 212 return 0; 213} 214 215/* 216 * Copy necessary elements in "st" into the fields of "f". 217 */ 218static void 219flist_copy_stat(struct flist *f, const struct stat *st) 220{ 221 f->st.mode = st->st_mode; 222 f->st.uid = st->st_uid; 223 f->st.gid = st->st_gid; 224 f->st.size = st->st_size; 225 f->st.mtime = st->st_mtime; 226 f->st.rdev = st->st_rdev; 227} 228 229void 230flist_free(struct flist *f, size_t sz) 231{ 232 size_t i; 233 234 if (f == NULL) 235 return; 236 237 for (i = 0; i < sz; i++) { 238 free(f[i].path); 239 free(f[i].link); 240 } 241 free(f); 242} 243 244/* 245 * Serialise our file list (which may be zero-length) to the wire. 246 * Makes sure that the receiver isn't going to block on sending us 247 * return messages on the log channel. 248 * Return zero on failure, non-zero on success. 249 */ 250int 251flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 252 size_t flsz) 253{ 254 size_t i, sz, gidsz = 0, uidsz = 0; 255 uint8_t flag; 256 const struct flist *f; 257 const char *fn; 258 struct ident *gids = NULL, *uids = NULL; 259 int rc = 0; 260 261 /* Double-check that we've no pending multiplexed data. */ 262 263 LOG2(sess, "sending file metadata list: %zu", flsz); 264 265 for (i = 0; i < flsz; i++) { 266 f = &fl[i]; 267 fn = f->wpath; 268 sz = strlen(f->wpath); 269 assert(sz > 0); 270 271 /* 272 * If applicable, unclog the read buffer. 273 * This happens when the receiver has a lot of log 274 * messages and all we're doing is sending our file list 275 * without checking for messages. 276 */ 277 278 if (sess->mplex_reads && 279 io_read_check(sess, fdin) && 280 !io_read_flush(sess, fdin)) { 281 ERRX1(sess, "io_read_flush"); 282 goto out; 283 } 284 285 /* 286 * For ease, make all of our filenames be "long" 287 * regardless their actual length. 288 * This also makes sure that we don't transmit a zero 289 * byte unintentionally. 290 */ 291 292 flag = FLIST_NAME_LONG; 293 294 LOG3(sess, "%s: sending file metadata: " 295 "size %jd, mtime %jd, mode %o", 296 fn, (intmax_t)f->st.size, 297 (intmax_t)f->st.mtime, f->st.mode); 298 299 /* Now write to the wire. */ 300 /* FIXME: buffer this. */ 301 302 if (!io_write_byte(sess, fdout, flag)) { 303 ERRX1(sess, "io_write_byte"); 304 goto out; 305 } else if (!io_write_int(sess, fdout, sz)) { 306 ERRX1(sess, "io_write_int"); 307 goto out; 308 } else if (!io_write_buf(sess, fdout, fn, sz)) { 309 ERRX1(sess, "io_write_buf"); 310 goto out; 311 } else if (!io_write_long(sess, fdout, f->st.size)) { 312 ERRX1(sess, "io_write_long"); 313 goto out; 314 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 315 ERRX1(sess, "io_write_int"); 316 goto out; 317 } else if (!io_write_int(sess, fdout, f->st.mode)) { 318 ERRX1(sess, "io_write_int"); 319 goto out; 320 } 321 322 /* Conditional part: uid. */ 323 324 if (sess->opts->preserve_uids) { 325 if (!io_write_int(sess, fdout, f->st.uid)) { 326 ERRX1(sess, "io_write_int"); 327 goto out; 328 } 329 if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) { 330 ERRX1(sess, "idents_add"); 331 goto out; 332 } 333 } 334 335 /* Conditional part: gid. */ 336 337 if (sess->opts->preserve_gids) { 338 if (!io_write_int(sess, fdout, f->st.gid)) { 339 ERRX1(sess, "io_write_int"); 340 goto out; 341 } 342 if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) { 343 ERRX1(sess, "idents_add"); 344 goto out; 345 } 346 } 347 348 /* Conditional part: link. */ 349 350 if (S_ISLNK(f->st.mode) && 351 sess->opts->preserve_links) { 352 fn = f->link; 353 sz = strlen(f->link); 354 if (!io_write_int(sess, fdout, sz)) { 355 ERRX1(sess, "io_write_int"); 356 goto out; 357 } 358 if (!io_write_buf(sess, fdout, fn, sz)) { 359 ERRX1(sess, "io_write_int"); 360 goto out; 361 } 362 } 363 364 if (S_ISBLK(f->st.mode) || S_ISCHR(f->st.mode) || 365 S_ISFIFO(f->st.mode) || S_ISSOCK(f->st.mode)) { 366 if (!io_write_int(sess, fdout, f->st.rdev)) { 367 ERRX1(sess, "io_write_int"); 368 goto out; 369 } 370 } 371 372 if (S_ISREG(f->st.mode)) 373 sess->total_size += f->st.size; 374 } 375 376 /* Signal end of file list. */ 377 378 if (!io_write_byte(sess, fdout, 0)) { 379 ERRX1(sess, "io_write_byte"); 380 goto out; 381 } 382 383 /* Conditionally write identifier lists. */ 384 385 if (sess->opts->preserve_uids) { 386 LOG2(sess, "sending uid list: %zu", uidsz); 387 if (!idents_send(sess, fdout, uids, uidsz)) { 388 ERRX1(sess, "idents_send"); 389 goto out; 390 } 391 } 392 393 if (sess->opts->preserve_gids) { 394 LOG2(sess, "sending gid list: %zu", gidsz); 395 if (!idents_send(sess, fdout, gids, gidsz)) { 396 ERRX1(sess, "idents_send"); 397 goto out; 398 } 399 } 400 401 rc = 1; 402out: 403 idents_free(gids, gidsz); 404 idents_free(uids, uidsz); 405 return rc; 406} 407 408/* 409 * Read the filename of a file list. 410 * This is the most expensive part of the file list transfer, so a lot 411 * of attention has gone into transmitting as little as possible. 412 * Micro-optimisation, but whatever. 413 * Fills in "f" with the full path on success. 414 * Returns zero on failure, non-zero on success. 415 */ 416static int 417flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 418 char last[MAXPATHLEN]) 419{ 420 uint8_t bval; 421 size_t partial = 0; 422 size_t pathlen = 0, len; 423 424 /* 425 * Read our filename. 426 * If we have FLIST_NAME_SAME, we inherit some of the last 427 * transmitted name. 428 * If we have FLIST_NAME_LONG, then the string length is greater 429 * than byte-size. 430 */ 431 432 if (FLIST_NAME_SAME & flags) { 433 if (!io_read_byte(sess, fd, &bval)) { 434 ERRX1(sess, "io_read_byte"); 435 return 0; 436 } 437 partial = bval; 438 } 439 440 /* Get the (possibly-remaining) filename length. */ 441 442 if (FLIST_NAME_LONG & flags) { 443 if (!io_read_size(sess, fd, &pathlen)) { 444 ERRX1(sess, "io_read_size"); 445 return 0; 446 } 447 } else { 448 if (!io_read_byte(sess, fd, &bval)) { 449 ERRX1(sess, "io_read_byte"); 450 return 0; 451 } 452 pathlen = bval; 453 } 454 455 /* Allocate our full filename length. */ 456 /* FIXME: maximum pathname length. */ 457 458 if ((len = pathlen + partial) == 0) { 459 ERRX(sess, "security violation: " 460 "zero-length pathname"); 461 return 0; 462 } 463 464 if ((f->path = malloc(len + 1)) == NULL) { 465 ERR(sess, "malloc"); 466 return 0; 467 } 468 f->path[len] = '\0'; 469 470 if (FLIST_NAME_SAME & flags) 471 memcpy(f->path, last, partial); 472 473 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 474 ERRX1(sess, "io_read_buf"); 475 return 0; 476 } 477 478 if (f->path[0] == '/') { 479 ERRX(sess, "security violation: " 480 "absolute pathname: %s", f->path); 481 return 0; 482 } 483 484 if (strstr(f->path, "/../") != NULL || 485 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 486 (len > 2 && strncmp(f->path, "../", 3) == 0) || 487 strcmp(f->path, "..") == 0) { 488 ERRX(sess, "%s: security violation: " 489 "backtracking pathname", f->path); 490 return 0; 491 } 492 493 /* Record our last path and construct our filename. */ 494 495 strlcpy(last, f->path, MAXPATHLEN); 496 f->wpath = f->path; 497 return 1; 498} 499 500/* 501 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 502 * Returns zero on failure, non-zero on success. 503 */ 504static int 505flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 506{ 507 void *pp; 508 509 if (*sz + 1 <= *max) { 510 (*sz)++; 511 return 1; 512 } 513 514 pp = recallocarray(*fl, *max, 515 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 516 if (pp == NULL) { 517 ERR(sess, "recallocarray"); 518 return 0; 519 } 520 *fl = pp; 521 *max += FLIST_CHUNK_SIZE; 522 (*sz)++; 523 return 1; 524} 525 526/* 527 * Copy a regular or symbolic link file "path" into "f". 528 * This handles the correct path creation and symbolic linking. 529 * Returns zero on failure, non-zero on success. 530 */ 531static int 532flist_append(struct sess *sess, struct flist *f, struct stat *st, 533 const char *path) 534{ 535 536 /* 537 * Copy the full path for local addressing and transmit 538 * only the filename part for the receiver. 539 */ 540 541 if ((f->path = strdup(path)) == NULL) { 542 ERR(sess, "strdup"); 543 return 0; 544 } 545 546 if ((f->wpath = strrchr(f->path, '/')) == NULL) 547 f->wpath = f->path; 548 else 549 f->wpath++; 550 551 /* 552 * On the receiving end, we'll strip out all bits on the 553 * mode except for the file permissions. 554 * No need to warn about it here. 555 */ 556 557 flist_copy_stat(f, st); 558 559 /* Optionally copy link information. */ 560 561 if (S_ISLNK(st->st_mode)) { 562 f->link = symlink_read(sess, f->path); 563 if (f->link == NULL) { 564 ERRX1(sess, "symlink_read"); 565 return 0; 566 } 567 } 568 569 return 1; 570} 571 572/* 573 * Receive a file list from the wire, filling in length "sz" (which may 574 * possibly be zero) and list "flp" on success. 575 * Return zero on failure, non-zero on success. 576 */ 577int 578flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 579{ 580 struct flist *fl = NULL; 581 struct flist *ff; 582 const struct flist *fflast = NULL; 583 size_t flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0; 584 uint8_t flag; 585 char last[MAXPATHLEN]; 586 uint64_t lval; /* temporary values... */ 587 int32_t ival; 588 struct ident *gids = NULL, *uids = NULL; 589 590 last[0] = '\0'; 591 592 for (;;) { 593 if (!io_read_byte(sess, fd, &flag)) { 594 ERRX1(sess, "io_read_byte"); 595 goto out; 596 } else if (flag == 0) 597 break; 598 599 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 600 ERRX1(sess, "flist_realloc"); 601 goto out; 602 } 603 604 ff = &fl[flsz - 1]; 605 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 606 607 /* Filename first. */ 608 609 if (!flist_recv_name(sess, fd, ff, flag, last)) { 610 ERRX1(sess, "flist_recv_name"); 611 goto out; 612 } 613 614 /* Read the file size. */ 615 616 if (!io_read_ulong(sess, fd, &lval)) { 617 ERRX1(sess, "io_read_ulong"); 618 goto out; 619 } 620 ff->st.size = lval; 621 622 /* Read the modification time. */ 623 624 if (!(FLIST_TIME_SAME & flag)) { 625 if (!io_read_int(sess, fd, &ival)) { 626 ERRX1(sess, "io_read_int"); 627 goto out; 628 } 629 ff->st.mtime = ival; 630 } else if (fflast == NULL) { 631 ERRX(sess, "same time without last entry"); 632 goto out; 633 } else 634 ff->st.mtime = fflast->st.mtime; 635 636 /* Read the file mode. */ 637 638 if (!(FLIST_MODE_SAME & flag)) { 639 if (!io_read_int(sess, fd, &ival)) { 640 ERRX1(sess, "io_read_int"); 641 goto out; 642 } 643 ff->st.mode = ival; 644 } else if (fflast == NULL) { 645 ERRX(sess, "same mode without last entry"); 646 goto out; 647 } else 648 ff->st.mode = fflast->st.mode; 649 650 /* Conditional part: uid. */ 651 652 if (sess->opts->preserve_uids) { 653 if (!(FLIST_UID_SAME & flag)) { 654 if (!io_read_int(sess, fd, &ival)) { 655 ERRX1(sess, "io_read_int"); 656 goto out; 657 } 658 ff->st.uid = ival; 659 } else if (fflast == NULL) { 660 ERRX(sess, "same uid " 661 "without last entry"); 662 goto out; 663 } else 664 ff->st.uid = fflast->st.uid; 665 } 666 667 /* Conditional part: gid. */ 668 669 if (sess->opts->preserve_gids) { 670 if (!(FLIST_GID_SAME & flag)) { 671 if (!io_read_int(sess, fd, &ival)) { 672 ERRX1(sess, "io_read_int"); 673 goto out; 674 } 675 ff->st.gid = ival; 676 } else if (fflast == NULL) { 677 ERRX(sess, "same gid " 678 "without last entry"); 679 goto out; 680 } else 681 ff->st.gid = fflast->st.gid; 682 } 683 684 /* handle devices & special files*/ 685 686 if ((sess->opts->devices && (S_ISBLK(ff->st.mode) || 687 S_ISCHR(ff->st.mode))) || 688 (sess->opts->specials && (S_ISFIFO(ff->st.mode) || 689 S_ISSOCK(ff->st.mode)))) { 690 if (!(FLIST_RDEV_SAME & flag)) { 691 if (!io_read_int(sess, fd, &ival)) { 692 ERRX1(sess, "io_read_int"); 693 goto out; 694 } 695 ff->st.rdev = ival; 696 } else if (fflast == NULL) { 697 ERRX(sess, "same mode without last entry"); 698 goto out; 699 } else 700 ff->st.rdev = fflast->st.rdev; 701 } 702 703 /* Conditional part: link. */ 704 705 if (S_ISLNK(ff->st.mode) && 706 sess->opts->preserve_links) { 707 if (!io_read_size(sess, fd, &lsz)) { 708 ERRX1(sess, "io_read_size"); 709 goto out; 710 } else if (lsz == 0) { 711 ERRX(sess, "empty link name"); 712 goto out; 713 } 714 ff->link = calloc(lsz + 1, 1); 715 if (ff->link == NULL) { 716 ERR(sess, "calloc"); 717 goto out; 718 } 719 if (!io_read_buf(sess, fd, ff->link, lsz)) { 720 ERRX1(sess, "io_read_buf"); 721 goto out; 722 } 723 } 724 725 LOG3(sess, "%s: received file metadata: " 726 "size %jd, mtime %jd, mode %o, rdev (%d, %d)", 727 ff->path, (intmax_t)ff->st.size, 728 (intmax_t)ff->st.mtime, ff->st.mode, 729 major(ff->st.rdev), minor(ff->st.rdev)); 730 731 if (S_ISREG(ff->st.mode)) 732 sess->total_size += ff->st.size; 733 } 734 735 /* Conditionally read the user/group list. */ 736 737 if (sess->opts->preserve_uids) { 738 if (!idents_recv(sess, fd, &uids, &uidsz)) { 739 ERRX1(sess, "idents_recv"); 740 goto out; 741 } 742 LOG2(sess, "received uid list: %zu", uidsz); 743 } 744 745 if (sess->opts->preserve_gids) { 746 if (!idents_recv(sess, fd, &gids, &gidsz)) { 747 ERRX1(sess, "idents_recv"); 748 goto out; 749 } 750 LOG2(sess, "received gid list: %zu", gidsz); 751 } 752 753 /* Remember to order the received list. */ 754 755 LOG2(sess, "received file metadata list: %zu", flsz); 756 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 757 flist_topdirs(sess, fl, flsz); 758 *sz = flsz; 759 *flp = fl; 760 761 /* Conditionally remap and reassign identifiers. */ 762 763 if (sess->opts->preserve_uids) { 764 idents_remap(sess, 0, uids, uidsz); 765 idents_assign_uid(sess, fl, flsz, uids, uidsz); 766 } 767 768 if (sess->opts->preserve_gids) { 769 idents_remap(sess, 1, gids, gidsz); 770 idents_assign_gid(sess, fl, flsz, gids, gidsz); 771 } 772 773 idents_free(gids, gidsz); 774 idents_free(uids, uidsz); 775 return 1; 776out: 777 flist_free(fl, flsz); 778 idents_free(gids, gidsz); 779 idents_free(uids, uidsz); 780 *sz = 0; 781 *flp = NULL; 782 return 0; 783} 784 785/* 786 * Generate a flist possibly-recursively given a file root, which may 787 * also be a regular file or symlink. 788 * On success, augments the generated list in "flp" of length "sz". 789 * Returns zero on failure, non-zero on success. 790 */ 791static int 792flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 793 size_t *max) 794{ 795 char *cargv[2], *cp; 796 int rc = 0; 797 FTS *fts; 798 FTSENT *ent; 799 struct flist *f; 800 size_t flsz = 0, stripdir; 801 struct stat st; 802 803 cargv[0] = root; 804 cargv[1] = NULL; 805 806 /* 807 * If we're a file, then revert to the same actions we use for 808 * the non-recursive scan. 809 */ 810 811 if (lstat(root, &st) == -1) { 812 ERR(sess, "%s: lstat", root); 813 return 0; 814 } else if (S_ISREG(st.st_mode)) { 815 if (!flist_realloc(sess, fl, sz, max)) { 816 ERRX1(sess, "flist_realloc"); 817 return 0; 818 } 819 f = &(*fl)[(*sz) - 1]; 820 assert(f != NULL); 821 822 if (!flist_append(sess, f, &st, root)) { 823 ERRX1(sess, "flist_append"); 824 return 0; 825 } 826 if (unveil(root, "r") == -1) { 827 ERR(sess, "%s: unveil", root); 828 return 0; 829 } 830 return 1; 831 } else if (S_ISLNK(st.st_mode)) { 832 if (!sess->opts->preserve_links) { 833 WARNX(sess, "%s: skipping symlink", root); 834 return 1; 835 } else if (!flist_realloc(sess, fl, sz, max)) { 836 ERRX1(sess, "flist_realloc"); 837 return 0; 838 } 839 f = &(*fl)[(*sz) - 1]; 840 assert(f != NULL); 841 842 if (!flist_append(sess, f, &st, root)) { 843 ERRX1(sess, "flist_append"); 844 return 0; 845 } 846 if (unveil(root, "r") == -1) { 847 ERR(sess, "%s: unveil", root); 848 return 0; 849 } 850 return 1; 851 } else if (!S_ISDIR(st.st_mode)) { 852 WARNX(sess, "%s: skipping special", root); 853 return 1; 854 } 855 856 /* 857 * If we end with a slash, it means that we're not supposed to 858 * copy the directory part itself---only the contents. 859 * So set "stripdir" to be what we take out. 860 */ 861 862 stripdir = strlen(root); 863 assert(stripdir > 0); 864 if (root[stripdir - 1] != '/') 865 stripdir = 0; 866 867 /* 868 * If we're not stripping anything, then see if we need to strip 869 * out the leading material in the path up to and including the 870 * last directory component. 871 */ 872 873 if (stripdir == 0) 874 if ((cp = strrchr(root, '/')) != NULL) 875 stripdir = cp - root + 1; 876 877 /* 878 * If we're recursive, then we need to take down all of the 879 * files and directory components, so use fts(3). 880 * Copying the information file-by-file into the flstat. 881 * We'll make sense of it in flist_send. 882 */ 883 884 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 885 ERR(sess, "fts_open"); 886 return 0; 887 } 888 889 errno = 0; 890 while ((ent = fts_read(fts)) != NULL) { 891 if (!flist_fts_check(sess, ent)) { 892 errno = 0; 893 continue; 894 } 895 896 /* We don't allow symlinks without -l. */ 897 898 assert(ent->fts_statp != NULL); 899 if (S_ISLNK(ent->fts_statp->st_mode) && 900 !sess->opts->preserve_links) { 901 WARNX(sess, "%s: skipping " 902 "symlink", ent->fts_path); 903 continue; 904 } 905 906 /* Allocate a new file entry. */ 907 908 if (!flist_realloc(sess, fl, sz, max)) { 909 ERRX1(sess, "flist_realloc"); 910 goto out; 911 } 912 flsz++; 913 f = &(*fl)[*sz - 1]; 914 915 /* Our path defaults to "." for the root. */ 916 917 if ('\0' == ent->fts_path[stripdir]) { 918 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 919 ERR(sess, "asprintf"); 920 f->path = NULL; 921 goto out; 922 } 923 } else { 924 if ((f->path = strdup(ent->fts_path)) == NULL) { 925 ERR(sess, "strdup"); 926 goto out; 927 } 928 } 929 930 f->wpath = f->path + stripdir; 931 flist_copy_stat(f, ent->fts_statp); 932 933 /* Optionally copy link information. */ 934 935 if (S_ISLNK(ent->fts_statp->st_mode)) { 936 f->link = symlink_read(sess, f->path); 937 if (f->link == NULL) { 938 ERRX1(sess, "symlink_read"); 939 goto out; 940 } 941 } 942 943 /* Reset errno for next fts_read() call. */ 944 errno = 0; 945 } 946 if (errno) { 947 ERR(sess, "fts_read"); 948 goto out; 949 } 950 if (unveil(root, "r") == -1) { 951 ERR(sess, "%s: unveil", root); 952 goto out; 953 } 954 955 LOG3(sess, "generated %zu filenames: %s", flsz, root); 956 rc = 1; 957out: 958 fts_close(fts); 959 return rc; 960} 961 962/* 963 * Generate a flist recursively given the array of directories (or 964 * files, symlinks, doesn't matter) specified in argv (argc >0). 965 * On success, stores the generated list in "flp" with length "sz", 966 * which may be zero. 967 * Returns zero on failure, non-zero on success. 968 */ 969static int 970flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 971 size_t *sz) 972{ 973 size_t i, max = 0; 974 975 for (i = 0; i < argc; i++) 976 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 977 break; 978 979 if (i == argc) { 980 LOG2(sess, "recursively generated %zu filenames", *sz); 981 return 1; 982 } 983 984 ERRX1(sess, "flist_gen_dirent"); 985 flist_free(*flp, max); 986 *flp = NULL; 987 *sz = 0; 988 return 0; 989} 990 991/* 992 * Generate list of files from the command-line argc (>0) and argv. 993 * On success, stores the generated list in "flp" with length "sz", 994 * which may be zero. 995 * Returns zero on failure, non-zero on success. 996 */ 997static int 998flist_gen_files(struct sess *sess, size_t argc, char **argv, 999 struct flist **flp, size_t *sz) 1000{ 1001 struct flist *fl = NULL, *f; 1002 size_t i, flsz = 0; 1003 struct stat st; 1004 1005 assert(argc); 1006 1007 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 1008 ERR(sess, "calloc"); 1009 return 0; 1010 } 1011 1012 for (i = 0; i < argc; i++) { 1013 if ('\0' == argv[i][0]) 1014 continue; 1015 if (lstat(argv[i], &st) == -1) { 1016 ERR(sess, "%s: lstat", argv[i]); 1017 goto out; 1018 } 1019 1020 /* 1021 * File type checks. 1022 * In non-recursive mode, we don't accept directories. 1023 * We also skip symbolic links without -l. 1024 * Beyond that, we only accept regular files. 1025 */ 1026 1027 if (S_ISDIR(st.st_mode)) { 1028 WARNX(sess, "%s: skipping directory", argv[i]); 1029 continue; 1030 } else if (S_ISLNK(st.st_mode)) { 1031 if (!sess->opts->preserve_links) { 1032 WARNX(sess, "%s: skipping " 1033 "symlink", argv[i]); 1034 continue; 1035 } 1036 } else if (!S_ISREG(st.st_mode)) { 1037 WARNX(sess, "%s: skipping special", argv[i]); 1038 continue; 1039 } 1040 1041 1042 f = &fl[flsz++]; 1043 assert(f != NULL); 1044 1045 /* Add this file to our file-system worldview. */ 1046 1047 if (unveil(argv[i], "r") == -1) { 1048 ERR(sess, "%s: unveil", argv[i]); 1049 goto out; 1050 } 1051 if (!flist_append(sess, f, &st, argv[i])) { 1052 ERRX1(sess, "flist_append"); 1053 goto out; 1054 } 1055 } 1056 1057 LOG2(sess, "non-recursively generated %zu filenames", flsz); 1058 *sz = flsz; 1059 *flp = fl; 1060 return 1; 1061out: 1062 flist_free(fl, argc); 1063 *sz = 0; 1064 *flp = NULL; 1065 return 0; 1066} 1067 1068/* 1069 * Generate a sorted, de-duplicated list of file metadata. 1070 * In non-recursive mode (the default), we use only the files we're 1071 * given. 1072 * Otherwise, directories are recursively examined. 1073 * Returns zero on failure, non-zero on success. 1074 * On success, "fl" will need to be freed with flist_free(). 1075 */ 1076int 1077flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1078 size_t *sz) 1079{ 1080 int rc; 1081 1082 assert(argc > 0); 1083 rc = sess->opts->recursive ? 1084 flist_gen_dirs(sess, argc, argv, flp, sz) : 1085 flist_gen_files(sess, argc, argv, flp, sz); 1086 1087 /* After scanning, lock our file-system view. */ 1088 1089 if (unveil(NULL, NULL) == -1) { 1090 ERR(sess, "unveil"); 1091 return 0; 1092 } 1093 if (!rc) 1094 return 0; 1095 1096 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1097 1098 if (flist_dedupe(sess, flp, sz)) { 1099 flist_topdirs(sess, *flp, *sz); 1100 return 1; 1101 } 1102 1103 ERRX1(sess, "flist_dedupe"); 1104 flist_free(*flp, *sz); 1105 *flp = NULL; 1106 *sz = 0; 1107 return 0; 1108} 1109 1110/* 1111 * Generate a list of files in root to delete that are within the 1112 * top-level directories stipulated by "wfl". 1113 * Only handles symbolic links, directories, and regular files. 1114 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1115 * on success. 1116 * On success, "fl" will need to be freed with flist_free(). 1117 */ 1118int 1119flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1120 size_t *sz, const struct flist *wfl, size_t wflsz) 1121{ 1122 char **cargv = NULL; 1123 int rc = 0, c; 1124 FTS *fts = NULL; 1125 FTSENT *ent; 1126 struct flist *f; 1127 size_t cargvs = 0, i, j, max = 0, stripdir; 1128 ENTRY hent; 1129 ENTRY *hentp; 1130 1131 *fl = NULL; 1132 *sz = 0; 1133 1134 /* Only run this code when we're recursive. */ 1135 1136 if (!sess->opts->recursive) 1137 return 1; 1138 1139 /* 1140 * Gather up all top-level directories for scanning. 1141 * This is stipulated by rsync's --delete behaviour, where we 1142 * only delete things in the top-level directories given on the 1143 * command line. 1144 */ 1145 1146 assert(wflsz > 0); 1147 for (i = 0; i < wflsz; i++) 1148 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1149 cargvs++; 1150 if (cargvs == 0) 1151 return 1; 1152 1153 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1154 ERR(sess, "calloc"); 1155 return 0; 1156 } 1157 1158 /* 1159 * If we're given just a "." as the first entry, that means 1160 * we're doing a relative copy with a trailing slash. 1161 * Special-case this just for the sake of simplicity. 1162 * Otherwise, look through all top-levels. 1163 */ 1164 1165 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1166 assert(cargvs == 1); 1167 assert(S_ISDIR(wfl[0].st.mode)); 1168 if (asprintf(&cargv[0], "%s/", root) < 0) { 1169 ERR(sess, "asprintf"); 1170 cargv[0] = NULL; 1171 goto out; 1172 } 1173 cargv[1] = NULL; 1174 } else { 1175 for (i = j = 0; i < wflsz; i++) { 1176 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1177 continue; 1178 assert(S_ISDIR(wfl[i].st.mode)); 1179 assert(strcmp(wfl[i].wpath, ".")); 1180 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1181 if (c < 0) { 1182 ERR(sess, "asprintf"); 1183 cargv[j] = NULL; 1184 goto out; 1185 } 1186 LOG4(sess, "%s: will scan for deletions", cargv[j]); 1187 j++; 1188 } 1189 assert(j == cargvs); 1190 cargv[j] = NULL; 1191 } 1192 1193 LOG2(sess, "delete from %zu directories", cargvs); 1194 1195 /* 1196 * Next, use the standard hcreate(3) hashtable interface to hash 1197 * all of the files that we want to synchronise. 1198 * This way, we'll be able to determine which files we want to 1199 * delete in O(n) time instead of O(n * search) time. 1200 * Plus, we can do the scan in-band and only allocate the files 1201 * we want to delete. 1202 */ 1203 1204 if (!hcreate(wflsz)) { 1205 ERR(sess, "hcreate"); 1206 goto out; 1207 } 1208 1209 for (i = 0; i < wflsz; i++) { 1210 memset(&hent, 0, sizeof(ENTRY)); 1211 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1212 ERR(sess, "strdup"); 1213 goto out; 1214 } 1215 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1216 ERR(sess, "hsearch"); 1217 goto out; 1218 } else if (hentp->key != hent.key) { 1219 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1220 free(hent.key); 1221 goto out; 1222 } 1223 } 1224 1225 /* 1226 * Now we're going to try to descend into all of the top-level 1227 * directories stipulated by the file list. 1228 * If the directories don't exist, it's ok. 1229 */ 1230 1231 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1232 ERR(sess, "fts_open"); 1233 goto out; 1234 } 1235 1236 stripdir = strlen(root) + 1; 1237 errno = 0; 1238 while ((ent = fts_read(fts)) != NULL) { 1239 if (ent->fts_info == FTS_NS) 1240 continue; 1241 if (!flist_fts_check(sess, ent)) { 1242 errno = 0; 1243 continue; 1244 } else if (stripdir >= ent->fts_pathlen) 1245 continue; 1246 1247 /* Look up in hashtable. */ 1248 1249 memset(&hent, 0, sizeof(ENTRY)); 1250 hent.key = ent->fts_path + stripdir; 1251 if (hsearch(hent, FIND) != NULL) 1252 continue; 1253 1254 /* Not found: we'll delete it. */ 1255 1256 if (!flist_realloc(sess, fl, sz, &max)) { 1257 ERRX1(sess, "flist_realloc"); 1258 goto out; 1259 } 1260 f = &(*fl)[*sz - 1]; 1261 1262 if ((f->path = strdup(ent->fts_path)) == NULL) { 1263 ERR(sess, "strdup"); 1264 goto out; 1265 } 1266 f->wpath = f->path + stripdir; 1267 assert(ent->fts_statp != NULL); 1268 flist_copy_stat(f, ent->fts_statp); 1269 errno = 0; 1270 } 1271 1272 if (errno) { 1273 ERR(sess, "fts_read"); 1274 goto out; 1275 } 1276 1277 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1278 rc = 1; 1279out: 1280 if (fts != NULL) 1281 fts_close(fts); 1282 for (i = 0; i < cargvs; i++) 1283 free(cargv[i]); 1284 free(cargv); 1285 hdestroy(); 1286 return rc; 1287} 1288 1289/* 1290 * Delete all files and directories in "fl". 1291 * If called with a zero-length "fl", does nothing. 1292 * If dry_run is specified, simply write what would be done. 1293 * Return zero on failure, non-zero on success. 1294 */ 1295int 1296flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1297{ 1298 ssize_t i; 1299 int flag; 1300 1301 if (flsz == 0) 1302 return 1; 1303 1304 assert(sess->opts->del); 1305 assert(sess->opts->recursive); 1306 1307 for (i = flsz - 1; i >= 0; i--) { 1308 LOG1(sess, "%s: deleting", fl[i].wpath); 1309 if (sess->opts->dry_run) 1310 continue; 1311 assert(root != -1); 1312 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1313 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1314 errno != ENOENT) { 1315 ERR(sess, "%s: unlinkat", fl[i].wpath); 1316 return 0; 1317 } 1318 } 1319 1320 return 1; 1321} 1322