flist.c revision 1.10
1/* $Id: flist.c,v 1.10 2019/02/12 19:19:13 benno Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/param.h> 18#include <sys/stat.h> 19 20#include <assert.h> 21#include <errno.h> 22#include <fcntl.h> 23#include <fts.h> 24#include <inttypes.h> 25#include <search.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30 31#include "extern.h" 32 33/* 34 * We allocate our file list in chunk sizes so as not to do it one by 35 * one. 36 * Preferrably we get one or two allocation. 37 */ 38#define FLIST_CHUNK_SIZE (1024) 39 40/* 41 * These flags are part of the rsync protocol. 42 * They are sent as the first byte for a file transmission and encode 43 * information that affects subsequent transmissions. 44 */ 45#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 46#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 47#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 48#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 49#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 50 51/* 52 * Requied way to sort a filename list. 53 */ 54static int 55flist_cmp(const void *p1, const void *p2) 56{ 57 const struct flist *f1 = p1, *f2 = p2; 58 59 return strcmp(f1->wpath, f2->wpath); 60} 61 62/* 63 * Deduplicate our file list (which may be zero-length). 64 * Returns zero on failure, non-zero on success. 65 */ 66static int 67flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 68{ 69 size_t i, j; 70 struct flist *new; 71 struct flist *f, *fnext; 72 73 if (*sz == 0) 74 return 1; 75 76 /* Create a new buffer, "new", and copy. */ 77 78 new = calloc(*sz, sizeof(struct flist)); 79 if (new == NULL) { 80 ERR(sess, "calloc"); 81 return 0; 82 } 83 84 for (i = j = 0; i < *sz - 1; i++) { 85 f = &(*fl)[i]; 86 fnext = &(*fl)[i + 1]; 87 88 if (strcmp(f->wpath, fnext->wpath)) { 89 new[j++] = *f; 90 continue; 91 } 92 93 /* 94 * Our working (destination) paths are the same. 95 * If the actual file is the same (as given on the 96 * command-line), then we can just discard the first. 97 * Otherwise, we need to bail out: it means we have two 98 * different files with the relative path on the 99 * destination side. 100 */ 101 102 if (strcmp(f->path, fnext->path) == 0) { 103 new[j++] = *f; 104 i++; 105 WARNX(sess, "%s: duplicate path: %s", 106 f->wpath, f->path); 107 free(fnext->path); 108 free(fnext->link); 109 fnext->path = fnext->link = NULL; 110 continue; 111 } 112 113 ERRX(sess, "%s: duplicate working path for " 114 "possibly different file: %s, %s", 115 f->wpath, f->path, fnext->path); 116 free(new); 117 return 0; 118 } 119 120 /* Don't forget the last entry. */ 121 122 if (i == *sz - 1) 123 new[j++] = (*fl)[i]; 124 125 /* 126 * Reassign to the deduplicated array. 127 * If we started out with *sz > 0, which we check for at the 128 * beginning, then we'll always continue having *sz > 0. 129 */ 130 131 free(*fl); 132 *fl = new; 133 *sz = j; 134 assert(*sz); 135 return 1; 136} 137 138/* 139 * We're now going to find our top-level directories. 140 * This only applies to recursive mode. 141 * If we have the first element as the ".", then that's the "top 142 * directory" of our transfer. 143 * Otherwise, mark up all top-level directories in the set. 144 */ 145static void 146flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 147{ 148 size_t i; 149 const char *cp; 150 151 if (!sess->opts->recursive) 152 return; 153 154 if (flsz && strcmp(fl[0].wpath, ".")) { 155 for (i = 0; i < flsz; i++) { 156 if (!S_ISDIR(fl[i].st.mode)) 157 continue; 158 cp = strchr(fl[i].wpath, '/'); 159 if (cp != NULL && cp[1] != '\0') 160 continue; 161 fl[i].st.flags |= FLSTAT_TOP_DIR; 162 LOG4(sess, "%s: top-level", fl[i].wpath); 163 } 164 } else if (flsz) { 165 fl[0].st.flags |= FLSTAT_TOP_DIR; 166 LOG4(sess, "%s: top-level", fl[0].wpath); 167 } 168} 169 170/* 171 * Filter through the fts() file information. 172 * We want directories (pre-order), regular files, and symlinks. 173 * Everything else is skipped and possibly warned about. 174 * Return zero to skip, non-zero to examine. 175 */ 176static int 177flist_fts_check(struct sess *sess, FTSENT *ent) 178{ 179 180 if (ent->fts_info == FTS_F || 181 ent->fts_info == FTS_D || 182 ent->fts_info == FTS_SL || 183 ent->fts_info == FTS_SLNONE) 184 return 1; 185 186 if (ent->fts_info == FTS_DC) { 187 WARNX(sess, "%s: directory cycle", ent->fts_path); 188 } else if (ent->fts_info == FTS_DNR) { 189 errno = ent->fts_errno; 190 WARN(sess, "%s: unreadable directory", ent->fts_path); 191 } else if (ent->fts_info == FTS_DOT) { 192 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 193 } else if (ent->fts_info == FTS_ERR) { 194 errno = ent->fts_errno; 195 WARN(sess, "%s", ent->fts_path); 196 } else if (ent->fts_info == FTS_DEFAULT) { 197 WARNX(sess, "%s: skipping special", ent->fts_path); 198 } else if (ent->fts_info == FTS_NS) { 199 errno = ent->fts_errno; 200 WARN(sess, "%s: could not stat", ent->fts_path); 201 } 202 203 return 0; 204} 205 206/* 207 * Copy necessary elements in "st" into the fields of "f". 208 */ 209static void 210flist_copy_stat(struct flist *f, const struct stat *st) 211{ 212 f->st.mode = st->st_mode; 213 f->st.uid = st->st_uid; 214 f->st.gid = st->st_gid; 215 f->st.size = st->st_size; 216 f->st.mtime = st->st_mtime; 217} 218 219void 220flist_free(struct flist *f, size_t sz) 221{ 222 size_t i; 223 224 if (f == NULL) 225 return; 226 227 for (i = 0; i < sz; i++) { 228 free(f[i].path); 229 free(f[i].link); 230 } 231 free(f); 232} 233 234/* 235 * Serialise our file list (which may be zero-length) to the wire. 236 * Makes sure that the receiver isn't going to block on sending us 237 * return messages on the log channel. 238 * Return zero on failure, non-zero on success. 239 */ 240int 241flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 242 size_t flsz) 243{ 244 size_t i, sz, gidsz = 0; 245 uint8_t flag; 246 const struct flist *f; 247 const char *fn; 248 struct ident *gids = NULL; 249 int rc = 0; 250 251 /* Double-check that we've no pending multiplexed data. */ 252 253 LOG2(sess, "sending file metadata list: %zu", flsz); 254 255 for (i = 0; i < flsz; i++) { 256 f = &fl[i]; 257 fn = f->wpath; 258 sz = strlen(f->wpath); 259 assert(sz > 0); 260 261 /* 262 * If applicable, unclog the read buffer. 263 * This happens when the receiver has a lot of log 264 * messages and all we're doing is sending our file list 265 * without checking for messages. 266 */ 267 268 if (sess->mplex_reads && 269 io_read_check(sess, fdin) && 270 !io_read_flush(sess, fdin)) { 271 ERRX1(sess, "io_read_flush"); 272 goto out; 273 } 274 275 /* 276 * For ease, make all of our filenames be "long" 277 * regardless their actual length. 278 * This also makes sure that we don't transmit a zero 279 * byte unintentionally. 280 */ 281 282 flag = FLIST_NAME_LONG; 283 284 LOG3(sess, "%s: sending file metadata: " 285 "size %jd, mtime %jd, mode %o", 286 fn, (intmax_t)f->st.size, 287 (intmax_t)f->st.mtime, f->st.mode); 288 289 /* Now write to the wire. */ 290 /* FIXME: buffer this. */ 291 292 if (!io_write_byte(sess, fdout, flag)) { 293 ERRX1(sess, "io_write_byte"); 294 goto out; 295 } else if (!io_write_int(sess, fdout, sz)) { 296 ERRX1(sess, "io_write_int"); 297 goto out; 298 } else if (!io_write_buf(sess, fdout, fn, sz)) { 299 ERRX1(sess, "io_write_buf"); 300 goto out; 301 } else if (!io_write_long(sess, fdout, f->st.size)) { 302 ERRX1(sess, "io_write_long"); 303 goto out; 304 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 305 ERRX1(sess, "io_write_int"); 306 goto out; 307 } else if (!io_write_int(sess, fdout, f->st.mode)) { 308 ERRX1(sess, "io_write_int"); 309 goto out; 310 } 311 312 /* Conditional part: gid. */ 313 314 if (sess->opts->preserve_gids) { 315 if (!io_write_int(sess, fdout, f->st.gid)) { 316 ERRX1(sess, "io_write_int"); 317 goto out; 318 } 319 if (!idents_gid_add(sess, &gids, &gidsz, f->st.gid)) { 320 ERRX1(sess, "idents_gid_add"); 321 goto out; 322 } 323 } 324 325 /* Conditional part: link. */ 326 327 if (S_ISLNK(f->st.mode) && 328 sess->opts->preserve_links) { 329 fn = f->link; 330 sz = strlen(f->link); 331 if (!io_write_int(sess, fdout, sz)) { 332 ERRX1(sess, "io_write_int"); 333 goto out; 334 } 335 if (!io_write_buf(sess, fdout, fn, sz)) { 336 ERRX1(sess, "io_write_int"); 337 goto out; 338 } 339 } 340 341 if (S_ISREG(f->st.mode)) 342 sess->total_size += f->st.size; 343 } 344 345 /* Signal end of file list. */ 346 347 if (!io_write_byte(sess, fdout, 0)) { 348 ERRX1(sess, "io_write_byte"); 349 goto out; 350 } 351 352 /* Conditionally write gid list and terminator. */ 353 354 if (sess->opts->preserve_gids) { 355 LOG2(sess, "sending gid list: %zu", gidsz); 356 if (!idents_send(sess, fdout, gids, gidsz)) { 357 ERRX1(sess, "idents_send"); 358 goto out; 359 } 360 } 361 362 rc = 1; 363out: 364 idents_free(gids, gidsz); 365 return rc; 366} 367 368/* 369 * Read the filename of a file list. 370 * This is the most expensive part of the file list transfer, so a lot 371 * of attention has gone into transmitting as little as possible. 372 * Micro-optimisation, but whatever. 373 * Fills in "f" with the full path on success. 374 * Returns zero on failure, non-zero on success. 375 */ 376static int 377flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 378 char last[MAXPATHLEN]) 379{ 380 uint8_t bval; 381 size_t partial = 0; 382 size_t pathlen = 0, len; 383 384 /* 385 * Read our filename. 386 * If we have FLIST_NAME_SAME, we inherit some of the last 387 * transmitted name. 388 * If we have FLIST_NAME_LONG, then the string length is greater 389 * than byte-size. 390 */ 391 392 if (FLIST_NAME_SAME & flags) { 393 if (!io_read_byte(sess, fd, &bval)) { 394 ERRX1(sess, "io_read_byte"); 395 return 0; 396 } 397 partial = bval; 398 } 399 400 /* Get the (possibly-remaining) filename length. */ 401 402 if (FLIST_NAME_LONG & flags) { 403 if (!io_read_size(sess, fd, &pathlen)) { 404 ERRX1(sess, "io_read_size"); 405 return 0; 406 } 407 } else { 408 if (!io_read_byte(sess, fd, &bval)) { 409 ERRX1(sess, "io_read_byte"); 410 return 0; 411 } 412 pathlen = bval; 413 } 414 415 /* Allocate our full filename length. */ 416 /* FIXME: maximum pathname length. */ 417 418 if ((len = pathlen + partial) == 0) { 419 ERRX(sess, "security violation: " 420 "zero-length pathname"); 421 return 0; 422 } 423 424 if ((f->path = malloc(len + 1)) == NULL) { 425 ERR(sess, "malloc"); 426 return 0; 427 } 428 f->path[len] = '\0'; 429 430 if (FLIST_NAME_SAME & flags) 431 memcpy(f->path, last, partial); 432 433 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 434 ERRX1(sess, "io_read_buf"); 435 return 0; 436 } 437 438 if (f->path[0] == '/') { 439 ERRX(sess, "security violation: " 440 "absolute pathname: %s", f->path); 441 return 0; 442 } 443 444 if (strstr(f->path, "/../") != NULL || 445 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 446 (len > 2 && strncmp(f->path, "../", 3) == 0) || 447 strcmp(f->path, "..") == 0) { 448 ERRX(sess, "%s: security violation: " 449 "backtracking pathname", f->path); 450 return 0; 451 } 452 453 /* Record our last path and construct our filename. */ 454 455 strlcpy(last, f->path, MAXPATHLEN); 456 f->wpath = f->path; 457 return 1; 458} 459 460/* 461 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 462 * Returns zero on failure, non-zero on success. 463 */ 464static int 465flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 466{ 467 void *pp; 468 469 if (*sz + 1 <= *max) { 470 (*sz)++; 471 return 1; 472 } 473 474 pp = recallocarray(*fl, *max, 475 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 476 if (pp == NULL) { 477 ERR(sess, "recallocarray"); 478 return 0; 479 } 480 *fl = pp; 481 *max += FLIST_CHUNK_SIZE; 482 (*sz)++; 483 return 1; 484} 485 486/* 487 * Copy a regular or symbolic link file "path" into "f". 488 * This handles the correct path creation and symbolic linking. 489 * Returns zero on failure, non-zero on success. 490 */ 491static int 492flist_append(struct sess *sess, struct flist *f, struct stat *st, 493 const char *path) 494{ 495 496 /* 497 * Copy the full path for local addressing and transmit 498 * only the filename part for the receiver. 499 */ 500 501 if ((f->path = strdup(path)) == NULL) { 502 ERR(sess, "strdup"); 503 return 0; 504 } 505 506 if ((f->wpath = strrchr(f->path, '/')) == NULL) 507 f->wpath = f->path; 508 else 509 f->wpath++; 510 511 /* 512 * On the receiving end, we'll strip out all bits on the 513 * mode except for the file permissions. 514 * No need to warn about it here. 515 */ 516 517 flist_copy_stat(f, st); 518 519 /* Optionally copy link information. */ 520 521 if (S_ISLNK(st->st_mode)) { 522 f->link = symlink_read(sess, f->path); 523 if (f->link == NULL) { 524 ERRX1(sess, "symlink_read"); 525 return 0; 526 } 527 } 528 529 return 1; 530} 531 532/* 533 * Receive a file list from the wire, filling in length "sz" (which may 534 * possibly be zero) and list "flp" on success. 535 * Return zero on failure, non-zero on success. 536 */ 537int 538flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 539{ 540 struct flist *fl = NULL; 541 struct flist *ff; 542 const struct flist *fflast = NULL; 543 size_t flsz = 0, flmax = 0, lsz, gidsz = 0; 544 uint8_t flag; 545 char last[MAXPATHLEN]; 546 uint64_t lval; /* temporary values... */ 547 int32_t ival; 548 struct ident *gids = NULL; 549 550 last[0] = '\0'; 551 552 for (;;) { 553 if (!io_read_byte(sess, fd, &flag)) { 554 ERRX1(sess, "io_read_byte"); 555 goto out; 556 } else if (flag == 0) 557 break; 558 559 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 560 ERRX1(sess, "flist_realloc"); 561 goto out; 562 } 563 564 ff = &fl[flsz - 1]; 565 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 566 567 /* Filename first. */ 568 569 if (!flist_recv_name(sess, fd, ff, flag, last)) { 570 ERRX1(sess, "flist_recv_name"); 571 goto out; 572 } 573 574 /* Read the file size. */ 575 576 if (!io_read_ulong(sess, fd, &lval)) { 577 ERRX1(sess, "io_read_ulong"); 578 goto out; 579 } 580 ff->st.size = lval; 581 582 /* Read the modification time. */ 583 584 if (!(FLIST_TIME_SAME & flag)) { 585 if (!io_read_int(sess, fd, &ival)) { 586 ERRX1(sess, "io_read_int"); 587 goto out; 588 } 589 ff->st.mtime = ival; 590 } else if (fflast == NULL) { 591 ERRX(sess, "same time without last entry"); 592 goto out; 593 } else 594 ff->st.mtime = fflast->st.mtime; 595 596 /* Read the file mode. */ 597 598 if (!(FLIST_MODE_SAME & flag)) { 599 if (!io_read_int(sess, fd, &ival)) { 600 ERRX1(sess, "io_read_int"); 601 goto out; 602 } 603 ff->st.mode = ival; 604 } else if (fflast == NULL) { 605 ERRX(sess, "same mode without last entry"); 606 goto out; 607 } else 608 ff->st.mode = fflast->st.mode; 609 610 /* Conditional part: gid. */ 611 612 if (sess->opts->preserve_gids) { 613 if ( ! (FLIST_GID_SAME & flag)) { 614 if ( ! io_read_int(sess, fd, &ival)) { 615 ERRX1(sess, "io_read_int"); 616 goto out; 617 } 618 ff->st.gid = ival; 619 } else if (NULL == fflast) { 620 ERRX(sess, "same gid " 621 "without last entry"); 622 goto out; 623 } else 624 ff->st.gid = fflast->st.gid; 625 } 626 627 /* Conditional part: link. */ 628 629 if (S_ISLNK(ff->st.mode) && 630 sess->opts->preserve_links) { 631 if (!io_read_size(sess, fd, &lsz)) { 632 ERRX1(sess, "io_read_size"); 633 goto out; 634 } else if (lsz == 0) { 635 ERRX(sess, "empty link name"); 636 goto out; 637 } 638 ff->link = calloc(lsz + 1, 1); 639 if (ff->link == NULL) { 640 ERR(sess, "calloc"); 641 goto out; 642 } 643 if (!io_read_buf(sess, fd, ff->link, lsz)) { 644 ERRX1(sess, "io_read_buf"); 645 goto out; 646 } 647 } 648 649 LOG3(sess, "%s: received file metadata: " 650 "size %jd, mtime %jd, mode %o", 651 ff->path, (intmax_t)ff->st.size, 652 (intmax_t)ff->st.mtime, ff->st.mode); 653 654 if (S_ISREG(ff->st.mode)) 655 sess->total_size += ff->st.size; 656 } 657 658 /* 659 * Now conditionally read the group list. 660 * We then remap all group identifiers to the local ids. 661 */ 662 663 if (sess->opts->preserve_gids) { 664 if (!idents_recv(sess, fd, &gids, &gidsz)) { 665 ERRX1(sess, "idents_recv"); 666 goto out; 667 } 668 LOG2(sess, "received gid list: %zu", gidsz); 669 } 670 671 /* Remember to order the received list. */ 672 673 LOG2(sess, "received file metadata list: %zu", flsz); 674 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 675 flist_topdirs(sess, fl, flsz); 676 *sz = flsz; 677 *flp = fl; 678 679 /* Lastly, remap and reassign group identifiers. */ 680 681 if (sess->opts->preserve_gids) { 682 idents_gid_remap(sess, gids, gidsz); 683 idents_gid_assign(sess, fl, flsz, gids, gidsz); 684 } 685 686 idents_free(gids, gidsz); 687 return 1; 688out: 689 flist_free(fl, flsz); 690 idents_free(gids, gidsz); 691 *sz = 0; 692 *flp = NULL; 693 return 0; 694} 695 696/* 697 * Generate a flist possibly-recursively given a file root, which may 698 * also be a regular file or symlink. 699 * On success, augments the generated list in "flp" of length "sz". 700 * Returns zero on failure, non-zero on success. 701 */ 702static int 703flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 704 size_t *max) 705{ 706 char *cargv[2], *cp; 707 int rc = 0; 708 FTS *fts; 709 FTSENT *ent; 710 struct flist *f; 711 size_t flsz = 0, stripdir; 712 struct stat st; 713 714 cargv[0] = root; 715 cargv[1] = NULL; 716 717 /* 718 * If we're a file, then revert to the same actions we use for 719 * the non-recursive scan. 720 */ 721 722 if (lstat(root, &st) == -1) { 723 ERR(sess, "%s: lstat", root); 724 return 0; 725 } else if (S_ISREG(st.st_mode)) { 726 if (!flist_realloc(sess, fl, sz, max)) { 727 ERRX1(sess, "flist_realloc"); 728 return 0; 729 } 730 f = &(*fl)[(*sz) - 1]; 731 assert(f != NULL); 732 733 if (!flist_append(sess, f, &st, root)) { 734 ERRX1(sess, "flist_append"); 735 return 0; 736 } 737 if (unveil(root, "r") == -1) { 738 ERR(sess, "%s: unveil", root); 739 return 0; 740 } 741 return 1; 742 } else if (S_ISLNK(st.st_mode)) { 743 if (!sess->opts->preserve_links) { 744 WARNX(sess, "%s: skipping symlink", root); 745 return 1; 746 } else if (!flist_realloc(sess, fl, sz, max)) { 747 ERRX1(sess, "flist_realloc"); 748 return 0; 749 } 750 f = &(*fl)[(*sz) - 1]; 751 assert(f != NULL); 752 753 if (!flist_append(sess, f, &st, root)) { 754 ERRX1(sess, "flist_append"); 755 return 0; 756 } 757 if (unveil(root, "r") == -1) { 758 ERR(sess, "%s: unveil", root); 759 return 0; 760 } 761 return 1; 762 } else if (!S_ISDIR(st.st_mode)) { 763 WARNX(sess, "%s: skipping special", root); 764 return 1; 765 } 766 767 /* 768 * If we end with a slash, it means that we're not supposed to 769 * copy the directory part itself---only the contents. 770 * So set "stripdir" to be what we take out. 771 */ 772 773 stripdir = strlen(root); 774 assert(stripdir > 0); 775 if (root[stripdir - 1] != '/') 776 stripdir = 0; 777 778 /* 779 * If we're not stripping anything, then see if we need to strip 780 * out the leading material in the path up to and including the 781 * last directory component. 782 */ 783 784 if (stripdir == 0) 785 if ((cp = strrchr(root, '/')) != NULL) 786 stripdir = cp - root + 1; 787 788 /* 789 * If we're recursive, then we need to take down all of the 790 * files and directory components, so use fts(3). 791 * Copying the information file-by-file into the flstat. 792 * We'll make sense of it in flist_send. 793 */ 794 795 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 796 ERR(sess, "fts_open"); 797 return 0; 798 } 799 800 errno = 0; 801 while ((ent = fts_read(fts)) != NULL) { 802 if (!flist_fts_check(sess, ent)) { 803 errno = 0; 804 continue; 805 } 806 807 /* We don't allow symlinks without -l. */ 808 809 assert(ent->fts_statp != NULL); 810 if (S_ISLNK(ent->fts_statp->st_mode) && 811 !sess->opts->preserve_links) { 812 WARNX(sess, "%s: skipping " 813 "symlink", ent->fts_path); 814 continue; 815 } 816 817 /* Allocate a new file entry. */ 818 819 if (!flist_realloc(sess, fl, sz, max)) { 820 ERRX1(sess, "flist_realloc"); 821 goto out; 822 } 823 flsz++; 824 f = &(*fl)[*sz - 1]; 825 826 /* Our path defaults to "." for the root. */ 827 828 if ('\0' == ent->fts_path[stripdir]) { 829 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 830 ERR(sess, "asprintf"); 831 f->path = NULL; 832 goto out; 833 } 834 } else { 835 if ((f->path = strdup(ent->fts_path)) == NULL) { 836 ERR(sess, "strdup"); 837 goto out; 838 } 839 } 840 841 f->wpath = f->path + stripdir; 842 flist_copy_stat(f, ent->fts_statp); 843 844 /* Optionally copy link information. */ 845 846 if (S_ISLNK(ent->fts_statp->st_mode)) { 847 f->link = symlink_read(sess, f->path); 848 if (f->link == NULL) { 849 ERRX1(sess, "symlink_read"); 850 goto out; 851 } 852 } 853 854 /* Reset errno for next fts_read() call. */ 855 errno = 0; 856 } 857 if (errno) { 858 ERR(sess, "fts_read"); 859 goto out; 860 } 861 if (unveil(root, "r") == -1) { 862 ERR(sess, "%s: unveil", root); 863 goto out; 864 } 865 866 LOG3(sess, "generated %zu filenames: %s", flsz, root); 867 rc = 1; 868out: 869 fts_close(fts); 870 return rc; 871} 872 873/* 874 * Generate a flist recursively given the array of directories (or 875 * files, symlinks, doesn't matter) specified in argv (argc >0). 876 * On success, stores the generated list in "flp" with length "sz", 877 * which may be zero. 878 * Returns zero on failure, non-zero on success. 879 */ 880static int 881flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 882 size_t *sz) 883{ 884 size_t i, max = 0; 885 886 for (i = 0; i < argc; i++) 887 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 888 break; 889 890 if (i == argc) { 891 LOG2(sess, "recursively generated %zu filenames", *sz); 892 return 1; 893 } 894 895 ERRX1(sess, "flist_gen_dirent"); 896 flist_free(*flp, max); 897 *flp = NULL; 898 *sz = 0; 899 return 0; 900} 901 902/* 903 * Generate list of files from the command-line argc (>0) and argv. 904 * On success, stores the generated list in "flp" with length "sz", 905 * which may be zero. 906 * Returns zero on failure, non-zero on success. 907 */ 908static int 909flist_gen_files(struct sess *sess, size_t argc, char **argv, 910 struct flist **flp, size_t *sz) 911{ 912 struct flist *fl = NULL, *f; 913 size_t i, flsz = 0; 914 struct stat st; 915 916 assert(argc); 917 918 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 919 ERR(sess, "calloc"); 920 return 0; 921 } 922 923 for (i = 0; i < argc; i++) { 924 if ('\0' == argv[i][0]) 925 continue; 926 if (lstat(argv[i], &st) == -1) { 927 ERR(sess, "%s: lstat", argv[i]); 928 goto out; 929 } 930 931 /* 932 * File type checks. 933 * In non-recursive mode, we don't accept directories. 934 * We also skip symbolic links without -l. 935 * Beyond that, we only accept regular files. 936 */ 937 938 if (S_ISDIR(st.st_mode)) { 939 WARNX(sess, "%s: skipping directory", argv[i]); 940 continue; 941 } else if (S_ISLNK(st.st_mode)) { 942 if (!sess->opts->preserve_links) { 943 WARNX(sess, "%s: skipping " 944 "symlink", argv[i]); 945 continue; 946 } 947 } else if (!S_ISREG(st.st_mode)) { 948 WARNX(sess, "%s: skipping special", argv[i]); 949 continue; 950 } 951 952 953 f = &fl[flsz++]; 954 assert(f != NULL); 955 956 /* Add this file to our file-system worldview. */ 957 958 if (unveil(argv[i], "r") == -1) { 959 ERR(sess, "%s: unveil", argv[i]); 960 goto out; 961 } 962 if (!flist_append(sess, f, &st, argv[i])) { 963 ERRX1(sess, "flist_append"); 964 goto out; 965 } 966 } 967 968 LOG2(sess, "non-recursively generated %zu filenames", flsz); 969 *sz = flsz; 970 *flp = fl; 971 return 1; 972out: 973 flist_free(fl, argc); 974 *sz = 0; 975 *flp = NULL; 976 return 0; 977} 978 979/* 980 * Generate a sorted, de-duplicated list of file metadata. 981 * In non-recursive mode (the default), we use only the files we're 982 * given. 983 * Otherwise, directories are recursively examined. 984 * Returns zero on failure, non-zero on success. 985 * On success, "fl" will need to be freed with flist_free(). 986 */ 987int 988flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 989 size_t *sz) 990{ 991 int rc; 992 993 assert(argc > 0); 994 rc = sess->opts->recursive ? 995 flist_gen_dirs(sess, argc, argv, flp, sz) : 996 flist_gen_files(sess, argc, argv, flp, sz); 997 998 /* After scanning, lock our file-system view. */ 999 1000 if (unveil(NULL, NULL) == -1) { 1001 ERR(sess, "unveil"); 1002 return 0; 1003 } 1004 if (!rc) 1005 return 0; 1006 1007 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1008 1009 if (flist_dedupe(sess, flp, sz)) { 1010 flist_topdirs(sess, *flp, *sz); 1011 return 1; 1012 } 1013 1014 ERRX1(sess, "flist_dedupe"); 1015 flist_free(*flp, *sz); 1016 *flp = NULL; 1017 *sz = 0; 1018 return 0; 1019} 1020 1021/* 1022 * Generate a list of files in root to delete that are within the 1023 * top-level directories stipulated by "wfl". 1024 * Only handles symbolic links, directories, and regular files. 1025 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1026 * on success. 1027 * On success, "fl" will need to be freed with flist_free(). 1028 */ 1029int 1030flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1031 size_t *sz, const struct flist *wfl, size_t wflsz) 1032{ 1033 char **cargv = NULL; 1034 int rc = 0, c; 1035 FTS *fts = NULL; 1036 FTSENT *ent; 1037 struct flist *f; 1038 size_t cargvs = 0, i, j, max = 0, stripdir; 1039 ENTRY hent; 1040 ENTRY *hentp; 1041 1042 *fl = NULL; 1043 *sz = 0; 1044 1045 /* Only run this code when we're recursive. */ 1046 1047 if (!sess->opts->recursive) 1048 return 1; 1049 1050 /* 1051 * Gather up all top-level directories for scanning. 1052 * This is stipulated by rsync's --delete behaviour, where we 1053 * only delete things in the top-level directories given on the 1054 * command line. 1055 */ 1056 1057 assert(wflsz > 0); 1058 for (i = 0; i < wflsz; i++) 1059 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1060 cargvs++; 1061 if (cargvs == 0) 1062 return 1; 1063 1064 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1065 ERR(sess, "calloc"); 1066 return 0; 1067 } 1068 1069 /* 1070 * If we're given just a "." as the first entry, that means 1071 * we're doing a relative copy with a trailing slash. 1072 * Special-case this just for the sake of simplicity. 1073 * Otherwise, look through all top-levels. 1074 */ 1075 1076 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1077 assert(cargvs == 1); 1078 assert(S_ISDIR(wfl[0].st.mode)); 1079 if (asprintf(&cargv[0], "%s/", root) < 0) { 1080 ERR(sess, "asprintf"); 1081 cargv[0] = NULL; 1082 goto out; 1083 } 1084 cargv[1] = NULL; 1085 } else { 1086 for (i = j = 0; i < wflsz; i++) { 1087 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1088 continue; 1089 assert(S_ISDIR(wfl[i].st.mode)); 1090 assert(strcmp(wfl[i].wpath, ".")); 1091 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1092 if (c < 0) { 1093 ERR(sess, "asprintf"); 1094 cargv[j] = NULL; 1095 goto out; 1096 } 1097 LOG4(sess, "%s: will scan " 1098 "for deletions", cargv[j]); 1099 j++; 1100 } 1101 assert(j == cargvs); 1102 cargv[j] = NULL; 1103 } 1104 1105 LOG2(sess, "delete from %zu directories", cargvs); 1106 1107 /* 1108 * Next, use the standard hcreate(3) hashtable interface to hash 1109 * all of the files that we want to synchronise. 1110 * This way, we'll be able to determine which files we want to 1111 * delete in O(n) time instead of O(n * search) time. 1112 * Plus, we can do the scan in-band and only allocate the files 1113 * we want to delete. 1114 */ 1115 1116 if (!hcreate(wflsz)) { 1117 ERR(sess, "hcreate"); 1118 goto out; 1119 } 1120 1121 for (i = 0; i < wflsz; i++) { 1122 memset(&hent, 0, sizeof(ENTRY)); 1123 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1124 ERR(sess, "strdup"); 1125 goto out; 1126 } 1127 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1128 ERR(sess, "hsearch"); 1129 goto out; 1130 } else if (hentp->key != hent.key) { 1131 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1132 free(hent.key); 1133 goto out; 1134 } 1135 } 1136 1137 /* 1138 * Now we're going to try to descend into all of the top-level 1139 * directories stipulated by the file list. 1140 * If the directories don't exist, it's ok. 1141 */ 1142 1143 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1144 ERR(sess, "fts_open"); 1145 goto out; 1146 } 1147 1148 stripdir = strlen(root) + 1; 1149 errno = 0; 1150 while ((ent = fts_read(fts)) != NULL) { 1151 if (ent->fts_info == FTS_NS) 1152 continue; 1153 if (!flist_fts_check(sess, ent)) { 1154 errno = 0; 1155 continue; 1156 } else if (stripdir >= ent->fts_pathlen) 1157 continue; 1158 1159 /* Look up in hashtable. */ 1160 1161 memset(&hent, 0, sizeof(ENTRY)); 1162 hent.key = ent->fts_path + stripdir; 1163 if (hsearch(hent, FIND) != NULL) 1164 continue; 1165 1166 /* Not found: we'll delete it. */ 1167 1168 if (!flist_realloc(sess, fl, sz, &max)) { 1169 ERRX1(sess, "flist_realloc"); 1170 goto out; 1171 } 1172 f = &(*fl)[*sz - 1]; 1173 1174 if ((f->path = strdup(ent->fts_path)) == NULL) { 1175 ERR(sess, "strdup"); 1176 goto out; 1177 } 1178 f->wpath = f->path + stripdir; 1179 assert(ent->fts_statp != NULL); 1180 flist_copy_stat(f, ent->fts_statp); 1181 errno = 0; 1182 } 1183 1184 if (errno) { 1185 ERR(sess, "fts_read"); 1186 goto out; 1187 } 1188 1189 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1190 rc = 1; 1191out: 1192 if (fts != NULL) 1193 fts_close(fts); 1194 for (i = 0; i < cargvs; i++) 1195 free(cargv[i]); 1196 free(cargv); 1197 hdestroy(); 1198 return rc; 1199} 1200 1201/* 1202 * Delete all files and directories in "fl". 1203 * If called with a zero-length "fl", does nothing. 1204 * If dry_run is specified, simply write what would be done. 1205 * Return zero on failure, non-zero on success. 1206 */ 1207int 1208flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1209{ 1210 ssize_t i; 1211 int flag; 1212 1213 if (flsz == 0) 1214 return 1; 1215 1216 assert(sess->opts->del); 1217 assert(sess->opts->recursive); 1218 1219 for (i = flsz - 1; i >= 0; i--) { 1220 LOG1(sess, "%s: deleting", fl[i].wpath); 1221 if (sess->opts->dry_run) 1222 continue; 1223 assert(root != -1); 1224 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1225 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1226 errno != ENOENT) { 1227 ERR(sess, "%s: unlinkat", fl[i].wpath); 1228 return 0; 1229 } 1230 } 1231 1232 return 1; 1233} 1234