flist.c revision 1.9
1/* $Id: flist.c,v 1.9 2019/02/12 19:13:03 benno Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/param.h> 18#include <sys/stat.h> 19 20#include <assert.h> 21#include <errno.h> 22#include <fcntl.h> 23#include <fts.h> 24#include <inttypes.h> 25#include <search.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30 31#include "extern.h" 32 33/* 34 * We allocate our file list in chunk sizes so as not to do it one by 35 * one. 36 * Preferrably we get one or two allocation. 37 */ 38#define FLIST_CHUNK_SIZE (1024) 39 40/* 41 * These flags are part of the rsync protocol. 42 * They are sent as the first byte for a file transmission and encode 43 * information that affects subsequent transmissions. 44 */ 45#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 46#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 47#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 48#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 49#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 50 51/* 52 * Requied way to sort a filename list. 53 */ 54static int 55flist_cmp(const void *p1, const void *p2) 56{ 57 const struct flist *f1 = p1, *f2 = p2; 58 59 return strcmp(f1->wpath, f2->wpath); 60} 61 62/* 63 * Deduplicate our file list (which may be zero-length). 64 * Returns zero on failure, non-zero on success. 65 */ 66static int 67flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 68{ 69 size_t i, j; 70 struct flist *new; 71 struct flist *f, *fnext; 72 73 if (*sz == 0) 74 return 1; 75 76 /* Create a new buffer, "new", and copy. */ 77 78 new = calloc(*sz, sizeof(struct flist)); 79 if (new == NULL) { 80 ERR(sess, "calloc"); 81 return 0; 82 } 83 84 for (i = j = 0; i < *sz - 1; i++) { 85 f = &(*fl)[i]; 86 fnext = &(*fl)[i + 1]; 87 88 if (strcmp(f->wpath, fnext->wpath)) { 89 new[j++] = *f; 90 continue; 91 } 92 93 /* 94 * Our working (destination) paths are the same. 95 * If the actual file is the same (as given on the 96 * command-line), then we can just discard the first. 97 * Otherwise, we need to bail out: it means we have two 98 * different files with the relative path on the 99 * destination side. 100 */ 101 102 if (strcmp(f->path, fnext->path) == 0) { 103 new[j++] = *f; 104 i++; 105 WARNX(sess, "%s: duplicate path: %s", 106 f->wpath, f->path); 107 free(fnext->path); 108 free(fnext->link); 109 fnext->path = fnext->link = NULL; 110 continue; 111 } 112 113 ERRX(sess, "%s: duplicate working path for " 114 "possibly different file: %s, %s", 115 f->wpath, f->path, fnext->path); 116 free(new); 117 return 0; 118 } 119 120 /* Don't forget the last entry. */ 121 122 if (i == *sz - 1) 123 new[j++] = (*fl)[i]; 124 125 /* 126 * Reassign to the deduplicated array. 127 * If we started out with *sz > 0, which we check for at the 128 * beginning, then we'll always continue having *sz > 0. 129 */ 130 131 free(*fl); 132 *fl = new; 133 *sz = j; 134 assert(*sz); 135 return 1; 136} 137 138/* 139 * We're now going to find our top-level directories. 140 * This only applies to recursive mode. 141 * If we have the first element as the ".", then that's the "top 142 * directory" of our transfer. 143 * Otherwise, mark up all top-level directories in the set. 144 */ 145static void 146flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 147{ 148 size_t i; 149 const char *cp; 150 151 if (!sess->opts->recursive) 152 return; 153 154 if (flsz && strcmp(fl[0].wpath, ".")) { 155 for (i = 0; i < flsz; i++) { 156 if (!S_ISDIR(fl[i].st.mode)) 157 continue; 158 cp = strchr(fl[i].wpath, '/'); 159 if (cp != NULL && cp[1] != '\0') 160 continue; 161 fl[i].st.flags |= FLSTAT_TOP_DIR; 162 LOG4(sess, "%s: top-level", fl[i].wpath); 163 } 164 } else if (flsz) { 165 fl[0].st.flags |= FLSTAT_TOP_DIR; 166 LOG4(sess, "%s: top-level", fl[0].wpath); 167 } 168} 169 170/* 171 * Filter through the fts() file information. 172 * We want directories (pre-order), regular files, and symlinks. 173 * Everything else is skipped and possibly warned about. 174 * Return zero to skip, non-zero to examine. 175 */ 176static int 177flist_fts_check(struct sess *sess, FTSENT *ent) 178{ 179 180 if (ent->fts_info == FTS_F || 181 ent->fts_info == FTS_D || 182 ent->fts_info == FTS_SL || 183 ent->fts_info == FTS_SLNONE) 184 return 1; 185 186 if (ent->fts_info == FTS_DC) { 187 WARNX(sess, "%s: directory cycle", ent->fts_path); 188 } else if (ent->fts_info == FTS_DNR) { 189 errno = ent->fts_errno; 190 WARN(sess, "%s: unreadable directory", ent->fts_path); 191 } else if (ent->fts_info == FTS_DOT) { 192 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 193 } else if (ent->fts_info == FTS_ERR) { 194 errno = ent->fts_errno; 195 WARN(sess, "%s", ent->fts_path); 196 } else if (ent->fts_info == FTS_DEFAULT) { 197 WARNX(sess, "%s: skipping special", ent->fts_path); 198 } else if (ent->fts_info == FTS_NS) { 199 errno = ent->fts_errno; 200 WARN(sess, "%s: could not stat", ent->fts_path); 201 } 202 203 return 0; 204} 205 206/* 207 * Copy necessary elements in "st" into the fields of "f". 208 */ 209static void 210flist_copy_stat(struct flist *f, const struct stat *st) 211{ 212 f->st.mode = st->st_mode; 213 f->st.uid = st->st_uid; 214 f->st.gid = st->st_gid; 215 f->st.size = st->st_size; 216 f->st.mtime = st->st_mtime; 217} 218 219void 220flist_free(struct flist *f, size_t sz) 221{ 222 size_t i; 223 224 if (f == NULL) 225 return; 226 227 for (i = 0; i < sz; i++) { 228 free(f[i].path); 229 free(f[i].link); 230 } 231 free(f); 232} 233 234/* 235 * Serialise our file list (which may be zero-length) to the wire. 236 * Makes sure that the receiver isn't going to block on sending us 237 * return messages on the log channel. 238 * Return zero on failure, non-zero on success. 239 */ 240int 241flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 242 size_t flsz) 243{ 244 size_t i, sz, gidsz = 0; 245 uint8_t flag; 246 const struct flist *f; 247 const char *fn; 248 struct ident *gids = NULL; 249 int rc = 0; 250 251 /* Double-check that we've no pending multiplexed data. */ 252 253 LOG2(sess, "sending file metadata list: %zu", flsz); 254 255 for (i = 0; i < flsz; i++) { 256 f = &fl[i]; 257 fn = f->wpath; 258 sz = strlen(f->wpath); 259 assert(sz > 0); 260 261 /* 262 * If applicable, unclog the read buffer. 263 * This happens when the receiver has a lot of log 264 * messages and all we're doing is sending our file list 265 * without checking for messages. 266 */ 267 268 if (sess->mplex_reads && 269 io_read_check(sess, fdin) && 270 !io_read_flush(sess, fdin)) { 271 ERRX1(sess, "io_read_flush"); 272 goto out; 273 } 274 275 /* 276 * For ease, make all of our filenames be "long" 277 * regardless their actual length. 278 * This also makes sure that we don't transmit a zero 279 * byte unintentionally. 280 */ 281 282 flag = FLIST_NAME_LONG; 283 284 LOG3(sess, "%s: sending file metadata: " 285 "size %jd, mtime %jd, mode %o", 286 fn, (intmax_t)f->st.size, 287 (intmax_t)f->st.mtime, f->st.mode); 288 289 /* Now write to the wire. */ 290 /* FIXME: buffer this. */ 291 292 if (!io_write_byte(sess, fdout, flag)) { 293 ERRX1(sess, "io_write_byte"); 294 goto out; 295 } else if (!io_write_int(sess, fdout, sz)) { 296 ERRX1(sess, "io_write_int"); 297 goto out; 298 } else if (!io_write_buf(sess, fdout, fn, sz)) { 299 ERRX1(sess, "io_write_buf"); 300 goto out; 301 } else if (!io_write_long(sess, fdout, f->st.size)) { 302 ERRX1(sess, "io_write_long"); 303 goto out; 304 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 305 ERRX1(sess, "io_write_int"); 306 goto out; 307 } else if (!io_write_int(sess, fdout, f->st.mode)) { 308 ERRX1(sess, "io_write_int"); 309 goto out; 310 } 311 312 /* Conditional part: gid. */ 313 314 if (sess->opts->preserve_gids) { 315 if (!io_write_int(sess, fdout, f->st.gid)) { 316 ERRX1(sess, "io_write_int"); 317 goto out; 318 } 319 if (!idents_gid_add(sess, &gids, &gidsz, f->st.gid)) { 320 ERRX1(sess, "idents_gid_add"); 321 goto out; 322 } 323 } 324 325 /* Conditional part: link. */ 326 327 if (S_ISLNK(f->st.mode) && 328 sess->opts->preserve_links) { 329 fn = f->link; 330 sz = strlen(f->link); 331 if (!io_write_int(sess, fdout, sz)) { 332 ERRX1(sess, "io_write_int"); 333 goto out; 334 } 335 if (!io_write_buf(sess, fdout, fn, sz)) { 336 ERRX1(sess, "io_write_int"); 337 goto out; 338 } 339 } 340 341 if (S_ISREG(f->st.mode)) 342 sess->total_size += f->st.size; 343 } 344 345 /* Signal end of file list. */ 346 347 if (!io_write_byte(sess, fdout, 0)) { 348 ERRX1(sess, "io_write_byte"); 349 goto out; 350 } 351 352 /* Conditionally write gid list and terminator. */ 353 354 if (sess->opts->preserve_gids) { 355 LOG2(sess, "sending gid list: %zu", gidsz); 356 if (!idents_send(sess, fdout, gids, gidsz)) { 357 ERRX1(sess, "idents_send"); 358 goto out; 359 } 360 } 361 362 rc = 1; 363out: 364 idents_free(gids, gidsz); 365 return rc; 366} 367 368/* 369 * Read the filename of a file list. 370 * This is the most expensive part of the file list transfer, so a lot 371 * of attention has gone into transmitting as little as possible. 372 * Micro-optimisation, but whatever. 373 * Fills in "f" with the full path on success. 374 * Returns zero on failure, non-zero on success. 375 */ 376static int 377flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 378 char last[MAXPATHLEN]) 379{ 380 uint8_t bval; 381 size_t partial = 0; 382 size_t pathlen = 0, len; 383 384 /* 385 * Read our filename. 386 * If we have FLIST_NAME_SAME, we inherit some of the last 387 * transmitted name. 388 * If we have FLIST_NAME_LONG, then the string length is greater 389 * than byte-size. 390 */ 391 392 if (FLIST_NAME_SAME & flags) { 393 if (!io_read_byte(sess, fd, &bval)) { 394 ERRX1(sess, "io_read_byte"); 395 return 0; 396 } 397 partial = bval; 398 } 399 400 /* Get the (possibly-remaining) filename length. */ 401 402 if (FLIST_NAME_LONG & flags) { 403 if (!io_read_size(sess, fd, &pathlen)) { 404 ERRX1(sess, "io_read_size"); 405 return 0; 406 } 407 } else { 408 if (!io_read_byte(sess, fd, &bval)) { 409 ERRX1(sess, "io_read_byte"); 410 return 0; 411 } 412 pathlen = bval; 413 } 414 415 /* Allocate our full filename length. */ 416 /* FIXME: maximum pathname length. */ 417 418 if ((len = pathlen + partial) == 0) { 419 ERRX(sess, "security violation: " 420 "zero-length pathname"); 421 return 0; 422 } 423 424 if ((f->path = malloc(len + 1)) == NULL) { 425 ERR(sess, "malloc"); 426 return 0; 427 } 428 f->path[len] = '\0'; 429 430 if (FLIST_NAME_SAME & flags) 431 memcpy(f->path, last, partial); 432 433 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 434 ERRX1(sess, "io_read_buf"); 435 return 0; 436 } 437 438 if (f->path[0] == '/') { 439 ERRX(sess, "security violation: " 440 "absolute pathname: %s", f->path); 441 return 0; 442 } 443 444 if (strstr(f->path, "/../") != NULL || 445 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 446 (len > 2 && strncmp(f->path, "../", 3) == 0) || 447 strcmp(f->path, "..") == 0) { 448 ERRX(sess, "%s: security violation: " 449 "backtracking pathname", f->path); 450 return 0; 451 } 452 453 /* Record our last path and construct our filename. */ 454 455 strlcpy(last, f->path, MAXPATHLEN); 456 f->wpath = f->path; 457 return 1; 458} 459 460/* 461 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 462 * Returns zero on failure, non-zero on success. 463 */ 464static int 465flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 466{ 467 void *pp; 468 469 if (*sz + 1 <= *max) { 470 (*sz)++; 471 return 1; 472 } 473 474 pp = recallocarray(*fl, *max, 475 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 476 if (pp == NULL) { 477 ERR(sess, "recallocarray"); 478 return 0; 479 } 480 *fl = pp; 481 *max += FLIST_CHUNK_SIZE; 482 (*sz)++; 483 return 1; 484} 485 486/* 487 * Copy a regular or symbolic link file "path" into "f". 488 * This handles the correct path creation and symbolic linking. 489 * Returns zero on failure, non-zero on success. 490 */ 491static int 492flist_append(struct sess *sess, struct flist *f, struct stat *st, 493 const char *path) 494{ 495 496 /* 497 * Copy the full path for local addressing and transmit 498 * only the filename part for the receiver. 499 */ 500 501 if ((f->path = strdup(path)) == NULL) { 502 ERR(sess, "strdup"); 503 return 0; 504 } 505 506 if ((f->wpath = strrchr(f->path, '/')) == NULL) 507 f->wpath = f->path; 508 else 509 f->wpath++; 510 511 /* 512 * On the receiving end, we'll strip out all bits on the 513 * mode except for the file permissions. 514 * No need to warn about it here. 515 */ 516 517 flist_copy_stat(f, st); 518 519 /* Optionally copy link information. */ 520 521 if (S_ISLNK(st->st_mode)) { 522 f->link = symlink_read(sess, f->path); 523 if (f->link == NULL) { 524 ERRX1(sess, "symlink_read"); 525 return 0; 526 } 527 } 528 529 return 1; 530} 531 532/* 533 * Receive a file list from the wire, filling in length "sz" (which may 534 * possibly be zero) and list "flp" on success. 535 * Return zero on failure, non-zero on success. 536 */ 537int 538flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 539{ 540 struct flist *fl = NULL; 541 struct flist *ff; 542 const struct flist *fflast = NULL; 543 size_t flsz = 0, flmax = 0, lsz, gidsz = 0; 544 uint8_t flag; 545 char last[MAXPATHLEN]; 546 uint64_t lval; /* temporary values... */ 547 int32_t ival; 548 struct ident *gids = NULL; 549 550 last[0] = '\0'; 551 552 for (;;) { 553 if (!io_read_byte(sess, fd, &flag)) { 554 ERRX1(sess, "io_read_byte"); 555 goto out; 556 } else if (flag == 0) 557 break; 558 559 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 560 ERRX1(sess, "flist_realloc"); 561 goto out; 562 } 563 564 ff = &fl[flsz - 1]; 565 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 566 567 /* Filename first. */ 568 569 if (!flist_recv_name(sess, fd, ff, flag, last)) { 570 ERRX1(sess, "flist_recv_name"); 571 goto out; 572 } 573 574 /* Read the file size. */ 575 576 if (!io_read_ulong(sess, fd, &lval)) { 577 ERRX1(sess, "io_read_ulong"); 578 goto out; 579 } 580 ff->st.size = lval; 581 582 /* Read the modification time. */ 583 584 if (!(FLIST_TIME_SAME & flag)) { 585 if (!io_read_int(sess, fd, &ival)) { 586 ERRX1(sess, "io_read_int"); 587 goto out; 588 } 589 ff->st.mtime = ival; 590 } else if (fflast == NULL) { 591 ERRX(sess, "same time without last entry"); 592 goto out; 593 } else 594 ff->st.mtime = fflast->st.mtime; 595 596 /* Read the file mode. */ 597 598 if (!(FLIST_MODE_SAME & flag)) { 599 if (!io_read_int(sess, fd, &ival)) { 600 ERRX1(sess, "io_read_int"); 601 goto out; 602 } 603 ff->st.mode = ival; 604 } else if (fflast == NULL) { 605 ERRX(sess, "same mode without last entry"); 606 goto out; 607 } else 608 ff->st.mode = fflast->st.mode; 609 610 /* Conditional part: gid. */ 611 612 if (sess->opts->preserve_gids) { 613 if ( ! (FLIST_GID_SAME & flag)) { 614 if ( ! io_read_int(sess, fd, &ival)) { 615 ERRX1(sess, "io_read_int"); 616 goto out; 617 } 618 ff->st.gid = ival; 619 } else if (NULL == fflast) { 620 ERRX(sess, "same gid " 621 "without last entry"); 622 goto out; 623 } else 624 ff->st.gid = fflast->st.gid; 625 } 626 627 /* Conditional part: link. */ 628 629 if (S_ISLNK(ff->st.mode) && 630 sess->opts->preserve_links) { 631 if (!io_read_size(sess, fd, &lsz)) { 632 ERRX1(sess, "io_read_size"); 633 goto out; 634 } else if (lsz == 0) { 635 ERRX(sess, "empty link name"); 636 goto out; 637 } 638 ff->link = calloc(lsz + 1, 1); 639 if (ff->link == NULL) { 640 ERR(sess, "calloc"); 641 goto out; 642 } 643 if (!io_read_buf(sess, fd, ff->link, lsz)) { 644 ERRX1(sess, "io_read_buf"); 645 goto out; 646 } 647 } 648 649 LOG3(sess, "%s: received file metadata: " 650 "size %jd, mtime %jd, mode %o", 651 ff->path, (intmax_t)ff->st.size, 652 (intmax_t)ff->st.mtime, ff->st.mode); 653 654 if (S_ISREG(ff->st.mode)) 655 sess->total_size += ff->st.size; 656 } 657 658 /* 659 * Now conditionally read the group list. 660 * We then remap all group identifiers to the local ids. 661 */ 662 663 if (sess->opts->preserve_gids) { 664 if (!idents_recv(sess, fd, &gids, &gidsz)) { 665 ERRX1(sess, "idents_recv"); 666 goto out; 667 } 668 LOG2(sess, "received gid list: %zu", gidsz); 669 } 670 671 /* Remember to order the received list. */ 672 673 LOG2(sess, "received file metadata list: %zu", flsz); 674 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 675 flist_topdirs(sess, fl, flsz); 676 *sz = flsz; 677 *flp = fl; 678 679 /* Lastly, remap and reassign group identifiers. */ 680 681 if (sess->opts->preserve_gids) { 682 idents_gid_remap(sess, gids, gidsz); 683 idents_gid_assign(sess, fl, flsz, gids, gidsz); 684 } 685 686 idents_free(gids, gidsz); 687 return 1; 688out: 689 flist_free(fl, flsz); 690 idents_free(gids, gidsz); 691 *sz = 0; 692 *flp = NULL; 693 return 0; 694} 695 696/* 697 * Generate a flist possibly-recursively given a file root, which may 698 * also be a regular file or symlink. 699 * On success, augments the generated list in "flp" of length "sz". 700 * Returns zero on failure, non-zero on success. 701 */ 702static int 703flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 704 size_t *max) 705{ 706 char *cargv[2], *cp; 707 int rc = 0; 708 FTS *fts; 709 FTSENT *ent; 710 struct flist *f; 711 size_t flsz = 0, stripdir; 712 struct stat st; 713 714 cargv[0] = root; 715 cargv[1] = NULL; 716 717 /* 718 * If we're a file, then revert to the same actions we use for 719 * the non-recursive scan. 720 */ 721 722 if (lstat(root, &st) == -1) { 723 ERR(sess, "%s: lstat", root); 724 return 0; 725 } else if (S_ISREG(st.st_mode)) { 726 if (!flist_realloc(sess, fl, sz, max)) { 727 ERRX1(sess, "flist_realloc"); 728 return 0; 729 } 730 f = &(*fl)[(*sz) - 1]; 731 assert(f != NULL); 732 733 if (!flist_append(sess, f, &st, root)) { 734 ERRX1(sess, "flist_append"); 735 return 0; 736 } else if (unveil(root, "r") == -1) { 737 ERR(sess, "%s: unveil", root); 738 return 0; 739 } 740 return 1; 741 } else if (S_ISLNK(st.st_mode)) { 742 if (!sess->opts->preserve_links) { 743 WARNX(sess, "%s: skipping symlink", root); 744 return 1; 745 } else if (!flist_realloc(sess, fl, sz, max)) { 746 ERRX1(sess, "flist_realloc"); 747 return 0; 748 } 749 f = &(*fl)[(*sz) - 1]; 750 assert(f != NULL); 751 752 if (!flist_append(sess, f, &st, root)) { 753 ERRX1(sess, "flist_append"); 754 return 0; 755 } else if (unveil(root, "r") == -1) { 756 ERR(sess, "%s: unveil", root); 757 return 0; 758 } 759 return 1; 760 } else if (!S_ISDIR(st.st_mode)) { 761 WARNX(sess, "%s: skipping special", root); 762 return 1; 763 } 764 765 /* 766 * If we end with a slash, it means that we're not supposed to 767 * copy the directory part itself---only the contents. 768 * So set "stripdir" to be what we take out. 769 */ 770 771 stripdir = strlen(root); 772 assert(stripdir > 0); 773 if (root[stripdir - 1] != '/') 774 stripdir = 0; 775 776 /* 777 * If we're not stripping anything, then see if we need to strip 778 * out the leading material in the path up to and including the 779 * last directory component. 780 */ 781 782 if (stripdir == 0) 783 if ((cp = strrchr(root, '/')) != NULL) 784 stripdir = cp - root + 1; 785 786 /* 787 * If we're recursive, then we need to take down all of the 788 * files and directory components, so use fts(3). 789 * Copying the information file-by-file into the flstat. 790 * We'll make sense of it in flist_send. 791 */ 792 793 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 794 ERR(sess, "fts_open"); 795 return 0; 796 } 797 798 errno = 0; 799 while ((ent = fts_read(fts)) != NULL) { 800 if (!flist_fts_check(sess, ent)) { 801 errno = 0; 802 continue; 803 } 804 805 /* We don't allow symlinks without -l. */ 806 807 assert(ent->fts_statp != NULL); 808 if (S_ISLNK(ent->fts_statp->st_mode) && 809 !sess->opts->preserve_links) { 810 WARNX(sess, "%s: skipping " 811 "symlink", ent->fts_path); 812 continue; 813 } 814 815 /* Allocate a new file entry. */ 816 817 if (!flist_realloc(sess, fl, sz, max)) { 818 ERRX1(sess, "flist_realloc"); 819 goto out; 820 } 821 flsz++; 822 f = &(*fl)[*sz - 1]; 823 824 /* Our path defaults to "." for the root. */ 825 826 if ('\0' == ent->fts_path[stripdir]) { 827 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 828 ERR(sess, "asprintf"); 829 f->path = NULL; 830 goto out; 831 } 832 } else { 833 if ((f->path = strdup(ent->fts_path)) == NULL) { 834 ERR(sess, "strdup"); 835 goto out; 836 } 837 } 838 839 f->wpath = f->path + stripdir; 840 flist_copy_stat(f, ent->fts_statp); 841 842 /* Optionally copy link information. */ 843 844 if (S_ISLNK(ent->fts_statp->st_mode)) { 845 f->link = symlink_read(sess, f->path); 846 if (f->link == NULL) { 847 ERRX1(sess, "symlink_read"); 848 goto out; 849 } 850 } 851 852 /* Reset errno for next fts_read() call. */ 853 errno = 0; 854 } 855 if (errno) { 856 ERR(sess, "fts_read"); 857 goto out; 858 } else if (unveil(root, "r") == -1) { 859 ERR(sess, "%s: unveil", root); 860 goto out; 861 } 862 863 LOG3(sess, "generated %zu filenames: %s", flsz, root); 864 rc = 1; 865out: 866 fts_close(fts); 867 return rc; 868} 869 870/* 871 * Generate a flist recursively given the array of directories (or 872 * files, symlinks, doesn't matter) specified in argv (argc >0). 873 * On success, stores the generated list in "flp" with length "sz", 874 * which may be zero. 875 * Returns zero on failure, non-zero on success. 876 */ 877static int 878flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 879 size_t *sz) 880{ 881 size_t i, max = 0; 882 883 for (i = 0; i < argc; i++) 884 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 885 break; 886 887 if (i == argc) { 888 LOG2(sess, "recursively generated %zu filenames", *sz); 889 return 1; 890 } 891 892 ERRX1(sess, "flist_gen_dirent"); 893 flist_free(*flp, max); 894 *flp = NULL; 895 *sz = 0; 896 return 0; 897} 898 899/* 900 * Generate list of files from the command-line argc (>0) and argv. 901 * On success, stores the generated list in "flp" with length "sz", 902 * which may be zero. 903 * Returns zero on failure, non-zero on success. 904 */ 905static int 906flist_gen_files(struct sess *sess, size_t argc, char **argv, 907 struct flist **flp, size_t *sz) 908{ 909 struct flist *fl = NULL, *f; 910 size_t i, flsz = 0; 911 struct stat st; 912 913 assert(argc); 914 915 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 916 ERR(sess, "calloc"); 917 return 0; 918 } 919 920 for (i = 0; i < argc; i++) { 921 if ('\0' == argv[i][0]) 922 continue; 923 if (lstat(argv[i], &st) == -1) { 924 ERR(sess, "%s: lstat", argv[i]); 925 goto out; 926 } 927 928 /* 929 * File type checks. 930 * In non-recursive mode, we don't accept directories. 931 * We also skip symbolic links without -l. 932 * Beyond that, we only accept regular files. 933 */ 934 935 if (S_ISDIR(st.st_mode)) { 936 WARNX(sess, "%s: skipping directory", argv[i]); 937 continue; 938 } else if (S_ISLNK(st.st_mode)) { 939 if (!sess->opts->preserve_links) { 940 WARNX(sess, "%s: skipping " 941 "symlink", argv[i]); 942 continue; 943 } 944 } else if (!S_ISREG(st.st_mode)) { 945 WARNX(sess, "%s: skipping special", argv[i]); 946 continue; 947 } 948 949 950 f = &fl[flsz++]; 951 assert(f != NULL); 952 953 /* Add this file to our file-system worldview. */ 954 955 if (unveil(argv[i], "r") == -1) { 956 ERR(sess, "%s: unveil", argv[i]); 957 goto out; 958 } else if (!flist_append(sess, f, &st, argv[i])) { 959 ERRX1(sess, "flist_append"); 960 goto out; 961 } 962 } 963 964 LOG2(sess, "non-recursively generated %zu filenames", flsz); 965 *sz = flsz; 966 *flp = fl; 967 return 1; 968out: 969 flist_free(fl, argc); 970 *sz = 0; 971 *flp = NULL; 972 return 0; 973} 974 975/* 976 * Generate a sorted, de-duplicated list of file metadata. 977 * In non-recursive mode (the default), we use only the files we're 978 * given. 979 * Otherwise, directories are recursively examined. 980 * Returns zero on failure, non-zero on success. 981 * On success, "fl" will need to be freed with flist_free(). 982 */ 983int 984flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 985 size_t *sz) 986{ 987 int rc; 988 989 assert(argc > 0); 990 rc = sess->opts->recursive ? 991 flist_gen_dirs(sess, argc, argv, flp, sz) : 992 flist_gen_files(sess, argc, argv, flp, sz); 993 994 /* After scanning, lock our file-system view. */ 995 996 if (unveil(NULL, NULL) == -1) { 997 ERR(sess, "unveil"); 998 return 0; 999 } else if (!rc) 1000 return 0; 1001 1002 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1003 1004 if (flist_dedupe(sess, flp, sz)) { 1005 flist_topdirs(sess, *flp, *sz); 1006 return 1; 1007 } 1008 1009 ERRX1(sess, "flist_dedupe"); 1010 flist_free(*flp, *sz); 1011 *flp = NULL; 1012 *sz = 0; 1013 return 0; 1014} 1015 1016/* 1017 * Generate a list of files in root to delete that are within the 1018 * top-level directories stipulated by "wfl". 1019 * Only handles symbolic links, directories, and regular files. 1020 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1021 * on success. 1022 * On success, "fl" will need to be freed with flist_free(). 1023 */ 1024int 1025flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1026 size_t *sz, const struct flist *wfl, size_t wflsz) 1027{ 1028 char **cargv = NULL; 1029 int rc = 0, c; 1030 FTS *fts = NULL; 1031 FTSENT *ent; 1032 struct flist *f; 1033 size_t cargvs = 0, i, j, max = 0, stripdir; 1034 ENTRY hent; 1035 ENTRY *hentp; 1036 1037 *fl = NULL; 1038 *sz = 0; 1039 1040 /* Only run this code when we're recursive. */ 1041 1042 if (!sess->opts->recursive) 1043 return 1; 1044 1045 /* 1046 * Gather up all top-level directories for scanning. 1047 * This is stipulated by rsync's --delete behaviour, where we 1048 * only delete things in the top-level directories given on the 1049 * command line. 1050 */ 1051 1052 assert(wflsz > 0); 1053 for (i = 0; i < wflsz; i++) 1054 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1055 cargvs++; 1056 if (cargvs == 0) 1057 return 1; 1058 1059 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1060 ERR(sess, "calloc"); 1061 return 0; 1062 } 1063 1064 /* 1065 * If we're given just a "." as the first entry, that means 1066 * we're doing a relative copy with a trailing slash. 1067 * Special-case this just for the sake of simplicity. 1068 * Otherwise, look through all top-levels. 1069 */ 1070 1071 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1072 assert(cargvs == 1); 1073 assert(S_ISDIR(wfl[0].st.mode)); 1074 if (asprintf(&cargv[0], "%s/", root) < 0) { 1075 ERR(sess, "asprintf"); 1076 cargv[0] = NULL; 1077 goto out; 1078 } 1079 cargv[1] = NULL; 1080 } else { 1081 for (i = j = 0; i < wflsz; i++) { 1082 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1083 continue; 1084 assert(S_ISDIR(wfl[i].st.mode)); 1085 assert(strcmp(wfl[i].wpath, ".")); 1086 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1087 if (c < 0) { 1088 ERR(sess, "asprintf"); 1089 cargv[j] = NULL; 1090 goto out; 1091 } 1092 LOG4(sess, "%s: will scan " 1093 "for deletions", cargv[j]); 1094 j++; 1095 } 1096 assert(j == cargvs); 1097 cargv[j] = NULL; 1098 } 1099 1100 LOG2(sess, "delete from %zu directories", cargvs); 1101 1102 /* 1103 * Next, use the standard hcreate(3) hashtable interface to hash 1104 * all of the files that we want to synchronise. 1105 * This way, we'll be able to determine which files we want to 1106 * delete in O(n) time instead of O(n * search) time. 1107 * Plus, we can do the scan in-band and only allocate the files 1108 * we want to delete. 1109 */ 1110 1111 if (!hcreate(wflsz)) { 1112 ERR(sess, "hcreate"); 1113 goto out; 1114 } 1115 1116 for (i = 0; i < wflsz; i++) { 1117 memset(&hent, 0, sizeof(ENTRY)); 1118 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1119 ERR(sess, "strdup"); 1120 goto out; 1121 } 1122 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1123 ERR(sess, "hsearch"); 1124 goto out; 1125 } else if (hentp->key != hent.key) { 1126 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1127 free(hent.key); 1128 goto out; 1129 } 1130 } 1131 1132 /* 1133 * Now we're going to try to descend into all of the top-level 1134 * directories stipulated by the file list. 1135 * If the directories don't exist, it's ok. 1136 */ 1137 1138 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1139 ERR(sess, "fts_open"); 1140 goto out; 1141 } 1142 1143 stripdir = strlen(root) + 1; 1144 errno = 0; 1145 while ((ent = fts_read(fts)) != NULL) { 1146 if (ent->fts_info == FTS_NS) 1147 continue; 1148 if (!flist_fts_check(sess, ent)) { 1149 errno = 0; 1150 continue; 1151 } else if (stripdir >= ent->fts_pathlen) 1152 continue; 1153 1154 /* Look up in hashtable. */ 1155 1156 memset(&hent, 0, sizeof(ENTRY)); 1157 hent.key = ent->fts_path + stripdir; 1158 if (hsearch(hent, FIND) != NULL) 1159 continue; 1160 1161 /* Not found: we'll delete it. */ 1162 1163 if (!flist_realloc(sess, fl, sz, &max)) { 1164 ERRX1(sess, "flist_realloc"); 1165 goto out; 1166 } 1167 f = &(*fl)[*sz - 1]; 1168 1169 if ((f->path = strdup(ent->fts_path)) == NULL) { 1170 ERR(sess, "strdup"); 1171 goto out; 1172 } 1173 f->wpath = f->path + stripdir; 1174 assert(ent->fts_statp != NULL); 1175 flist_copy_stat(f, ent->fts_statp); 1176 errno = 0; 1177 } 1178 1179 if (errno) { 1180 ERR(sess, "fts_read"); 1181 goto out; 1182 } 1183 1184 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1185 rc = 1; 1186out: 1187 if (fts != NULL) 1188 fts_close(fts); 1189 for (i = 0; i < cargvs; i++) 1190 free(cargv[i]); 1191 free(cargv); 1192 hdestroy(); 1193 return rc; 1194} 1195 1196/* 1197 * Delete all files and directories in "fl". 1198 * If called with a zero-length "fl", does nothing. 1199 * If dry_run is specified, simply write what would be done. 1200 * Return zero on failure, non-zero on success. 1201 */ 1202int 1203flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1204{ 1205 ssize_t i; 1206 int flag; 1207 1208 if (flsz == 0) 1209 return 1; 1210 1211 assert(sess->opts->del); 1212 assert(sess->opts->recursive); 1213 1214 for (i = flsz - 1; i >= 0; i--) { 1215 LOG1(sess, "%s: deleting", fl[i].wpath); 1216 if (sess->opts->dry_run) 1217 continue; 1218 assert(root != -1); 1219 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1220 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1221 errno != ENOENT) { 1222 ERR(sess, "%s: unlinkat", fl[i].wpath); 1223 return 0; 1224 } 1225 } 1226 1227 return 1; 1228} 1229