flist.c revision 1.5
1/* $Id: flist.c,v 1.5 2019/02/11 21:41:22 deraadt Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/param.h> 18#include <sys/stat.h> 19 20#include <assert.h> 21#include <errno.h> 22#include <fcntl.h> 23#include <inttypes.h> 24#include <fts.h> 25#include <search.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30 31#include "extern.h" 32 33/* 34 * We allocate our file list in chunk sizes so as not to do it one by 35 * one. 36 * Preferrably we get one or two allocation. 37 */ 38#define FLIST_CHUNK_SIZE (1024) 39 40/* 41 * These flags are part of the rsync protocol. 42 * They are sent as the first byte for a file transmission and encode 43 * information that affects subsequent transmissions. 44 */ 45#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 46#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 47#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 48#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 49 50/* 51 * Requied way to sort a filename list. 52 */ 53static int 54flist_cmp(const void *p1, const void *p2) 55{ 56 const struct flist *f1 = p1, *f2 = p2; 57 58 return strcmp(f1->wpath, f2->wpath); 59} 60 61/* 62 * Deduplicate our file list (which may be zero-length). 63 * Returns zero on failure, non-zero on success. 64 */ 65static int 66flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 67{ 68 size_t i, j; 69 struct flist *new; 70 struct flist *f, *fnext; 71 72 if (*sz == 0) 73 return 1; 74 75 /* Create a new buffer, "new", and copy. */ 76 77 new = calloc(*sz, sizeof(struct flist)); 78 if (new == NULL) { 79 ERR(sess, "calloc"); 80 return 0; 81 } 82 83 for (i = j = 0; i < *sz - 1; i++) { 84 f = &(*fl)[i]; 85 fnext = &(*fl)[i + 1]; 86 87 if (strcmp(f->wpath, fnext->wpath)) { 88 new[j++] = *f; 89 continue; 90 } 91 92 /* 93 * Our working (destination) paths are the same. 94 * If the actual file is the same (as given on the 95 * command-line), then we can just discard the first. 96 * Otherwise, we need to bail out: it means we have two 97 * different files with the relative path on the 98 * destination side. 99 */ 100 101 if (strcmp(f->path, fnext->path) == 0) { 102 new[j++] = *f; 103 i++; 104 WARNX(sess, "%s: duplicate path: %s", 105 f->wpath, f->path); 106 free(fnext->path); 107 free(fnext->link); 108 fnext->path = fnext->link = NULL; 109 continue; 110 } 111 112 ERRX(sess, "%s: duplicate working path for " 113 "possibly different file: %s, %s", 114 f->wpath, f->path, fnext->path); 115 free(new); 116 return 0; 117 } 118 119 /* Don't forget the last entry. */ 120 121 if (i == *sz - 1) 122 new[j++] = (*fl)[i]; 123 124 /* 125 * Reassign to the deduplicated array. 126 * If we started out with *sz > 0, which we check for at the 127 * beginning, then we'll always continue having *sz > 0. 128 */ 129 130 free(*fl); 131 *fl = new; 132 *sz = j; 133 assert(*sz); 134 return 1; 135} 136 137/* 138 * We're now going to find our top-level directories. 139 * This only applies to recursive mode. 140 * If we have the first element as the ".", then that's the "top 141 * directory" of our transfer. 142 * Otherwise, mark up all top-level directories in the set. 143 */ 144static void 145flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 146{ 147 size_t i; 148 const char *cp; 149 150 if (!sess->opts->recursive) 151 return; 152 153 if (flsz && strcmp(fl[0].wpath, ".")) { 154 for (i = 0; i < flsz; i++) { 155 if (!S_ISDIR(fl[i].st.mode)) 156 continue; 157 cp = strchr(fl[i].wpath, '/'); 158 if (cp != NULL && cp[1] != '\0') 159 continue; 160 fl[i].st.flags |= FLSTAT_TOP_DIR; 161 LOG4(sess, "%s: top-level", fl[i].wpath); 162 } 163 } else if (flsz) { 164 fl[0].st.flags |= FLSTAT_TOP_DIR; 165 LOG4(sess, "%s: top-level", fl[0].wpath); 166 } 167} 168 169/* 170 * Filter through the fts() file information. 171 * We want directories (pre-order), regular files, and symlinks. 172 * Everything else is skipped and possibly warned about. 173 * Return zero to skip, non-zero to examine. 174 */ 175static int 176flist_fts_check(struct sess *sess, FTSENT *ent) 177{ 178 179 if (ent->fts_info == FTS_F || 180 ent->fts_info == FTS_D || 181 ent->fts_info == FTS_SL || 182 ent->fts_info == FTS_SLNONE) 183 return 1; 184 185 if (ent->fts_info == FTS_DC) { 186 WARNX(sess, "%s: directory cycle", ent->fts_path); 187 } else if (ent->fts_info == FTS_DNR) { 188 errno = ent->fts_errno; 189 WARN(sess, "%s: unreadable directory", ent->fts_path); 190 } else if (ent->fts_info == FTS_DOT) { 191 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 192 } else if (ent->fts_info == FTS_ERR) { 193 errno = ent->fts_errno; 194 WARN(sess, "%s", ent->fts_path); 195 } else if (ent->fts_info == FTS_DEFAULT) { 196 WARNX(sess, "%s: skipping special", ent->fts_path); 197 } else if (ent->fts_info == FTS_NS) { 198 errno = ent->fts_errno; 199 WARN(sess, "%s: could not stat", ent->fts_path); 200 } 201 202 return 0; 203} 204 205/* 206 * Copy necessary elements in "st" into the fields of "f". 207 */ 208static void 209flist_copy_stat(struct flist *f, const struct stat *st) 210{ 211 f->st.mode = st->st_mode; 212 f->st.uid = st->st_uid; 213 f->st.gid = st->st_gid; 214 f->st.size = st->st_size; 215 f->st.mtime = st->st_mtime; 216} 217 218void 219flist_free(struct flist *f, size_t sz) 220{ 221 size_t i; 222 223 if (f == NULL) 224 return; 225 226 for (i = 0; i < sz; i++) { 227 free(f[i].path); 228 free(f[i].link); 229 } 230 free(f); 231} 232 233/* 234 * Serialise our file list (which may be zero-length) to the wire. 235 * Makes sure that the receiver isn't going to block on sending us 236 * return messages on the log channel. 237 * Return zero on failure, non-zero on success. 238 */ 239int 240flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 241 size_t flsz) 242{ 243 size_t i, fnlen; 244 uint8_t flag; 245 const struct flist *f; 246 const char *fn; 247 248 /* Double-check that we've no pending multiplexed data. */ 249 250 LOG2(sess, "sending file metadata list: %zu", flsz); 251 252 for (i = 0; i < flsz; i++) { 253 f = &fl[i]; 254 fn = f->wpath; 255 fnlen = strlen(f->wpath); 256 assert(fnlen > 0); 257 258 /* 259 * If applicable, unclog the read buffer. 260 * This happens when the receiver has a lot of log 261 * messages and all we're doing is sending our file list 262 * without checking for messages. 263 */ 264 265 if (sess->mplex_reads && 266 io_read_check(sess, fdin) && 267 !io_read_flush(sess, fdin)) { 268 ERRX1(sess, "io_read_flush"); 269 return 0; 270 } 271 272 /* 273 * For ease, make all of our filenames be "long" 274 * regardless their actual length. 275 * This also makes sure that we don't transmit a zero 276 * byte unintentionally. 277 */ 278 279 flag = FLIST_NAME_LONG; 280 281 LOG3(sess, "%s: sending file metadata: " 282 "size %jd, mtime %jd, mode %o", 283 fn, (intmax_t)f->st.size, 284 (intmax_t)f->st.mtime, f->st.mode); 285 286 /* Now write to the wire. */ 287 /* FIXME: buffer this. */ 288 289 if (!io_write_byte(sess, fdout, flag)) { 290 ERRX1(sess, "io_write_byte"); 291 return 0; 292 } else if (!io_write_int(sess, fdout, fnlen)) { 293 ERRX1(sess, "io_write_int"); 294 return 0; 295 } else if (!io_write_buf(sess, fdout, fn, fnlen)) { 296 ERRX1(sess, "io_write_buf"); 297 return 0; 298 } else if (!io_write_long(sess, fdout, f->st.size)) { 299 ERRX1(sess, "io_write_long"); 300 return 0; 301 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 302 ERRX1(sess, "io_write_int"); 303 return 0; 304 } else if (!io_write_int(sess, fdout, f->st.mode)) { 305 ERRX1(sess, "io_write_int"); 306 return 0; 307 } 308 309 /* Optional link information. */ 310 311 if (S_ISLNK(f->st.mode) && 312 sess->opts->preserve_links) { 313 fn = f->link; 314 fnlen = strlen(f->link); 315 if (!io_write_int(sess, fdout, fnlen)) { 316 ERRX1(sess, "io_write_int"); 317 return 0; 318 } 319 if (!io_write_buf(sess, fdout, fn, fnlen)) { 320 ERRX1(sess, "io_write_int"); 321 return 0; 322 } 323 } 324 325 if (S_ISREG(f->st.mode)) 326 sess->total_size += f->st.size; 327 } 328 329 if (!io_write_byte(sess, fdout, 0)) { 330 ERRX1(sess, "io_write_byte"); 331 return 0; 332 } 333 334 return 1; 335} 336 337/* 338 * Read the filename of a file list. 339 * This is the most expensive part of the file list transfer, so a lot 340 * of attention has gone into transmitting as little as possible. 341 * Micro-optimisation, but whatever. 342 * Fills in "f" with the full path on success. 343 * Returns zero on failure, non-zero on success. 344 */ 345static int 346flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 347 char last[MAXPATHLEN]) 348{ 349 uint8_t bval; 350 size_t partial = 0; 351 size_t pathlen = 0, len; 352 353 /* 354 * Read our filename. 355 * If we have FLIST_NAME_SAME, we inherit some of the last 356 * transmitted name. 357 * If we have FLIST_NAME_LONG, then the string length is greater 358 * than byte-size. 359 */ 360 361 if (FLIST_NAME_SAME & flags) { 362 if (!io_read_byte(sess, fd, &bval)) { 363 ERRX1(sess, "io_read_byte"); 364 return 0; 365 } 366 partial = bval; 367 } 368 369 /* Get the (possibly-remaining) filename length. */ 370 371 if (FLIST_NAME_LONG & flags) { 372 if (!io_read_size(sess, fd, &pathlen)) { 373 ERRX1(sess, "io_read_size"); 374 return 0; 375 } 376 } else { 377 if (!io_read_byte(sess, fd, &bval)) { 378 ERRX1(sess, "io_read_byte"); 379 return 0; 380 } 381 pathlen = bval; 382 } 383 384 /* Allocate our full filename length. */ 385 /* FIXME: maximum pathname length. */ 386 387 if ((len = pathlen + partial) == 0) { 388 ERRX(sess, "security violation: " 389 "zero-length pathname"); 390 return 0; 391 } 392 393 if ((f->path = malloc(len + 1)) == NULL) { 394 ERR(sess, "malloc"); 395 return 0; 396 } 397 f->path[len] = '\0'; 398 399 if (FLIST_NAME_SAME & flags) 400 memcpy(f->path, last, partial); 401 402 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 403 ERRX1(sess, "io_read_buf"); 404 return 0; 405 } 406 407 if (f->path[0] == '/') { 408 ERRX(sess, "security violation: " 409 "absolute pathname: %s", f->path); 410 return 0; 411 } 412 413 if (strstr(f->path, "/../") != NULL || 414 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 415 (len > 2 && strncmp(f->path, "../", 3) == 0) || 416 strcmp(f->path, "..") == 0) { 417 ERRX(sess, "%s: security violation: " 418 "backtracking pathname", f->path); 419 return 0; 420 } 421 422 /* Record our last path and construct our filename. */ 423 424 strlcpy(last, f->path, MAXPATHLEN); 425 f->wpath = f->path; 426 return 1; 427} 428 429/* 430 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 431 * Returns zero on failure, non-zero on success. 432 */ 433static int 434flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 435{ 436 void *pp; 437 438 if (*sz + 1 <= *max) { 439 (*sz)++; 440 return 1; 441 } 442 443 pp = recallocarray(*fl, *max, 444 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 445 if (pp == NULL) { 446 ERR(sess, "recallocarray"); 447 return 0; 448 } 449 *fl = pp; 450 *max += FLIST_CHUNK_SIZE; 451 (*sz)++; 452 return 1; 453} 454 455/* 456 * Copy a regular or symbolic link file "path" into "f". 457 * This handles the correct path creation and symbolic linking. 458 * Returns zero on failure, non-zero on success. 459 */ 460static int 461flist_append(struct sess *sess, struct flist *f, struct stat *st, 462 const char *path) 463{ 464 465 /* 466 * Copy the full path for local addressing and transmit 467 * only the filename part for the receiver. 468 */ 469 470 if ((f->path = strdup(path)) == NULL) { 471 ERR(sess, "strdup"); 472 return 0; 473 } 474 475 if ((f->wpath = strrchr(f->path, '/')) == NULL) 476 f->wpath = f->path; 477 else 478 f->wpath++; 479 480 /* 481 * On the receiving end, we'll strip out all bits on the 482 * mode except for the file permissions. 483 * No need to warn about it here. 484 */ 485 486 flist_copy_stat(f, st); 487 488 /* Optionally copy link information. */ 489 490 if (S_ISLNK(st->st_mode)) { 491 f->link = symlink_read(sess, f->path); 492 if (f->link == NULL) { 493 ERRX1(sess, "symlink_read"); 494 return 0; 495 } 496 } 497 498 return 1; 499} 500 501/* 502 * Receive a file list from the wire, filling in length "sz" (which may 503 * possibly be zero) and list "flp" on success. 504 * Return zero on failure, non-zero on success. 505 */ 506int 507flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 508{ 509 struct flist *fl = NULL; 510 struct flist *ff; 511 const struct flist *fflast = NULL; 512 size_t flsz = 0, flmax = 0, lsz; 513 uint8_t flag; 514 char last[MAXPATHLEN]; 515 uint64_t lval; /* temporary values... */ 516 int32_t ival; 517 518 last[0] = '\0'; 519 520 for (;;) { 521 if (!io_read_byte(sess, fd, &flag)) { 522 ERRX1(sess, "io_read_byte"); 523 goto out; 524 } else if (flag == 0) 525 break; 526 527 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 528 ERRX1(sess, "flist_realloc"); 529 goto out; 530 } 531 532 ff = &fl[flsz - 1]; 533 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 534 535 /* Filename first. */ 536 537 if (!flist_recv_name(sess, fd, ff, flag, last)) { 538 ERRX1(sess, "flist_recv_name"); 539 goto out; 540 } 541 542 /* Read the file size. */ 543 544 if (!io_read_ulong(sess, fd, &lval)) { 545 ERRX1(sess, "io_read_ulong"); 546 goto out; 547 } 548 ff->st.size = lval; 549 550 /* Read the modification time. */ 551 552 if (!(FLIST_TIME_SAME & flag)) { 553 if (!io_read_int(sess, fd, &ival)) { 554 ERRX1(sess, "io_read_int"); 555 goto out; 556 } 557 ff->st.mtime = ival; 558 } else if (fflast == NULL) { 559 ERRX(sess, "same time without last entry"); 560 goto out; 561 } else 562 ff->st.mtime = fflast->st.mtime; 563 564 /* Read the file mode. */ 565 566 if (!(FLIST_MODE_SAME & flag)) { 567 if (!io_read_int(sess, fd, &ival)) { 568 ERRX1(sess, "io_read_int"); 569 goto out; 570 } 571 ff->st.mode = ival; 572 } else if (fflast == NULL) { 573 ERRX(sess, "same mode without last entry"); 574 goto out; 575 } else 576 ff->st.mode = fflast->st.mode; 577 578 /* Optionally read the link information. */ 579 580 if (S_ISLNK(ff->st.mode) && 581 sess->opts->preserve_links) { 582 if (!io_read_size(sess, fd, &lsz)) { 583 ERRX1(sess, "io_read_size"); 584 goto out; 585 } else if (lsz == 0) { 586 ERRX(sess, "empty link name"); 587 goto out; 588 } 589 ff->link = calloc(lsz + 1, 1); 590 if (ff->link == NULL) { 591 ERR(sess, "calloc"); 592 goto out; 593 } 594 if (!io_read_buf(sess, fd, ff->link, lsz)) { 595 ERRX1(sess, "io_read_buf"); 596 goto out; 597 } 598 } 599 600 LOG3(sess, "%s: received file metadata: " 601 "size %jd, mtime %jd, mode %o", 602 ff->path, (intmax_t)ff->st.size, 603 (intmax_t)ff->st.mtime, ff->st.mode); 604 605 if (S_ISREG(ff->st.mode)) 606 sess->total_size += ff->st.size; 607 } 608 609 /* Remember to order the received list. */ 610 611 LOG2(sess, "received file metadata list: %zu", flsz); 612 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 613 flist_topdirs(sess, fl, flsz); 614 *sz = flsz; 615 *flp = fl; 616 return 1; 617out: 618 flist_free(fl, flsz); 619 *sz = 0; 620 *flp = NULL; 621 return 0; 622} 623 624/* 625 * Generate a flist possibly-recursively given a file root, which may 626 * also be a regular file or symlink. 627 * On success, augments the generated list in "flp" of length "sz". 628 * Returns zero on failure, non-zero on success. 629 */ 630static int 631flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 632 size_t *max) 633{ 634 char *cargv[2], *cp; 635 int rc = 0; 636 FTS *fts; 637 FTSENT *ent; 638 struct flist *f; 639 size_t flsz = 0, stripdir; 640 struct stat st; 641 642 cargv[0] = root; 643 cargv[1] = NULL; 644 645 /* 646 * If we're a file, then revert to the same actions we use for 647 * the non-recursive scan. 648 */ 649 650 if (lstat(root, &st) == -1) { 651 ERR(sess, "%s: lstat", root); 652 return 0; 653 } else if (S_ISREG(st.st_mode)) { 654 if (!flist_realloc(sess, fl, sz, max)) { 655 ERRX1(sess, "flist_realloc"); 656 return 0; 657 } 658 f = &(*fl)[(*sz) - 1]; 659 assert(f != NULL); 660 661 if (!flist_append(sess, f, &st, root)) { 662 ERRX1(sess, "flist_append"); 663 return 0; 664 } else if (unveil(root, "r") == -1) { 665 ERR(sess, "%s: unveil", root); 666 return 0; 667 } 668 return 1; 669 } else if (S_ISLNK(st.st_mode)) { 670 if (!sess->opts->preserve_links) { 671 WARNX(sess, "%s: skipping symlink", root); 672 return 1; 673 } else if (!flist_realloc(sess, fl, sz, max)) { 674 ERRX1(sess, "flist_realloc"); 675 return 0; 676 } 677 f = &(*fl)[(*sz) - 1]; 678 assert(f != NULL); 679 680 if (!flist_append(sess, f, &st, root)) { 681 ERRX1(sess, "flist_append"); 682 return 0; 683 } else if (unveil(root, "r") == -1) { 684 ERR(sess, "%s: unveil", root); 685 return 0; 686 } 687 return 1; 688 } else if (!S_ISDIR(st.st_mode)) { 689 WARNX(sess, "%s: skipping special", root); 690 return 1; 691 } 692 693 /* 694 * If we end with a slash, it means that we're not supposed to 695 * copy the directory part itself---only the contents. 696 * So set "stripdir" to be what we take out. 697 */ 698 699 stripdir = strlen(root); 700 assert(stripdir > 0); 701 if (root[stripdir - 1] != '/') 702 stripdir = 0; 703 704 /* 705 * If we're not stripping anything, then see if we need to strip 706 * out the leading material in the path up to and including the 707 * last directory component. 708 */ 709 710 if (stripdir == 0) 711 if ((cp = strrchr(root, '/')) != NULL) 712 stripdir = cp - root + 1; 713 714 /* 715 * If we're recursive, then we need to take down all of the 716 * files and directory components, so use fts(3). 717 * Copying the information file-by-file into the flstat. 718 * We'll make sense of it in flist_send. 719 */ 720 721 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 722 ERR(sess, "fts_open"); 723 return 0; 724 } 725 726 errno = 0; 727 while ((ent = fts_read(fts)) != NULL) { 728 if (!flist_fts_check(sess, ent)) { 729 errno = 0; 730 continue; 731 } 732 733 /* We don't allow symlinks without -l. */ 734 735 assert(ent->fts_statp != NULL); 736 if (S_ISLNK(ent->fts_statp->st_mode) && 737 !sess->opts->preserve_links) { 738 WARNX(sess, "%s: skipping " 739 "symlink", ent->fts_path); 740 continue; 741 } 742 743 /* Allocate a new file entry. */ 744 745 if (!flist_realloc(sess, fl, sz, max)) { 746 ERRX1(sess, "flist_realloc"); 747 goto out; 748 } 749 flsz++; 750 f = &(*fl)[*sz - 1]; 751 752 /* Our path defaults to "." for the root. */ 753 754 if ('\0' == ent->fts_path[stripdir]) { 755 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 756 ERR(sess, "asprintf"); 757 f->path = NULL; 758 goto out; 759 } 760 } else { 761 if ((f->path = strdup(ent->fts_path)) == NULL) { 762 ERR(sess, "strdup"); 763 goto out; 764 } 765 } 766 767 f->wpath = f->path + stripdir; 768 flist_copy_stat(f, ent->fts_statp); 769 770 /* Optionally copy link information. */ 771 772 if (S_ISLNK(ent->fts_statp->st_mode)) { 773 f->link = symlink_read(sess, f->path); 774 if (f->link == NULL) { 775 ERRX1(sess, "symlink_read"); 776 goto out; 777 } 778 } 779 780 /* Reset errno for next fts_read() call. */ 781 errno = 0; 782 } 783 if (errno) { 784 ERR(sess, "fts_read"); 785 goto out; 786 } else if (unveil(root, "r") == -1) { 787 ERR(sess, "%s: unveil", root); 788 goto out; 789 } 790 791 LOG3(sess, "generated %zu filenames: %s", flsz, root); 792 rc = 1; 793out: 794 fts_close(fts); 795 return rc; 796} 797 798/* 799 * Generate a flist recursively given the array of directories (or 800 * files, symlinks, doesn't matter) specified in argv (argc >0). 801 * On success, stores the generated list in "flp" with length "sz", 802 * which may be zero. 803 * Returns zero on failure, non-zero on success. 804 */ 805static int 806flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 807 size_t *sz) 808{ 809 size_t i, max = 0; 810 811 for (i = 0; i < argc; i++) 812 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 813 break; 814 815 if (i == argc) { 816 LOG2(sess, "recursively generated %zu filenames", *sz); 817 return 1; 818 } 819 820 ERRX1(sess, "flist_gen_dirent"); 821 flist_free(*flp, max); 822 *flp = NULL; 823 *sz = 0; 824 return 0; 825} 826 827/* 828 * Generate list of files from the command-line argc (>0) and argv. 829 * On success, stores the generated list in "flp" with length "sz", 830 * which may be zero. 831 * Returns zero on failure, non-zero on success. 832 */ 833static int 834flist_gen_files(struct sess *sess, size_t argc, char **argv, 835 struct flist **flp, size_t *sz) 836{ 837 struct flist *fl = NULL, *f; 838 size_t i, flsz = 0; 839 struct stat st; 840 841 assert(argc); 842 843 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 844 ERR(sess, "calloc"); 845 return 0; 846 } 847 848 for (i = 0; i < argc; i++) { 849 if ('\0' == argv[i][0]) 850 continue; 851 if (lstat(argv[i], &st) == -1) { 852 ERR(sess, "%s: lstat", argv[i]); 853 goto out; 854 } 855 856 /* 857 * File type checks. 858 * In non-recursive mode, we don't accept directories. 859 * We also skip symbolic links without -l. 860 * Beyond that, we only accept regular files. 861 */ 862 863 if (S_ISDIR(st.st_mode)) { 864 WARNX(sess, "%s: skipping directory", argv[i]); 865 continue; 866 } else if (S_ISLNK(st.st_mode)) { 867 if (!sess->opts->preserve_links) { 868 WARNX(sess, "%s: skipping " 869 "symlink", argv[i]); 870 continue; 871 } 872 } else if (!S_ISREG(st.st_mode)) { 873 WARNX(sess, "%s: skipping special", argv[i]); 874 continue; 875 } 876 877 878 f = &fl[flsz++]; 879 assert(f != NULL); 880 881 /* Add this file to our file-system worldview. */ 882 883 if (unveil(argv[i], "r") == -1) { 884 ERR(sess, "%s: unveil", argv[i]); 885 goto out; 886 } else if (!flist_append(sess, f, &st, argv[i])) { 887 ERRX1(sess, "flist_append"); 888 goto out; 889 } 890 } 891 892 LOG2(sess, "non-recursively generated %zu filenames", flsz); 893 *sz = flsz; 894 *flp = fl; 895 return 1; 896out: 897 flist_free(fl, argc); 898 *sz = 0; 899 *flp = NULL; 900 return 0; 901} 902 903/* 904 * Generate a sorted, de-duplicated list of file metadata. 905 * In non-recursive mode (the default), we use only the files we're 906 * given. 907 * Otherwise, directories are recursively examined. 908 * Returns zero on failure, non-zero on success. 909 * On success, "fl" will need to be freed with flist_free(). 910 */ 911int 912flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 913 size_t *sz) 914{ 915 int rc; 916 917 assert(argc > 0); 918 rc = sess->opts->recursive ? 919 flist_gen_dirs(sess, argc, argv, flp, sz) : 920 flist_gen_files(sess, argc, argv, flp, sz); 921 922 /* After scanning, lock our file-system view. */ 923 924 if (unveil(NULL, NULL) == -1) { 925 ERR(sess, "unveil"); 926 return 0; 927 } else if (!rc) 928 return 0; 929 930 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 931 932 if (flist_dedupe(sess, flp, sz)) { 933 flist_topdirs(sess, *flp, *sz); 934 return 1; 935 } 936 937 ERRX1(sess, "flist_dedupe"); 938 flist_free(*flp, *sz); 939 *flp = NULL; 940 *sz = 0; 941 return 0; 942} 943 944/* 945 * Generate a list of files in root to delete that are within the 946 * top-level directories stipulated by "wfl". 947 * Only handles symbolic links, directories, and regular files. 948 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 949 * on success. 950 * On success, "fl" will need to be freed with flist_free(). 951 */ 952int 953flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 954 size_t *sz, const struct flist *wfl, size_t wflsz) 955{ 956 char **cargv = NULL; 957 int rc = 0, c; 958 FTS *fts = NULL; 959 FTSENT *ent; 960 struct flist *f; 961 size_t cargvs = 0, i, j, max = 0, stripdir; 962 ENTRY hent; 963 ENTRY *hentp; 964 965 *fl = NULL; 966 *sz = 0; 967 968 /* Only run this code when we're recursive. */ 969 970 if (!sess->opts->recursive) 971 return 1; 972 973 /* 974 * Gather up all top-level directories for scanning. 975 * This is stipulated by rsync's --delete behaviour, where we 976 * only delete things in the top-level directories given on the 977 * command line. 978 */ 979 980 assert(wflsz > 0); 981 for (i = 0; i < wflsz; i++) 982 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 983 cargvs++; 984 if (cargvs == 0) 985 return 1; 986 987 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 988 ERR(sess, "calloc"); 989 return 0; 990 } 991 992 /* 993 * If we're given just a "." as the first entry, that means 994 * we're doing a relative copy with a trailing slash. 995 * Special-case this just for the sake of simplicity. 996 * Otherwise, look through all top-levels. 997 */ 998 999 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1000 assert(cargvs == 1); 1001 assert(S_ISDIR(wfl[0].st.mode)); 1002 if (asprintf(&cargv[0], "%s/", root) < 0) { 1003 ERR(sess, "asprintf"); 1004 cargv[0] = NULL; 1005 goto out; 1006 } 1007 cargv[1] = NULL; 1008 } else { 1009 for (i = j = 0; i < wflsz; i++) { 1010 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1011 continue; 1012 assert(S_ISDIR(wfl[i].st.mode)); 1013 assert(strcmp(wfl[i].wpath, ".")); 1014 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1015 if (c < 0) { 1016 ERR(sess, "asprintf"); 1017 cargv[j] = NULL; 1018 goto out; 1019 } 1020 LOG4(sess, "%s: will scan " 1021 "for deletions", cargv[j]); 1022 j++; 1023 } 1024 assert(j == cargvs); 1025 cargv[j] = NULL; 1026 } 1027 1028 LOG2(sess, "delete from %zu directories", cargvs); 1029 1030 /* 1031 * Next, use the standard hcreate(3) hashtable interface to hash 1032 * all of the files that we want to synchronise. 1033 * This way, we'll be able to determine which files we want to 1034 * delete in O(n) time instead of O(n * search) time. 1035 * Plus, we can do the scan in-band and only allocate the files 1036 * we want to delete. 1037 */ 1038 1039 if (!hcreate(wflsz)) { 1040 ERR(sess, "hcreate"); 1041 goto out; 1042 } 1043 1044 for (i = 0; i < wflsz; i++) { 1045 memset(&hent, 0, sizeof(ENTRY)); 1046 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1047 ERR(sess, "strdup"); 1048 goto out; 1049 } 1050 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1051 ERR(sess, "hsearch"); 1052 goto out; 1053 } else if (hentp->key != hent.key) { 1054 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1055 free(hent.key); 1056 goto out; 1057 } 1058 } 1059 1060 /* 1061 * Now we're going to try to descend into all of the top-level 1062 * directories stipulated by the file list. 1063 * If the directories don't exist, it's ok. 1064 */ 1065 1066 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1067 ERR(sess, "fts_open"); 1068 goto out; 1069 } 1070 1071 stripdir = strlen(root) + 1; 1072 errno = 0; 1073 while ((ent = fts_read(fts)) != NULL) { 1074 if (ent->fts_info == FTS_NS) 1075 continue; 1076 if (!flist_fts_check(sess, ent)) { 1077 errno = 0; 1078 continue; 1079 } else if (stripdir >= ent->fts_pathlen) 1080 continue; 1081 1082 /* Look up in hashtable. */ 1083 1084 memset(&hent, 0, sizeof(ENTRY)); 1085 hent.key = ent->fts_path + stripdir; 1086 if (hsearch(hent, FIND) != NULL) 1087 continue; 1088 1089 /* Not found: we'll delete it. */ 1090 1091 if (!flist_realloc(sess, fl, sz, &max)) { 1092 ERRX1(sess, "flist_realloc"); 1093 goto out; 1094 } 1095 f = &(*fl)[*sz - 1]; 1096 1097 if ((f->path = strdup(ent->fts_path)) == NULL) { 1098 ERR(sess, "strdup"); 1099 goto out; 1100 } 1101 f->wpath = f->path + stripdir; 1102 assert(ent->fts_statp != NULL); 1103 flist_copy_stat(f, ent->fts_statp); 1104 errno = 0; 1105 } 1106 1107 if (errno) { 1108 ERR(sess, "fts_read"); 1109 goto out; 1110 } 1111 1112 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1113 rc = 1; 1114out: 1115 if (fts != NULL) 1116 fts_close(fts); 1117 for (i = 0; i < cargvs; i++) 1118 free(cargv[i]); 1119 free(cargv); 1120 hdestroy(); 1121 return rc; 1122} 1123 1124/* 1125 * Delete all files and directories in "fl". 1126 * If called with a zero-length "fl", does nothing. 1127 * If dry_run is specified, simply write what would be done. 1128 * Return zero on failure, non-zero on success. 1129 */ 1130int 1131flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1132{ 1133 ssize_t i; 1134 int flag; 1135 1136 if (flsz == 0) 1137 return 1; 1138 1139 assert(sess->opts->del); 1140 assert(sess->opts->recursive); 1141 1142 for (i = flsz - 1; i >= 0; i--) { 1143 LOG1(sess, "%s: deleting", fl[i].wpath); 1144 if (sess->opts->dry_run) 1145 continue; 1146 assert(root != -1); 1147 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1148 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1149 errno != ENOENT) { 1150 ERR(sess, "%s: unlinkat", fl[i].wpath); 1151 return 0; 1152 } 1153 } 1154 1155 return 1; 1156} 1157