flist.c revision 1.18
1/* $Id: flist.c,v 1.18 2019/02/21 22:07:44 benno Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/param.h> 19#include <sys/stat.h> 20 21#include <assert.h> 22#include <errno.h> 23#include <fcntl.h> 24#include <fts.h> 25#include <inttypes.h> 26#include <search.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <unistd.h> 31 32#include "extern.h" 33 34/* 35 * We allocate our file list in chunk sizes so as not to do it one by 36 * one. 37 * Preferrably we get one or two allocation. 38 */ 39#define FLIST_CHUNK_SIZE (1024) 40 41/* 42 * These flags are part of the rsync protocol. 43 * They are sent as the first byte for a file transmission and encode 44 * information that affects subsequent transmissions. 45 */ 46#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 47#define FLIST_RDEV_SAME 0x0004 /* rdev is repeat */ 48#define FLIST_UID_SAME 0x0008 /* uid is repeat */ 49#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 50#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 51#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 52#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 53 54/* 55 * Requied way to sort a filename list. 56 */ 57static int 58flist_cmp(const void *p1, const void *p2) 59{ 60 const struct flist *f1 = p1, *f2 = p2; 61 62 return strcmp(f1->wpath, f2->wpath); 63} 64 65/* 66 * Deduplicate our file list (which may be zero-length). 67 * Returns zero on failure, non-zero on success. 68 */ 69static int 70flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 71{ 72 size_t i, j; 73 struct flist *new; 74 struct flist *f, *fnext; 75 76 if (*sz == 0) 77 return 1; 78 79 /* Create a new buffer, "new", and copy. */ 80 81 new = calloc(*sz, sizeof(struct flist)); 82 if (new == NULL) { 83 ERR(sess, "calloc"); 84 return 0; 85 } 86 87 for (i = j = 0; i < *sz - 1; i++) { 88 f = &(*fl)[i]; 89 fnext = &(*fl)[i + 1]; 90 91 if (strcmp(f->wpath, fnext->wpath)) { 92 new[j++] = *f; 93 continue; 94 } 95 96 /* 97 * Our working (destination) paths are the same. 98 * If the actual file is the same (as given on the 99 * command-line), then we can just discard the first. 100 * Otherwise, we need to bail out: it means we have two 101 * different files with the relative path on the 102 * destination side. 103 */ 104 105 if (strcmp(f->path, fnext->path) == 0) { 106 new[j++] = *f; 107 i++; 108 WARNX(sess, "%s: duplicate path: %s", 109 f->wpath, f->path); 110 free(fnext->path); 111 free(fnext->link); 112 fnext->path = fnext->link = NULL; 113 continue; 114 } 115 116 ERRX(sess, "%s: duplicate working path for " 117 "possibly different file: %s, %s", 118 f->wpath, f->path, fnext->path); 119 free(new); 120 return 0; 121 } 122 123 /* Don't forget the last entry. */ 124 125 if (i == *sz - 1) 126 new[j++] = (*fl)[i]; 127 128 /* 129 * Reassign to the deduplicated array. 130 * If we started out with *sz > 0, which we check for at the 131 * beginning, then we'll always continue having *sz > 0. 132 */ 133 134 free(*fl); 135 *fl = new; 136 *sz = j; 137 assert(*sz); 138 return 1; 139} 140 141/* 142 * We're now going to find our top-level directories. 143 * This only applies to recursive mode. 144 * If we have the first element as the ".", then that's the "top 145 * directory" of our transfer. 146 * Otherwise, mark up all top-level directories in the set. 147 */ 148static void 149flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 150{ 151 size_t i; 152 const char *cp; 153 154 if (!sess->opts->recursive) 155 return; 156 157 if (flsz && strcmp(fl[0].wpath, ".")) { 158 for (i = 0; i < flsz; i++) { 159 if (!S_ISDIR(fl[i].st.mode)) 160 continue; 161 cp = strchr(fl[i].wpath, '/'); 162 if (cp != NULL && cp[1] != '\0') 163 continue; 164 fl[i].st.flags |= FLSTAT_TOP_DIR; 165 LOG4(sess, "%s: top-level", fl[i].wpath); 166 } 167 } else if (flsz) { 168 fl[0].st.flags |= FLSTAT_TOP_DIR; 169 LOG4(sess, "%s: top-level", fl[0].wpath); 170 } 171} 172 173/* 174 * Filter through the fts() file information. 175 * We want directories (pre-order), regular files, and symlinks. 176 * Everything else is skipped and possibly warned about. 177 * Return zero to skip, non-zero to examine. 178 */ 179static int 180flist_fts_check(struct sess *sess, FTSENT *ent) 181{ 182 183 if (ent->fts_info == FTS_F || 184 ent->fts_info == FTS_D || 185 ent->fts_info == FTS_SL || 186 ent->fts_info == FTS_SLNONE) 187 return 1; 188 189 if (ent->fts_info == FTS_DC) { 190 WARNX(sess, "%s: directory cycle", ent->fts_path); 191 } else if (ent->fts_info == FTS_DNR) { 192 errno = ent->fts_errno; 193 WARN(sess, "%s: unreadable directory", ent->fts_path); 194 } else if (ent->fts_info == FTS_DOT) { 195 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 196 } else if (ent->fts_info == FTS_ERR) { 197 errno = ent->fts_errno; 198 WARN(sess, "%s", ent->fts_path); 199 } else if (ent->fts_info == FTS_DEFAULT) { 200 if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) || 201 S_ISCHR(ent->fts_statp->st_mode))) || 202 (sess->opts->specials && 203 (S_ISFIFO(ent->fts_statp->st_mode) || 204 S_ISSOCK(ent->fts_statp->st_mode)))) { 205 return 1; 206 } 207 WARNX(sess, "%s: skipping special", ent->fts_path); 208 } else if (ent->fts_info == FTS_NS) { 209 errno = ent->fts_errno; 210 WARN(sess, "%s: could not stat", ent->fts_path); 211 } 212 213 return 0; 214} 215 216/* 217 * Copy necessary elements in "st" into the fields of "f". 218 */ 219static void 220flist_copy_stat(struct flist *f, const struct stat *st) 221{ 222 f->st.mode = st->st_mode; 223 f->st.uid = st->st_uid; 224 f->st.gid = st->st_gid; 225 f->st.size = st->st_size; 226 f->st.mtime = st->st_mtime; 227 f->st.rdev = st->st_rdev; 228} 229 230void 231flist_free(struct flist *f, size_t sz) 232{ 233 size_t i; 234 235 if (f == NULL) 236 return; 237 238 for (i = 0; i < sz; i++) { 239 free(f[i].path); 240 free(f[i].link); 241 } 242 free(f); 243} 244 245/* 246 * Serialise our file list (which may be zero-length) to the wire. 247 * Makes sure that the receiver isn't going to block on sending us 248 * return messages on the log channel. 249 * Return zero on failure, non-zero on success. 250 */ 251int 252flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 253 size_t flsz) 254{ 255 size_t i, sz, gidsz = 0, uidsz = 0; 256 uint8_t flag; 257 const struct flist *f; 258 const char *fn; 259 struct ident *gids = NULL, *uids = NULL; 260 int rc = 0; 261 262 /* Double-check that we've no pending multiplexed data. */ 263 264 LOG2(sess, "sending file metadata list: %zu", flsz); 265 266 for (i = 0; i < flsz; i++) { 267 f = &fl[i]; 268 fn = f->wpath; 269 sz = strlen(f->wpath); 270 assert(sz > 0); 271 272 /* 273 * If applicable, unclog the read buffer. 274 * This happens when the receiver has a lot of log 275 * messages and all we're doing is sending our file list 276 * without checking for messages. 277 */ 278 279 if (sess->mplex_reads && 280 io_read_check(sess, fdin) && 281 !io_read_flush(sess, fdin)) { 282 ERRX1(sess, "io_read_flush"); 283 goto out; 284 } 285 286 /* 287 * For ease, make all of our filenames be "long" 288 * regardless their actual length. 289 * This also makes sure that we don't transmit a zero 290 * byte unintentionally. 291 */ 292 293 flag = FLIST_NAME_LONG; 294 295 LOG3(sess, "%s: sending file metadata: " 296 "size %jd, mtime %jd, mode %o", 297 fn, (intmax_t)f->st.size, 298 (intmax_t)f->st.mtime, f->st.mode); 299 300 /* Now write to the wire. */ 301 /* FIXME: buffer this. */ 302 303 if (!io_write_byte(sess, fdout, flag)) { 304 ERRX1(sess, "io_write_byte"); 305 goto out; 306 } else if (!io_write_int(sess, fdout, sz)) { 307 ERRX1(sess, "io_write_int"); 308 goto out; 309 } else if (!io_write_buf(sess, fdout, fn, sz)) { 310 ERRX1(sess, "io_write_buf"); 311 goto out; 312 } else if (!io_write_long(sess, fdout, f->st.size)) { 313 ERRX1(sess, "io_write_long"); 314 goto out; 315 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 316 ERRX1(sess, "io_write_int"); 317 goto out; 318 } else if (!io_write_int(sess, fdout, f->st.mode)) { 319 ERRX1(sess, "io_write_int"); 320 goto out; 321 } 322 323 /* Conditional part: uid. */ 324 325 if (sess->opts->preserve_uids) { 326 if (!io_write_int(sess, fdout, f->st.uid)) { 327 ERRX1(sess, "io_write_int"); 328 goto out; 329 } 330 if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) { 331 ERRX1(sess, "idents_add"); 332 goto out; 333 } 334 } 335 336 /* Conditional part: gid. */ 337 338 if (sess->opts->preserve_gids) { 339 if (!io_write_int(sess, fdout, f->st.gid)) { 340 ERRX1(sess, "io_write_int"); 341 goto out; 342 } 343 if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) { 344 ERRX1(sess, "idents_add"); 345 goto out; 346 } 347 } 348 349 /* Conditional part: devices & special files. */ 350 351 if ((sess->opts->devices && (S_ISBLK(f->st.mode) || 352 S_ISCHR(f->st.mode))) || 353 (sess->opts->specials && (S_ISFIFO(f->st.mode) || 354 S_ISSOCK(f->st.mode)))) { 355 if (!io_write_int(sess, fdout, f->st.rdev)) { 356 ERRX1(sess, "io_write_int"); 357 goto out; 358 } 359 } 360 361 /* Conditional part: link. */ 362 363 if (S_ISLNK(f->st.mode) && 364 sess->opts->preserve_links) { 365 fn = f->link; 366 sz = strlen(f->link); 367 if (!io_write_int(sess, fdout, sz)) { 368 ERRX1(sess, "io_write_int"); 369 goto out; 370 } 371 if (!io_write_buf(sess, fdout, fn, sz)) { 372 ERRX1(sess, "io_write_int"); 373 goto out; 374 } 375 } 376 377 if (S_ISREG(f->st.mode)) 378 sess->total_size += f->st.size; 379 } 380 381 /* Signal end of file list. */ 382 383 if (!io_write_byte(sess, fdout, 0)) { 384 ERRX1(sess, "io_write_byte"); 385 goto out; 386 } 387 388 /* Conditionally write identifier lists. */ 389 390 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 391 LOG2(sess, "sending uid list: %zu", uidsz); 392 if (!idents_send(sess, fdout, uids, uidsz)) { 393 ERRX1(sess, "idents_send"); 394 goto out; 395 } 396 } 397 398 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 399 LOG2(sess, "sending gid list: %zu", gidsz); 400 if (!idents_send(sess, fdout, gids, gidsz)) { 401 ERRX1(sess, "idents_send"); 402 goto out; 403 } 404 } 405 406 rc = 1; 407out: 408 idents_free(gids, gidsz); 409 idents_free(uids, uidsz); 410 return rc; 411} 412 413/* 414 * Read the filename of a file list. 415 * This is the most expensive part of the file list transfer, so a lot 416 * of attention has gone into transmitting as little as possible. 417 * Micro-optimisation, but whatever. 418 * Fills in "f" with the full path on success. 419 * Returns zero on failure, non-zero on success. 420 */ 421static int 422flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 423 char last[MAXPATHLEN]) 424{ 425 uint8_t bval; 426 size_t partial = 0; 427 size_t pathlen = 0, len; 428 429 /* 430 * Read our filename. 431 * If we have FLIST_NAME_SAME, we inherit some of the last 432 * transmitted name. 433 * If we have FLIST_NAME_LONG, then the string length is greater 434 * than byte-size. 435 */ 436 437 if (FLIST_NAME_SAME & flags) { 438 if (!io_read_byte(sess, fd, &bval)) { 439 ERRX1(sess, "io_read_byte"); 440 return 0; 441 } 442 partial = bval; 443 } 444 445 /* Get the (possibly-remaining) filename length. */ 446 447 if (FLIST_NAME_LONG & flags) { 448 if (!io_read_size(sess, fd, &pathlen)) { 449 ERRX1(sess, "io_read_size"); 450 return 0; 451 } 452 } else { 453 if (!io_read_byte(sess, fd, &bval)) { 454 ERRX1(sess, "io_read_byte"); 455 return 0; 456 } 457 pathlen = bval; 458 } 459 460 /* Allocate our full filename length. */ 461 /* FIXME: maximum pathname length. */ 462 463 if ((len = pathlen + partial) == 0) { 464 ERRX(sess, "security violation: " 465 "zero-length pathname"); 466 return 0; 467 } 468 469 if ((f->path = malloc(len + 1)) == NULL) { 470 ERR(sess, "malloc"); 471 return 0; 472 } 473 f->path[len] = '\0'; 474 475 if (FLIST_NAME_SAME & flags) 476 memcpy(f->path, last, partial); 477 478 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 479 ERRX1(sess, "io_read_buf"); 480 return 0; 481 } 482 483 if (f->path[0] == '/') { 484 ERRX(sess, "security violation: " 485 "absolute pathname: %s", f->path); 486 return 0; 487 } 488 489 if (strstr(f->path, "/../") != NULL || 490 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 491 (len > 2 && strncmp(f->path, "../", 3) == 0) || 492 strcmp(f->path, "..") == 0) { 493 ERRX(sess, "%s: security violation: " 494 "backtracking pathname", f->path); 495 return 0; 496 } 497 498 /* Record our last path and construct our filename. */ 499 500 strlcpy(last, f->path, MAXPATHLEN); 501 f->wpath = f->path; 502 return 1; 503} 504 505/* 506 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 507 * Returns zero on failure, non-zero on success. 508 */ 509static int 510flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 511{ 512 void *pp; 513 514 if (*sz + 1 <= *max) { 515 (*sz)++; 516 return 1; 517 } 518 519 pp = recallocarray(*fl, *max, 520 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 521 if (pp == NULL) { 522 ERR(sess, "recallocarray"); 523 return 0; 524 } 525 *fl = pp; 526 *max += FLIST_CHUNK_SIZE; 527 (*sz)++; 528 return 1; 529} 530 531/* 532 * Copy a regular or symbolic link file "path" into "f". 533 * This handles the correct path creation and symbolic linking. 534 * Returns zero on failure, non-zero on success. 535 */ 536static int 537flist_append(struct sess *sess, struct flist *f, struct stat *st, 538 const char *path) 539{ 540 541 /* 542 * Copy the full path for local addressing and transmit 543 * only the filename part for the receiver. 544 */ 545 546 if ((f->path = strdup(path)) == NULL) { 547 ERR(sess, "strdup"); 548 return 0; 549 } 550 551 if ((f->wpath = strrchr(f->path, '/')) == NULL) 552 f->wpath = f->path; 553 else 554 f->wpath++; 555 556 /* 557 * On the receiving end, we'll strip out all bits on the 558 * mode except for the file permissions. 559 * No need to warn about it here. 560 */ 561 562 flist_copy_stat(f, st); 563 564 /* Optionally copy link information. */ 565 566 if (S_ISLNK(st->st_mode)) { 567 f->link = symlink_read(sess, f->path); 568 if (f->link == NULL) { 569 ERRX1(sess, "symlink_read"); 570 return 0; 571 } 572 } 573 574 return 1; 575} 576 577/* 578 * Receive a file list from the wire, filling in length "sz" (which may 579 * possibly be zero) and list "flp" on success. 580 * Return zero on failure, non-zero on success. 581 */ 582int 583flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 584{ 585 struct flist *fl = NULL; 586 struct flist *ff; 587 const struct flist *fflast = NULL; 588 size_t flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0; 589 uint8_t flag; 590 char last[MAXPATHLEN]; 591 uint64_t lval; /* temporary values... */ 592 int32_t ival; 593 struct ident *gids = NULL, *uids = NULL; 594 595 last[0] = '\0'; 596 597 for (;;) { 598 if (!io_read_byte(sess, fd, &flag)) { 599 ERRX1(sess, "io_read_byte"); 600 goto out; 601 } else if (flag == 0) 602 break; 603 604 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 605 ERRX1(sess, "flist_realloc"); 606 goto out; 607 } 608 609 ff = &fl[flsz - 1]; 610 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 611 612 /* Filename first. */ 613 614 if (!flist_recv_name(sess, fd, ff, flag, last)) { 615 ERRX1(sess, "flist_recv_name"); 616 goto out; 617 } 618 619 /* Read the file size. */ 620 621 if (!io_read_ulong(sess, fd, &lval)) { 622 ERRX1(sess, "io_read_ulong"); 623 goto out; 624 } 625 ff->st.size = lval; 626 627 /* Read the modification time. */ 628 629 if (!(FLIST_TIME_SAME & flag)) { 630 if (!io_read_int(sess, fd, &ival)) { 631 ERRX1(sess, "io_read_int"); 632 goto out; 633 } 634 ff->st.mtime = ival; 635 } else if (fflast == NULL) { 636 ERRX(sess, "same time without last entry"); 637 goto out; 638 } else 639 ff->st.mtime = fflast->st.mtime; 640 641 /* Read the file mode. */ 642 643 if (!(FLIST_MODE_SAME & flag)) { 644 if (!io_read_int(sess, fd, &ival)) { 645 ERRX1(sess, "io_read_int"); 646 goto out; 647 } 648 ff->st.mode = ival; 649 } else if (fflast == NULL) { 650 ERRX(sess, "same mode without last entry"); 651 goto out; 652 } else 653 ff->st.mode = fflast->st.mode; 654 655 /* Conditional part: uid. */ 656 657 if (sess->opts->preserve_uids) { 658 if (!(FLIST_UID_SAME & flag)) { 659 if (!io_read_int(sess, fd, &ival)) { 660 ERRX1(sess, "io_read_int"); 661 goto out; 662 } 663 ff->st.uid = ival; 664 } else if (fflast == NULL) { 665 ERRX(sess, "same uid " 666 "without last entry"); 667 goto out; 668 } else 669 ff->st.uid = fflast->st.uid; 670 } 671 672 /* Conditional part: gid. */ 673 674 if (sess->opts->preserve_gids) { 675 if (!(FLIST_GID_SAME & flag)) { 676 if (!io_read_int(sess, fd, &ival)) { 677 ERRX1(sess, "io_read_int"); 678 goto out; 679 } 680 ff->st.gid = ival; 681 } else if (fflast == NULL) { 682 ERRX(sess, "same gid " 683 "without last entry"); 684 goto out; 685 } else 686 ff->st.gid = fflast->st.gid; 687 } 688 689 /* Conditional part: devices & special files. */ 690 691 if ((sess->opts->devices && (S_ISBLK(ff->st.mode) || 692 S_ISCHR(ff->st.mode))) || 693 (sess->opts->specials && (S_ISFIFO(ff->st.mode) || 694 S_ISSOCK(ff->st.mode)))) { 695 if (!(FLIST_RDEV_SAME & flag)) { 696 if (!io_read_int(sess, fd, &ival)) { 697 ERRX1(sess, "io_read_int"); 698 goto out; 699 } 700 ff->st.rdev = ival; 701 } else if (fflast == NULL) { 702 ERRX(sess, "same device without last entry"); 703 goto out; 704 } else 705 ff->st.rdev = fflast->st.rdev; 706 } 707 708 /* Conditional part: link. */ 709 710 if (S_ISLNK(ff->st.mode) && 711 sess->opts->preserve_links) { 712 if (!io_read_size(sess, fd, &lsz)) { 713 ERRX1(sess, "io_read_size"); 714 goto out; 715 } else if (lsz == 0) { 716 ERRX(sess, "empty link name"); 717 goto out; 718 } 719 ff->link = calloc(lsz + 1, 1); 720 if (ff->link == NULL) { 721 ERR(sess, "calloc"); 722 goto out; 723 } 724 if (!io_read_buf(sess, fd, ff->link, lsz)) { 725 ERRX1(sess, "io_read_buf"); 726 goto out; 727 } 728 } 729 730 LOG3(sess, "%s: received file metadata: " 731 "size %jd, mtime %jd, mode %o, rdev (%d, %d)", 732 ff->path, (intmax_t)ff->st.size, 733 (intmax_t)ff->st.mtime, ff->st.mode, 734 major(ff->st.rdev), minor(ff->st.rdev)); 735 736 if (S_ISREG(ff->st.mode)) 737 sess->total_size += ff->st.size; 738 } 739 740 /* Conditionally read the user/group list. */ 741 742 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 743 if (!idents_recv(sess, fd, &uids, &uidsz)) { 744 ERRX1(sess, "idents_recv"); 745 goto out; 746 } 747 LOG2(sess, "received uid list: %zu", uidsz); 748 } 749 750 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 751 if (!idents_recv(sess, fd, &gids, &gidsz)) { 752 ERRX1(sess, "idents_recv"); 753 goto out; 754 } 755 LOG2(sess, "received gid list: %zu", gidsz); 756 } 757 758 /* Remember to order the received list. */ 759 760 LOG2(sess, "received file metadata list: %zu", flsz); 761 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 762 flist_topdirs(sess, fl, flsz); 763 *sz = flsz; 764 *flp = fl; 765 766 /* Conditionally remap and reassign identifiers. */ 767 768 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 769 idents_remap(sess, 0, uids, uidsz); 770 idents_assign_uid(sess, fl, flsz, uids, uidsz); 771 } 772 773 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 774 idents_remap(sess, 1, gids, gidsz); 775 idents_assign_gid(sess, fl, flsz, gids, gidsz); 776 } 777 778 idents_free(gids, gidsz); 779 idents_free(uids, uidsz); 780 return 1; 781out: 782 flist_free(fl, flsz); 783 idents_free(gids, gidsz); 784 idents_free(uids, uidsz); 785 *sz = 0; 786 *flp = NULL; 787 return 0; 788} 789 790/* 791 * Generate a flist possibly-recursively given a file root, which may 792 * also be a regular file or symlink. 793 * On success, augments the generated list in "flp" of length "sz". 794 * Returns zero on failure, non-zero on success. 795 */ 796static int 797flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 798 size_t *max) 799{ 800 char *cargv[2], *cp; 801 int rc = 0; 802 FTS *fts; 803 FTSENT *ent; 804 struct flist *f; 805 size_t flsz = 0, stripdir; 806 struct stat st; 807 808 cargv[0] = root; 809 cargv[1] = NULL; 810 811 /* 812 * If we're a file, then revert to the same actions we use for 813 * the non-recursive scan. 814 */ 815 816 if (lstat(root, &st) == -1) { 817 ERR(sess, "%s: lstat", root); 818 return 0; 819 } else if (S_ISREG(st.st_mode)) { 820 if (!flist_realloc(sess, fl, sz, max)) { 821 ERRX1(sess, "flist_realloc"); 822 return 0; 823 } 824 f = &(*fl)[(*sz) - 1]; 825 assert(f != NULL); 826 827 if (!flist_append(sess, f, &st, root)) { 828 ERRX1(sess, "flist_append"); 829 return 0; 830 } 831 if (unveil(root, "r") == -1) { 832 ERR(sess, "%s: unveil", root); 833 return 0; 834 } 835 return 1; 836 } else if (S_ISLNK(st.st_mode)) { 837 if (!sess->opts->preserve_links) { 838 WARNX(sess, "%s: skipping symlink", root); 839 return 1; 840 } else if (!flist_realloc(sess, fl, sz, max)) { 841 ERRX1(sess, "flist_realloc"); 842 return 0; 843 } 844 f = &(*fl)[(*sz) - 1]; 845 assert(f != NULL); 846 847 if (!flist_append(sess, f, &st, root)) { 848 ERRX1(sess, "flist_append"); 849 return 0; 850 } 851 if (unveil(root, "r") == -1) { 852 ERR(sess, "%s: unveil", root); 853 return 0; 854 } 855 return 1; 856 } else if (!S_ISDIR(st.st_mode)) { 857 WARNX(sess, "%s: skipping special", root); 858 return 1; 859 } 860 861 /* 862 * If we end with a slash, it means that we're not supposed to 863 * copy the directory part itself---only the contents. 864 * So set "stripdir" to be what we take out. 865 */ 866 867 stripdir = strlen(root); 868 assert(stripdir > 0); 869 if (root[stripdir - 1] != '/') 870 stripdir = 0; 871 872 /* 873 * If we're not stripping anything, then see if we need to strip 874 * out the leading material in the path up to and including the 875 * last directory component. 876 */ 877 878 if (stripdir == 0) 879 if ((cp = strrchr(root, '/')) != NULL) 880 stripdir = cp - root + 1; 881 882 /* 883 * If we're recursive, then we need to take down all of the 884 * files and directory components, so use fts(3). 885 * Copying the information file-by-file into the flstat. 886 * We'll make sense of it in flist_send. 887 */ 888 889 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 890 ERR(sess, "fts_open"); 891 return 0; 892 } 893 894 errno = 0; 895 while ((ent = fts_read(fts)) != NULL) { 896 if (!flist_fts_check(sess, ent)) { 897 errno = 0; 898 continue; 899 } 900 901 /* We don't allow symlinks without -l. */ 902 903 assert(ent->fts_statp != NULL); 904 if (S_ISLNK(ent->fts_statp->st_mode) && 905 !sess->opts->preserve_links) { 906 WARNX(sess, "%s: skipping " 907 "symlink", ent->fts_path); 908 continue; 909 } 910 911 /* Allocate a new file entry. */ 912 913 if (!flist_realloc(sess, fl, sz, max)) { 914 ERRX1(sess, "flist_realloc"); 915 goto out; 916 } 917 flsz++; 918 f = &(*fl)[*sz - 1]; 919 920 /* Our path defaults to "." for the root. */ 921 922 if ('\0' == ent->fts_path[stripdir]) { 923 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 924 ERR(sess, "asprintf"); 925 f->path = NULL; 926 goto out; 927 } 928 } else { 929 if ((f->path = strdup(ent->fts_path)) == NULL) { 930 ERR(sess, "strdup"); 931 goto out; 932 } 933 } 934 935 f->wpath = f->path + stripdir; 936 flist_copy_stat(f, ent->fts_statp); 937 938 /* Optionally copy link information. */ 939 940 if (S_ISLNK(ent->fts_statp->st_mode)) { 941 f->link = symlink_read(sess, f->path); 942 if (f->link == NULL) { 943 ERRX1(sess, "symlink_read"); 944 goto out; 945 } 946 } 947 948 /* Reset errno for next fts_read() call. */ 949 errno = 0; 950 } 951 if (errno) { 952 ERR(sess, "fts_read"); 953 goto out; 954 } 955 if (unveil(root, "r") == -1) { 956 ERR(sess, "%s: unveil", root); 957 goto out; 958 } 959 960 LOG3(sess, "generated %zu filenames: %s", flsz, root); 961 rc = 1; 962out: 963 fts_close(fts); 964 return rc; 965} 966 967/* 968 * Generate a flist recursively given the array of directories (or 969 * files, symlinks, doesn't matter) specified in argv (argc >0). 970 * On success, stores the generated list in "flp" with length "sz", 971 * which may be zero. 972 * Returns zero on failure, non-zero on success. 973 */ 974static int 975flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 976 size_t *sz) 977{ 978 size_t i, max = 0; 979 980 for (i = 0; i < argc; i++) 981 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 982 break; 983 984 if (i == argc) { 985 LOG2(sess, "recursively generated %zu filenames", *sz); 986 return 1; 987 } 988 989 ERRX1(sess, "flist_gen_dirent"); 990 flist_free(*flp, max); 991 *flp = NULL; 992 *sz = 0; 993 return 0; 994} 995 996/* 997 * Generate list of files from the command-line argc (>0) and argv. 998 * On success, stores the generated list in "flp" with length "sz", 999 * which may be zero. 1000 * Returns zero on failure, non-zero on success. 1001 */ 1002static int 1003flist_gen_files(struct sess *sess, size_t argc, char **argv, 1004 struct flist **flp, size_t *sz) 1005{ 1006 struct flist *fl = NULL, *f; 1007 size_t i, flsz = 0; 1008 struct stat st; 1009 1010 assert(argc); 1011 1012 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 1013 ERR(sess, "calloc"); 1014 return 0; 1015 } 1016 1017 for (i = 0; i < argc; i++) { 1018 if ('\0' == argv[i][0]) 1019 continue; 1020 if (lstat(argv[i], &st) == -1) { 1021 ERR(sess, "%s: lstat", argv[i]); 1022 goto out; 1023 } 1024 1025 /* 1026 * File type checks. 1027 * In non-recursive mode, we don't accept directories. 1028 * We also skip symbolic links without -l. 1029 * Beyond that, we only accept regular files. 1030 */ 1031 1032 if (S_ISDIR(st.st_mode)) { 1033 WARNX(sess, "%s: skipping directory", argv[i]); 1034 continue; 1035 } else if (S_ISLNK(st.st_mode)) { 1036 if (!sess->opts->preserve_links) { 1037 WARNX(sess, "%s: skipping " 1038 "symlink", argv[i]); 1039 continue; 1040 } 1041 } else if (!S_ISREG(st.st_mode)) { 1042 WARNX(sess, "%s: skipping special", argv[i]); 1043 continue; 1044 } 1045 1046 1047 f = &fl[flsz++]; 1048 assert(f != NULL); 1049 1050 /* Add this file to our file-system worldview. */ 1051 1052 if (unveil(argv[i], "r") == -1) { 1053 ERR(sess, "%s: unveil", argv[i]); 1054 goto out; 1055 } 1056 if (!flist_append(sess, f, &st, argv[i])) { 1057 ERRX1(sess, "flist_append"); 1058 goto out; 1059 } 1060 } 1061 1062 LOG2(sess, "non-recursively generated %zu filenames", flsz); 1063 *sz = flsz; 1064 *flp = fl; 1065 return 1; 1066out: 1067 flist_free(fl, argc); 1068 *sz = 0; 1069 *flp = NULL; 1070 return 0; 1071} 1072 1073/* 1074 * Generate a sorted, de-duplicated list of file metadata. 1075 * In non-recursive mode (the default), we use only the files we're 1076 * given. 1077 * Otherwise, directories are recursively examined. 1078 * Returns zero on failure, non-zero on success. 1079 * On success, "fl" will need to be freed with flist_free(). 1080 */ 1081int 1082flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1083 size_t *sz) 1084{ 1085 int rc; 1086 1087 assert(argc > 0); 1088 rc = sess->opts->recursive ? 1089 flist_gen_dirs(sess, argc, argv, flp, sz) : 1090 flist_gen_files(sess, argc, argv, flp, sz); 1091 1092 /* After scanning, lock our file-system view. */ 1093 1094 if (unveil(NULL, NULL) == -1) { 1095 ERR(sess, "unveil"); 1096 return 0; 1097 } 1098 if (!rc) 1099 return 0; 1100 1101 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1102 1103 if (flist_dedupe(sess, flp, sz)) { 1104 flist_topdirs(sess, *flp, *sz); 1105 return 1; 1106 } 1107 1108 ERRX1(sess, "flist_dedupe"); 1109 flist_free(*flp, *sz); 1110 *flp = NULL; 1111 *sz = 0; 1112 return 0; 1113} 1114 1115/* 1116 * Generate a list of files in root to delete that are within the 1117 * top-level directories stipulated by "wfl". 1118 * Only handles symbolic links, directories, and regular files. 1119 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1120 * on success. 1121 * On success, "fl" will need to be freed with flist_free(). 1122 */ 1123int 1124flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1125 size_t *sz, const struct flist *wfl, size_t wflsz) 1126{ 1127 char **cargv = NULL; 1128 int rc = 0, c; 1129 FTS *fts = NULL; 1130 FTSENT *ent; 1131 struct flist *f; 1132 size_t cargvs = 0, i, j, max = 0, stripdir; 1133 ENTRY hent; 1134 ENTRY *hentp; 1135 1136 *fl = NULL; 1137 *sz = 0; 1138 1139 /* Only run this code when we're recursive. */ 1140 1141 if (!sess->opts->recursive) 1142 return 1; 1143 1144 /* 1145 * Gather up all top-level directories for scanning. 1146 * This is stipulated by rsync's --delete behaviour, where we 1147 * only delete things in the top-level directories given on the 1148 * command line. 1149 */ 1150 1151 assert(wflsz > 0); 1152 for (i = 0; i < wflsz; i++) 1153 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1154 cargvs++; 1155 if (cargvs == 0) 1156 return 1; 1157 1158 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1159 ERR(sess, "calloc"); 1160 return 0; 1161 } 1162 1163 /* 1164 * If we're given just a "." as the first entry, that means 1165 * we're doing a relative copy with a trailing slash. 1166 * Special-case this just for the sake of simplicity. 1167 * Otherwise, look through all top-levels. 1168 */ 1169 1170 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1171 assert(cargvs == 1); 1172 assert(S_ISDIR(wfl[0].st.mode)); 1173 if (asprintf(&cargv[0], "%s/", root) < 0) { 1174 ERR(sess, "asprintf"); 1175 cargv[0] = NULL; 1176 goto out; 1177 } 1178 cargv[1] = NULL; 1179 } else { 1180 for (i = j = 0; i < wflsz; i++) { 1181 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1182 continue; 1183 assert(S_ISDIR(wfl[i].st.mode)); 1184 assert(strcmp(wfl[i].wpath, ".")); 1185 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1186 if (c < 0) { 1187 ERR(sess, "asprintf"); 1188 cargv[j] = NULL; 1189 goto out; 1190 } 1191 LOG4(sess, "%s: will scan for deletions", cargv[j]); 1192 j++; 1193 } 1194 assert(j == cargvs); 1195 cargv[j] = NULL; 1196 } 1197 1198 LOG2(sess, "delete from %zu directories", cargvs); 1199 1200 /* 1201 * Next, use the standard hcreate(3) hashtable interface to hash 1202 * all of the files that we want to synchronise. 1203 * This way, we'll be able to determine which files we want to 1204 * delete in O(n) time instead of O(n * search) time. 1205 * Plus, we can do the scan in-band and only allocate the files 1206 * we want to delete. 1207 */ 1208 1209 if (!hcreate(wflsz)) { 1210 ERR(sess, "hcreate"); 1211 goto out; 1212 } 1213 1214 for (i = 0; i < wflsz; i++) { 1215 memset(&hent, 0, sizeof(ENTRY)); 1216 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1217 ERR(sess, "strdup"); 1218 goto out; 1219 } 1220 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1221 ERR(sess, "hsearch"); 1222 goto out; 1223 } else if (hentp->key != hent.key) { 1224 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1225 free(hent.key); 1226 goto out; 1227 } 1228 } 1229 1230 /* 1231 * Now we're going to try to descend into all of the top-level 1232 * directories stipulated by the file list. 1233 * If the directories don't exist, it's ok. 1234 */ 1235 1236 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1237 ERR(sess, "fts_open"); 1238 goto out; 1239 } 1240 1241 stripdir = strlen(root) + 1; 1242 errno = 0; 1243 while ((ent = fts_read(fts)) != NULL) { 1244 if (ent->fts_info == FTS_NS) 1245 continue; 1246 if (!flist_fts_check(sess, ent)) { 1247 errno = 0; 1248 continue; 1249 } else if (stripdir >= ent->fts_pathlen) 1250 continue; 1251 1252 /* Look up in hashtable. */ 1253 1254 memset(&hent, 0, sizeof(ENTRY)); 1255 hent.key = ent->fts_path + stripdir; 1256 if (hsearch(hent, FIND) != NULL) 1257 continue; 1258 1259 /* Not found: we'll delete it. */ 1260 1261 if (!flist_realloc(sess, fl, sz, &max)) { 1262 ERRX1(sess, "flist_realloc"); 1263 goto out; 1264 } 1265 f = &(*fl)[*sz - 1]; 1266 1267 if ((f->path = strdup(ent->fts_path)) == NULL) { 1268 ERR(sess, "strdup"); 1269 goto out; 1270 } 1271 f->wpath = f->path + stripdir; 1272 assert(ent->fts_statp != NULL); 1273 flist_copy_stat(f, ent->fts_statp); 1274 errno = 0; 1275 } 1276 1277 if (errno) { 1278 ERR(sess, "fts_read"); 1279 goto out; 1280 } 1281 1282 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1283 rc = 1; 1284out: 1285 if (fts != NULL) 1286 fts_close(fts); 1287 for (i = 0; i < cargvs; i++) 1288 free(cargv[i]); 1289 free(cargv); 1290 hdestroy(); 1291 return rc; 1292} 1293 1294/* 1295 * Delete all files and directories in "fl". 1296 * If called with a zero-length "fl", does nothing. 1297 * If dry_run is specified, simply write what would be done. 1298 * Return zero on failure, non-zero on success. 1299 */ 1300int 1301flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1302{ 1303 ssize_t i; 1304 int flag; 1305 1306 if (flsz == 0) 1307 return 1; 1308 1309 assert(sess->opts->del); 1310 assert(sess->opts->recursive); 1311 1312 for (i = flsz - 1; i >= 0; i--) { 1313 LOG1(sess, "%s: deleting", fl[i].wpath); 1314 if (sess->opts->dry_run) 1315 continue; 1316 assert(root != -1); 1317 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1318 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1319 errno != ENOENT) { 1320 ERR(sess, "%s: unlinkat", fl[i].wpath); 1321 return 0; 1322 } 1323 } 1324 1325 return 1; 1326} 1327