flist.c revision 1.20
1/* $Id: flist.c,v 1.20 2019/03/18 15:33:21 deraadt Exp $ */ 2/* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/param.h> 19#include <sys/stat.h> 20 21#include <assert.h> 22#include <errno.h> 23#include <fcntl.h> 24#include <fts.h> 25#include <inttypes.h> 26#include <search.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <unistd.h> 31 32#include "extern.h" 33 34/* 35 * We allocate our file list in chunk sizes so as not to do it one by 36 * one. 37 * Preferrably we get one or two allocation. 38 */ 39#define FLIST_CHUNK_SIZE (1024) 40 41/* 42 * These flags are part of the rsync protocol. 43 * They are sent as the first byte for a file transmission and encode 44 * information that affects subsequent transmissions. 45 */ 46#define FLIST_TOP_LEVEL 0x0001 /* needed for remote --delete */ 47#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 48#define FLIST_RDEV_SAME 0x0004 /* rdev is repeat */ 49#define FLIST_UID_SAME 0x0008 /* uid is repeat */ 50#define FLIST_GID_SAME 0x0010 /* gid is repeat */ 51#define FLIST_NAME_SAME 0x0020 /* name is repeat */ 52#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 53#define FLIST_TIME_SAME 0x0080 /* time is repeat */ 54 55/* 56 * Requied way to sort a filename list. 57 */ 58static int 59flist_cmp(const void *p1, const void *p2) 60{ 61 const struct flist *f1 = p1, *f2 = p2; 62 63 return strcmp(f1->wpath, f2->wpath); 64} 65 66/* 67 * Deduplicate our file list (which may be zero-length). 68 * Returns zero on failure, non-zero on success. 69 */ 70static int 71flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 72{ 73 size_t i, j; 74 struct flist *new; 75 struct flist *f, *fnext; 76 77 if (*sz == 0) 78 return 1; 79 80 /* Create a new buffer, "new", and copy. */ 81 82 new = calloc(*sz, sizeof(struct flist)); 83 if (new == NULL) { 84 ERR(sess, "calloc"); 85 return 0; 86 } 87 88 for (i = j = 0; i < *sz - 1; i++) { 89 f = &(*fl)[i]; 90 fnext = &(*fl)[i + 1]; 91 92 if (strcmp(f->wpath, fnext->wpath)) { 93 new[j++] = *f; 94 continue; 95 } 96 97 /* 98 * Our working (destination) paths are the same. 99 * If the actual file is the same (as given on the 100 * command-line), then we can just discard the first. 101 * Otherwise, we need to bail out: it means we have two 102 * different files with the relative path on the 103 * destination side. 104 */ 105 106 if (strcmp(f->path, fnext->path) == 0) { 107 new[j++] = *f; 108 i++; 109 WARNX(sess, "%s: duplicate path: %s", 110 f->wpath, f->path); 111 free(fnext->path); 112 free(fnext->link); 113 fnext->path = fnext->link = NULL; 114 continue; 115 } 116 117 ERRX(sess, "%s: duplicate working path for " 118 "possibly different file: %s, %s", 119 f->wpath, f->path, fnext->path); 120 free(new); 121 return 0; 122 } 123 124 /* Don't forget the last entry. */ 125 126 if (i == *sz - 1) 127 new[j++] = (*fl)[i]; 128 129 /* 130 * Reassign to the deduplicated array. 131 * If we started out with *sz > 0, which we check for at the 132 * beginning, then we'll always continue having *sz > 0. 133 */ 134 135 free(*fl); 136 *fl = new; 137 *sz = j; 138 assert(*sz); 139 return 1; 140} 141 142/* 143 * We're now going to find our top-level directories. 144 * This only applies to recursive mode. 145 * If we have the first element as the ".", then that's the "top 146 * directory" of our transfer. 147 * Otherwise, mark up all top-level directories in the set. 148 * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't 149 * a top-level directory, but I'm not sure if GPL rsync(1) respects it 150 * the same way. 151 */ 152static void 153flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 154{ 155 size_t i; 156 const char *cp; 157 158 if (!sess->opts->recursive) 159 return; 160 161 if (flsz && strcmp(fl[0].wpath, ".")) { 162 for (i = 0; i < flsz; i++) { 163 if (!S_ISDIR(fl[i].st.mode)) 164 continue; 165 cp = strchr(fl[i].wpath, '/'); 166 if (cp != NULL && cp[1] != '\0') 167 continue; 168 fl[i].st.flags |= FLSTAT_TOP_DIR; 169 LOG4(sess, "%s: top-level", fl[i].wpath); 170 } 171 } else if (flsz) { 172 fl[0].st.flags |= FLSTAT_TOP_DIR; 173 LOG4(sess, "%s: top-level", fl[0].wpath); 174 } 175} 176 177/* 178 * Filter through the fts() file information. 179 * We want directories (pre-order), regular files, and symlinks. 180 * Everything else is skipped and possibly warned about. 181 * Return zero to skip, non-zero to examine. 182 */ 183static int 184flist_fts_check(struct sess *sess, FTSENT *ent) 185{ 186 187 if (ent->fts_info == FTS_F || 188 ent->fts_info == FTS_D || 189 ent->fts_info == FTS_SL || 190 ent->fts_info == FTS_SLNONE) 191 return 1; 192 193 if (ent->fts_info == FTS_DC) { 194 WARNX(sess, "%s: directory cycle", ent->fts_path); 195 } else if (ent->fts_info == FTS_DNR) { 196 errno = ent->fts_errno; 197 WARN(sess, "%s: unreadable directory", ent->fts_path); 198 } else if (ent->fts_info == FTS_DOT) { 199 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 200 } else if (ent->fts_info == FTS_ERR) { 201 errno = ent->fts_errno; 202 WARN(sess, "%s", ent->fts_path); 203 } else if (ent->fts_info == FTS_DEFAULT) { 204 if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) || 205 S_ISCHR(ent->fts_statp->st_mode))) || 206 (sess->opts->specials && 207 (S_ISFIFO(ent->fts_statp->st_mode) || 208 S_ISSOCK(ent->fts_statp->st_mode)))) { 209 return 1; 210 } 211 WARNX(sess, "%s: skipping special", ent->fts_path); 212 } else if (ent->fts_info == FTS_NS) { 213 errno = ent->fts_errno; 214 WARN(sess, "%s: could not stat", ent->fts_path); 215 } 216 217 return 0; 218} 219 220/* 221 * Copy necessary elements in "st" into the fields of "f". 222 */ 223static void 224flist_copy_stat(struct flist *f, const struct stat *st) 225{ 226 f->st.mode = st->st_mode; 227 f->st.uid = st->st_uid; 228 f->st.gid = st->st_gid; 229 f->st.size = st->st_size; 230 f->st.mtime = st->st_mtime; 231 f->st.rdev = st->st_rdev; 232} 233 234void 235flist_free(struct flist *f, size_t sz) 236{ 237 size_t i; 238 239 if (f == NULL) 240 return; 241 242 for (i = 0; i < sz; i++) { 243 free(f[i].path); 244 free(f[i].link); 245 } 246 free(f); 247} 248 249/* 250 * Serialise our file list (which may be zero-length) to the wire. 251 * Makes sure that the receiver isn't going to block on sending us 252 * return messages on the log channel. 253 * Return zero on failure, non-zero on success. 254 */ 255int 256flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 257 size_t flsz) 258{ 259 size_t i, sz, gidsz = 0, uidsz = 0; 260 uint8_t flag; 261 const struct flist *f; 262 const char *fn; 263 struct ident *gids = NULL, *uids = NULL; 264 int rc = 0; 265 266 /* Double-check that we've no pending multiplexed data. */ 267 268 LOG2(sess, "sending file metadata list: %zu", flsz); 269 270 for (i = 0; i < flsz; i++) { 271 f = &fl[i]; 272 fn = f->wpath; 273 sz = strlen(f->wpath); 274 assert(sz > 0); 275 276 /* 277 * If applicable, unclog the read buffer. 278 * This happens when the receiver has a lot of log 279 * messages and all we're doing is sending our file list 280 * without checking for messages. 281 */ 282 283 if (sess->mplex_reads && 284 io_read_check(sess, fdin) && 285 !io_read_flush(sess, fdin)) { 286 ERRX1(sess, "io_read_flush"); 287 goto out; 288 } 289 290 /* 291 * For ease, make all of our filenames be "long" 292 * regardless their actual length. 293 * This also makes sure that we don't transmit a zero 294 * byte unintentionally. 295 */ 296 297 flag = FLIST_NAME_LONG; 298 if ((FLSTAT_TOP_DIR & f->st.flags)) 299 flag |= FLIST_TOP_LEVEL; 300 301 LOG3(sess, "%s: sending file metadata: " 302 "size %jd, mtime %jd, mode %o", 303 fn, (intmax_t)f->st.size, 304 (intmax_t)f->st.mtime, f->st.mode); 305 306 /* Now write to the wire. */ 307 /* FIXME: buffer this. */ 308 309 if (!io_write_byte(sess, fdout, flag)) { 310 ERRX1(sess, "io_write_byte"); 311 goto out; 312 } else if (!io_write_int(sess, fdout, sz)) { 313 ERRX1(sess, "io_write_int"); 314 goto out; 315 } else if (!io_write_buf(sess, fdout, fn, sz)) { 316 ERRX1(sess, "io_write_buf"); 317 goto out; 318 } else if (!io_write_long(sess, fdout, f->st.size)) { 319 ERRX1(sess, "io_write_long"); 320 goto out; 321 } else if (!io_write_int(sess, fdout, f->st.mtime)) { 322 ERRX1(sess, "io_write_int"); 323 goto out; 324 } else if (!io_write_int(sess, fdout, f->st.mode)) { 325 ERRX1(sess, "io_write_int"); 326 goto out; 327 } 328 329 /* Conditional part: uid. */ 330 331 if (sess->opts->preserve_uids) { 332 if (!io_write_int(sess, fdout, f->st.uid)) { 333 ERRX1(sess, "io_write_int"); 334 goto out; 335 } 336 if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) { 337 ERRX1(sess, "idents_add"); 338 goto out; 339 } 340 } 341 342 /* Conditional part: gid. */ 343 344 if (sess->opts->preserve_gids) { 345 if (!io_write_int(sess, fdout, f->st.gid)) { 346 ERRX1(sess, "io_write_int"); 347 goto out; 348 } 349 if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) { 350 ERRX1(sess, "idents_add"); 351 goto out; 352 } 353 } 354 355 /* Conditional part: devices & special files. */ 356 357 if ((sess->opts->devices && (S_ISBLK(f->st.mode) || 358 S_ISCHR(f->st.mode))) || 359 (sess->opts->specials && (S_ISFIFO(f->st.mode) || 360 S_ISSOCK(f->st.mode)))) { 361 if (!io_write_int(sess, fdout, f->st.rdev)) { 362 ERRX1(sess, "io_write_int"); 363 goto out; 364 } 365 } 366 367 /* Conditional part: link. */ 368 369 if (S_ISLNK(f->st.mode) && 370 sess->opts->preserve_links) { 371 fn = f->link; 372 sz = strlen(f->link); 373 if (!io_write_int(sess, fdout, sz)) { 374 ERRX1(sess, "io_write_int"); 375 goto out; 376 } 377 if (!io_write_buf(sess, fdout, fn, sz)) { 378 ERRX1(sess, "io_write_int"); 379 goto out; 380 } 381 } 382 383 if (S_ISREG(f->st.mode)) 384 sess->total_size += f->st.size; 385 } 386 387 /* Signal end of file list. */ 388 389 if (!io_write_byte(sess, fdout, 0)) { 390 ERRX1(sess, "io_write_byte"); 391 goto out; 392 } 393 394 /* Conditionally write identifier lists. */ 395 396 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 397 LOG2(sess, "sending uid list: %zu", uidsz); 398 if (!idents_send(sess, fdout, uids, uidsz)) { 399 ERRX1(sess, "idents_send"); 400 goto out; 401 } 402 } 403 404 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 405 LOG2(sess, "sending gid list: %zu", gidsz); 406 if (!idents_send(sess, fdout, gids, gidsz)) { 407 ERRX1(sess, "idents_send"); 408 goto out; 409 } 410 } 411 412 rc = 1; 413out: 414 idents_free(gids, gidsz); 415 idents_free(uids, uidsz); 416 return rc; 417} 418 419/* 420 * Read the filename of a file list. 421 * This is the most expensive part of the file list transfer, so a lot 422 * of attention has gone into transmitting as little as possible. 423 * Micro-optimisation, but whatever. 424 * Fills in "f" with the full path on success. 425 * Returns zero on failure, non-zero on success. 426 */ 427static int 428flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 429 char last[MAXPATHLEN]) 430{ 431 uint8_t bval; 432 size_t partial = 0; 433 size_t pathlen = 0, len; 434 435 /* 436 * Read our filename. 437 * If we have FLIST_NAME_SAME, we inherit some of the last 438 * transmitted name. 439 * If we have FLIST_NAME_LONG, then the string length is greater 440 * than byte-size. 441 */ 442 443 if (FLIST_NAME_SAME & flags) { 444 if (!io_read_byte(sess, fd, &bval)) { 445 ERRX1(sess, "io_read_byte"); 446 return 0; 447 } 448 partial = bval; 449 } 450 451 /* Get the (possibly-remaining) filename length. */ 452 453 if (FLIST_NAME_LONG & flags) { 454 if (!io_read_size(sess, fd, &pathlen)) { 455 ERRX1(sess, "io_read_size"); 456 return 0; 457 } 458 } else { 459 if (!io_read_byte(sess, fd, &bval)) { 460 ERRX1(sess, "io_read_byte"); 461 return 0; 462 } 463 pathlen = bval; 464 } 465 466 /* Allocate our full filename length. */ 467 /* FIXME: maximum pathname length. */ 468 469 if ((len = pathlen + partial) == 0) { 470 ERRX(sess, "security violation: " 471 "zero-length pathname"); 472 return 0; 473 } 474 475 if ((f->path = malloc(len + 1)) == NULL) { 476 ERR(sess, "malloc"); 477 return 0; 478 } 479 f->path[len] = '\0'; 480 481 if (FLIST_NAME_SAME & flags) 482 memcpy(f->path, last, partial); 483 484 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 485 ERRX1(sess, "io_read_buf"); 486 return 0; 487 } 488 489 if (f->path[0] == '/') { 490 ERRX(sess, "security violation: " 491 "absolute pathname: %s", f->path); 492 return 0; 493 } 494 495 if (strstr(f->path, "/../") != NULL || 496 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 497 (len > 2 && strncmp(f->path, "../", 3) == 0) || 498 strcmp(f->path, "..") == 0) { 499 ERRX(sess, "%s: security violation: " 500 "backtracking pathname", f->path); 501 return 0; 502 } 503 504 /* Record our last path and construct our filename. */ 505 506 strlcpy(last, f->path, MAXPATHLEN); 507 f->wpath = f->path; 508 return 1; 509} 510 511/* 512 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 513 * Returns zero on failure, non-zero on success. 514 */ 515static int 516flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 517{ 518 void *pp; 519 520 if (*sz + 1 <= *max) { 521 (*sz)++; 522 return 1; 523 } 524 525 pp = recallocarray(*fl, *max, 526 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 527 if (pp == NULL) { 528 ERR(sess, "recallocarray"); 529 return 0; 530 } 531 *fl = pp; 532 *max += FLIST_CHUNK_SIZE; 533 (*sz)++; 534 return 1; 535} 536 537/* 538 * Copy a regular or symbolic link file "path" into "f". 539 * This handles the correct path creation and symbolic linking. 540 * Returns zero on failure, non-zero on success. 541 */ 542static int 543flist_append(struct sess *sess, struct flist *f, struct stat *st, 544 const char *path) 545{ 546 547 /* 548 * Copy the full path for local addressing and transmit 549 * only the filename part for the receiver. 550 */ 551 552 if ((f->path = strdup(path)) == NULL) { 553 ERR(sess, "strdup"); 554 return 0; 555 } 556 557 if ((f->wpath = strrchr(f->path, '/')) == NULL) 558 f->wpath = f->path; 559 else 560 f->wpath++; 561 562 /* 563 * On the receiving end, we'll strip out all bits on the 564 * mode except for the file permissions. 565 * No need to warn about it here. 566 */ 567 568 flist_copy_stat(f, st); 569 570 /* Optionally copy link information. */ 571 572 if (S_ISLNK(st->st_mode)) { 573 f->link = symlink_read(sess, f->path); 574 if (f->link == NULL) { 575 ERRX1(sess, "symlink_read"); 576 return 0; 577 } 578 } 579 580 return 1; 581} 582 583/* 584 * Receive a file list from the wire, filling in length "sz" (which may 585 * possibly be zero) and list "flp" on success. 586 * Return zero on failure, non-zero on success. 587 */ 588int 589flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 590{ 591 struct flist *fl = NULL; 592 struct flist *ff; 593 const struct flist *fflast = NULL; 594 size_t flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0; 595 uint8_t flag; 596 char last[MAXPATHLEN]; 597 uint64_t lval; /* temporary values... */ 598 int32_t ival; 599 struct ident *gids = NULL, *uids = NULL; 600 601 last[0] = '\0'; 602 603 for (;;) { 604 if (!io_read_byte(sess, fd, &flag)) { 605 ERRX1(sess, "io_read_byte"); 606 goto out; 607 } else if (flag == 0) 608 break; 609 610 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 611 ERRX1(sess, "flist_realloc"); 612 goto out; 613 } 614 615 ff = &fl[flsz - 1]; 616 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 617 618 /* Filename first. */ 619 620 if (!flist_recv_name(sess, fd, ff, flag, last)) { 621 ERRX1(sess, "flist_recv_name"); 622 goto out; 623 } 624 625 /* Read the file size. */ 626 627 if (!io_read_ulong(sess, fd, &lval)) { 628 ERRX1(sess, "io_read_ulong"); 629 goto out; 630 } 631 ff->st.size = lval; 632 633 /* Read the modification time. */ 634 635 if (!(FLIST_TIME_SAME & flag)) { 636 if (!io_read_int(sess, fd, &ival)) { 637 ERRX1(sess, "io_read_int"); 638 goto out; 639 } 640 ff->st.mtime = ival; 641 } else if (fflast == NULL) { 642 ERRX(sess, "same time without last entry"); 643 goto out; 644 } else 645 ff->st.mtime = fflast->st.mtime; 646 647 /* Read the file mode. */ 648 649 if (!(FLIST_MODE_SAME & flag)) { 650 if (!io_read_int(sess, fd, &ival)) { 651 ERRX1(sess, "io_read_int"); 652 goto out; 653 } 654 ff->st.mode = ival; 655 } else if (fflast == NULL) { 656 ERRX(sess, "same mode without last entry"); 657 goto out; 658 } else 659 ff->st.mode = fflast->st.mode; 660 661 /* Conditional part: uid. */ 662 663 if (sess->opts->preserve_uids) { 664 if (!(FLIST_UID_SAME & flag)) { 665 if (!io_read_int(sess, fd, &ival)) { 666 ERRX1(sess, "io_read_int"); 667 goto out; 668 } 669 ff->st.uid = ival; 670 } else if (fflast == NULL) { 671 ERRX(sess, "same uid " 672 "without last entry"); 673 goto out; 674 } else 675 ff->st.uid = fflast->st.uid; 676 } 677 678 /* Conditional part: gid. */ 679 680 if (sess->opts->preserve_gids) { 681 if (!(FLIST_GID_SAME & flag)) { 682 if (!io_read_int(sess, fd, &ival)) { 683 ERRX1(sess, "io_read_int"); 684 goto out; 685 } 686 ff->st.gid = ival; 687 } else if (fflast == NULL) { 688 ERRX(sess, "same gid " 689 "without last entry"); 690 goto out; 691 } else 692 ff->st.gid = fflast->st.gid; 693 } 694 695 /* Conditional part: devices & special files. */ 696 697 if ((sess->opts->devices && (S_ISBLK(ff->st.mode) || 698 S_ISCHR(ff->st.mode))) || 699 (sess->opts->specials && (S_ISFIFO(ff->st.mode) || 700 S_ISSOCK(ff->st.mode)))) { 701 if (!(FLIST_RDEV_SAME & flag)) { 702 if (!io_read_int(sess, fd, &ival)) { 703 ERRX1(sess, "io_read_int"); 704 goto out; 705 } 706 ff->st.rdev = ival; 707 } else if (fflast == NULL) { 708 ERRX(sess, "same device without last entry"); 709 goto out; 710 } else 711 ff->st.rdev = fflast->st.rdev; 712 } 713 714 /* Conditional part: link. */ 715 716 if (S_ISLNK(ff->st.mode) && 717 sess->opts->preserve_links) { 718 if (!io_read_size(sess, fd, &lsz)) { 719 ERRX1(sess, "io_read_size"); 720 goto out; 721 } else if (lsz == 0) { 722 ERRX(sess, "empty link name"); 723 goto out; 724 } 725 ff->link = calloc(lsz + 1, 1); 726 if (ff->link == NULL) { 727 ERR(sess, "calloc"); 728 goto out; 729 } 730 if (!io_read_buf(sess, fd, ff->link, lsz)) { 731 ERRX1(sess, "io_read_buf"); 732 goto out; 733 } 734 } 735 736 LOG3(sess, "%s: received file metadata: " 737 "size %jd, mtime %jd, mode %o, rdev (%d, %d)", 738 ff->path, (intmax_t)ff->st.size, 739 (intmax_t)ff->st.mtime, ff->st.mode, 740 major(ff->st.rdev), minor(ff->st.rdev)); 741 742 if (S_ISREG(ff->st.mode)) 743 sess->total_size += ff->st.size; 744 } 745 746 /* Conditionally read the user/group list. */ 747 748 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 749 if (!idents_recv(sess, fd, &uids, &uidsz)) { 750 ERRX1(sess, "idents_recv"); 751 goto out; 752 } 753 LOG2(sess, "received uid list: %zu", uidsz); 754 } 755 756 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 757 if (!idents_recv(sess, fd, &gids, &gidsz)) { 758 ERRX1(sess, "idents_recv"); 759 goto out; 760 } 761 LOG2(sess, "received gid list: %zu", gidsz); 762 } 763 764 /* Remember to order the received list. */ 765 766 LOG2(sess, "received file metadata list: %zu", flsz); 767 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 768 flist_topdirs(sess, fl, flsz); 769 *sz = flsz; 770 *flp = fl; 771 772 /* Conditionally remap and reassign identifiers. */ 773 774 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 775 idents_remap(sess, 0, uids, uidsz); 776 idents_assign_uid(sess, fl, flsz, uids, uidsz); 777 } 778 779 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 780 idents_remap(sess, 1, gids, gidsz); 781 idents_assign_gid(sess, fl, flsz, gids, gidsz); 782 } 783 784 idents_free(gids, gidsz); 785 idents_free(uids, uidsz); 786 return 1; 787out: 788 flist_free(fl, flsz); 789 idents_free(gids, gidsz); 790 idents_free(uids, uidsz); 791 *sz = 0; 792 *flp = NULL; 793 return 0; 794} 795 796/* 797 * Generate a flist possibly-recursively given a file root, which may 798 * also be a regular file or symlink. 799 * On success, augments the generated list in "flp" of length "sz". 800 * Returns zero on failure, non-zero on success. 801 */ 802static int 803flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 804 size_t *max) 805{ 806 char *cargv[2], *cp; 807 int rc = 0; 808 FTS *fts; 809 FTSENT *ent; 810 struct flist *f; 811 size_t flsz = 0, stripdir; 812 struct stat st; 813 814 cargv[0] = root; 815 cargv[1] = NULL; 816 817 /* 818 * If we're a file, then revert to the same actions we use for 819 * the non-recursive scan. 820 */ 821 822 if (lstat(root, &st) == -1) { 823 ERR(sess, "%s: lstat", root); 824 return 0; 825 } else if (S_ISREG(st.st_mode)) { 826 if (!flist_realloc(sess, fl, sz, max)) { 827 ERRX1(sess, "flist_realloc"); 828 return 0; 829 } 830 f = &(*fl)[(*sz) - 1]; 831 assert(f != NULL); 832 833 if (!flist_append(sess, f, &st, root)) { 834 ERRX1(sess, "flist_append"); 835 return 0; 836 } 837 if (unveil(root, "r") == -1) { 838 ERR(sess, "%s: unveil", root); 839 return 0; 840 } 841 return 1; 842 } else if (S_ISLNK(st.st_mode)) { 843 if (!sess->opts->preserve_links) { 844 WARNX(sess, "%s: skipping symlink", root); 845 return 1; 846 } else if (!flist_realloc(sess, fl, sz, max)) { 847 ERRX1(sess, "flist_realloc"); 848 return 0; 849 } 850 f = &(*fl)[(*sz) - 1]; 851 assert(f != NULL); 852 853 if (!flist_append(sess, f, &st, root)) { 854 ERRX1(sess, "flist_append"); 855 return 0; 856 } 857 if (unveil(root, "r") == -1) { 858 ERR(sess, "%s: unveil", root); 859 return 0; 860 } 861 return 1; 862 } else if (!S_ISDIR(st.st_mode)) { 863 WARNX(sess, "%s: skipping special", root); 864 return 1; 865 } 866 867 /* 868 * If we end with a slash, it means that we're not supposed to 869 * copy the directory part itself---only the contents. 870 * So set "stripdir" to be what we take out. 871 */ 872 873 stripdir = strlen(root); 874 assert(stripdir > 0); 875 if (root[stripdir - 1] != '/') 876 stripdir = 0; 877 878 /* 879 * If we're not stripping anything, then see if we need to strip 880 * out the leading material in the path up to and including the 881 * last directory component. 882 */ 883 884 if (stripdir == 0) 885 if ((cp = strrchr(root, '/')) != NULL) 886 stripdir = cp - root + 1; 887 888 /* 889 * If we're recursive, then we need to take down all of the 890 * files and directory components, so use fts(3). 891 * Copying the information file-by-file into the flstat. 892 * We'll make sense of it in flist_send. 893 */ 894 895 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 896 ERR(sess, "fts_open"); 897 return 0; 898 } 899 900 errno = 0; 901 while ((ent = fts_read(fts)) != NULL) { 902 if (!flist_fts_check(sess, ent)) { 903 errno = 0; 904 continue; 905 } 906 907 /* We don't allow symlinks without -l. */ 908 909 assert(ent->fts_statp != NULL); 910 if (S_ISLNK(ent->fts_statp->st_mode) && 911 !sess->opts->preserve_links) { 912 WARNX(sess, "%s: skipping " 913 "symlink", ent->fts_path); 914 continue; 915 } 916 917 /* Allocate a new file entry. */ 918 919 if (!flist_realloc(sess, fl, sz, max)) { 920 ERRX1(sess, "flist_realloc"); 921 goto out; 922 } 923 flsz++; 924 f = &(*fl)[*sz - 1]; 925 926 /* Our path defaults to "." for the root. */ 927 928 if (ent->fts_path[stripdir] == '\0') { 929 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 930 ERR(sess, "asprintf"); 931 f->path = NULL; 932 goto out; 933 } 934 } else { 935 if ((f->path = strdup(ent->fts_path)) == NULL) { 936 ERR(sess, "strdup"); 937 goto out; 938 } 939 } 940 941 f->wpath = f->path + stripdir; 942 flist_copy_stat(f, ent->fts_statp); 943 944 /* Optionally copy link information. */ 945 946 if (S_ISLNK(ent->fts_statp->st_mode)) { 947 f->link = symlink_read(sess, f->path); 948 if (f->link == NULL) { 949 ERRX1(sess, "symlink_read"); 950 goto out; 951 } 952 } 953 954 /* Reset errno for next fts_read() call. */ 955 errno = 0; 956 } 957 if (errno) { 958 ERR(sess, "fts_read"); 959 goto out; 960 } 961 if (unveil(root, "r") == -1) { 962 ERR(sess, "%s: unveil", root); 963 goto out; 964 } 965 966 LOG3(sess, "generated %zu filenames: %s", flsz, root); 967 rc = 1; 968out: 969 fts_close(fts); 970 return rc; 971} 972 973/* 974 * Generate a flist recursively given the array of directories (or 975 * files, symlinks, doesn't matter) specified in argv (argc >0). 976 * On success, stores the generated list in "flp" with length "sz", 977 * which may be zero. 978 * Returns zero on failure, non-zero on success. 979 */ 980static int 981flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 982 size_t *sz) 983{ 984 size_t i, max = 0; 985 986 for (i = 0; i < argc; i++) 987 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 988 break; 989 990 if (i == argc) { 991 LOG2(sess, "recursively generated %zu filenames", *sz); 992 return 1; 993 } 994 995 ERRX1(sess, "flist_gen_dirent"); 996 flist_free(*flp, max); 997 *flp = NULL; 998 *sz = 0; 999 return 0; 1000} 1001 1002/* 1003 * Generate list of files from the command-line argc (>0) and argv. 1004 * On success, stores the generated list in "flp" with length "sz", 1005 * which may be zero. 1006 * Returns zero on failure, non-zero on success. 1007 */ 1008static int 1009flist_gen_files(struct sess *sess, size_t argc, char **argv, 1010 struct flist **flp, size_t *sz) 1011{ 1012 struct flist *fl = NULL, *f; 1013 size_t i, flsz = 0; 1014 struct stat st; 1015 1016 assert(argc); 1017 1018 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 1019 ERR(sess, "calloc"); 1020 return 0; 1021 } 1022 1023 for (i = 0; i < argc; i++) { 1024 if (argv[i][0] == '\0') 1025 continue; 1026 if (lstat(argv[i], &st) == -1) { 1027 ERR(sess, "%s: lstat", argv[i]); 1028 goto out; 1029 } 1030 1031 /* 1032 * File type checks. 1033 * In non-recursive mode, we don't accept directories. 1034 * We also skip symbolic links without -l. 1035 * Beyond that, we only accept regular files. 1036 */ 1037 1038 if (S_ISDIR(st.st_mode)) { 1039 WARNX(sess, "%s: skipping directory", argv[i]); 1040 continue; 1041 } else if (S_ISLNK(st.st_mode)) { 1042 if (!sess->opts->preserve_links) { 1043 WARNX(sess, "%s: skipping " 1044 "symlink", argv[i]); 1045 continue; 1046 } 1047 } else if (!S_ISREG(st.st_mode)) { 1048 WARNX(sess, "%s: skipping special", argv[i]); 1049 continue; 1050 } 1051 1052 1053 f = &fl[flsz++]; 1054 assert(f != NULL); 1055 1056 /* Add this file to our file-system worldview. */ 1057 1058 if (unveil(argv[i], "r") == -1) { 1059 ERR(sess, "%s: unveil", argv[i]); 1060 goto out; 1061 } 1062 if (!flist_append(sess, f, &st, argv[i])) { 1063 ERRX1(sess, "flist_append"); 1064 goto out; 1065 } 1066 } 1067 1068 LOG2(sess, "non-recursively generated %zu filenames", flsz); 1069 *sz = flsz; 1070 *flp = fl; 1071 return 1; 1072out: 1073 flist_free(fl, argc); 1074 *sz = 0; 1075 *flp = NULL; 1076 return 0; 1077} 1078 1079/* 1080 * Generate a sorted, de-duplicated list of file metadata. 1081 * In non-recursive mode (the default), we use only the files we're 1082 * given. 1083 * Otherwise, directories are recursively examined. 1084 * Returns zero on failure, non-zero on success. 1085 * On success, "fl" will need to be freed with flist_free(). 1086 */ 1087int 1088flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1089 size_t *sz) 1090{ 1091 int rc; 1092 1093 assert(argc > 0); 1094 rc = sess->opts->recursive ? 1095 flist_gen_dirs(sess, argc, argv, flp, sz) : 1096 flist_gen_files(sess, argc, argv, flp, sz); 1097 1098 /* After scanning, lock our file-system view. */ 1099 1100 if (unveil(NULL, NULL) == -1) { 1101 ERR(sess, "unveil"); 1102 return 0; 1103 } 1104 if (!rc) 1105 return 0; 1106 1107 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1108 1109 if (flist_dedupe(sess, flp, sz)) { 1110 flist_topdirs(sess, *flp, *sz); 1111 return 1; 1112 } 1113 1114 ERRX1(sess, "flist_dedupe"); 1115 flist_free(*flp, *sz); 1116 *flp = NULL; 1117 *sz = 0; 1118 return 0; 1119} 1120 1121/* 1122 * Generate a list of files in root to delete that are within the 1123 * top-level directories stipulated by "wfl". 1124 * Only handles symbolic links, directories, and regular files. 1125 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1126 * on success. 1127 * On success, "fl" will need to be freed with flist_free(). 1128 */ 1129int 1130flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1131 size_t *sz, const struct flist *wfl, size_t wflsz) 1132{ 1133 char **cargv = NULL; 1134 int rc = 0, c; 1135 FTS *fts = NULL; 1136 FTSENT *ent; 1137 struct flist *f; 1138 size_t cargvs = 0, i, j, max = 0, stripdir; 1139 ENTRY hent; 1140 ENTRY *hentp; 1141 1142 *fl = NULL; 1143 *sz = 0; 1144 1145 /* Only run this code when we're recursive. */ 1146 1147 if (!sess->opts->recursive) 1148 return 1; 1149 1150 /* 1151 * Gather up all top-level directories for scanning. 1152 * This is stipulated by rsync's --delete behaviour, where we 1153 * only delete things in the top-level directories given on the 1154 * command line. 1155 */ 1156 1157 assert(wflsz > 0); 1158 for (i = 0; i < wflsz; i++) 1159 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1160 cargvs++; 1161 if (cargvs == 0) 1162 return 1; 1163 1164 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1165 ERR(sess, "calloc"); 1166 return 0; 1167 } 1168 1169 /* 1170 * If we're given just a "." as the first entry, that means 1171 * we're doing a relative copy with a trailing slash. 1172 * Special-case this just for the sake of simplicity. 1173 * Otherwise, look through all top-levels. 1174 */ 1175 1176 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1177 assert(cargvs == 1); 1178 assert(S_ISDIR(wfl[0].st.mode)); 1179 if (asprintf(&cargv[0], "%s/", root) < 0) { 1180 ERR(sess, "asprintf"); 1181 cargv[0] = NULL; 1182 goto out; 1183 } 1184 cargv[1] = NULL; 1185 } else { 1186 for (i = j = 0; i < wflsz; i++) { 1187 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1188 continue; 1189 assert(S_ISDIR(wfl[i].st.mode)); 1190 assert(strcmp(wfl[i].wpath, ".")); 1191 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1192 if (c < 0) { 1193 ERR(sess, "asprintf"); 1194 cargv[j] = NULL; 1195 goto out; 1196 } 1197 LOG4(sess, "%s: will scan for deletions", cargv[j]); 1198 j++; 1199 } 1200 assert(j == cargvs); 1201 cargv[j] = NULL; 1202 } 1203 1204 LOG2(sess, "delete from %zu directories", cargvs); 1205 1206 /* 1207 * Next, use the standard hcreate(3) hashtable interface to hash 1208 * all of the files that we want to synchronise. 1209 * This way, we'll be able to determine which files we want to 1210 * delete in O(n) time instead of O(n * search) time. 1211 * Plus, we can do the scan in-band and only allocate the files 1212 * we want to delete. 1213 */ 1214 1215 if (!hcreate(wflsz)) { 1216 ERR(sess, "hcreate"); 1217 goto out; 1218 } 1219 1220 for (i = 0; i < wflsz; i++) { 1221 memset(&hent, 0, sizeof(ENTRY)); 1222 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1223 ERR(sess, "strdup"); 1224 goto out; 1225 } 1226 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1227 ERR(sess, "hsearch"); 1228 goto out; 1229 } else if (hentp->key != hent.key) { 1230 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1231 free(hent.key); 1232 goto out; 1233 } 1234 } 1235 1236 /* 1237 * Now we're going to try to descend into all of the top-level 1238 * directories stipulated by the file list. 1239 * If the directories don't exist, it's ok. 1240 */ 1241 1242 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1243 ERR(sess, "fts_open"); 1244 goto out; 1245 } 1246 1247 stripdir = strlen(root) + 1; 1248 errno = 0; 1249 while ((ent = fts_read(fts)) != NULL) { 1250 if (ent->fts_info == FTS_NS) 1251 continue; 1252 if (!flist_fts_check(sess, ent)) { 1253 errno = 0; 1254 continue; 1255 } else if (stripdir >= ent->fts_pathlen) 1256 continue; 1257 1258 /* Look up in hashtable. */ 1259 1260 memset(&hent, 0, sizeof(ENTRY)); 1261 hent.key = ent->fts_path + stripdir; 1262 if (hsearch(hent, FIND) != NULL) 1263 continue; 1264 1265 /* Not found: we'll delete it. */ 1266 1267 if (!flist_realloc(sess, fl, sz, &max)) { 1268 ERRX1(sess, "flist_realloc"); 1269 goto out; 1270 } 1271 f = &(*fl)[*sz - 1]; 1272 1273 if ((f->path = strdup(ent->fts_path)) == NULL) { 1274 ERR(sess, "strdup"); 1275 goto out; 1276 } 1277 f->wpath = f->path + stripdir; 1278 assert(ent->fts_statp != NULL); 1279 flist_copy_stat(f, ent->fts_statp); 1280 errno = 0; 1281 } 1282 1283 if (errno) { 1284 ERR(sess, "fts_read"); 1285 goto out; 1286 } 1287 1288 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1289 rc = 1; 1290out: 1291 if (fts != NULL) 1292 fts_close(fts); 1293 for (i = 0; i < cargvs; i++) 1294 free(cargv[i]); 1295 free(cargv); 1296 hdestroy(); 1297 return rc; 1298} 1299 1300/* 1301 * Delete all files and directories in "fl". 1302 * If called with a zero-length "fl", does nothing. 1303 * If dry_run is specified, simply write what would be done. 1304 * Return zero on failure, non-zero on success. 1305 */ 1306int 1307flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1308{ 1309 ssize_t i; 1310 int flag; 1311 1312 if (flsz == 0) 1313 return 1; 1314 1315 assert(sess->opts->del); 1316 assert(sess->opts->recursive); 1317 1318 for (i = flsz - 1; i >= 0; i--) { 1319 LOG1(sess, "%s: deleting", fl[i].wpath); 1320 if (sess->opts->dry_run) 1321 continue; 1322 assert(root != -1); 1323 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1324 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1325 errno != ENOENT) { 1326 ERR(sess, "%s: unlinkat", fl[i].wpath); 1327 return 0; 1328 } 1329 } 1330 1331 return 1; 1332} 1333