1/* 2 * Unix SMB/CIFS implementation. 3 * 4 * OneFS shadow copy implementation that utilizes the file system's native 5 * snapshot support. This file does all of the heavy lifting. 6 * 7 * Copyright (C) Dave Richards, 2007 8 * Copyright (C) Tim Prouty, 2009 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 3 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, see <http://www.gnu.org/licenses/>. 22 */ 23 24#include <ifs/ifs_syscalls.h> 25#include <sys/types.h> 26#include <sys/isi_enc.h> 27#include <sys/module.h> 28#include <sys/stat.h> 29#include <sys/syscall.h> 30#include <sys/time.h> 31#include <dirent.h> 32#include <errno.h> 33#include <fcntl.h> 34#include <limits.h> 35#include <search.h> 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39#include <unistd.h> 40 41#include "onefs_shadow_copy.h" 42 43/* Copied from ../include/proto.h */ 44void become_root(void); 45void unbecome_root(void); 46 47#define SNAPSHOT_DIRECTORY ".snapshot" 48 49#define MAX_VERSIONS 64 50 51/** 52 * A snapshot object. 53 * 54 * During snapshot enumeration, snapshots are represented by snapshot objects 55 * and are stored in a snapshot set. The snapshot object represents one 56 * snapshot within the set. An important thing to note about the set is that 57 * the key of the snapshot object is the tv_sec component of the is_time 58 * member. What this means is that we only store one snapshot for each 59 * second. If multiple snapshots were created within the same second, we'll 60 * keep the earliest one and ignore the rest. Thus, not all snapshots are 61 * necessarily retained. 62 */ 63struct osc_snapshot { 64 char * is_name; 65 struct timespec is_time; 66 struct osc_snapshot * is_next; 67}; 68 69/** 70 * A snapshot context object. 71 * 72 * Snapshot contexts are used to pass information throughout the snapshot 73 * enumeration routines. As a result, snapshot contexts are stored on the 74 * stack and are both created and destroyed within a single API function. 75 */ 76struct osc_snapshot_ctx { 77 void * osc_set; 78 struct timespec osc_mtime; 79}; 80 81/** 82 * A directory context. 83 * 84 * Directory contexts are the underlying data structured used to enumerate 85 * snapshot versions. An opendir()-, readdir()- and closedir()-like interface 86 * is provided that utilizes directory contexts. At the API level, directory 87 * contexts are passed around as void pointers. Directory contexts are 88 * allocated on the heap and their lifetime is dictated by the calling 89 * routine. 90 */ 91struct osc_directory_ctx { 92 size_t idc_pos; 93 size_t idc_len; 94 size_t idc_size; 95 char ** idc_version; 96}; 97 98/** 99 * Return a file descriptor to the STF names directory. 100 * 101 * Opens the STF names directory and returns a file descriptor to it. 102 * Subsequent calls return the same value (avoiding the need to re-open the 103 * directory repeatedly). Caveat caller: don't close the file descriptor or 104 * you will be shot! 105 */ 106static int 107osc_get_names_directory_fd(void) 108{ 109 static int fd = -1; 110 111 if (fd == -1) { 112 become_root(); 113 fd = pctl2_lin_open(STF_NAMES_LIN, HEAD_SNAPID, O_RDONLY); 114 unbecome_root(); 115 } 116 117 return fd; 118} 119 120/** 121 * Compare two time values. 122 * 123 * Accepts two struct timespecs and compares the tv_sec components of these 124 * values. It returns -1 if the first value preceeds the second, 0 if they 125 * are equal and +1 if the first values succeeds the second. 126 */ 127static int 128osc_time_compare(const struct timespec *tsp1, const struct timespec *tsp2) 129{ 130 return (tsp1->tv_sec < tsp2->tv_sec) ? -1 : 131 (tsp1->tv_sec > tsp2->tv_sec) ? +1 : 0; 132} 133 134/** 135 * Compare two timespec values. 136 * 137 * Compares two timespec values. It returns -1 if the first value preceeds 138 * the second, 0 if they are equal and +1 if the first values succeeds the 139 * second. 140 */ 141static int 142osc_timespec_compare(const struct timespec *tsp1, const struct timespec *tsp2) 143{ 144 return (tsp1->tv_sec < tsp2->tv_sec) ? -1 : 145 (tsp1->tv_sec > tsp2->tv_sec) ? +1 : 146 (tsp1->tv_nsec < tsp2->tv_nsec) ? -1 : 147 (tsp1->tv_nsec > tsp2->tv_nsec) ? +1 : 0; 148} 149 150/** 151 * Determine whether a timespec value is zero. 152 * 153 * Return 1 if the struct timespec provided is zero and 0 otherwise. 154 */ 155static int 156osc_timespec_is_zero(const struct timespec *tsp) 157{ 158 return (tsp->tv_sec == 0) && 159 (tsp->tv_nsec == 0); 160} 161 162/** 163 * Create a snapshot object. 164 * 165 * Allocates and initializes a new snapshot object. In addition to allocating 166 * space for the snapshot object itself, space is allocated for the snapshot 167 * name. Both the name and time are then copied to the new object. 168 */ 169static struct osc_snapshot * 170osc_snapshot_create(const char *name, const struct timespec *tsp) 171{ 172 struct osc_snapshot *isp; 173 174 isp = malloc(sizeof *isp); 175 if (isp == NULL) 176 goto out; 177 178 isp->is_name = malloc(strlen(name) + 1); 179 if (isp->is_name == NULL) { 180 free(isp); 181 isp = NULL; 182 goto out; 183 } 184 185 strcpy(isp->is_name, name); 186 isp->is_time = *tsp; 187 isp->is_next = NULL; 188 189 out: 190 return isp; 191} 192 193/** 194 * Destroy a snapshot object. 195 * 196 * Frees both the name and the snapshot object itself. Appropriate NULL 197 * checking is performed because counting on free to do so is immoral. 198 */ 199static void 200osc_snapshot_destroy(struct osc_snapshot *isp) 201{ 202 if (isp != NULL) { 203 if (isp->is_name != NULL) 204 free(isp->is_name); 205 free(isp); 206 } 207} 208 209/** 210 * Destroy all snapshots in the snapshot list. 211 * 212 * Calls osc_snapshot_destroy() on each snapshot in the list. 213 */ 214static void 215osc_snapshot_destroy_list(struct osc_snapshot *isp) 216{ 217 struct osc_snapshot *tmp; 218 219 while (isp != NULL) { 220 tmp = isp; 221 isp = isp->is_next; 222 osc_snapshot_destroy(tmp); 223 } 224} 225 226/** 227 * Compare two snapshot objects. 228 * 229 * Compare two snapshot objects. It is really just a wrapper for 230 * osc_time_compare(), which compare the time value of the two snapshots. 231 * N.B. time value in this context refers only to the tv_sec component. 232 */ 233static int 234osc_snapshot_compare(const void *vp1, const void *vp2) 235{ 236 const struct osc_snapshot *isp1 = vp1; 237 const struct osc_snapshot *isp2 = vp2; 238 239 return -osc_time_compare(&isp1->is_time, &isp2->is_time); 240} 241 242/** 243 * Insert a snapshot into the snapshot set. 244 * 245 * Inserts a new snapshot into the snapshot set. The key for snapshots is 246 * their creation time (it's actually the seconds portion of the creation 247 * time). If a duplicate snapshot is found in the set, the new snapshot is 248 * added to a linked list of snapshots for that second. 249 */ 250static void 251osc_snapshot_insert(struct osc_snapshot_ctx *oscp, const char *name, 252 const struct timespec *tsp, int *errorp) 253{ 254 struct osc_snapshot *isp1; 255 struct osc_snapshot **ispp; 256 257 isp1 = osc_snapshot_create(name, tsp); 258 if (isp1 == NULL) { 259 *errorp = 1; 260 return; 261 } 262 263 ispp = tsearch(isp1, &oscp->osc_set, osc_snapshot_compare); 264 if (ispp != NULL) { 265 struct osc_snapshot *isp2 = *ispp; 266 267 /* If this is the only snapshot for this second, we're done. */ 268 if (isp2 == isp1) 269 return; 270 271 /* Collision: add the new snapshot to the list. */ 272 isp1->is_next = isp2->is_next; 273 isp2->is_next = isp1; 274 275 } else 276 *errorp = 1; 277 278} 279 280/** 281 * Process the next snapshot. 282 * 283 * Called for (almost) every entry in a .snapshot directory, ("." and ".." are 284 * ignored in osc_process_snapshot_directory()). All other entries are passed 285 * to osc_process_snapshot(), however. These entries can fall into one of two 286 * categories: snapshot names and snapshot aliases. We only care about 287 * snapshot names (as aliases are just redundant entries). Once it verifies 288 * that name represents a valid snapshot name, it calls fstat() to get the 289 * creation time of the snapshot and then calls osc_snapshot_insert() to add 290 * this entry to the snapshot set. 291 */ 292static void 293osc_process_snapshot(struct osc_snapshot_ctx *oscp, const char *name, 294 int *errorp) 295{ 296 int fd; 297 struct stf_stat stf_stat; 298 struct stat stbuf; 299 300 fd = osc_get_names_directory_fd(); 301 if (fd == -1) 302 goto out; 303 304 fd = enc_openat(fd, name, ENC_DEFAULT, O_RDONLY); 305 if (fd == -1) 306 goto out; 307 308 memset(&stf_stat, 0, sizeof stf_stat); 309 if (ifs_snap_stat(fd, &stf_stat) == -1) 310 goto out; 311 312 if (stf_stat.sf_type != SF_STF) 313 goto out; 314 315 if (fstat(fd, &stbuf) == -1) 316 goto out; 317 318 osc_snapshot_insert(oscp, name, &stbuf.st_birthtimespec, errorp); 319 320 out: 321 if (fd != -1) 322 close(fd); 323} 324 325/** 326 * Process a snapshot directory. 327 * 328 * Opens the snapshot directory and calls osc_process_snapshot() for each 329 * entry. (Well ok, "." and ".." are ignored.) The goal here is to add all 330 * snapshots in the directory to the snapshot set. 331 */ 332static void 333osc_process_snapshot_directory(struct osc_snapshot_ctx *oscp, int *errorp) 334{ 335 int fd; 336 struct stat stbuf; 337 DIR *dirp; 338 struct dirent *dp; 339 340 fd = osc_get_names_directory_fd(); 341 if (fd == -1) 342 goto out; 343 344 if (fstat(fd, &stbuf) == -1) 345 goto out; 346 347 dirp = opendir(SNAPSHOT_DIRECTORY); 348 if (dirp == NULL) 349 goto out; 350 351 for (;;) { 352 dp = readdir(dirp); 353 if (dp == NULL) 354 break; 355 356 if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || 357 (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) 358 continue; 359 360 osc_process_snapshot(oscp, dp->d_name, errorp); 361 if (*errorp) 362 break; 363 } 364 365 closedir(dirp); 366 367 if (!*errorp) 368 oscp->osc_mtime = stbuf.st_mtimespec; 369 370 out: 371 return; 372} 373 374/** 375 * Initialize a snapshot context object. 376 * 377 * Clears all members of the context object. 378 */ 379static void 380osc_snapshot_ctx_init(struct osc_snapshot_ctx *oscp) 381{ 382 memset(oscp, 0, sizeof *oscp); 383} 384 385/** 386 * Desoy a snapshot context object. 387 * 388 * Frees all snapshots associated with the snapshot context and then calls 389 * osc_snapshot_ctx_init() to re-initialize the context object. 390 */ 391static void 392osc_snapshot_ctx_clean(struct osc_snapshot_ctx *oscp) 393{ 394 struct osc_snapshot *tmp; 395 396 while (oscp->osc_set != NULL) { 397 tmp = *(void **)oscp->osc_set; 398 tdelete(tmp, &oscp->osc_set, osc_snapshot_compare); 399 osc_snapshot_destroy_list(tmp); 400 } 401 402 osc_snapshot_ctx_init(oscp); 403} 404 405/** 406 * Return the "global" snapshot context. 407 * 408 * We maintain a single open snapshot context. Return a pointer to it. 409 */ 410static struct osc_snapshot_ctx * 411osc_get_snapshot_ctx(void) 412{ 413 static struct osc_snapshot_ctx osc = { 0, { 0, 0 } }; 414 415 return &osc; 416} 417 418/** 419 * Determine whether a snapshot context is still valid. 420 * 421 * "Valid" in this context means "reusable". We can re-use a previous 422 * snapshot context iff we successfully built a previous snapshot context 423 * and no snapshots have been created or deleted since we did so. 424 * A "names" directory exists within our snapshot 425 * implementation in which all snapshot names are entered. Each time a 426 * snapshot is created or deleted, an entry must be added or removed. 427 * When this happens the modification time on the "names" directory 428 * changes. Therefore, a snapshot context is valid iff the context 429 * pointer is non-NULL, the cached modification time is non-zero 430 * (zero means uninitialized), and the modification time of the "names" 431 * directory matches the cached value. 432 */ 433static int 434osc_snapshot_ctx_is_valid(struct osc_snapshot_ctx *oscp) 435{ 436 int fd; 437 struct stat stbuf; 438 439 if (oscp == NULL) 440 return 0; 441 442 if (osc_timespec_is_zero(&oscp->osc_mtime)) 443 return 0; 444 445 fd = osc_get_names_directory_fd(); 446 if (fd == -1) 447 return 0; 448 449 if (fstat(fd, &stbuf) == -1) 450 return 0; 451 452 if (osc_timespec_compare(&oscp->osc_mtime, &stbuf.st_mtimespec) != 0) 453 return 0; 454 455 return 1; 456} 457 458/** 459 * Create and initialize a directory context. 460 * 461 * Allocates a directory context from the heap and initializes it. 462 */ 463static struct osc_directory_ctx * 464osc_directory_ctx_create(void) 465{ 466 struct osc_directory_ctx *idcp; 467 468 idcp = malloc(sizeof *idcp); 469 if (idcp != NULL) 470 memset(idcp, 0, sizeof *idcp); 471 472 return idcp; 473} 474 475/** 476 * Destroy a directory context. 477 * 478 * Frees any versions associated with the directory context and then frees the 479 * context itself. 480 */ 481static void 482osc_directory_ctx_destroy(struct osc_directory_ctx *idcp) 483{ 484 int i; 485 486 if (idcp == NULL) 487 return; 488 489 for (i = 0; i < idcp->idc_len; i++) 490 free(idcp->idc_version[i]); 491 492 free(idcp); 493} 494 495/** 496 * Expand the size of a directory context's version list. 497 * 498 * If osc_directory_ctx_append_version() detects that the version list is too 499 * small to accomodate a new version string, it called 500 * osc_directory_ctx_expand_version_list() to expand the version list. 501 */ 502static void 503osc_directory_ctx_expand_version_list(struct osc_snapshot_ctx *oscp, 504 struct osc_directory_ctx *idcp, int *errorp) 505{ 506 size_t size; 507 char **cpp; 508 509 size = idcp->idc_size * 2 ?: 1; 510 511 cpp = realloc(idcp->idc_version, size * sizeof (char *)); 512 if (cpp == NULL) { 513 *errorp = 1; 514 return; 515 } 516 517 idcp->idc_size = size; 518 idcp->idc_version = cpp; 519} 520 521/** 522 * Append a new version to a directory context. 523 * 524 * Appends a snapshot version to the 525 * directory context's version list. 526 */ 527static void 528osc_directory_ctx_append_version(struct osc_snapshot_ctx *oscp, 529 struct osc_directory_ctx *idcp, const struct timespec *tsp, int *errorp) 530{ 531 char *cp; 532 struct tm *tmp; 533 char text[64]; 534 535 if (idcp->idc_len >= MAX_VERSIONS) 536 return; 537 538 if (idcp->idc_len >= idcp->idc_size) { 539 osc_directory_ctx_expand_version_list(oscp, idcp, errorp); 540 if (*errorp) 541 return; 542 } 543 544 tmp = gmtime(&tsp->tv_sec); 545 if (tmp == NULL) { 546 *errorp = 1; 547 return; 548 } 549 550 snprintf(text, sizeof text, 551 "@GMT-%04u.%02u.%02u-%02u.%02u.%02u", 552 tmp->tm_year + 1900, 553 tmp->tm_mon + 1, 554 tmp->tm_mday, 555 tmp->tm_hour, 556 tmp->tm_min, 557 tmp->tm_sec); 558 559 cp = malloc(strlen(text) + 1); 560 if (cp == NULL) { 561 *errorp = 1; 562 return; 563 } 564 565 strcpy(cp, text); 566 567 idcp->idc_version[idcp->idc_len++] = cp; 568} 569 570/** 571 * Make a directory context from a snapshot context. 572 * 573 * Once a snapshot context has been completely filled-in, 574 * osc_make_directory_ctx() is used to build a directory context from it. The 575 * idea here is to create version for each snapshot in the snapshot set. 576 */ 577static void 578osc_make_directory_ctx(struct osc_snapshot_ctx *oscp, 579 struct osc_directory_ctx *idcp, int *errorp) 580{ 581 static void 582 walk(const void *vp, VISIT v, int level) 583 { 584 const struct osc_snapshot *isp; 585 586 if ((v != postorder && v != leaf) || *errorp) 587 return; 588 589 isp = *(const struct osc_snapshot **)(u_long)vp; 590 591 osc_directory_ctx_append_version(oscp, idcp, &isp->is_time, 592 errorp); 593 } 594 595 twalk(oscp->osc_set, walk); 596} 597 598/** 599 * Open a version directory. 600 * 601 * Opens a version directory. What this really means is that 602 * osc_version_opendir() returns a handle to a directory context, which can be 603 * used to retrieve version strings. 604 */ 605void * 606osc_version_opendir(void) 607{ 608 int error = 0; 609 struct osc_directory_ctx *idcp; 610 struct osc_snapshot_ctx *oscp; 611 612 idcp = osc_directory_ctx_create(); 613 if (idcp == NULL) 614 goto error_out; 615 616 oscp = osc_get_snapshot_ctx(); 617 618 if (!osc_snapshot_ctx_is_valid(oscp)) { 619 osc_snapshot_ctx_clean(oscp); 620 osc_process_snapshot_directory(oscp, &error); 621 if (error) 622 goto error_out; 623 } 624 625 osc_make_directory_ctx(oscp, idcp, &error); 626 if (error) 627 goto error_out; 628 629 goto out; 630 631 error_out: 632 if (idcp != NULL) { 633 osc_directory_ctx_destroy(idcp); 634 idcp = NULL; 635 } 636 637 out: 638 return (void *)idcp; 639} 640 641/** 642 * Read the next version directory entry. 643 * 644 * Returns the name of the next version in the version directory, or NULL if 645 * we're at the end of the directory. What this really does is return the 646 * next version from the version list stored in the directory context. 647 */ 648char * 649osc_version_readdir(void *vp) 650{ 651 struct osc_directory_ctx *idcp = vp; 652 653 if (idcp == NULL) 654 return NULL; 655 656 if (idcp->idc_pos >= idcp->idc_len) 657 return NULL; 658 659 return idcp->idc_version[idcp->idc_pos++]; 660} 661 662/** 663 * Close the version directory. 664 * 665 * Destroys the underlying directory context. 666 */ 667void 668osc_version_closedir(void *vp) 669{ 670 struct osc_directory_ctx *idcp = vp; 671 672 if (idcp != NULL) 673 osc_directory_ctx_destroy(idcp); 674} 675 676/** 677 * Canonicalize a path. 678 * 679 * Converts paths of the form @GMT-.. to paths of the form ../.snapshot/.. 680 * It's not the prettiest routine I've ever written, but what the heck? 681 */ 682char * 683osc_canonicalize_path(const char *path, char *snap_component) 684{ 685 int error = 0; 686 struct osc_snapshot_ctx *oscp; 687 struct tm tm; 688 int n; 689 struct osc_snapshot is; 690 struct osc_snapshot **ispp; 691 struct osc_snapshot *isp; 692 char *cpath = NULL; 693 char *cpath2 = NULL; 694 const char *snap_component_orig = snap_component; 695 struct stat sb; 696 697 oscp = osc_get_snapshot_ctx(); 698 699 if (!osc_snapshot_ctx_is_valid(oscp)) { 700 osc_snapshot_ctx_clean(oscp); 701 osc_process_snapshot_directory(oscp, &error); 702 if (error) 703 goto out; 704 } 705 706 memset(&tm, 0, sizeof tm); 707 n = sscanf(snap_component, 708 "@GMT-%4u.%2u.%2u-%2u.%2u.%2u", 709 &tm.tm_year, 710 &tm.tm_mon, 711 &tm.tm_mday, 712 &tm.tm_hour, 713 &tm.tm_min, 714 &tm.tm_sec); 715 if (n != 6) 716 goto out; 717 718 tm.tm_year -= 1900; 719 tm.tm_mon -= 1; 720 721 is.is_name = NULL; 722 is.is_time.tv_sec = timegm(&tm); 723 is.is_time.tv_nsec = 0; 724 725 ispp = tfind(&is, &oscp->osc_set, osc_snapshot_compare); 726 if (ispp == NULL) 727 goto out; 728 isp = *ispp; 729 730 /* Determine the path after "@GMT-..." */ 731 while (*snap_component != '/' && *snap_component != '\0') 732 snap_component++; 733 734 while (*snap_component == '/') 735 snap_component++; 736 737 cpath = malloc(strlen(SNAPSHOT_DIRECTORY) + strlen(isp->is_name) + 738 strlen(path) + 3); 739 740 if (cpath == NULL) 741 goto out; 742 743 /* 744 * Use the first snapshot that has a successful stat for the requested 745 * path. 746 */ 747 while (true) { 748 749 sprintf(cpath, "%s/%s", SNAPSHOT_DIRECTORY, isp->is_name); 750 751 /* Append path before "@GMT-..." */ 752 if (snap_component_orig != path) { 753 strcat(cpath, "/"); 754 strncat(cpath, path, snap_component_orig - path); 755 } 756 757 /* Append path after "@GMT-..." */ 758 if (*snap_component != '\0') { 759 strcat(cpath, "/"); 760 strcat(cpath, snap_component); 761 } 762 763 /* If there is a valid snapshot for this file, we're done. */ 764 if (stat(cpath, &sb) == 0) 765 break; 766 767 /* Try the next snapshot. If this was the last one, give up. */ 768 isp = isp->is_next; 769 if (isp == NULL) 770 break; 771 772 /* If the realloc fails, give up. */ 773 cpath2 = realloc(cpath, strlen(SNAPSHOT_DIRECTORY) + 774 strlen(isp->is_name) + strlen(path) + 3); 775 if (cpath2 == NULL) 776 break; 777 cpath = cpath2; 778 } 779 780 out: 781 return cpath; 782} 783