1/* 2 * Copyright (C) 2003 Sistina Software 3 * 4 * This file is released under the LGPL. 5 */ 6 7#include <linux/init.h> 8#include <linux/slab.h> 9#include <linux/module.h> 10#include <linux/vmalloc.h> 11 12#include "dm-log.h" 13#include "dm-io.h" 14 15#define DM_MSG_PREFIX "mirror log" 16 17static LIST_HEAD(_log_types); 18static DEFINE_SPINLOCK(_lock); 19 20int dm_register_dirty_log_type(struct dirty_log_type *type) 21{ 22 spin_lock(&_lock); 23 type->use_count = 0; 24 list_add(&type->list, &_log_types); 25 spin_unlock(&_lock); 26 27 return 0; 28} 29 30int dm_unregister_dirty_log_type(struct dirty_log_type *type) 31{ 32 spin_lock(&_lock); 33 34 if (type->use_count) 35 DMWARN("Attempt to unregister a log type that is still in use"); 36 else 37 list_del(&type->list); 38 39 spin_unlock(&_lock); 40 41 return 0; 42} 43 44static struct dirty_log_type *get_type(const char *type_name) 45{ 46 struct dirty_log_type *type; 47 48 spin_lock(&_lock); 49 list_for_each_entry (type, &_log_types, list) 50 if (!strcmp(type_name, type->name)) { 51 if (!type->use_count && !try_module_get(type->module)){ 52 spin_unlock(&_lock); 53 return NULL; 54 } 55 type->use_count++; 56 spin_unlock(&_lock); 57 return type; 58 } 59 60 spin_unlock(&_lock); 61 return NULL; 62} 63 64static void put_type(struct dirty_log_type *type) 65{ 66 spin_lock(&_lock); 67 if (!--type->use_count) 68 module_put(type->module); 69 spin_unlock(&_lock); 70} 71 72struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, 73 unsigned int argc, char **argv) 74{ 75 struct dirty_log_type *type; 76 struct dirty_log *log; 77 78 log = kmalloc(sizeof(*log), GFP_KERNEL); 79 if (!log) 80 return NULL; 81 82 type = get_type(type_name); 83 if (!type) { 84 kfree(log); 85 return NULL; 86 } 87 88 log->type = type; 89 if (type->ctr(log, ti, argc, argv)) { 90 kfree(log); 91 put_type(type); 92 return NULL; 93 } 94 95 return log; 96} 97 98void dm_destroy_dirty_log(struct dirty_log *log) 99{ 100 log->type->dtr(log); 101 put_type(log->type); 102 kfree(log); 103} 104 105/* 106 * Magic for persistent mirrors: "MiRr" 107 */ 108#define MIRROR_MAGIC 0x4D695272 109 110/* 111 * The on-disk version of the metadata. 112 */ 113#define MIRROR_DISK_VERSION 2 114#define LOG_OFFSET 2 115 116struct log_header { 117 uint32_t magic; 118 119 /* 120 * Simple, incrementing version. no backward 121 * compatibility. 122 */ 123 uint32_t version; 124 sector_t nr_regions; 125}; 126 127struct log_c { 128 struct dm_target *ti; 129 int touched; 130 uint32_t region_size; 131 unsigned int region_count; 132 region_t sync_count; 133 134 unsigned bitset_uint32_count; 135 uint32_t *clean_bits; 136 uint32_t *sync_bits; 137 uint32_t *recovering_bits; 138 139 int sync_search; 140 141 /* Resync flag */ 142 enum sync { 143 DEFAULTSYNC, /* Synchronize if necessary */ 144 NOSYNC, /* Devices known to be already in sync */ 145 FORCESYNC, /* Force a sync to happen */ 146 } sync; 147 148 struct dm_io_request io_req; 149 150 /* 151 * Disk log fields 152 */ 153 int log_dev_failed; 154 struct dm_dev *log_dev; 155 struct log_header header; 156 157 struct io_region header_location; 158 struct log_header *disk_header; 159}; 160 161/* 162 * The touched member needs to be updated every time we access 163 * one of the bitsets. 164 */ 165static inline int log_test_bit(uint32_t *bs, unsigned bit) 166{ 167 return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; 168} 169 170static inline void log_set_bit(struct log_c *l, 171 uint32_t *bs, unsigned bit) 172{ 173 ext2_set_bit(bit, (unsigned long *) bs); 174 l->touched = 1; 175} 176 177static inline void log_clear_bit(struct log_c *l, 178 uint32_t *bs, unsigned bit) 179{ 180 ext2_clear_bit(bit, (unsigned long *) bs); 181 l->touched = 1; 182} 183 184/*---------------------------------------------------------------- 185 * Header IO 186 *--------------------------------------------------------------*/ 187static void header_to_disk(struct log_header *core, struct log_header *disk) 188{ 189 disk->magic = cpu_to_le32(core->magic); 190 disk->version = cpu_to_le32(core->version); 191 disk->nr_regions = cpu_to_le64(core->nr_regions); 192} 193 194static void header_from_disk(struct log_header *core, struct log_header *disk) 195{ 196 core->magic = le32_to_cpu(disk->magic); 197 core->version = le32_to_cpu(disk->version); 198 core->nr_regions = le64_to_cpu(disk->nr_regions); 199} 200 201static int rw_header(struct log_c *lc, int rw) 202{ 203 lc->io_req.bi_rw = rw; 204 lc->io_req.mem.ptr.vma = lc->disk_header; 205 lc->io_req.notify.fn = NULL; 206 207 return dm_io(&lc->io_req, 1, &lc->header_location, NULL); 208} 209 210static int read_header(struct log_c *log) 211{ 212 int r; 213 214 r = rw_header(log, READ); 215 if (r) 216 return r; 217 218 header_from_disk(&log->header, log->disk_header); 219 220 /* New log required? */ 221 if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) { 222 log->header.magic = MIRROR_MAGIC; 223 log->header.version = MIRROR_DISK_VERSION; 224 log->header.nr_regions = 0; 225 } 226 227#ifdef __LITTLE_ENDIAN 228 if (log->header.version == 1) 229 log->header.version = 2; 230#endif 231 232 if (log->header.version != MIRROR_DISK_VERSION) { 233 DMWARN("incompatible disk log version"); 234 return -EINVAL; 235 } 236 237 return 0; 238} 239 240static inline int write_header(struct log_c *log) 241{ 242 header_to_disk(&log->header, log->disk_header); 243 return rw_header(log, WRITE); 244} 245 246/*---------------------------------------------------------------- 247 * core log constructor/destructor 248 * 249 * argv contains region_size followed optionally by [no]sync 250 *--------------------------------------------------------------*/ 251#define BYTE_SHIFT 3 252static int create_log_context(struct dirty_log *log, struct dm_target *ti, 253 unsigned int argc, char **argv, 254 struct dm_dev *dev) 255{ 256 enum sync sync = DEFAULTSYNC; 257 258 struct log_c *lc; 259 uint32_t region_size; 260 unsigned int region_count; 261 size_t bitset_size, buf_size; 262 int r; 263 264 if (argc < 1 || argc > 2) { 265 DMWARN("wrong number of arguments to mirror log"); 266 return -EINVAL; 267 } 268 269 if (argc > 1) { 270 if (!strcmp(argv[1], "sync")) 271 sync = FORCESYNC; 272 else if (!strcmp(argv[1], "nosync")) 273 sync = NOSYNC; 274 else { 275 DMWARN("unrecognised sync argument to mirror log: %s", 276 argv[1]); 277 return -EINVAL; 278 } 279 } 280 281 if (sscanf(argv[0], "%u", ®ion_size) != 1) { 282 DMWARN("invalid region size string"); 283 return -EINVAL; 284 } 285 286 region_count = dm_sector_div_up(ti->len, region_size); 287 288 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 289 if (!lc) { 290 DMWARN("couldn't allocate core log"); 291 return -ENOMEM; 292 } 293 294 lc->ti = ti; 295 lc->touched = 0; 296 lc->region_size = region_size; 297 lc->region_count = region_count; 298 lc->sync = sync; 299 300 /* 301 * Work out how many "unsigned long"s we need to hold the bitset. 302 */ 303 bitset_size = dm_round_up(region_count, 304 sizeof(*lc->clean_bits) << BYTE_SHIFT); 305 bitset_size >>= BYTE_SHIFT; 306 307 lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); 308 309 /* 310 * Disk log? 311 */ 312 if (!dev) { 313 lc->clean_bits = vmalloc(bitset_size); 314 if (!lc->clean_bits) { 315 DMWARN("couldn't allocate clean bitset"); 316 kfree(lc); 317 return -ENOMEM; 318 } 319 lc->disk_header = NULL; 320 } else { 321 lc->log_dev = dev; 322 lc->log_dev_failed = 0; 323 lc->header_location.bdev = lc->log_dev->bdev; 324 lc->header_location.sector = 0; 325 326 /* 327 * Buffer holds both header and bitset. 328 */ 329 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 330 bitset_size, ti->limits.hardsect_size); 331 lc->header_location.count = buf_size >> SECTOR_SHIFT; 332 lc->io_req.mem.type = DM_IO_VMA; 333 lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, 334 PAGE_SIZE)); 335 if (IS_ERR(lc->io_req.client)) { 336 r = PTR_ERR(lc->io_req.client); 337 DMWARN("couldn't allocate disk io client"); 338 kfree(lc); 339 return -ENOMEM; 340 } 341 342 lc->disk_header = vmalloc(buf_size); 343 if (!lc->disk_header) { 344 DMWARN("couldn't allocate disk log buffer"); 345 kfree(lc); 346 return -ENOMEM; 347 } 348 349 lc->clean_bits = (void *)lc->disk_header + 350 (LOG_OFFSET << SECTOR_SHIFT); 351 } 352 353 memset(lc->clean_bits, -1, bitset_size); 354 355 lc->sync_bits = vmalloc(bitset_size); 356 if (!lc->sync_bits) { 357 DMWARN("couldn't allocate sync bitset"); 358 if (!dev) 359 vfree(lc->clean_bits); 360 vfree(lc->disk_header); 361 kfree(lc); 362 return -ENOMEM; 363 } 364 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 365 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 366 367 lc->recovering_bits = vmalloc(bitset_size); 368 if (!lc->recovering_bits) { 369 DMWARN("couldn't allocate sync bitset"); 370 vfree(lc->sync_bits); 371 if (!dev) 372 vfree(lc->clean_bits); 373 vfree(lc->disk_header); 374 kfree(lc); 375 return -ENOMEM; 376 } 377 memset(lc->recovering_bits, 0, bitset_size); 378 lc->sync_search = 0; 379 log->context = lc; 380 381 return 0; 382} 383 384static int core_ctr(struct dirty_log *log, struct dm_target *ti, 385 unsigned int argc, char **argv) 386{ 387 return create_log_context(log, ti, argc, argv, NULL); 388} 389 390static void destroy_log_context(struct log_c *lc) 391{ 392 vfree(lc->sync_bits); 393 vfree(lc->recovering_bits); 394 kfree(lc); 395} 396 397static void core_dtr(struct dirty_log *log) 398{ 399 struct log_c *lc = (struct log_c *) log->context; 400 401 vfree(lc->clean_bits); 402 destroy_log_context(lc); 403} 404 405/*---------------------------------------------------------------- 406 * disk log constructor/destructor 407 * 408 * argv contains log_device region_size followed optionally by [no]sync 409 *--------------------------------------------------------------*/ 410static int disk_ctr(struct dirty_log *log, struct dm_target *ti, 411 unsigned int argc, char **argv) 412{ 413 int r; 414 struct dm_dev *dev; 415 416 if (argc < 2 || argc > 3) { 417 DMWARN("wrong number of arguments to disk mirror log"); 418 return -EINVAL; 419 } 420 421 r = dm_get_device(ti, argv[0], 0, 0, 422 FMODE_READ | FMODE_WRITE, &dev); 423 if (r) 424 return r; 425 426 r = create_log_context(log, ti, argc - 1, argv + 1, dev); 427 if (r) { 428 dm_put_device(ti, dev); 429 return r; 430 } 431 432 return 0; 433} 434 435static void disk_dtr(struct dirty_log *log) 436{ 437 struct log_c *lc = (struct log_c *) log->context; 438 439 dm_put_device(lc->ti, lc->log_dev); 440 vfree(lc->disk_header); 441 dm_io_client_destroy(lc->io_req.client); 442 destroy_log_context(lc); 443} 444 445static int count_bits32(uint32_t *addr, unsigned size) 446{ 447 int count = 0, i; 448 449 for (i = 0; i < size; i++) { 450 count += hweight32(*(addr+i)); 451 } 452 return count; 453} 454 455static void fail_log_device(struct log_c *lc) 456{ 457 if (lc->log_dev_failed) 458 return; 459 460 lc->log_dev_failed = 1; 461 dm_table_event(lc->ti->table); 462} 463 464static int disk_resume(struct dirty_log *log) 465{ 466 int r; 467 unsigned i; 468 struct log_c *lc = (struct log_c *) log->context; 469 size_t size = lc->bitset_uint32_count * sizeof(uint32_t); 470 471 /* read the disk header */ 472 r = read_header(lc); 473 if (r) { 474 DMWARN("%s: Failed to read header on mirror log device", 475 lc->log_dev->name); 476 fail_log_device(lc); 477 /* 478 * If the log device cannot be read, we must assume 479 * all regions are out-of-sync. If we simply return 480 * here, the state will be uninitialized and could 481 * lead us to return 'in-sync' status for regions 482 * that are actually 'out-of-sync'. 483 */ 484 lc->header.nr_regions = 0; 485 } 486 487 /* set or clear any new bits -- device has grown */ 488 if (lc->sync == NOSYNC) 489 for (i = lc->header.nr_regions; i < lc->region_count; i++) 490 log_set_bit(lc, lc->clean_bits, i); 491 else 492 for (i = lc->header.nr_regions; i < lc->region_count; i++) 493 log_clear_bit(lc, lc->clean_bits, i); 494 495 /* clear any old bits -- device has shrunk */ 496 for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) 497 log_clear_bit(lc, lc->clean_bits, i); 498 499 /* copy clean across to sync */ 500 memcpy(lc->sync_bits, lc->clean_bits, size); 501 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 502 lc->sync_search = 0; 503 504 /* set the correct number of regions in the header */ 505 lc->header.nr_regions = lc->region_count; 506 507 /* write the new header */ 508 r = write_header(lc); 509 if (r) { 510 DMWARN("%s: Failed to write header on mirror log device", 511 lc->log_dev->name); 512 fail_log_device(lc); 513 } 514 515 return r; 516} 517 518static uint32_t core_get_region_size(struct dirty_log *log) 519{ 520 struct log_c *lc = (struct log_c *) log->context; 521 return lc->region_size; 522} 523 524static int core_resume(struct dirty_log *log) 525{ 526 struct log_c *lc = (struct log_c *) log->context; 527 lc->sync_search = 0; 528 return 0; 529} 530 531static int core_is_clean(struct dirty_log *log, region_t region) 532{ 533 struct log_c *lc = (struct log_c *) log->context; 534 return log_test_bit(lc->clean_bits, region); 535} 536 537static int core_in_sync(struct dirty_log *log, region_t region, int block) 538{ 539 struct log_c *lc = (struct log_c *) log->context; 540 return log_test_bit(lc->sync_bits, region); 541} 542 543static int core_flush(struct dirty_log *log) 544{ 545 /* no op */ 546 return 0; 547} 548 549static int disk_flush(struct dirty_log *log) 550{ 551 int r; 552 struct log_c *lc = (struct log_c *) log->context; 553 554 /* only write if the log has changed */ 555 if (!lc->touched) 556 return 0; 557 558 r = write_header(lc); 559 if (r) 560 fail_log_device(lc); 561 else 562 lc->touched = 0; 563 564 return r; 565} 566 567static void core_mark_region(struct dirty_log *log, region_t region) 568{ 569 struct log_c *lc = (struct log_c *) log->context; 570 log_clear_bit(lc, lc->clean_bits, region); 571} 572 573static void core_clear_region(struct dirty_log *log, region_t region) 574{ 575 struct log_c *lc = (struct log_c *) log->context; 576 log_set_bit(lc, lc->clean_bits, region); 577} 578 579static int core_get_resync_work(struct dirty_log *log, region_t *region) 580{ 581 struct log_c *lc = (struct log_c *) log->context; 582 583 if (lc->sync_search >= lc->region_count) 584 return 0; 585 586 do { 587 *region = ext2_find_next_zero_bit( 588 (unsigned long *) lc->sync_bits, 589 lc->region_count, 590 lc->sync_search); 591 lc->sync_search = *region + 1; 592 593 if (*region >= lc->region_count) 594 return 0; 595 596 } while (log_test_bit(lc->recovering_bits, *region)); 597 598 log_set_bit(lc, lc->recovering_bits, *region); 599 return 1; 600} 601 602static void core_set_region_sync(struct dirty_log *log, region_t region, 603 int in_sync) 604{ 605 struct log_c *lc = (struct log_c *) log->context; 606 607 log_clear_bit(lc, lc->recovering_bits, region); 608 if (in_sync) { 609 log_set_bit(lc, lc->sync_bits, region); 610 lc->sync_count++; 611 } else if (log_test_bit(lc->sync_bits, region)) { 612 lc->sync_count--; 613 log_clear_bit(lc, lc->sync_bits, region); 614 } 615} 616 617static region_t core_get_sync_count(struct dirty_log *log) 618{ 619 struct log_c *lc = (struct log_c *) log->context; 620 621 return lc->sync_count; 622} 623 624#define DMEMIT_SYNC \ 625 if (lc->sync != DEFAULTSYNC) \ 626 DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") 627 628static int core_status(struct dirty_log *log, status_type_t status, 629 char *result, unsigned int maxlen) 630{ 631 int sz = 0; 632 struct log_c *lc = log->context; 633 634 switch(status) { 635 case STATUSTYPE_INFO: 636 DMEMIT("1 %s", log->type->name); 637 break; 638 639 case STATUSTYPE_TABLE: 640 DMEMIT("%s %u %u ", log->type->name, 641 lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); 642 DMEMIT_SYNC; 643 } 644 645 return sz; 646} 647 648static int disk_status(struct dirty_log *log, status_type_t status, 649 char *result, unsigned int maxlen) 650{ 651 int sz = 0; 652 struct log_c *lc = log->context; 653 654 switch(status) { 655 case STATUSTYPE_INFO: 656 DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name, 657 lc->log_dev_failed ? 'D' : 'A'); 658 break; 659 660 case STATUSTYPE_TABLE: 661 DMEMIT("%s %u %s %u ", log->type->name, 662 lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, 663 lc->region_size); 664 DMEMIT_SYNC; 665 } 666 667 return sz; 668} 669 670static struct dirty_log_type _core_type = { 671 .name = "core", 672 .module = THIS_MODULE, 673 .ctr = core_ctr, 674 .dtr = core_dtr, 675 .resume = core_resume, 676 .get_region_size = core_get_region_size, 677 .is_clean = core_is_clean, 678 .in_sync = core_in_sync, 679 .flush = core_flush, 680 .mark_region = core_mark_region, 681 .clear_region = core_clear_region, 682 .get_resync_work = core_get_resync_work, 683 .set_region_sync = core_set_region_sync, 684 .get_sync_count = core_get_sync_count, 685 .status = core_status, 686}; 687 688static struct dirty_log_type _disk_type = { 689 .name = "disk", 690 .module = THIS_MODULE, 691 .ctr = disk_ctr, 692 .dtr = disk_dtr, 693 .suspend = disk_flush, 694 .resume = disk_resume, 695 .get_region_size = core_get_region_size, 696 .is_clean = core_is_clean, 697 .in_sync = core_in_sync, 698 .flush = disk_flush, 699 .mark_region = core_mark_region, 700 .clear_region = core_clear_region, 701 .get_resync_work = core_get_resync_work, 702 .set_region_sync = core_set_region_sync, 703 .get_sync_count = core_get_sync_count, 704 .status = disk_status, 705}; 706 707int __init dm_dirty_log_init(void) 708{ 709 int r; 710 711 r = dm_register_dirty_log_type(&_core_type); 712 if (r) 713 DMWARN("couldn't register core log"); 714 715 r = dm_register_dirty_log_type(&_disk_type); 716 if (r) { 717 DMWARN("couldn't register disk type"); 718 dm_unregister_dirty_log_type(&_core_type); 719 } 720 721 return r; 722} 723 724void dm_dirty_log_exit(void) 725{ 726 dm_unregister_dirty_log_type(&_disk_type); 727 dm_unregister_dirty_log_type(&_core_type); 728} 729 730EXPORT_SYMBOL(dm_register_dirty_log_type); 731EXPORT_SYMBOL(dm_unregister_dirty_log_type); 732EXPORT_SYMBOL(dm_create_dirty_log); 733EXPORT_SYMBOL(dm_destroy_dirty_log); 734