1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7 8#include "dm-space-map.h" 9#include "dm-space-map-common.h" 10#include "dm-space-map-metadata.h" 11 12#include <linux/list.h> 13#include <linux/slab.h> 14#include <linux/device-mapper.h> 15#include <linux/kernel.h> 16 17#define DM_MSG_PREFIX "space map metadata" 18 19/*----------------------------------------------------------------*/ 20 21/* 22 * An edge triggered threshold. 23 */ 24struct threshold { 25 bool threshold_set; 26 bool value_set; 27 dm_block_t threshold; 28 dm_block_t current_value; 29 dm_sm_threshold_fn fn; 30 void *context; 31}; 32 33static void threshold_init(struct threshold *t) 34{ 35 t->threshold_set = false; 36 t->value_set = false; 37} 38 39static void set_threshold(struct threshold *t, dm_block_t value, 40 dm_sm_threshold_fn fn, void *context) 41{ 42 t->threshold_set = true; 43 t->threshold = value; 44 t->fn = fn; 45 t->context = context; 46} 47 48static bool below_threshold(struct threshold *t, dm_block_t value) 49{ 50 return t->threshold_set && value <= t->threshold; 51} 52 53static bool threshold_already_triggered(struct threshold *t) 54{ 55 return t->value_set && below_threshold(t, t->current_value); 56} 57 58static void check_threshold(struct threshold *t, dm_block_t value) 59{ 60 if (below_threshold(t, value) && 61 !threshold_already_triggered(t)) 62 t->fn(t->context); 63 64 t->value_set = true; 65 t->current_value = value; 66} 67 68/*----------------------------------------------------------------*/ 69 70/* 71 * Space map interface. 72 * 73 * The low level disk format is written using the standard btree and 74 * transaction manager. This means that performing disk operations may 75 * cause us to recurse into the space map in order to allocate new blocks. 76 * For this reason we have a pool of pre-allocated blocks large enough to 77 * service any metadata_ll_disk operation. 78 */ 79 80/* 81 * FIXME: we should calculate this based on the size of the device. 82 * Only the metadata space map needs this functionality. 83 */ 84#define MAX_RECURSIVE_ALLOCATIONS 1024 85 86enum block_op_type { 87 BOP_INC, 88 BOP_DEC 89}; 90 91struct block_op { 92 enum block_op_type type; 93 dm_block_t b; 94 dm_block_t e; 95}; 96 97struct bop_ring_buffer { 98 unsigned int begin; 99 unsigned int end; 100 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 101}; 102 103static void brb_init(struct bop_ring_buffer *brb) 104{ 105 brb->begin = 0; 106 brb->end = 0; 107} 108 109static bool brb_empty(struct bop_ring_buffer *brb) 110{ 111 return brb->begin == brb->end; 112} 113 114static unsigned int brb_next(struct bop_ring_buffer *brb, unsigned int old) 115{ 116 unsigned int r = old + 1; 117 118 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 119} 120 121static int brb_push(struct bop_ring_buffer *brb, 122 enum block_op_type type, dm_block_t b, dm_block_t e) 123{ 124 struct block_op *bop; 125 unsigned int next = brb_next(brb, brb->end); 126 127 /* 128 * We don't allow the last bop to be filled, this way we can 129 * differentiate between full and empty. 130 */ 131 if (next == brb->begin) 132 return -ENOMEM; 133 134 bop = brb->bops + brb->end; 135 bop->type = type; 136 bop->b = b; 137 bop->e = e; 138 139 brb->end = next; 140 141 return 0; 142} 143 144static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 145{ 146 struct block_op *bop; 147 148 if (brb_empty(brb)) 149 return -ENODATA; 150 151 bop = brb->bops + brb->begin; 152 memcpy(result, bop, sizeof(*result)); 153 return 0; 154} 155 156static int brb_pop(struct bop_ring_buffer *brb) 157{ 158 if (brb_empty(brb)) 159 return -ENODATA; 160 161 brb->begin = brb_next(brb, brb->begin); 162 163 return 0; 164} 165 166/*----------------------------------------------------------------*/ 167 168struct sm_metadata { 169 struct dm_space_map sm; 170 171 struct ll_disk ll; 172 struct ll_disk old_ll; 173 174 dm_block_t begin; 175 176 unsigned int recursion_count; 177 unsigned int allocated_this_transaction; 178 struct bop_ring_buffer uncommitted; 179 180 struct threshold threshold; 181}; 182 183static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e) 184{ 185 int r = brb_push(&smm->uncommitted, type, b, e); 186 187 if (r) { 188 DMERR("too many recursive allocations"); 189 return -ENOMEM; 190 } 191 192 return 0; 193} 194 195static int commit_bop(struct sm_metadata *smm, struct block_op *op) 196{ 197 int r = 0; 198 int32_t nr_allocations; 199 200 switch (op->type) { 201 case BOP_INC: 202 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations); 203 break; 204 205 case BOP_DEC: 206 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations); 207 break; 208 } 209 210 return r; 211} 212 213static void in(struct sm_metadata *smm) 214{ 215 smm->recursion_count++; 216} 217 218static int apply_bops(struct sm_metadata *smm) 219{ 220 int r = 0; 221 222 while (!brb_empty(&smm->uncommitted)) { 223 struct block_op bop; 224 225 r = brb_peek(&smm->uncommitted, &bop); 226 if (r) { 227 DMERR("bug in bop ring buffer"); 228 break; 229 } 230 231 r = commit_bop(smm, &bop); 232 if (r) 233 break; 234 235 brb_pop(&smm->uncommitted); 236 } 237 238 return r; 239} 240 241static int out(struct sm_metadata *smm) 242{ 243 int r = 0; 244 245 /* 246 * If we're not recursing then very bad things are happening. 247 */ 248 if (!smm->recursion_count) { 249 DMERR("lost track of recursion depth"); 250 return -ENOMEM; 251 } 252 253 if (smm->recursion_count == 1) 254 r = apply_bops(smm); 255 256 smm->recursion_count--; 257 258 return r; 259} 260 261/* 262 * When using the out() function above, we often want to combine an error 263 * code for the operation run in the recursive context with that from 264 * out(). 265 */ 266static int combine_errors(int r1, int r2) 267{ 268 return r1 ? r1 : r2; 269} 270 271static int recursing(struct sm_metadata *smm) 272{ 273 return smm->recursion_count; 274} 275 276static void sm_metadata_destroy(struct dm_space_map *sm) 277{ 278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 279 280 kfree(smm); 281} 282 283static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 284{ 285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 286 287 *count = smm->ll.nr_blocks; 288 289 return 0; 290} 291 292static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 293{ 294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 295 296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 297 smm->allocated_this_transaction; 298 299 return 0; 300} 301 302static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 303 uint32_t *result) 304{ 305 int r; 306 unsigned int i; 307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 308 unsigned int adjustment = 0; 309 310 /* 311 * We may have some uncommitted adjustments to add. This list 312 * should always be really short. 313 */ 314 for (i = smm->uncommitted.begin; 315 i != smm->uncommitted.end; 316 i = brb_next(&smm->uncommitted, i)) { 317 struct block_op *op = smm->uncommitted.bops + i; 318 319 if (b < op->b || b >= op->e) 320 continue; 321 322 switch (op->type) { 323 case BOP_INC: 324 adjustment++; 325 break; 326 327 case BOP_DEC: 328 adjustment--; 329 break; 330 } 331 } 332 333 r = sm_ll_lookup(&smm->ll, b, result); 334 if (r) 335 return r; 336 337 *result += adjustment; 338 339 return 0; 340} 341 342static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 343 dm_block_t b, int *result) 344{ 345 int r, adjustment = 0; 346 unsigned int i; 347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 348 uint32_t rc; 349 350 /* 351 * We may have some uncommitted adjustments to add. This list 352 * should always be really short. 353 */ 354 for (i = smm->uncommitted.begin; 355 i != smm->uncommitted.end; 356 i = brb_next(&smm->uncommitted, i)) { 357 358 struct block_op *op = smm->uncommitted.bops + i; 359 360 if (b < op->b || b >= op->e) 361 continue; 362 363 switch (op->type) { 364 case BOP_INC: 365 adjustment++; 366 break; 367 368 case BOP_DEC: 369 adjustment--; 370 break; 371 } 372 } 373 374 if (adjustment > 1) { 375 *result = 1; 376 return 0; 377 } 378 379 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 380 if (r) 381 return r; 382 383 if (rc == 3) 384 /* 385 * We err on the side of caution, and always return true. 386 */ 387 *result = 1; 388 else 389 *result = rc + adjustment > 1; 390 391 return 0; 392} 393 394static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 395 uint32_t count) 396{ 397 int r, r2; 398 int32_t nr_allocations; 399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 400 401 if (smm->recursion_count) { 402 DMERR("cannot recurse set_count()"); 403 return -EINVAL; 404 } 405 406 in(smm); 407 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations); 408 r2 = out(smm); 409 410 return combine_errors(r, r2); 411} 412 413static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 414{ 415 int r, r2 = 0; 416 int32_t nr_allocations; 417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 418 419 if (recursing(smm)) { 420 r = add_bop(smm, BOP_INC, b, e); 421 if (r) 422 return r; 423 } else { 424 in(smm); 425 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations); 426 r2 = out(smm); 427 } 428 429 return combine_errors(r, r2); 430} 431 432static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 433{ 434 int r, r2 = 0; 435 int32_t nr_allocations; 436 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 437 438 if (recursing(smm)) 439 r = add_bop(smm, BOP_DEC, b, e); 440 else { 441 in(smm); 442 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations); 443 r2 = out(smm); 444 } 445 446 return combine_errors(r, r2); 447} 448 449static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 450{ 451 int r, r2 = 0; 452 int32_t nr_allocations; 453 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 454 455 /* 456 * Any block we allocate has to be free in both the old and current ll. 457 */ 458 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); 459 if (r == -ENOSPC) { 460 /* 461 * There's no free block between smm->begin and the end of the metadata device. 462 * We search before smm->begin in case something has been freed. 463 */ 464 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b); 465 } 466 467 if (r) 468 return r; 469 470 smm->begin = *b + 1; 471 472 if (recursing(smm)) 473 r = add_bop(smm, BOP_INC, *b, *b + 1); 474 else { 475 in(smm); 476 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations); 477 r2 = out(smm); 478 } 479 480 if (!r) 481 smm->allocated_this_transaction++; 482 483 return combine_errors(r, r2); 484} 485 486static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 487{ 488 dm_block_t count; 489 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 490 491 int r = sm_metadata_new_block_(sm, b); 492 493 if (r) { 494 DMERR_LIMIT("unable to allocate new metadata block"); 495 return r; 496 } 497 498 r = sm_metadata_get_nr_free(sm, &count); 499 if (r) { 500 DMERR_LIMIT("couldn't get free block count"); 501 return r; 502 } 503 504 check_threshold(&smm->threshold, count); 505 506 return r; 507} 508 509static int sm_metadata_commit(struct dm_space_map *sm) 510{ 511 int r; 512 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 513 514 r = sm_ll_commit(&smm->ll); 515 if (r) 516 return r; 517 518 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 519 smm->allocated_this_transaction = 0; 520 521 return 0; 522} 523 524static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 525 dm_block_t threshold, 526 dm_sm_threshold_fn fn, 527 void *context) 528{ 529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 530 531 set_threshold(&smm->threshold, threshold, fn, context); 532 533 return 0; 534} 535 536static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 537{ 538 *result = sizeof(struct disk_sm_root); 539 540 return 0; 541} 542 543static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 544{ 545 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 546 struct disk_sm_root root_le; 547 548 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 549 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 550 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 551 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 552 553 if (max < sizeof(root_le)) 554 return -ENOSPC; 555 556 memcpy(where_le, &root_le, sizeof(root_le)); 557 558 return 0; 559} 560 561static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 562 563static const struct dm_space_map ops = { 564 .destroy = sm_metadata_destroy, 565 .extend = sm_metadata_extend, 566 .get_nr_blocks = sm_metadata_get_nr_blocks, 567 .get_nr_free = sm_metadata_get_nr_free, 568 .get_count = sm_metadata_get_count, 569 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 570 .set_count = sm_metadata_set_count, 571 .inc_blocks = sm_metadata_inc_blocks, 572 .dec_blocks = sm_metadata_dec_blocks, 573 .new_block = sm_metadata_new_block, 574 .commit = sm_metadata_commit, 575 .root_size = sm_metadata_root_size, 576 .copy_root = sm_metadata_copy_root, 577 .register_threshold_callback = sm_metadata_register_threshold_callback 578}; 579 580/*----------------------------------------------------------------*/ 581 582/* 583 * When a new space map is created that manages its own space. We use 584 * this tiny bootstrap allocator. 585 */ 586static void sm_bootstrap_destroy(struct dm_space_map *sm) 587{ 588} 589 590static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 591{ 592 DMERR("bootstrap doesn't support extend"); 593 594 return -EINVAL; 595} 596 597static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 598{ 599 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 600 601 *count = smm->ll.nr_blocks; 602 603 return 0; 604} 605 606static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 607{ 608 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 609 610 *count = smm->ll.nr_blocks - smm->begin; 611 612 return 0; 613} 614 615static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 616 uint32_t *result) 617{ 618 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 619 620 *result = (b < smm->begin) ? 1 : 0; 621 622 return 0; 623} 624 625static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 626 dm_block_t b, int *result) 627{ 628 *result = 0; 629 630 return 0; 631} 632 633static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 634 uint32_t count) 635{ 636 DMERR("bootstrap doesn't support set_count"); 637 638 return -EINVAL; 639} 640 641static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 642{ 643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 644 645 /* 646 * We know the entire device is unused. 647 */ 648 if (smm->begin == smm->ll.nr_blocks) 649 return -ENOSPC; 650 651 *b = smm->begin++; 652 653 return 0; 654} 655 656static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 657{ 658 int r; 659 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 660 661 r = add_bop(smm, BOP_INC, b, e); 662 if (r) 663 return r; 664 665 return 0; 666} 667 668static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 669{ 670 int r; 671 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 672 673 r = add_bop(smm, BOP_DEC, b, e); 674 if (r) 675 return r; 676 677 return 0; 678} 679 680static int sm_bootstrap_commit(struct dm_space_map *sm) 681{ 682 return 0; 683} 684 685static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 686{ 687 DMERR("bootstrap doesn't support root_size"); 688 689 return -EINVAL; 690} 691 692static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 693 size_t max) 694{ 695 DMERR("bootstrap doesn't support copy_root"); 696 697 return -EINVAL; 698} 699 700static const struct dm_space_map bootstrap_ops = { 701 .destroy = sm_bootstrap_destroy, 702 .extend = sm_bootstrap_extend, 703 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 704 .get_nr_free = sm_bootstrap_get_nr_free, 705 .get_count = sm_bootstrap_get_count, 706 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 707 .set_count = sm_bootstrap_set_count, 708 .inc_blocks = sm_bootstrap_inc_blocks, 709 .dec_blocks = sm_bootstrap_dec_blocks, 710 .new_block = sm_bootstrap_new_block, 711 .commit = sm_bootstrap_commit, 712 .root_size = sm_bootstrap_root_size, 713 .copy_root = sm_bootstrap_copy_root, 714 .register_threshold_callback = NULL 715}; 716 717/*----------------------------------------------------------------*/ 718 719static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 720{ 721 int r; 722 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 723 dm_block_t old_len = smm->ll.nr_blocks; 724 725 /* 726 * Flick into a mode where all blocks get allocated in the new area. 727 */ 728 smm->begin = old_len; 729 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 730 731 /* 732 * Extend. 733 */ 734 r = sm_ll_extend(&smm->ll, extra_blocks); 735 if (r) 736 goto out; 737 738 /* 739 * We repeatedly increment then commit until the commit doesn't 740 * allocate any new blocks. 741 */ 742 do { 743 r = add_bop(smm, BOP_INC, old_len, smm->begin); 744 if (r) 745 goto out; 746 747 old_len = smm->begin; 748 749 r = apply_bops(smm); 750 if (r) { 751 DMERR("%s: apply_bops failed", __func__); 752 goto out; 753 } 754 755 r = sm_ll_commit(&smm->ll); 756 if (r) 757 goto out; 758 759 } while (old_len != smm->begin); 760 761out: 762 /* 763 * Switch back to normal behaviour. 764 */ 765 memcpy(sm, &ops, sizeof(*sm)); 766 return r; 767} 768 769/*----------------------------------------------------------------*/ 770 771struct dm_space_map *dm_sm_metadata_init(void) 772{ 773 struct sm_metadata *smm; 774 775 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 776 if (!smm) 777 return ERR_PTR(-ENOMEM); 778 779 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 780 781 return &smm->sm; 782} 783 784int dm_sm_metadata_create(struct dm_space_map *sm, 785 struct dm_transaction_manager *tm, 786 dm_block_t nr_blocks, 787 dm_block_t superblock) 788{ 789 int r; 790 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 791 792 smm->begin = superblock + 1; 793 smm->recursion_count = 0; 794 smm->allocated_this_transaction = 0; 795 brb_init(&smm->uncommitted); 796 threshold_init(&smm->threshold); 797 798 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 799 800 r = sm_ll_new_metadata(&smm->ll, tm); 801 if (!r) { 802 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 803 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 804 r = sm_ll_extend(&smm->ll, nr_blocks); 805 } 806 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 807 if (r) 808 return r; 809 810 /* 811 * Now we need to update the newly created data structures with the 812 * allocated blocks that they were built from. 813 */ 814 r = add_bop(smm, BOP_INC, superblock, smm->begin); 815 if (r) 816 return r; 817 818 r = apply_bops(smm); 819 if (r) { 820 DMERR("%s: apply_bops failed", __func__); 821 return r; 822 } 823 824 return sm_metadata_commit(sm); 825} 826 827int dm_sm_metadata_open(struct dm_space_map *sm, 828 struct dm_transaction_manager *tm, 829 void *root_le, size_t len) 830{ 831 int r; 832 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 833 834 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 835 if (r) 836 return r; 837 838 smm->begin = 0; 839 smm->recursion_count = 0; 840 smm->allocated_this_transaction = 0; 841 brb_init(&smm->uncommitted); 842 threshold_init(&smm->threshold); 843 844 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 845 return 0; 846} 847