1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7#include "dm-block-manager.h" 8#include "dm-persistent-data-internal.h" 9 10#include <linux/dm-bufio.h> 11#include <linux/crc32c.h> 12#include <linux/module.h> 13#include <linux/slab.h> 14#include <linux/rwsem.h> 15#include <linux/device-mapper.h> 16#include <linux/stacktrace.h> 17#include <linux/sched/task.h> 18 19#define DM_MSG_PREFIX "block manager" 20 21/*----------------------------------------------------------------*/ 22 23#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 24 25/* 26 * This is a read/write semaphore with a couple of differences. 27 * 28 * i) There is a restriction on the number of concurrent read locks that 29 * may be held at once. This is just an implementation detail. 30 * 31 * ii) Recursive locking attempts are detected and return EINVAL. A stack 32 * trace is also emitted for the previous lock acquisition. 33 * 34 * iii) Priority is given to write locks. 35 */ 36#define MAX_HOLDERS 4 37#define MAX_STACK 10 38 39struct stack_store { 40 unsigned int nr_entries; 41 unsigned long entries[MAX_STACK]; 42}; 43 44struct block_lock { 45 spinlock_t lock; 46 __s32 count; 47 struct list_head waiters; 48 struct task_struct *holders[MAX_HOLDERS]; 49 50#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 51 struct stack_store traces[MAX_HOLDERS]; 52#endif 53}; 54 55struct waiter { 56 struct list_head list; 57 struct task_struct *task; 58 int wants_write; 59}; 60 61static unsigned int __find_holder(struct block_lock *lock, 62 struct task_struct *task) 63{ 64 unsigned int i; 65 66 for (i = 0; i < MAX_HOLDERS; i++) 67 if (lock->holders[i] == task) 68 break; 69 70 BUG_ON(i == MAX_HOLDERS); 71 return i; 72} 73 74/* call this *after* you increment lock->count */ 75static void __add_holder(struct block_lock *lock, struct task_struct *task) 76{ 77 unsigned int h = __find_holder(lock, NULL); 78#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 79 struct stack_store *t; 80#endif 81 82 get_task_struct(task); 83 lock->holders[h] = task; 84 85#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 86 t = lock->traces + h; 87 t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2); 88#endif 89} 90 91/* call this *before* you decrement lock->count */ 92static void __del_holder(struct block_lock *lock, struct task_struct *task) 93{ 94 unsigned int h = __find_holder(lock, task); 95 96 lock->holders[h] = NULL; 97 put_task_struct(task); 98} 99 100static int __check_holder(struct block_lock *lock) 101{ 102 unsigned int i; 103 104 for (i = 0; i < MAX_HOLDERS; i++) { 105 if (lock->holders[i] == current) { 106 DMERR("recursive lock detected in metadata"); 107#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 108 DMERR("previously held here:"); 109 stack_trace_print(lock->traces[i].entries, 110 lock->traces[i].nr_entries, 4); 111 112 DMERR("subsequent acquisition attempted here:"); 113 dump_stack(); 114#endif 115 return -EINVAL; 116 } 117 } 118 119 return 0; 120} 121 122static void __wait(struct waiter *w) 123{ 124 for (;;) { 125 set_current_state(TASK_UNINTERRUPTIBLE); 126 127 if (!w->task) 128 break; 129 130 schedule(); 131 } 132 133 set_current_state(TASK_RUNNING); 134} 135 136static void __wake_waiter(struct waiter *w) 137{ 138 struct task_struct *task; 139 140 list_del(&w->list); 141 task = w->task; 142 smp_mb(); 143 w->task = NULL; 144 wake_up_process(task); 145} 146 147/* 148 * We either wake a few readers or a single writer. 149 */ 150static void __wake_many(struct block_lock *lock) 151{ 152 struct waiter *w, *tmp; 153 154 BUG_ON(lock->count < 0); 155 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 156 if (lock->count >= MAX_HOLDERS) 157 return; 158 159 if (w->wants_write) { 160 if (lock->count > 0) 161 return; /* still read locked */ 162 163 lock->count = -1; 164 __add_holder(lock, w->task); 165 __wake_waiter(w); 166 return; 167 } 168 169 lock->count++; 170 __add_holder(lock, w->task); 171 __wake_waiter(w); 172 } 173} 174 175static void bl_init(struct block_lock *lock) 176{ 177 int i; 178 179 spin_lock_init(&lock->lock); 180 lock->count = 0; 181 INIT_LIST_HEAD(&lock->waiters); 182 for (i = 0; i < MAX_HOLDERS; i++) 183 lock->holders[i] = NULL; 184} 185 186static int __available_for_read(struct block_lock *lock) 187{ 188 return lock->count >= 0 && 189 lock->count < MAX_HOLDERS && 190 list_empty(&lock->waiters); 191} 192 193static int bl_down_read(struct block_lock *lock) 194{ 195 int r; 196 struct waiter w; 197 198 spin_lock(&lock->lock); 199 r = __check_holder(lock); 200 if (r) { 201 spin_unlock(&lock->lock); 202 return r; 203 } 204 205 if (__available_for_read(lock)) { 206 lock->count++; 207 __add_holder(lock, current); 208 spin_unlock(&lock->lock); 209 return 0; 210 } 211 212 get_task_struct(current); 213 214 w.task = current; 215 w.wants_write = 0; 216 list_add_tail(&w.list, &lock->waiters); 217 spin_unlock(&lock->lock); 218 219 __wait(&w); 220 put_task_struct(current); 221 return 0; 222} 223 224static int bl_down_read_nonblock(struct block_lock *lock) 225{ 226 int r; 227 228 spin_lock(&lock->lock); 229 r = __check_holder(lock); 230 if (r) 231 goto out; 232 233 if (__available_for_read(lock)) { 234 lock->count++; 235 __add_holder(lock, current); 236 r = 0; 237 } else 238 r = -EWOULDBLOCK; 239 240out: 241 spin_unlock(&lock->lock); 242 return r; 243} 244 245static void bl_up_read(struct block_lock *lock) 246{ 247 spin_lock(&lock->lock); 248 BUG_ON(lock->count <= 0); 249 __del_holder(lock, current); 250 --lock->count; 251 if (!list_empty(&lock->waiters)) 252 __wake_many(lock); 253 spin_unlock(&lock->lock); 254} 255 256static int bl_down_write(struct block_lock *lock) 257{ 258 int r; 259 struct waiter w; 260 261 spin_lock(&lock->lock); 262 r = __check_holder(lock); 263 if (r) { 264 spin_unlock(&lock->lock); 265 return r; 266 } 267 268 if (lock->count == 0 && list_empty(&lock->waiters)) { 269 lock->count = -1; 270 __add_holder(lock, current); 271 spin_unlock(&lock->lock); 272 return 0; 273 } 274 275 get_task_struct(current); 276 w.task = current; 277 w.wants_write = 1; 278 279 /* 280 * Writers given priority. We know there's only one mutator in the 281 * system, so ignoring the ordering reversal. 282 */ 283 list_add(&w.list, &lock->waiters); 284 spin_unlock(&lock->lock); 285 286 __wait(&w); 287 put_task_struct(current); 288 289 return 0; 290} 291 292static void bl_up_write(struct block_lock *lock) 293{ 294 spin_lock(&lock->lock); 295 __del_holder(lock, current); 296 lock->count = 0; 297 if (!list_empty(&lock->waiters)) 298 __wake_many(lock); 299 spin_unlock(&lock->lock); 300} 301 302static void report_recursive_bug(dm_block_t b, int r) 303{ 304 if (r == -EINVAL) 305 DMERR("recursive acquisition of block %llu requested.", 306 (unsigned long long) b); 307} 308 309#else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 310 311#define bl_init(x) do { } while (0) 312#define bl_down_read(x) 0 313#define bl_down_read_nonblock(x) 0 314#define bl_up_read(x) do { } while (0) 315#define bl_down_write(x) 0 316#define bl_up_write(x) do { } while (0) 317#define report_recursive_bug(x, y) do { } while (0) 318 319#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 320 321/*----------------------------------------------------------------*/ 322 323/* 324 * Block manager is currently implemented using dm-bufio. struct 325 * dm_block_manager and struct dm_block map directly onto a couple of 326 * structs in the bufio interface. I want to retain the freedom to move 327 * away from bufio in the future. So these structs are just cast within 328 * this .c file, rather than making it through to the public interface. 329 */ 330static struct dm_buffer *to_buffer(struct dm_block *b) 331{ 332 return (struct dm_buffer *) b; 333} 334 335dm_block_t dm_block_location(struct dm_block *b) 336{ 337 return dm_bufio_get_block_number(to_buffer(b)); 338} 339EXPORT_SYMBOL_GPL(dm_block_location); 340 341void *dm_block_data(struct dm_block *b) 342{ 343 return dm_bufio_get_block_data(to_buffer(b)); 344} 345EXPORT_SYMBOL_GPL(dm_block_data); 346 347struct buffer_aux { 348 struct dm_block_validator *validator; 349 int write_locked; 350 351#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 352 struct block_lock lock; 353#endif 354}; 355 356static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 357{ 358 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 359 360 aux->validator = NULL; 361 bl_init(&aux->lock); 362} 363 364static void dm_block_manager_write_callback(struct dm_buffer *buf) 365{ 366 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 367 368 if (aux->validator) { 369 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 370 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 371 } 372} 373 374/* 375 * ------------------------------------------------------------- 376 * Public interface 377 *-------------------------------------------------------------- 378 */ 379struct dm_block_manager { 380 struct dm_bufio_client *bufio; 381 bool read_only:1; 382}; 383 384struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 385 unsigned int block_size, 386 unsigned int max_held_per_thread) 387{ 388 int r; 389 struct dm_block_manager *bm; 390 391 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 392 if (!bm) { 393 r = -ENOMEM; 394 goto bad; 395 } 396 397 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 398 sizeof(struct buffer_aux), 399 dm_block_manager_alloc_callback, 400 dm_block_manager_write_callback, 401 0); 402 if (IS_ERR(bm->bufio)) { 403 r = PTR_ERR(bm->bufio); 404 kfree(bm); 405 goto bad; 406 } 407 408 bm->read_only = false; 409 410 return bm; 411 412bad: 413 return ERR_PTR(r); 414} 415EXPORT_SYMBOL_GPL(dm_block_manager_create); 416 417void dm_block_manager_destroy(struct dm_block_manager *bm) 418{ 419 dm_bufio_client_destroy(bm->bufio); 420 kfree(bm); 421} 422EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 423 424void dm_block_manager_reset(struct dm_block_manager *bm) 425{ 426 dm_bufio_client_reset(bm->bufio); 427} 428EXPORT_SYMBOL_GPL(dm_block_manager_reset); 429 430unsigned int dm_bm_block_size(struct dm_block_manager *bm) 431{ 432 return dm_bufio_get_block_size(bm->bufio); 433} 434EXPORT_SYMBOL_GPL(dm_bm_block_size); 435 436dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 437{ 438 return dm_bufio_get_device_size(bm->bufio); 439} 440 441static int dm_bm_validate_buffer(struct dm_block_manager *bm, 442 struct dm_buffer *buf, 443 struct buffer_aux *aux, 444 struct dm_block_validator *v) 445{ 446 if (unlikely(!aux->validator)) { 447 int r; 448 449 if (!v) 450 return 0; 451 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 452 if (unlikely(r)) { 453 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 454 (unsigned long long) dm_bufio_get_block_number(buf)); 455 return r; 456 } 457 aux->validator = v; 458 } else { 459 if (unlikely(aux->validator != v)) { 460 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 461 aux->validator->name, v ? v->name : "NULL", 462 (unsigned long long) dm_bufio_get_block_number(buf)); 463 return -EINVAL; 464 } 465 } 466 467 return 0; 468} 469int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 470 struct dm_block_validator *v, 471 struct dm_block **result) 472{ 473 struct buffer_aux *aux; 474 void *p; 475 int r; 476 477 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 478 if (IS_ERR(p)) 479 return PTR_ERR(p); 480 481 aux = dm_bufio_get_aux_data(to_buffer(*result)); 482 r = bl_down_read(&aux->lock); 483 if (unlikely(r)) { 484 dm_bufio_release(to_buffer(*result)); 485 report_recursive_bug(b, r); 486 return r; 487 } 488 489 aux->write_locked = 0; 490 491 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 492 if (unlikely(r)) { 493 bl_up_read(&aux->lock); 494 dm_bufio_release(to_buffer(*result)); 495 return r; 496 } 497 498 return 0; 499} 500EXPORT_SYMBOL_GPL(dm_bm_read_lock); 501 502int dm_bm_write_lock(struct dm_block_manager *bm, 503 dm_block_t b, struct dm_block_validator *v, 504 struct dm_block **result) 505{ 506 struct buffer_aux *aux; 507 void *p; 508 int r; 509 510 if (dm_bm_is_read_only(bm)) 511 return -EPERM; 512 513 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 514 if (IS_ERR(p)) 515 return PTR_ERR(p); 516 517 aux = dm_bufio_get_aux_data(to_buffer(*result)); 518 r = bl_down_write(&aux->lock); 519 if (r) { 520 dm_bufio_release(to_buffer(*result)); 521 report_recursive_bug(b, r); 522 return r; 523 } 524 525 aux->write_locked = 1; 526 527 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 528 if (unlikely(r)) { 529 bl_up_write(&aux->lock); 530 dm_bufio_release(to_buffer(*result)); 531 return r; 532 } 533 534 return 0; 535} 536EXPORT_SYMBOL_GPL(dm_bm_write_lock); 537 538int dm_bm_read_try_lock(struct dm_block_manager *bm, 539 dm_block_t b, struct dm_block_validator *v, 540 struct dm_block **result) 541{ 542 struct buffer_aux *aux; 543 void *p; 544 int r; 545 546 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 547 if (IS_ERR(p)) 548 return PTR_ERR(p); 549 if (unlikely(!p)) 550 return -EWOULDBLOCK; 551 552 aux = dm_bufio_get_aux_data(to_buffer(*result)); 553 r = bl_down_read_nonblock(&aux->lock); 554 if (r < 0) { 555 dm_bufio_release(to_buffer(*result)); 556 report_recursive_bug(b, r); 557 return r; 558 } 559 aux->write_locked = 0; 560 561 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 562 if (unlikely(r)) { 563 bl_up_read(&aux->lock); 564 dm_bufio_release(to_buffer(*result)); 565 return r; 566 } 567 568 return 0; 569} 570 571int dm_bm_write_lock_zero(struct dm_block_manager *bm, 572 dm_block_t b, struct dm_block_validator *v, 573 struct dm_block **result) 574{ 575 int r; 576 struct buffer_aux *aux; 577 void *p; 578 579 if (dm_bm_is_read_only(bm)) 580 return -EPERM; 581 582 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 583 if (IS_ERR(p)) 584 return PTR_ERR(p); 585 586 memset(p, 0, dm_bm_block_size(bm)); 587 588 aux = dm_bufio_get_aux_data(to_buffer(*result)); 589 r = bl_down_write(&aux->lock); 590 if (r) { 591 dm_bufio_release(to_buffer(*result)); 592 return r; 593 } 594 595 aux->write_locked = 1; 596 aux->validator = v; 597 598 return 0; 599} 600EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 601 602void dm_bm_unlock(struct dm_block *b) 603{ 604 struct buffer_aux *aux = dm_bufio_get_aux_data(to_buffer(b)); 605 606 if (aux->write_locked) { 607 dm_bufio_mark_buffer_dirty(to_buffer(b)); 608 bl_up_write(&aux->lock); 609 } else 610 bl_up_read(&aux->lock); 611 612 dm_bufio_release(to_buffer(b)); 613} 614EXPORT_SYMBOL_GPL(dm_bm_unlock); 615 616int dm_bm_flush(struct dm_block_manager *bm) 617{ 618 if (dm_bm_is_read_only(bm)) 619 return -EPERM; 620 621 return dm_bufio_write_dirty_buffers(bm->bufio); 622} 623EXPORT_SYMBOL_GPL(dm_bm_flush); 624 625void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 626{ 627 dm_bufio_prefetch(bm->bufio, b, 1); 628} 629 630bool dm_bm_is_read_only(struct dm_block_manager *bm) 631{ 632 return bm ? bm->read_only : true; 633} 634EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 635 636void dm_bm_set_read_only(struct dm_block_manager *bm) 637{ 638 if (bm) 639 bm->read_only = true; 640} 641EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 642 643void dm_bm_set_read_write(struct dm_block_manager *bm) 644{ 645 if (bm) 646 bm->read_only = false; 647} 648EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 649 650u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 651{ 652 return crc32c(~(u32) 0, data, len) ^ init_xor; 653} 654EXPORT_SYMBOL_GPL(dm_bm_checksum); 655 656/*----------------------------------------------------------------*/ 657 658MODULE_LICENSE("GPL"); 659MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>"); 660MODULE_DESCRIPTION("Immutable metadata library for dm"); 661 662/*----------------------------------------------------------------*/ 663