activemap.c revision 220521
1145524Sdarrenr/*- 2145524Sdarrenr * Copyright (c) 2009-2010 The FreeBSD Foundation 3298107Sgjb * All rights reserved. 4145524Sdarrenr * 5145630Sdarrenr * This software was developed by Pawel Jakub Dawidek under sponsorship from 6145524Sdarrenr * the FreeBSD Foundation. 7255332Scy * 8255332Scy * Redistribution and use in source and binary forms, with or without 9255332Scy * modification, are permitted provided that the following conditions 10255332Scy * are met: 11255332Scy * 1. Redistributions of source code must retain the above copyright 12255332Scy * notice, this list of conditions and the following disclaimer. 13255332Scy * 2. Redistributions in binary form must reproduce the above copyright 14255332Scy * notice, this list of conditions and the following disclaimer in the 15255332Scy * documentation and/or other materials provided with the distribution. 16255332Scy * 17170268Sdarrenr * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18255332Scy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19255332Scy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20255332Scy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21255332Scy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22255332Scy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23255332Scy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24170268Sdarrenr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25255332Scy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26170268Sdarrenr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27255332Scy * SUCH DAMAGE. 28255332Scy */ 29255332Scy 30255332Scy#include <sys/cdefs.h> 31255332Scy__FBSDID("$FreeBSD: head/sbin/hastd/activemap.c 220521 2011-04-10 15:11:19Z trociny $"); 32255332Scy 33255332Scy#include <sys/param.h> /* powerof2() */ 34170268Sdarrenr#include <sys/queue.h> 35255332Scy 36255332Scy#include <assert.h> 37255332Scy#include <bitstring.h> 38255332Scy#include <errno.h> 39255332Scy#include <stdint.h> 40255332Scy#include <stdio.h> 41255332Scy#include <stdlib.h> 42170268Sdarrenr#include <string.h> 43255332Scy 44170268Sdarrenr#include <activemap.h> 45170268Sdarrenr 46145524Sdarrenr#define ACTIVEMAP_MAGIC 0xac71e4 47291735Sbdrewerystruct activemap { 48291735Sbdrewery int am_magic; /* Magic value. */ 49145524Sdarrenr off_t am_mediasize; /* Media size in bytes. */ 50 uint32_t am_extentsize; /* Extent size in bytes, 51 must be power of 2. */ 52 uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ 53 int am_nextents; /* Number of extents. */ 54 size_t am_mapsize; /* Bitmap size in bytes. */ 55 uint16_t *am_memtab; /* An array that holds number of pending 56 writes per extent. */ 57 bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ 58 bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ 59 size_t am_diskmapsize; /* Map size rounded up to sector size. */ 60 uint64_t am_ndirty; /* Number of dirty regions. */ 61 bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ 62 off_t am_syncoff; /* Next synchronization offset. */ 63 TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that 64 we keep dirty to reduce bitmap 65 updates. */ 66 int am_nkeepdirty; /* Number of am_keepdirty elements. */ 67 int am_nkeepdirty_limit; /* Maximum number of am_keepdirty 68 elements. */ 69}; 70 71struct keepdirty { 72 int kd_extent; 73 TAILQ_ENTRY(keepdirty) kd_next; 74}; 75 76/* 77 * Helper function taken from sys/systm.h to calculate extentshift. 78 */ 79static uint32_t 80bitcount32(uint32_t x) 81{ 82 83 x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); 84 x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); 85 x = (x + (x >> 4)) & 0x0f0f0f0f; 86 x = (x + (x >> 8)); 87 x = (x + (x >> 16)) & 0x000000ff; 88 return (x); 89} 90 91static __inline int 92off2ext(const struct activemap *amp, off_t offset) 93{ 94 int extent; 95 96 assert(offset >= 0 && offset < amp->am_mediasize); 97 extent = (offset >> amp->am_extentshift); 98 assert(extent >= 0 && extent < amp->am_nextents); 99 return (extent); 100} 101 102static __inline off_t 103ext2off(const struct activemap *amp, int extent) 104{ 105 off_t offset; 106 107 assert(extent >= 0 && extent < amp->am_nextents); 108 offset = ((off_t)extent << amp->am_extentshift); 109 assert(offset >= 0 && offset < amp->am_mediasize); 110 return (offset); 111} 112 113/* 114 * Function calculates number of requests needed to synchronize the given 115 * extent. 116 */ 117static __inline int 118ext2reqs(const struct activemap *amp, int ext) 119{ 120 off_t left; 121 122 if (ext < amp->am_nextents - 1) 123 return (((amp->am_extentsize - 1) / MAXPHYS) + 1); 124 125 assert(ext == amp->am_nextents - 1); 126 left = amp->am_mediasize % amp->am_extentsize; 127 if (left == 0) 128 left = amp->am_extentsize; 129 return (((left - 1) / MAXPHYS) + 1); 130} 131 132/* 133 * Initialize activemap structure and allocate memory for internal needs. 134 * Function returns 0 on success and -1 if any of the allocations failed. 135 */ 136int 137activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, 138 uint32_t sectorsize, uint32_t keepdirty) 139{ 140 struct activemap *amp; 141 142 assert(ampp != NULL); 143 assert(mediasize > 0); 144 assert(extentsize > 0); 145 assert(powerof2(extentsize)); 146 assert(sectorsize > 0); 147 assert(powerof2(sectorsize)); 148 assert(keepdirty > 0); 149 150 amp = malloc(sizeof(*amp)); 151 if (amp == NULL) 152 return (-1); 153 154 amp->am_mediasize = mediasize; 155 amp->am_nkeepdirty_limit = keepdirty; 156 amp->am_extentsize = extentsize; 157 amp->am_extentshift = bitcount32(extentsize - 1); 158 amp->am_nextents = ((mediasize - 1) / extentsize) + 1; 159 amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); 160 amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); 161 amp->am_ndirty = 0; 162 amp->am_syncoff = -2; 163 TAILQ_INIT(&->am_keepdirty); 164 amp->am_nkeepdirty = 0; 165 166 amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); 167 amp->am_diskmap = calloc(1, amp->am_diskmapsize); 168 amp->am_memmap = bit_alloc(amp->am_nextents); 169 amp->am_syncmap = bit_alloc(amp->am_nextents); 170 171 /* 172 * Check to see if any of the allocations above failed. 173 */ 174 if (amp->am_memtab == NULL || amp->am_diskmap == NULL || 175 amp->am_memmap == NULL || amp->am_syncmap == NULL) { 176 if (amp->am_memtab != NULL) 177 free(amp->am_memtab); 178 if (amp->am_diskmap != NULL) 179 free(amp->am_diskmap); 180 if (amp->am_memmap != NULL) 181 free(amp->am_memmap); 182 if (amp->am_syncmap != NULL) 183 free(amp->am_syncmap); 184 amp->am_magic = 0; 185 free(amp); 186 errno = ENOMEM; 187 return (-1); 188 } 189 190 amp->am_magic = ACTIVEMAP_MAGIC; 191 *ampp = amp; 192 193 return (0); 194} 195 196static struct keepdirty * 197keepdirty_find(struct activemap *amp, int extent) 198{ 199 struct keepdirty *kd; 200 201 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { 202 if (kd->kd_extent == extent) 203 break; 204 } 205 return (kd); 206} 207 208static void 209keepdirty_add(struct activemap *amp, int extent) 210{ 211 struct keepdirty *kd; 212 213 kd = keepdirty_find(amp, extent); 214 if (kd != NULL) { 215 /* 216 * Only move element at the begining. 217 */ 218 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 219 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 220 return; 221 } 222 /* 223 * Add new element, but first remove the most unused one if 224 * we have too many. 225 */ 226 if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { 227 kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); 228 assert(kd != NULL); 229 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 230 amp->am_nkeepdirty--; 231 assert(amp->am_nkeepdirty > 0); 232 } 233 if (kd == NULL) 234 kd = malloc(sizeof(*kd)); 235 /* We can ignore allocation failure. */ 236 if (kd != NULL) { 237 kd->kd_extent = extent; 238 amp->am_nkeepdirty++; 239 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 240 } 241} 242 243static void 244keepdirty_fill(struct activemap *amp) 245{ 246 struct keepdirty *kd; 247 248 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) 249 bit_set(amp->am_diskmap, kd->kd_extent); 250} 251 252static void 253keepdirty_free(struct activemap *amp) 254{ 255 struct keepdirty *kd; 256 257 while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { 258 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 259 amp->am_nkeepdirty--; 260 free(kd); 261 } 262 assert(amp->am_nkeepdirty == 0); 263} 264 265/* 266 * Function frees resources allocated by activemap_init() function. 267 */ 268void 269activemap_free(struct activemap *amp) 270{ 271 272 assert(amp->am_magic == ACTIVEMAP_MAGIC); 273 274 amp->am_magic = 0; 275 276 keepdirty_free(amp); 277 free(amp->am_memtab); 278 free(amp->am_diskmap); 279 free(amp->am_memmap); 280 free(amp->am_syncmap); 281} 282 283/* 284 * Function should be called before we handle write requests. It updates 285 * internal structures and returns true if on-disk metadata should be updated. 286 */ 287bool 288activemap_write_start(struct activemap *amp, off_t offset, off_t length) 289{ 290 bool modified; 291 off_t end; 292 int ext; 293 294 assert(amp->am_magic == ACTIVEMAP_MAGIC); 295 assert(length > 0); 296 297 modified = false; 298 end = offset + length - 1; 299 300 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 301 /* 302 * If the number of pending writes is increased from 0, 303 * we have to mark the extent as dirty also in on-disk bitmap. 304 * By returning true we inform the caller that on-disk bitmap 305 * was modified and has to be flushed to disk. 306 */ 307 if (amp->am_memtab[ext]++ == 0) { 308 assert(!bit_test(amp->am_memmap, ext)); 309 bit_set(amp->am_memmap, ext); 310 amp->am_ndirty++; 311 modified = true; 312 } 313 keepdirty_add(amp, ext); 314 } 315 316 return (modified); 317} 318 319/* 320 * Function should be called after receiving write confirmation. It updates 321 * internal structures and returns true if on-disk metadata should be updated. 322 */ 323bool 324activemap_write_complete(struct activemap *amp, off_t offset, off_t length) 325{ 326 bool modified; 327 off_t end; 328 int ext; 329 330 assert(amp->am_magic == ACTIVEMAP_MAGIC); 331 assert(length > 0); 332 333 modified = false; 334 end = offset + length - 1; 335 336 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 337 /* 338 * If the number of pending writes goes down to 0, we have to 339 * mark the extent as clean also in on-disk bitmap. 340 * By returning true we inform the caller that on-disk bitmap 341 * was modified and has to be flushed to disk. 342 */ 343 assert(amp->am_memtab[ext] > 0); 344 assert(bit_test(amp->am_memmap, ext)); 345 if (--amp->am_memtab[ext] == 0) { 346 bit_clear(amp->am_memmap, ext); 347 amp->am_ndirty--; 348 modified = true; 349 } 350 } 351 352 return (modified); 353} 354 355/* 356 * Function should be called after finishing synchronization of one extent. 357 * It returns true if on-disk metadata should be updated. 358 */ 359bool 360activemap_extent_complete(struct activemap *amp, int extent) 361{ 362 bool modified; 363 int reqs; 364 365 assert(amp->am_magic == ACTIVEMAP_MAGIC); 366 assert(extent >= 0 && extent < amp->am_nextents); 367 368 modified = false; 369 370 reqs = ext2reqs(amp, extent); 371 assert(amp->am_memtab[extent] >= reqs); 372 amp->am_memtab[extent] -= reqs; 373 assert(bit_test(amp->am_memmap, extent)); 374 if (amp->am_memtab[extent] == 0) { 375 bit_clear(amp->am_memmap, extent); 376 amp->am_ndirty--; 377 modified = true; 378 } 379 380 return (modified); 381} 382 383/* 384 * Function returns number of dirty regions. 385 */ 386uint64_t 387activemap_ndirty(const struct activemap *amp) 388{ 389 390 assert(amp->am_magic == ACTIVEMAP_MAGIC); 391 392 return (amp->am_ndirty); 393} 394 395/* 396 * Function compare on-disk bitmap and in-memory bitmap and returns true if 397 * they differ and should be flushed to the disk. 398 */ 399bool 400activemap_differ(const struct activemap *amp) 401{ 402 403 assert(amp->am_magic == ACTIVEMAP_MAGIC); 404 405 return (memcmp(amp->am_diskmap, amp->am_memmap, 406 amp->am_mapsize) != 0); 407} 408 409/* 410 * Function returns number of bytes used by bitmap. 411 */ 412size_t 413activemap_size(const struct activemap *amp) 414{ 415 416 assert(amp->am_magic == ACTIVEMAP_MAGIC); 417 418 return (amp->am_mapsize); 419} 420 421/* 422 * Function returns number of bytes needed for storing on-disk bitmap. 423 * This is the same as activemap_size(), but rounded up to sector size. 424 */ 425size_t 426activemap_ondisk_size(const struct activemap *amp) 427{ 428 429 assert(amp->am_magic == ACTIVEMAP_MAGIC); 430 431 return (amp->am_diskmapsize); 432} 433 434/* 435 * Function copies the given buffer read from disk to the internal bitmap. 436 */ 437void 438activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) 439{ 440 int ext; 441 442 assert(amp->am_magic == ACTIVEMAP_MAGIC); 443 assert(size >= amp->am_mapsize); 444 445 memcpy(amp->am_diskmap, buf, amp->am_mapsize); 446 memcpy(amp->am_memmap, buf, amp->am_mapsize); 447 memcpy(amp->am_syncmap, buf, amp->am_mapsize); 448 449 bit_ffs(amp->am_memmap, amp->am_nextents, &ext); 450 if (ext == -1) { 451 /* There are no dirty extents, so we can leave now. */ 452 return; 453 } 454 /* 455 * Set synchronization offset to the first dirty extent. 456 */ 457 activemap_sync_rewind(amp); 458 /* 459 * We have dirty extents and we want them to stay that way until 460 * we synchronize, so we set number of pending writes to number 461 * of requests needed to synchronize one extent. 462 */ 463 amp->am_ndirty = 0; 464 for (; ext < amp->am_nextents; ext++) { 465 if (bit_test(amp->am_memmap, ext)) { 466 amp->am_memtab[ext] = ext2reqs(amp, ext); 467 amp->am_ndirty++; 468 } 469 } 470} 471 472/* 473 * Function merges the given bitmap with existing one. 474 */ 475void 476activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) 477{ 478 bitstr_t *remmap = __DECONST(bitstr_t *, buf); 479 int ext; 480 481 assert(amp->am_magic == ACTIVEMAP_MAGIC); 482 assert(size >= amp->am_mapsize); 483 484 bit_ffs(remmap, amp->am_nextents, &ext); 485 if (ext == -1) { 486 /* There are no dirty extents, so we can leave now. */ 487 return; 488 } 489 /* 490 * We have dirty extents and we want them to stay that way until 491 * we synchronize, so we set number of pending writes to number 492 * of requests needed to synchronize one extent. 493 */ 494 for (; ext < amp->am_nextents; ext++) { 495 /* Local extent already dirty. */ 496 if (bit_test(amp->am_syncmap, ext)) 497 continue; 498 /* Remote extent isn't dirty. */ 499 if (!bit_test(remmap, ext)) 500 continue; 501 bit_set(amp->am_syncmap, ext); 502 bit_set(amp->am_memmap, ext); 503 bit_set(amp->am_diskmap, ext); 504 if (amp->am_memtab[ext] == 0) 505 amp->am_ndirty++; 506 amp->am_memtab[ext] = ext2reqs(amp, ext); 507 } 508 /* 509 * Set synchronization offset to the first dirty extent. 510 */ 511 activemap_sync_rewind(amp); 512} 513 514/* 515 * Function returns pointer to internal bitmap that should be written to disk. 516 */ 517const unsigned char * 518activemap_bitmap(struct activemap *amp, size_t *sizep) 519{ 520 521 assert(amp->am_magic == ACTIVEMAP_MAGIC); 522 523 if (sizep != NULL) 524 *sizep = amp->am_diskmapsize; 525 memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); 526 keepdirty_fill(amp); 527 return ((const unsigned char *)amp->am_diskmap); 528} 529 530/* 531 * Function calculates size needed to store bitmap on disk. 532 */ 533size_t 534activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, 535 uint32_t sectorsize) 536{ 537 uint64_t nextents, mapsize; 538 539 assert(mediasize > 0); 540 assert(extentsize > 0); 541 assert(powerof2(extentsize)); 542 assert(sectorsize > 0); 543 assert(powerof2(sectorsize)); 544 545 nextents = ((mediasize - 1) / extentsize) + 1; 546 mapsize = sizeof(bitstr_t) * bitstr_size(nextents); 547 return (roundup2(mapsize, sectorsize)); 548} 549 550/* 551 * Set synchronization offset to the first dirty extent. 552 */ 553void 554activemap_sync_rewind(struct activemap *amp) 555{ 556 int ext; 557 558 assert(amp->am_magic == ACTIVEMAP_MAGIC); 559 560 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 561 if (ext == -1) { 562 /* There are no extents to synchronize. */ 563 amp->am_syncoff = -2; 564 return; 565 } 566 /* 567 * Mark that we want to start synchronization from the begining. 568 */ 569 amp->am_syncoff = -1; 570} 571 572/* 573 * Return next offset of where we should synchronize. 574 */ 575off_t 576activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) 577{ 578 off_t syncoff, left; 579 int ext; 580 581 assert(amp->am_magic == ACTIVEMAP_MAGIC); 582 assert(lengthp != NULL); 583 assert(syncextp != NULL); 584 585 *syncextp = -1; 586 587 if (amp->am_syncoff == -2) 588 return (-1); 589 590 if (amp->am_syncoff >= 0 && 591 (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || 592 off2ext(amp, amp->am_syncoff) != 593 off2ext(amp, amp->am_syncoff + MAXPHYS))) { 594 /* 595 * We are about to change extent, so mark previous one as clean. 596 */ 597 ext = off2ext(amp, amp->am_syncoff); 598 bit_clear(amp->am_syncmap, ext); 599 *syncextp = ext; 600 amp->am_syncoff = -1; 601 } 602 603 if (amp->am_syncoff == -1) { 604 /* 605 * Let's find first extent to synchronize. 606 */ 607 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 608 if (ext == -1) { 609 amp->am_syncoff = -2; 610 return (-1); 611 } 612 amp->am_syncoff = ext2off(amp, ext); 613 } else { 614 /* 615 * We don't change extent, so just increase offset. 616 */ 617 amp->am_syncoff += MAXPHYS; 618 if (amp->am_syncoff >= amp->am_mediasize) { 619 amp->am_syncoff = -2; 620 return (-1); 621 } 622 } 623 624 syncoff = amp->am_syncoff; 625 left = ext2off(amp, off2ext(amp, syncoff)) + 626 amp->am_extentsize - syncoff; 627 if (syncoff + left > amp->am_mediasize) 628 left = amp->am_mediasize - syncoff; 629 if (left > MAXPHYS) 630 left = MAXPHYS; 631 632 assert(left >= 0 && left <= MAXPHYS); 633 assert(syncoff >= 0 && syncoff < amp->am_mediasize); 634 assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize); 635 636 *lengthp = left; 637 return (syncoff); 638} 639 640/* 641 * Mark extent(s) containing the given region for synchronization. 642 * Most likely one of the components is unavailable. 643 */ 644bool 645activemap_need_sync(struct activemap *amp, off_t offset, off_t length) 646{ 647 bool modified; 648 off_t end; 649 int ext; 650 651 assert(amp->am_magic == ACTIVEMAP_MAGIC); 652 653 modified = false; 654 end = offset + length - 1; 655 656 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 657 if (bit_test(amp->am_syncmap, ext)) { 658 /* Already marked for synchronization. */ 659 assert(bit_test(amp->am_memmap, ext)); 660 continue; 661 } 662 bit_set(amp->am_syncmap, ext); 663 if (!bit_test(amp->am_memmap, ext)) { 664 bit_set(amp->am_memmap, ext); 665 amp->am_ndirty++; 666 } 667 amp->am_memtab[ext] += ext2reqs(amp, ext); 668 modified = true; 669 } 670 671 return (modified); 672} 673 674void 675activemap_dump(const struct activemap *amp) 676{ 677 int bit; 678 679 printf("M: "); 680 for (bit = 0; bit < amp->am_nextents; bit++) 681 printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); 682 printf("\n"); 683 printf("D: "); 684 for (bit = 0; bit < amp->am_nextents; bit++) 685 printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); 686 printf("\n"); 687 printf("S: "); 688 for (bit = 0; bit < amp->am_nextents; bit++) 689 printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); 690 printf("\n"); 691} 692