activemap.c revision 223654
1/*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sbin/hastd/activemap.c 223654 2011-06-28 20:57:54Z trociny $"); 32 33#include <sys/param.h> /* powerof2() */ 34#include <sys/queue.h> 35 36#include <assert.h> 37#include <bitstring.h> 38#include <errno.h> 39#include <stdint.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43 44#include <activemap.h> 45 46#define ACTIVEMAP_MAGIC 0xac71e4 47struct activemap { 48 int am_magic; /* Magic value. */ 49 off_t am_mediasize; /* Media size in bytes. */ 50 uint32_t am_extentsize; /* Extent size in bytes, 51 must be power of 2. */ 52 uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ 53 int am_nextents; /* Number of extents. */ 54 size_t am_mapsize; /* Bitmap size in bytes. */ 55 uint16_t *am_memtab; /* An array that holds number of pending 56 writes per extent. */ 57 bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ 58 bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ 59 size_t am_diskmapsize; /* Map size rounded up to sector size. */ 60 uint64_t am_ndirty; /* Number of dirty regions. */ 61 bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ 62 off_t am_syncoff; /* Next synchronization offset. */ 63 TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that 64 we keep dirty to reduce bitmap 65 updates. */ 66 int am_nkeepdirty; /* Number of am_keepdirty elements. */ 67 int am_nkeepdirty_limit; /* Maximum number of am_keepdirty 68 elements. */ 69}; 70 71struct keepdirty { 72 int kd_extent; 73 TAILQ_ENTRY(keepdirty) kd_next; 74}; 75 76/* 77 * Helper function taken from sys/systm.h to calculate extentshift. 78 */ 79static uint32_t 80bitcount32(uint32_t x) 81{ 82 83 x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); 84 x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); 85 x = (x + (x >> 4)) & 0x0f0f0f0f; 86 x = (x + (x >> 8)); 87 x = (x + (x >> 16)) & 0x000000ff; 88 return (x); 89} 90 91static __inline int 92off2ext(const struct activemap *amp, off_t offset) 93{ 94 int extent; 95 96 assert(offset >= 0 && offset < amp->am_mediasize); 97 extent = (offset >> amp->am_extentshift); 98 assert(extent >= 0 && extent < amp->am_nextents); 99 return (extent); 100} 101 102static __inline off_t 103ext2off(const struct activemap *amp, int extent) 104{ 105 off_t offset; 106 107 assert(extent >= 0 && extent < amp->am_nextents); 108 offset = ((off_t)extent << amp->am_extentshift); 109 assert(offset >= 0 && offset < amp->am_mediasize); 110 return (offset); 111} 112 113/* 114 * Function calculates number of requests needed to synchronize the given 115 * extent. 116 */ 117static __inline int 118ext2reqs(const struct activemap *amp, int ext) 119{ 120 off_t left; 121 122 if (ext < amp->am_nextents - 1) 123 return (((amp->am_extentsize - 1) / MAXPHYS) + 1); 124 125 assert(ext == amp->am_nextents - 1); 126 left = amp->am_mediasize % amp->am_extentsize; 127 if (left == 0) 128 left = amp->am_extentsize; 129 return (((left - 1) / MAXPHYS) + 1); 130} 131 132/* 133 * Initialize activemap structure and allocate memory for internal needs. 134 * Function returns 0 on success and -1 if any of the allocations failed. 135 */ 136int 137activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, 138 uint32_t sectorsize, uint32_t keepdirty) 139{ 140 struct activemap *amp; 141 142 assert(ampp != NULL); 143 assert(mediasize > 0); 144 assert(extentsize > 0); 145 assert(powerof2(extentsize)); 146 assert(sectorsize > 0); 147 assert(powerof2(sectorsize)); 148 assert(keepdirty > 0); 149 150 amp = malloc(sizeof(*amp)); 151 if (amp == NULL) 152 return (-1); 153 154 amp->am_mediasize = mediasize; 155 amp->am_nkeepdirty_limit = keepdirty; 156 amp->am_extentsize = extentsize; 157 amp->am_extentshift = bitcount32(extentsize - 1); 158 amp->am_nextents = ((mediasize - 1) / extentsize) + 1; 159 amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); 160 amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); 161 amp->am_ndirty = 0; 162 amp->am_syncoff = -2; 163 TAILQ_INIT(&->am_keepdirty); 164 amp->am_nkeepdirty = 0; 165 166 amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); 167 amp->am_diskmap = calloc(1, amp->am_diskmapsize); 168 amp->am_memmap = bit_alloc(amp->am_nextents); 169 amp->am_syncmap = bit_alloc(amp->am_nextents); 170 171 /* 172 * Check to see if any of the allocations above failed. 173 */ 174 if (amp->am_memtab == NULL || amp->am_diskmap == NULL || 175 amp->am_memmap == NULL || amp->am_syncmap == NULL) { 176 if (amp->am_memtab != NULL) 177 free(amp->am_memtab); 178 if (amp->am_diskmap != NULL) 179 free(amp->am_diskmap); 180 if (amp->am_memmap != NULL) 181 free(amp->am_memmap); 182 if (amp->am_syncmap != NULL) 183 free(amp->am_syncmap); 184 amp->am_magic = 0; 185 free(amp); 186 errno = ENOMEM; 187 return (-1); 188 } 189 190 amp->am_magic = ACTIVEMAP_MAGIC; 191 *ampp = amp; 192 193 return (0); 194} 195 196static struct keepdirty * 197keepdirty_find(struct activemap *amp, int extent) 198{ 199 struct keepdirty *kd; 200 201 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { 202 if (kd->kd_extent == extent) 203 break; 204 } 205 return (kd); 206} 207 208static bool 209keepdirty_add(struct activemap *amp, int extent) 210{ 211 struct keepdirty *kd; 212 213 kd = keepdirty_find(amp, extent); 214 if (kd != NULL) { 215 /* 216 * Only move element at the begining. 217 */ 218 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 219 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 220 return (false); 221 } 222 /* 223 * Add new element, but first remove the most unused one if 224 * we have too many. 225 */ 226 if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { 227 kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); 228 assert(kd != NULL); 229 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 230 amp->am_nkeepdirty--; 231 assert(amp->am_nkeepdirty > 0); 232 } 233 if (kd == NULL) 234 kd = malloc(sizeof(*kd)); 235 /* We can ignore allocation failure. */ 236 if (kd != NULL) { 237 kd->kd_extent = extent; 238 amp->am_nkeepdirty++; 239 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 240 } 241 242 return (true); 243} 244 245static void 246keepdirty_fill(struct activemap *amp) 247{ 248 struct keepdirty *kd; 249 250 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) 251 bit_set(amp->am_diskmap, kd->kd_extent); 252} 253 254static void 255keepdirty_free(struct activemap *amp) 256{ 257 struct keepdirty *kd; 258 259 while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { 260 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 261 amp->am_nkeepdirty--; 262 free(kd); 263 } 264 assert(amp->am_nkeepdirty == 0); 265} 266 267/* 268 * Function frees resources allocated by activemap_init() function. 269 */ 270void 271activemap_free(struct activemap *amp) 272{ 273 274 assert(amp->am_magic == ACTIVEMAP_MAGIC); 275 276 amp->am_magic = 0; 277 278 keepdirty_free(amp); 279 free(amp->am_memtab); 280 free(amp->am_diskmap); 281 free(amp->am_memmap); 282 free(amp->am_syncmap); 283} 284 285/* 286 * Function should be called before we handle write requests. It updates 287 * internal structures and returns true if on-disk metadata should be updated. 288 */ 289bool 290activemap_write_start(struct activemap *amp, off_t offset, off_t length) 291{ 292 bool modified; 293 off_t end; 294 int ext; 295 296 assert(amp->am_magic == ACTIVEMAP_MAGIC); 297 assert(length > 0); 298 299 modified = false; 300 end = offset + length - 1; 301 302 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 303 /* 304 * If the number of pending writes is increased from 0, 305 * we have to mark the extent as dirty also in on-disk bitmap. 306 * By returning true we inform the caller that on-disk bitmap 307 * was modified and has to be flushed to disk. 308 */ 309 if (amp->am_memtab[ext]++ == 0) { 310 assert(!bit_test(amp->am_memmap, ext)); 311 bit_set(amp->am_memmap, ext); 312 amp->am_ndirty++; 313 } 314 if (keepdirty_add(amp, ext)) 315 modified = true; 316 } 317 318 return (modified); 319} 320 321/* 322 * Function should be called after receiving write confirmation. It updates 323 * internal structures and returns true if on-disk metadata should be updated. 324 */ 325bool 326activemap_write_complete(struct activemap *amp, off_t offset, off_t length) 327{ 328 bool modified; 329 off_t end; 330 int ext; 331 332 assert(amp->am_magic == ACTIVEMAP_MAGIC); 333 assert(length > 0); 334 335 modified = false; 336 end = offset + length - 1; 337 338 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 339 /* 340 * If the number of pending writes goes down to 0, we have to 341 * mark the extent as clean also in on-disk bitmap. 342 * By returning true we inform the caller that on-disk bitmap 343 * was modified and has to be flushed to disk. 344 */ 345 assert(amp->am_memtab[ext] > 0); 346 assert(bit_test(amp->am_memmap, ext)); 347 if (--amp->am_memtab[ext] == 0) { 348 bit_clear(amp->am_memmap, ext); 349 amp->am_ndirty--; 350 if (keepdirty_find(amp, ext) == NULL) 351 modified = true; 352 } 353 } 354 355 return (modified); 356} 357 358/* 359 * Function should be called after finishing synchronization of one extent. 360 * It returns true if on-disk metadata should be updated. 361 */ 362bool 363activemap_extent_complete(struct activemap *amp, int extent) 364{ 365 bool modified; 366 int reqs; 367 368 assert(amp->am_magic == ACTIVEMAP_MAGIC); 369 assert(extent >= 0 && extent < amp->am_nextents); 370 371 modified = false; 372 373 reqs = ext2reqs(amp, extent); 374 assert(amp->am_memtab[extent] >= reqs); 375 amp->am_memtab[extent] -= reqs; 376 assert(bit_test(amp->am_memmap, extent)); 377 if (amp->am_memtab[extent] == 0) { 378 bit_clear(amp->am_memmap, extent); 379 amp->am_ndirty--; 380 modified = true; 381 } 382 383 return (modified); 384} 385 386/* 387 * Function returns number of dirty regions. 388 */ 389uint64_t 390activemap_ndirty(const struct activemap *amp) 391{ 392 393 assert(amp->am_magic == ACTIVEMAP_MAGIC); 394 395 return (amp->am_ndirty); 396} 397 398/* 399 * Function compare on-disk bitmap and in-memory bitmap and returns true if 400 * they differ and should be flushed to the disk. 401 */ 402bool 403activemap_differ(const struct activemap *amp) 404{ 405 406 assert(amp->am_magic == ACTIVEMAP_MAGIC); 407 408 return (memcmp(amp->am_diskmap, amp->am_memmap, 409 amp->am_mapsize) != 0); 410} 411 412/* 413 * Function returns number of bytes used by bitmap. 414 */ 415size_t 416activemap_size(const struct activemap *amp) 417{ 418 419 assert(amp->am_magic == ACTIVEMAP_MAGIC); 420 421 return (amp->am_mapsize); 422} 423 424/* 425 * Function returns number of bytes needed for storing on-disk bitmap. 426 * This is the same as activemap_size(), but rounded up to sector size. 427 */ 428size_t 429activemap_ondisk_size(const struct activemap *amp) 430{ 431 432 assert(amp->am_magic == ACTIVEMAP_MAGIC); 433 434 return (amp->am_diskmapsize); 435} 436 437/* 438 * Function copies the given buffer read from disk to the internal bitmap. 439 */ 440void 441activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) 442{ 443 int ext; 444 445 assert(amp->am_magic == ACTIVEMAP_MAGIC); 446 assert(size >= amp->am_mapsize); 447 448 memcpy(amp->am_diskmap, buf, amp->am_mapsize); 449 memcpy(amp->am_memmap, buf, amp->am_mapsize); 450 memcpy(amp->am_syncmap, buf, amp->am_mapsize); 451 452 bit_ffs(amp->am_memmap, amp->am_nextents, &ext); 453 if (ext == -1) { 454 /* There are no dirty extents, so we can leave now. */ 455 return; 456 } 457 /* 458 * Set synchronization offset to the first dirty extent. 459 */ 460 activemap_sync_rewind(amp); 461 /* 462 * We have dirty extents and we want them to stay that way until 463 * we synchronize, so we set number of pending writes to number 464 * of requests needed to synchronize one extent. 465 */ 466 amp->am_ndirty = 0; 467 for (; ext < amp->am_nextents; ext++) { 468 if (bit_test(amp->am_memmap, ext)) { 469 amp->am_memtab[ext] = ext2reqs(amp, ext); 470 amp->am_ndirty++; 471 } 472 } 473} 474 475/* 476 * Function merges the given bitmap with existing one. 477 */ 478void 479activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) 480{ 481 bitstr_t *remmap = __DECONST(bitstr_t *, buf); 482 int ext; 483 484 assert(amp->am_magic == ACTIVEMAP_MAGIC); 485 assert(size >= amp->am_mapsize); 486 487 bit_ffs(remmap, amp->am_nextents, &ext); 488 if (ext == -1) { 489 /* There are no dirty extents, so we can leave now. */ 490 return; 491 } 492 /* 493 * We have dirty extents and we want them to stay that way until 494 * we synchronize, so we set number of pending writes to number 495 * of requests needed to synchronize one extent. 496 */ 497 for (; ext < amp->am_nextents; ext++) { 498 /* Local extent already dirty. */ 499 if (bit_test(amp->am_syncmap, ext)) 500 continue; 501 /* Remote extent isn't dirty. */ 502 if (!bit_test(remmap, ext)) 503 continue; 504 bit_set(amp->am_syncmap, ext); 505 bit_set(amp->am_memmap, ext); 506 bit_set(amp->am_diskmap, ext); 507 if (amp->am_memtab[ext] == 0) 508 amp->am_ndirty++; 509 amp->am_memtab[ext] = ext2reqs(amp, ext); 510 } 511 /* 512 * Set synchronization offset to the first dirty extent. 513 */ 514 activemap_sync_rewind(amp); 515} 516 517/* 518 * Function returns pointer to internal bitmap that should be written to disk. 519 */ 520const unsigned char * 521activemap_bitmap(struct activemap *amp, size_t *sizep) 522{ 523 524 assert(amp->am_magic == ACTIVEMAP_MAGIC); 525 526 if (sizep != NULL) 527 *sizep = amp->am_diskmapsize; 528 memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); 529 keepdirty_fill(amp); 530 return ((const unsigned char *)amp->am_diskmap); 531} 532 533/* 534 * Function calculates size needed to store bitmap on disk. 535 */ 536size_t 537activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, 538 uint32_t sectorsize) 539{ 540 uint64_t nextents, mapsize; 541 542 assert(mediasize > 0); 543 assert(extentsize > 0); 544 assert(powerof2(extentsize)); 545 assert(sectorsize > 0); 546 assert(powerof2(sectorsize)); 547 548 nextents = ((mediasize - 1) / extentsize) + 1; 549 mapsize = sizeof(bitstr_t) * bitstr_size(nextents); 550 return (roundup2(mapsize, sectorsize)); 551} 552 553/* 554 * Set synchronization offset to the first dirty extent. 555 */ 556void 557activemap_sync_rewind(struct activemap *amp) 558{ 559 int ext; 560 561 assert(amp->am_magic == ACTIVEMAP_MAGIC); 562 563 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 564 if (ext == -1) { 565 /* There are no extents to synchronize. */ 566 amp->am_syncoff = -2; 567 return; 568 } 569 /* 570 * Mark that we want to start synchronization from the begining. 571 */ 572 amp->am_syncoff = -1; 573} 574 575/* 576 * Return next offset of where we should synchronize. 577 */ 578off_t 579activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) 580{ 581 off_t syncoff, left; 582 int ext; 583 584 assert(amp->am_magic == ACTIVEMAP_MAGIC); 585 assert(lengthp != NULL); 586 assert(syncextp != NULL); 587 588 *syncextp = -1; 589 590 if (amp->am_syncoff == -2) 591 return (-1); 592 593 if (amp->am_syncoff >= 0 && 594 (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || 595 off2ext(amp, amp->am_syncoff) != 596 off2ext(amp, amp->am_syncoff + MAXPHYS))) { 597 /* 598 * We are about to change extent, so mark previous one as clean. 599 */ 600 ext = off2ext(amp, amp->am_syncoff); 601 bit_clear(amp->am_syncmap, ext); 602 *syncextp = ext; 603 amp->am_syncoff = -1; 604 } 605 606 if (amp->am_syncoff == -1) { 607 /* 608 * Let's find first extent to synchronize. 609 */ 610 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 611 if (ext == -1) { 612 amp->am_syncoff = -2; 613 return (-1); 614 } 615 amp->am_syncoff = ext2off(amp, ext); 616 } else { 617 /* 618 * We don't change extent, so just increase offset. 619 */ 620 amp->am_syncoff += MAXPHYS; 621 if (amp->am_syncoff >= amp->am_mediasize) { 622 amp->am_syncoff = -2; 623 return (-1); 624 } 625 } 626 627 syncoff = amp->am_syncoff; 628 left = ext2off(amp, off2ext(amp, syncoff)) + 629 amp->am_extentsize - syncoff; 630 if (syncoff + left > amp->am_mediasize) 631 left = amp->am_mediasize - syncoff; 632 if (left > MAXPHYS) 633 left = MAXPHYS; 634 635 assert(left >= 0 && left <= MAXPHYS); 636 assert(syncoff >= 0 && syncoff < amp->am_mediasize); 637 assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize); 638 639 *lengthp = left; 640 return (syncoff); 641} 642 643/* 644 * Mark extent(s) containing the given region for synchronization. 645 * Most likely one of the components is unavailable. 646 */ 647bool 648activemap_need_sync(struct activemap *amp, off_t offset, off_t length) 649{ 650 bool modified; 651 off_t end; 652 int ext; 653 654 assert(amp->am_magic == ACTIVEMAP_MAGIC); 655 656 modified = false; 657 end = offset + length - 1; 658 659 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 660 if (bit_test(amp->am_syncmap, ext)) { 661 /* Already marked for synchronization. */ 662 assert(bit_test(amp->am_memmap, ext)); 663 continue; 664 } 665 bit_set(amp->am_syncmap, ext); 666 if (!bit_test(amp->am_memmap, ext)) { 667 bit_set(amp->am_memmap, ext); 668 amp->am_ndirty++; 669 } 670 amp->am_memtab[ext] += ext2reqs(amp, ext); 671 modified = true; 672 } 673 674 return (modified); 675} 676 677void 678activemap_dump(const struct activemap *amp) 679{ 680 int bit; 681 682 printf("M: "); 683 for (bit = 0; bit < amp->am_nextents; bit++) 684 printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); 685 printf("\n"); 686 printf("D: "); 687 for (bit = 0; bit < amp->am_nextents; bit++) 688 printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); 689 printf("\n"); 690 printf("S: "); 691 for (bit = 0; bit < amp->am_nextents; bit++) 692 printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); 693 printf("\n"); 694} 695