activemap.c revision 231017
1238106Sdes/*- 2294190Sdes * Copyright (c) 2009-2010 The FreeBSD Foundation 3238106Sdes * All rights reserved. 4238106Sdes * 5238106Sdes * This software was developed by Pawel Jakub Dawidek under sponsorship from 6238106Sdes * the FreeBSD Foundation. 7238106Sdes * 8238106Sdes * Redistribution and use in source and binary forms, with or without 9238106Sdes * modification, are permitted provided that the following conditions 10238106Sdes * are met: 11238106Sdes * 1. Redistributions of source code must retain the above copyright 12238106Sdes * notice, this list of conditions and the following disclaimer. 13238106Sdes * 2. Redistributions in binary form must reproduce the above copyright 14238106Sdes * notice, this list of conditions and the following disclaimer in the 15238106Sdes * documentation and/or other materials provided with the distribution. 16238106Sdes * 17238106Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18238106Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19238106Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20238106Sdes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21238106Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22238106Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23238106Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24269257Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25269257Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26269257Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27269257Sdes * SUCH DAMAGE. 28269257Sdes */ 29269257Sdes 30269257Sdes#include <sys/cdefs.h> 31269257Sdes__FBSDID("$FreeBSD: stable/9/sbin/hastd/activemap.c 231017 2012-02-05 15:51:19Z trociny $"); 32269257Sdes 33269257Sdes#include <sys/param.h> /* powerof2() */ 34238106Sdes#include <sys/queue.h> 35238106Sdes 36238106Sdes#include <bitstring.h> 37238106Sdes#include <errno.h> 38238106Sdes#include <stdint.h> 39238106Sdes#include <stdio.h> 40238106Sdes#include <stdlib.h> 41294190Sdes#include <string.h> 42238106Sdes 43238106Sdes#include <pjdlog.h> 44238106Sdes 45238106Sdes#include "activemap.h" 46238106Sdes 47238106Sdes#ifndef PJDLOG_ASSERT 48238106Sdes#include <assert.h> 49238106Sdes#define PJDLOG_ASSERT(...) assert(__VA_ARGS__) 50238106Sdes#endif 51238106Sdes 52238106Sdes#define ACTIVEMAP_MAGIC 0xac71e4 53238106Sdesstruct activemap { 54238106Sdes int am_magic; /* Magic value. */ 55238106Sdes off_t am_mediasize; /* Media size in bytes. */ 56238106Sdes uint32_t am_extentsize; /* Extent size in bytes, 57238106Sdes must be power of 2. */ 58238106Sdes uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ 59238106Sdes int am_nextents; /* Number of extents. */ 60238106Sdes size_t am_mapsize; /* Bitmap size in bytes. */ 61238106Sdes uint16_t *am_memtab; /* An array that holds number of pending 62238106Sdes writes per extent. */ 63238106Sdes bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ 64238106Sdes bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ 65238106Sdes size_t am_diskmapsize; /* Map size rounded up to sector size. */ 66238106Sdes uint64_t am_ndirty; /* Number of dirty regions. */ 67238106Sdes bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ 68238106Sdes off_t am_syncoff; /* Next synchronization offset. */ 69238106Sdes TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that 70238106Sdes we keep dirty to reduce bitmap 71238106Sdes updates. */ 72238106Sdes int am_nkeepdirty; /* Number of am_keepdirty elements. */ 73238106Sdes int am_nkeepdirty_limit; /* Maximum number of am_keepdirty 74238106Sdes elements. */ 75238106Sdes}; 76238106Sdes 77238106Sdesstruct keepdirty { 78238106Sdes int kd_extent; 79238106Sdes TAILQ_ENTRY(keepdirty) kd_next; 80238106Sdes}; 81238106Sdes 82238106Sdes/* 83238106Sdes * Helper function taken from sys/systm.h to calculate extentshift. 84238106Sdes */ 85238106Sdesstatic uint32_t 86238106Sdesbitcount32(uint32_t x) 87238106Sdes{ 88238106Sdes 89238106Sdes x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); 90238106Sdes x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); 91238106Sdes x = (x + (x >> 4)) & 0x0f0f0f0f; 92238106Sdes x = (x + (x >> 8)); 93238106Sdes x = (x + (x >> 16)) & 0x000000ff; 94238106Sdes return (x); 95238106Sdes} 96238106Sdes 97238106Sdesstatic __inline int 98238106Sdesoff2ext(const struct activemap *amp, off_t offset) 99238106Sdes{ 100238106Sdes int extent; 101238106Sdes 102238106Sdes PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize); 103238106Sdes extent = (offset >> amp->am_extentshift); 104238106Sdes PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 105238106Sdes return (extent); 106238106Sdes} 107238106Sdes 108238106Sdesstatic __inline off_t 109238106Sdesext2off(const struct activemap *amp, int extent) 110238106Sdes{ 111238106Sdes off_t offset; 112238106Sdes 113238106Sdes PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 114238106Sdes offset = ((off_t)extent << amp->am_extentshift); 115238106Sdes PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize); 116238106Sdes return (offset); 117238106Sdes} 118238106Sdes 119238106Sdes/* 120238106Sdes * Function calculates number of requests needed to synchronize the given 121238106Sdes * extent. 122238106Sdes */ 123238106Sdesstatic __inline int 124238106Sdesext2reqs(const struct activemap *amp, int ext) 125238106Sdes{ 126238106Sdes off_t left; 127238106Sdes 128238106Sdes if (ext < amp->am_nextents - 1) 129238106Sdes return (((amp->am_extentsize - 1) / MAXPHYS) + 1); 130238106Sdes 131238106Sdes PJDLOG_ASSERT(ext == amp->am_nextents - 1); 132238106Sdes left = amp->am_mediasize % amp->am_extentsize; 133238106Sdes if (left == 0) 134238106Sdes left = amp->am_extentsize; 135238106Sdes return (((left - 1) / MAXPHYS) + 1); 136238106Sdes} 137238106Sdes 138238106Sdes/* 139238106Sdes * Initialize activemap structure and allocate memory for internal needs. 140238106Sdes * Function returns 0 on success and -1 if any of the allocations failed. 141238106Sdes */ 142238106Sdesint 143238106Sdesactivemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, 144238106Sdes uint32_t sectorsize, uint32_t keepdirty) 145238106Sdes{ 146238106Sdes struct activemap *amp; 147238106Sdes 148238106Sdes PJDLOG_ASSERT(ampp != NULL); 149238106Sdes PJDLOG_ASSERT(mediasize > 0); 150238106Sdes PJDLOG_ASSERT(extentsize > 0); 151238106Sdes PJDLOG_ASSERT(powerof2(extentsize)); 152238106Sdes PJDLOG_ASSERT(sectorsize > 0); 153238106Sdes PJDLOG_ASSERT(powerof2(sectorsize)); 154238106Sdes PJDLOG_ASSERT(keepdirty > 0); 155238106Sdes 156238106Sdes amp = malloc(sizeof(*amp)); 157238106Sdes if (amp == NULL) 158238106Sdes return (-1); 159238106Sdes 160238106Sdes amp->am_mediasize = mediasize; 161238106Sdes amp->am_nkeepdirty_limit = keepdirty; 162238106Sdes amp->am_extentsize = extentsize; 163238106Sdes amp->am_extentshift = bitcount32(extentsize - 1); 164238106Sdes amp->am_nextents = ((mediasize - 1) / extentsize) + 1; 165238106Sdes amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); 166238106Sdes amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); 167238106Sdes amp->am_ndirty = 0; 168238106Sdes amp->am_syncoff = -2; 169238106Sdes TAILQ_INIT(&->am_keepdirty); 170238106Sdes amp->am_nkeepdirty = 0; 171238106Sdes 172238106Sdes amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); 173238106Sdes amp->am_diskmap = calloc(1, amp->am_diskmapsize); 174238106Sdes amp->am_memmap = bit_alloc(amp->am_nextents); 175238106Sdes amp->am_syncmap = bit_alloc(amp->am_nextents); 176238106Sdes 177238106Sdes /* 178238106Sdes * Check to see if any of the allocations above failed. 179238106Sdes */ 180238106Sdes if (amp->am_memtab == NULL || amp->am_diskmap == NULL || 181238106Sdes amp->am_memmap == NULL || amp->am_syncmap == NULL) { 182238106Sdes if (amp->am_memtab != NULL) 183238106Sdes free(amp->am_memtab); 184238106Sdes if (amp->am_diskmap != NULL) 185238106Sdes free(amp->am_diskmap); 186238106Sdes if (amp->am_memmap != NULL) 187238106Sdes free(amp->am_memmap); 188238106Sdes if (amp->am_syncmap != NULL) 189238106Sdes free(amp->am_syncmap); 190238106Sdes amp->am_magic = 0; 191238106Sdes free(amp); 192238106Sdes errno = ENOMEM; 193238106Sdes return (-1); 194238106Sdes } 195238106Sdes 196238106Sdes amp->am_magic = ACTIVEMAP_MAGIC; 197238106Sdes *ampp = amp; 198238106Sdes 199269257Sdes return (0); 200238106Sdes} 201238106Sdes 202238106Sdesstatic struct keepdirty * 203238106Sdeskeepdirty_find(struct activemap *amp, int extent) 204238106Sdes{ 205238106Sdes struct keepdirty *kd; 206238106Sdes 207238106Sdes TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { 208238106Sdes if (kd->kd_extent == extent) 209238106Sdes break; 210238106Sdes } 211238106Sdes return (kd); 212238106Sdes} 213238106Sdes 214238106Sdesstatic bool 215238106Sdeskeepdirty_add(struct activemap *amp, int extent) 216238106Sdes{ 217238106Sdes struct keepdirty *kd; 218238106Sdes 219238106Sdes kd = keepdirty_find(amp, extent); 220238106Sdes if (kd != NULL) { 221238106Sdes /* 222238106Sdes * Only move element at the beginning. 223238106Sdes */ 224238106Sdes TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 225238106Sdes TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 226238106Sdes return (false); 227238106Sdes } 228238106Sdes /* 229238106Sdes * Add new element, but first remove the most unused one if 230238106Sdes * we have too many. 231238106Sdes */ 232238106Sdes if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { 233238106Sdes kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); 234238106Sdes PJDLOG_ASSERT(kd != NULL); 235238106Sdes TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 236238106Sdes amp->am_nkeepdirty--; 237238106Sdes PJDLOG_ASSERT(amp->am_nkeepdirty > 0); 238238106Sdes } 239238106Sdes if (kd == NULL) 240238106Sdes kd = malloc(sizeof(*kd)); 241238106Sdes /* We can ignore allocation failure. */ 242238106Sdes if (kd != NULL) { 243238106Sdes kd->kd_extent = extent; 244238106Sdes amp->am_nkeepdirty++; 245238106Sdes TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 246238106Sdes } 247238106Sdes 248238106Sdes return (true); 249238106Sdes} 250238106Sdes 251238106Sdesstatic void 252238106Sdeskeepdirty_fill(struct activemap *amp) 253238106Sdes{ 254238106Sdes struct keepdirty *kd; 255238106Sdes 256238106Sdes TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) 257238106Sdes bit_set(amp->am_diskmap, kd->kd_extent); 258238106Sdes} 259238106Sdes 260238106Sdesstatic void 261238106Sdeskeepdirty_free(struct activemap *amp) 262238106Sdes{ 263238106Sdes struct keepdirty *kd; 264238106Sdes 265238106Sdes while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { 266238106Sdes TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 267238106Sdes amp->am_nkeepdirty--; 268238106Sdes free(kd); 269238106Sdes } 270238106Sdes PJDLOG_ASSERT(amp->am_nkeepdirty == 0); 271238106Sdes} 272238106Sdes 273238106Sdes/* 274238106Sdes * Function frees resources allocated by activemap_init() function. 275238106Sdes */ 276238106Sdesvoid 277238106Sdesactivemap_free(struct activemap *amp) 278238106Sdes{ 279238106Sdes 280238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 281238106Sdes 282294190Sdes amp->am_magic = 0; 283238106Sdes 284238106Sdes keepdirty_free(amp); 285238106Sdes free(amp->am_memtab); 286238106Sdes free(amp->am_diskmap); 287238106Sdes free(amp->am_memmap); 288238106Sdes free(amp->am_syncmap); 289238106Sdes} 290238106Sdes 291238106Sdes/* 292238106Sdes * Function should be called before we handle write requests. It updates 293238106Sdes * internal structures and returns true if on-disk metadata should be updated. 294238106Sdes */ 295238106Sdesbool 296238106Sdesactivemap_write_start(struct activemap *amp, off_t offset, off_t length) 297238106Sdes{ 298238106Sdes bool modified; 299238106Sdes off_t end; 300238106Sdes int ext; 301238106Sdes 302238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 303238106Sdes PJDLOG_ASSERT(length > 0); 304238106Sdes 305238106Sdes modified = false; 306238106Sdes end = offset + length - 1; 307238106Sdes 308238106Sdes for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 309238106Sdes /* 310238106Sdes * If the number of pending writes is increased from 0, 311238106Sdes * we have to mark the extent as dirty also in on-disk bitmap. 312238106Sdes * By returning true we inform the caller that on-disk bitmap 313238106Sdes * was modified and has to be flushed to disk. 314238106Sdes */ 315238106Sdes if (amp->am_memtab[ext]++ == 0) { 316238106Sdes PJDLOG_ASSERT(!bit_test(amp->am_memmap, ext)); 317238106Sdes bit_set(amp->am_memmap, ext); 318238106Sdes amp->am_ndirty++; 319238106Sdes } 320238106Sdes if (keepdirty_add(amp, ext)) 321238106Sdes modified = true; 322238106Sdes } 323238106Sdes 324238106Sdes return (modified); 325238106Sdes} 326238106Sdes 327238106Sdes/* 328238106Sdes * Function should be called after receiving write confirmation. It updates 329238106Sdes * internal structures and returns true if on-disk metadata should be updated. 330238106Sdes */ 331238106Sdesbool 332238106Sdesactivemap_write_complete(struct activemap *amp, off_t offset, off_t length) 333238106Sdes{ 334238106Sdes bool modified; 335238106Sdes off_t end; 336238106Sdes int ext; 337238106Sdes 338238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 339238106Sdes PJDLOG_ASSERT(length > 0); 340238106Sdes 341238106Sdes modified = false; 342238106Sdes end = offset + length - 1; 343294190Sdes 344294190Sdes for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 345294190Sdes /* 346294190Sdes * If the number of pending writes goes down to 0, we have to 347294190Sdes * mark the extent as clean also in on-disk bitmap. 348294190Sdes * By returning true we inform the caller that on-disk bitmap 349294190Sdes * was modified and has to be flushed to disk. 350294190Sdes */ 351294190Sdes PJDLOG_ASSERT(amp->am_memtab[ext] > 0); 352294190Sdes PJDLOG_ASSERT(bit_test(amp->am_memmap, ext)); 353294190Sdes if (--amp->am_memtab[ext] == 0) { 354294190Sdes bit_clear(amp->am_memmap, ext); 355294190Sdes amp->am_ndirty--; 356294190Sdes if (keepdirty_find(amp, ext) == NULL) 357294190Sdes modified = true; 358294190Sdes } 359294190Sdes } 360294190Sdes 361294190Sdes return (modified); 362294190Sdes} 363294190Sdes 364294190Sdes/* 365238106Sdes * Function should be called after finishing synchronization of one extent. 366238106Sdes * It returns true if on-disk metadata should be updated. 367238106Sdes */ 368238106Sdesbool 369238106Sdesactivemap_extent_complete(struct activemap *amp, int extent) 370238106Sdes{ 371238106Sdes bool modified; 372238106Sdes int reqs; 373238106Sdes 374238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 375238106Sdes PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 376238106Sdes 377238106Sdes modified = false; 378238106Sdes 379238106Sdes reqs = ext2reqs(amp, extent); 380238106Sdes PJDLOG_ASSERT(amp->am_memtab[extent] >= reqs); 381238106Sdes amp->am_memtab[extent] -= reqs; 382238106Sdes PJDLOG_ASSERT(bit_test(amp->am_memmap, extent)); 383238106Sdes if (amp->am_memtab[extent] == 0) { 384238106Sdes bit_clear(amp->am_memmap, extent); 385238106Sdes amp->am_ndirty--; 386238106Sdes modified = true; 387238106Sdes } 388238106Sdes 389238106Sdes return (modified); 390238106Sdes} 391238106Sdes 392238106Sdes/* 393238106Sdes * Function returns number of dirty regions. 394238106Sdes */ 395238106Sdesuint64_t 396238106Sdesactivemap_ndirty(const struct activemap *amp) 397238106Sdes{ 398238106Sdes 399238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 400238106Sdes 401238106Sdes return (amp->am_ndirty); 402238106Sdes} 403238106Sdes 404238106Sdes/* 405238106Sdes * Function compare on-disk bitmap and in-memory bitmap and returns true if 406238106Sdes * they differ and should be flushed to the disk. 407238106Sdes */ 408238106Sdesbool 409238106Sdesactivemap_differ(const struct activemap *amp) 410238106Sdes{ 411238106Sdes 412238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 413238106Sdes 414238106Sdes return (memcmp(amp->am_diskmap, amp->am_memmap, 415238106Sdes amp->am_mapsize) != 0); 416238106Sdes} 417238106Sdes 418238106Sdes/* 419238106Sdes * Function returns number of bytes used by bitmap. 420238106Sdes */ 421238106Sdessize_t 422238106Sdesactivemap_size(const struct activemap *amp) 423238106Sdes{ 424238106Sdes 425238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 426238106Sdes 427238106Sdes return (amp->am_mapsize); 428238106Sdes} 429238106Sdes 430238106Sdes/* 431238106Sdes * Function returns number of bytes needed for storing on-disk bitmap. 432238106Sdes * This is the same as activemap_size(), but rounded up to sector size. 433238106Sdes */ 434238106Sdessize_t 435238106Sdesactivemap_ondisk_size(const struct activemap *amp) 436238106Sdes{ 437238106Sdes 438238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 439238106Sdes 440238106Sdes return (amp->am_diskmapsize); 441238106Sdes} 442238106Sdes 443238106Sdes/* 444238106Sdes * Function copies the given buffer read from disk to the internal bitmap. 445238106Sdes */ 446238106Sdesvoid 447238106Sdesactivemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) 448238106Sdes{ 449238106Sdes int ext; 450238106Sdes 451238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 452238106Sdes PJDLOG_ASSERT(size >= amp->am_mapsize); 453238106Sdes 454238106Sdes memcpy(amp->am_diskmap, buf, amp->am_mapsize); 455238106Sdes memcpy(amp->am_memmap, buf, amp->am_mapsize); 456238106Sdes memcpy(amp->am_syncmap, buf, amp->am_mapsize); 457238106Sdes 458238106Sdes bit_ffs(amp->am_memmap, amp->am_nextents, &ext); 459238106Sdes if (ext == -1) { 460238106Sdes /* There are no dirty extents, so we can leave now. */ 461238106Sdes return; 462238106Sdes } 463238106Sdes /* 464238106Sdes * Set synchronization offset to the first dirty extent. 465238106Sdes */ 466238106Sdes activemap_sync_rewind(amp); 467238106Sdes /* 468238106Sdes * We have dirty extents and we want them to stay that way until 469238106Sdes * we synchronize, so we set number of pending writes to number 470238106Sdes * of requests needed to synchronize one extent. 471238106Sdes */ 472238106Sdes amp->am_ndirty = 0; 473238106Sdes for (; ext < amp->am_nextents; ext++) { 474238106Sdes if (bit_test(amp->am_memmap, ext)) { 475238106Sdes amp->am_memtab[ext] = ext2reqs(amp, ext); 476238106Sdes amp->am_ndirty++; 477238106Sdes } 478238106Sdes } 479238106Sdes} 480238106Sdes 481238106Sdes/* 482238106Sdes * Function merges the given bitmap with existing one. 483238106Sdes */ 484238106Sdesvoid 485238106Sdesactivemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) 486238106Sdes{ 487238106Sdes bitstr_t *remmap = __DECONST(bitstr_t *, buf); 488238106Sdes int ext; 489238106Sdes 490238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 491238106Sdes PJDLOG_ASSERT(size >= amp->am_mapsize); 492238106Sdes 493238106Sdes bit_ffs(remmap, amp->am_nextents, &ext); 494238106Sdes if (ext == -1) { 495238106Sdes /* There are no dirty extents, so we can leave now. */ 496238106Sdes return; 497238106Sdes } 498238106Sdes /* 499238106Sdes * We have dirty extents and we want them to stay that way until 500238106Sdes * we synchronize, so we set number of pending writes to number 501238106Sdes * of requests needed to synchronize one extent. 502238106Sdes */ 503238106Sdes for (; ext < amp->am_nextents; ext++) { 504238106Sdes /* Local extent already dirty. */ 505238106Sdes if (bit_test(amp->am_syncmap, ext)) 506238106Sdes continue; 507238106Sdes /* Remote extent isn't dirty. */ 508238106Sdes if (!bit_test(remmap, ext)) 509238106Sdes continue; 510238106Sdes bit_set(amp->am_syncmap, ext); 511238106Sdes bit_set(amp->am_memmap, ext); 512238106Sdes bit_set(amp->am_diskmap, ext); 513238106Sdes if (amp->am_memtab[ext] == 0) 514238106Sdes amp->am_ndirty++; 515238106Sdes amp->am_memtab[ext] = ext2reqs(amp, ext); 516238106Sdes } 517238106Sdes /* 518238106Sdes * Set synchronization offset to the first dirty extent. 519238106Sdes */ 520238106Sdes activemap_sync_rewind(amp); 521238106Sdes} 522238106Sdes 523238106Sdes/* 524238106Sdes * Function returns pointer to internal bitmap that should be written to disk. 525238106Sdes */ 526238106Sdesconst unsigned char * 527238106Sdesactivemap_bitmap(struct activemap *amp, size_t *sizep) 528238106Sdes{ 529238106Sdes 530238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 531238106Sdes 532238106Sdes if (sizep != NULL) 533238106Sdes *sizep = amp->am_diskmapsize; 534238106Sdes memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); 535238106Sdes keepdirty_fill(amp); 536238106Sdes return ((const unsigned char *)amp->am_diskmap); 537238106Sdes} 538238106Sdes 539238106Sdes/* 540238106Sdes * Function calculates size needed to store bitmap on disk. 541238106Sdes */ 542238106Sdessize_t 543238106Sdesactivemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, 544238106Sdes uint32_t sectorsize) 545238106Sdes{ 546238106Sdes uint64_t nextents, mapsize; 547238106Sdes 548238106Sdes PJDLOG_ASSERT(mediasize > 0); 549238106Sdes PJDLOG_ASSERT(extentsize > 0); 550238106Sdes PJDLOG_ASSERT(powerof2(extentsize)); 551238106Sdes PJDLOG_ASSERT(sectorsize > 0); 552238106Sdes PJDLOG_ASSERT(powerof2(sectorsize)); 553238106Sdes 554238106Sdes nextents = ((mediasize - 1) / extentsize) + 1; 555238106Sdes mapsize = sizeof(bitstr_t) * bitstr_size(nextents); 556238106Sdes return (roundup2(mapsize, sectorsize)); 557238106Sdes} 558238106Sdes 559238106Sdes/* 560238106Sdes * Set synchronization offset to the first dirty extent. 561238106Sdes */ 562238106Sdesvoid 563238106Sdesactivemap_sync_rewind(struct activemap *amp) 564238106Sdes{ 565238106Sdes int ext; 566238106Sdes 567238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 568238106Sdes 569238106Sdes bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 570238106Sdes if (ext == -1) { 571238106Sdes /* There are no extents to synchronize. */ 572238106Sdes amp->am_syncoff = -2; 573238106Sdes return; 574238106Sdes } 575238106Sdes /* 576238106Sdes * Mark that we want to start synchronization from the beginning. 577238106Sdes */ 578238106Sdes amp->am_syncoff = -1; 579238106Sdes} 580238106Sdes 581238106Sdes/* 582238106Sdes * Return next offset of where we should synchronize. 583238106Sdes */ 584238106Sdesoff_t 585238106Sdesactivemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) 586238106Sdes{ 587238106Sdes off_t syncoff, left; 588238106Sdes int ext; 589238106Sdes 590238106Sdes PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 591238106Sdes PJDLOG_ASSERT(lengthp != NULL); 592238106Sdes PJDLOG_ASSERT(syncextp != NULL); 593238106Sdes 594238106Sdes *syncextp = -1; 595238106Sdes 596238106Sdes if (amp->am_syncoff == -2) 597238106Sdes return (-1); 598238106Sdes 599238106Sdes if (amp->am_syncoff >= 0 && 600238106Sdes (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || 601238106Sdes off2ext(amp, amp->am_syncoff) != 602238106Sdes off2ext(amp, amp->am_syncoff + MAXPHYS))) { 603238106Sdes /* 604238106Sdes * We are about to change extent, so mark previous one as clean. 605238106Sdes */ 606238106Sdes ext = off2ext(amp, amp->am_syncoff); 607238106Sdes bit_clear(amp->am_syncmap, ext); 608238106Sdes *syncextp = ext; 609238106Sdes amp->am_syncoff = -1; 610238106Sdes } 611238106Sdes 612238106Sdes if (amp->am_syncoff == -1) { 613238106Sdes /* 614238106Sdes * Let's find first extent to synchronize. 615238106Sdes */ 616238106Sdes bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 617238106Sdes if (ext == -1) { 618238106Sdes amp->am_syncoff = -2; 619238106Sdes return (-1); 620238106Sdes } 621238106Sdes amp->am_syncoff = ext2off(amp, ext); 622238106Sdes } else { 623238106Sdes /* 624238106Sdes * We don't change extent, so just increase offset. 625 */ 626 amp->am_syncoff += MAXPHYS; 627 if (amp->am_syncoff >= amp->am_mediasize) { 628 amp->am_syncoff = -2; 629 return (-1); 630 } 631 } 632 633 syncoff = amp->am_syncoff; 634 left = ext2off(amp, off2ext(amp, syncoff)) + 635 amp->am_extentsize - syncoff; 636 if (syncoff + left > amp->am_mediasize) 637 left = amp->am_mediasize - syncoff; 638 if (left > MAXPHYS) 639 left = MAXPHYS; 640 641 PJDLOG_ASSERT(left >= 0 && left <= MAXPHYS); 642 PJDLOG_ASSERT(syncoff >= 0 && syncoff < amp->am_mediasize); 643 PJDLOG_ASSERT(syncoff + left >= 0 && 644 syncoff + left <= amp->am_mediasize); 645 646 *lengthp = left; 647 return (syncoff); 648} 649 650/* 651 * Mark extent(s) containing the given region for synchronization. 652 * Most likely one of the components is unavailable. 653 */ 654bool 655activemap_need_sync(struct activemap *amp, off_t offset, off_t length) 656{ 657 bool modified; 658 off_t end; 659 int ext; 660 661 PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 662 663 modified = false; 664 end = offset + length - 1; 665 666 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 667 if (bit_test(amp->am_syncmap, ext)) { 668 /* Already marked for synchronization. */ 669 PJDLOG_ASSERT(bit_test(amp->am_memmap, ext)); 670 continue; 671 } 672 bit_set(amp->am_syncmap, ext); 673 if (!bit_test(amp->am_memmap, ext)) { 674 bit_set(amp->am_memmap, ext); 675 amp->am_ndirty++; 676 } 677 amp->am_memtab[ext] += ext2reqs(amp, ext); 678 modified = true; 679 } 680 681 return (modified); 682} 683 684void 685activemap_dump(const struct activemap *amp) 686{ 687 int bit; 688 689 printf("M: "); 690 for (bit = 0; bit < amp->am_nextents; bit++) 691 printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); 692 printf("\n"); 693 printf("D: "); 694 for (bit = 0; bit < amp->am_nextents; bit++) 695 printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); 696 printf("\n"); 697 printf("S: "); 698 for (bit = 0; bit < amp->am_nextents; bit++) 699 printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); 700 printf("\n"); 701} 702