1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD$"); 32204076Spjd 33204076Spjd#include <sys/param.h> /* powerof2() */ 34204076Spjd#include <sys/queue.h> 35204076Spjd 36204076Spjd#include <bitstring.h> 37204076Spjd#include <errno.h> 38204076Spjd#include <stdint.h> 39204076Spjd#include <stdio.h> 40204076Spjd#include <stdlib.h> 41204076Spjd#include <string.h> 42204076Spjd 43229509Strociny#include <pjdlog.h> 44204076Spjd 45229509Strociny#include "activemap.h" 46229509Strociny 47229509Strociny#ifndef PJDLOG_ASSERT 48229509Strociny#include <assert.h> 49229509Strociny#define PJDLOG_ASSERT(...) assert(__VA_ARGS__) 50229509Strociny#endif 51229509Strociny 52204076Spjd#define ACTIVEMAP_MAGIC 0xac71e4 53204076Spjdstruct activemap { 54204076Spjd int am_magic; /* Magic value. */ 55219864Spjd off_t am_mediasize; /* Media size in bytes. */ 56204076Spjd uint32_t am_extentsize; /* Extent size in bytes, 57204076Spjd must be power of 2. */ 58204076Spjd uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ 59204076Spjd int am_nextents; /* Number of extents. */ 60204076Spjd size_t am_mapsize; /* Bitmap size in bytes. */ 61204076Spjd uint16_t *am_memtab; /* An array that holds number of pending 62204076Spjd writes per extent. */ 63204076Spjd bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ 64204076Spjd bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ 65204076Spjd size_t am_diskmapsize; /* Map size rounded up to sector size. */ 66204076Spjd uint64_t am_ndirty; /* Number of dirty regions. */ 67204076Spjd bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ 68204076Spjd off_t am_syncoff; /* Next synchronization offset. */ 69204076Spjd TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that 70204076Spjd we keep dirty to reduce bitmap 71204076Spjd updates. */ 72204076Spjd int am_nkeepdirty; /* Number of am_keepdirty elements. */ 73204076Spjd int am_nkeepdirty_limit; /* Maximum number of am_keepdirty 74204076Spjd elements. */ 75204076Spjd}; 76204076Spjd 77204076Spjdstruct keepdirty { 78204076Spjd int kd_extent; 79204076Spjd TAILQ_ENTRY(keepdirty) kd_next; 80204076Spjd}; 81204076Spjd 82204076Spjd/* 83204076Spjd * Helper function taken from sys/systm.h to calculate extentshift. 84204076Spjd */ 85204076Spjdstatic uint32_t 86204076Spjdbitcount32(uint32_t x) 87204076Spjd{ 88204076Spjd 89204076Spjd x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); 90204076Spjd x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); 91204076Spjd x = (x + (x >> 4)) & 0x0f0f0f0f; 92204076Spjd x = (x + (x >> 8)); 93204076Spjd x = (x + (x >> 16)) & 0x000000ff; 94204076Spjd return (x); 95204076Spjd} 96204076Spjd 97204076Spjdstatic __inline int 98204076Spjdoff2ext(const struct activemap *amp, off_t offset) 99204076Spjd{ 100204076Spjd int extent; 101204076Spjd 102229509Strociny PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize); 103204076Spjd extent = (offset >> amp->am_extentshift); 104229509Strociny PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 105204076Spjd return (extent); 106204076Spjd} 107204076Spjd 108204076Spjdstatic __inline off_t 109204076Spjdext2off(const struct activemap *amp, int extent) 110204076Spjd{ 111204076Spjd off_t offset; 112204076Spjd 113229509Strociny PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 114204076Spjd offset = ((off_t)extent << amp->am_extentshift); 115229509Strociny PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize); 116204076Spjd return (offset); 117204076Spjd} 118204076Spjd 119204076Spjd/* 120204076Spjd * Function calculates number of requests needed to synchronize the given 121204076Spjd * extent. 122204076Spjd */ 123204076Spjdstatic __inline int 124204076Spjdext2reqs(const struct activemap *amp, int ext) 125204076Spjd{ 126204076Spjd off_t left; 127204076Spjd 128204076Spjd if (ext < amp->am_nextents - 1) 129204076Spjd return (((amp->am_extentsize - 1) / MAXPHYS) + 1); 130204076Spjd 131229509Strociny PJDLOG_ASSERT(ext == amp->am_nextents - 1); 132204076Spjd left = amp->am_mediasize % amp->am_extentsize; 133204076Spjd if (left == 0) 134204076Spjd left = amp->am_extentsize; 135204076Spjd return (((left - 1) / MAXPHYS) + 1); 136204076Spjd} 137204076Spjd 138204076Spjd/* 139204076Spjd * Initialize activemap structure and allocate memory for internal needs. 140204076Spjd * Function returns 0 on success and -1 if any of the allocations failed. 141204076Spjd */ 142204076Spjdint 143204076Spjdactivemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, 144204076Spjd uint32_t sectorsize, uint32_t keepdirty) 145204076Spjd{ 146204076Spjd struct activemap *amp; 147204076Spjd 148229509Strociny PJDLOG_ASSERT(ampp != NULL); 149229509Strociny PJDLOG_ASSERT(mediasize > 0); 150229509Strociny PJDLOG_ASSERT(extentsize > 0); 151229509Strociny PJDLOG_ASSERT(powerof2(extentsize)); 152229509Strociny PJDLOG_ASSERT(sectorsize > 0); 153229509Strociny PJDLOG_ASSERT(powerof2(sectorsize)); 154229509Strociny PJDLOG_ASSERT(keepdirty > 0); 155204076Spjd 156204076Spjd amp = malloc(sizeof(*amp)); 157204076Spjd if (amp == NULL) 158204076Spjd return (-1); 159204076Spjd 160204076Spjd amp->am_mediasize = mediasize; 161204076Spjd amp->am_nkeepdirty_limit = keepdirty; 162204076Spjd amp->am_extentsize = extentsize; 163204076Spjd amp->am_extentshift = bitcount32(extentsize - 1); 164204076Spjd amp->am_nextents = ((mediasize - 1) / extentsize) + 1; 165204076Spjd amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); 166204076Spjd amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); 167204076Spjd amp->am_ndirty = 0; 168204076Spjd amp->am_syncoff = -2; 169204076Spjd TAILQ_INIT(&->am_keepdirty); 170204076Spjd amp->am_nkeepdirty = 0; 171204076Spjd 172204076Spjd amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); 173204076Spjd amp->am_diskmap = calloc(1, amp->am_diskmapsize); 174204076Spjd amp->am_memmap = bit_alloc(amp->am_nextents); 175204076Spjd amp->am_syncmap = bit_alloc(amp->am_nextents); 176204076Spjd 177204076Spjd /* 178204076Spjd * Check to see if any of the allocations above failed. 179204076Spjd */ 180204076Spjd if (amp->am_memtab == NULL || amp->am_diskmap == NULL || 181204076Spjd amp->am_memmap == NULL || amp->am_syncmap == NULL) { 182204076Spjd if (amp->am_memtab != NULL) 183204076Spjd free(amp->am_memtab); 184204076Spjd if (amp->am_diskmap != NULL) 185204076Spjd free(amp->am_diskmap); 186204076Spjd if (amp->am_memmap != NULL) 187204076Spjd free(amp->am_memmap); 188204076Spjd if (amp->am_syncmap != NULL) 189204076Spjd free(amp->am_syncmap); 190204076Spjd amp->am_magic = 0; 191204076Spjd free(amp); 192204076Spjd errno = ENOMEM; 193204076Spjd return (-1); 194204076Spjd } 195204076Spjd 196204076Spjd amp->am_magic = ACTIVEMAP_MAGIC; 197204076Spjd *ampp = amp; 198204076Spjd 199204076Spjd return (0); 200204076Spjd} 201204076Spjd 202204076Spjdstatic struct keepdirty * 203204076Spjdkeepdirty_find(struct activemap *amp, int extent) 204204076Spjd{ 205204076Spjd struct keepdirty *kd; 206204076Spjd 207204076Spjd TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { 208204076Spjd if (kd->kd_extent == extent) 209204076Spjd break; 210204076Spjd } 211204076Spjd return (kd); 212204076Spjd} 213204076Spjd 214223654Strocinystatic bool 215204076Spjdkeepdirty_add(struct activemap *amp, int extent) 216204076Spjd{ 217204076Spjd struct keepdirty *kd; 218204076Spjd 219204076Spjd kd = keepdirty_find(amp, extent); 220204076Spjd if (kd != NULL) { 221204076Spjd /* 222231017Strociny * Only move element at the beginning. 223204076Spjd */ 224204076Spjd TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 225204076Spjd TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 226223654Strociny return (false); 227204076Spjd } 228204076Spjd /* 229204076Spjd * Add new element, but first remove the most unused one if 230204076Spjd * we have too many. 231204076Spjd */ 232204076Spjd if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { 233204076Spjd kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); 234229509Strociny PJDLOG_ASSERT(kd != NULL); 235204076Spjd TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 236204076Spjd amp->am_nkeepdirty--; 237229509Strociny PJDLOG_ASSERT(amp->am_nkeepdirty > 0); 238204076Spjd } 239204076Spjd if (kd == NULL) 240204076Spjd kd = malloc(sizeof(*kd)); 241204076Spjd /* We can ignore allocation failure. */ 242204076Spjd if (kd != NULL) { 243204076Spjd kd->kd_extent = extent; 244204076Spjd amp->am_nkeepdirty++; 245204076Spjd TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 246204076Spjd } 247223654Strociny 248223654Strociny return (true); 249204076Spjd} 250204076Spjd 251204076Spjdstatic void 252204076Spjdkeepdirty_fill(struct activemap *amp) 253204076Spjd{ 254204076Spjd struct keepdirty *kd; 255204076Spjd 256204076Spjd TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) 257204076Spjd bit_set(amp->am_diskmap, kd->kd_extent); 258204076Spjd} 259204076Spjd 260204076Spjdstatic void 261204076Spjdkeepdirty_free(struct activemap *amp) 262204076Spjd{ 263204076Spjd struct keepdirty *kd; 264204076Spjd 265204076Spjd while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { 266204076Spjd TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 267204076Spjd amp->am_nkeepdirty--; 268204076Spjd free(kd); 269204076Spjd } 270229509Strociny PJDLOG_ASSERT(amp->am_nkeepdirty == 0); 271204076Spjd} 272204076Spjd 273204076Spjd/* 274204076Spjd * Function frees resources allocated by activemap_init() function. 275204076Spjd */ 276204076Spjdvoid 277204076Spjdactivemap_free(struct activemap *amp) 278204076Spjd{ 279204076Spjd 280229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 281204076Spjd 282204076Spjd amp->am_magic = 0; 283204076Spjd 284204076Spjd keepdirty_free(amp); 285204076Spjd free(amp->am_memtab); 286204076Spjd free(amp->am_diskmap); 287204076Spjd free(amp->am_memmap); 288204076Spjd free(amp->am_syncmap); 289204076Spjd} 290204076Spjd 291204076Spjd/* 292204076Spjd * Function should be called before we handle write requests. It updates 293204076Spjd * internal structures and returns true if on-disk metadata should be updated. 294204076Spjd */ 295204076Spjdbool 296204076Spjdactivemap_write_start(struct activemap *amp, off_t offset, off_t length) 297204076Spjd{ 298204076Spjd bool modified; 299204076Spjd off_t end; 300204076Spjd int ext; 301204076Spjd 302229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 303229509Strociny PJDLOG_ASSERT(length > 0); 304204076Spjd 305204076Spjd modified = false; 306204076Spjd end = offset + length - 1; 307204076Spjd 308204076Spjd for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 309204076Spjd /* 310204076Spjd * If the number of pending writes is increased from 0, 311204076Spjd * we have to mark the extent as dirty also in on-disk bitmap. 312204076Spjd * By returning true we inform the caller that on-disk bitmap 313204076Spjd * was modified and has to be flushed to disk. 314204076Spjd */ 315204076Spjd if (amp->am_memtab[ext]++ == 0) { 316229509Strociny PJDLOG_ASSERT(!bit_test(amp->am_memmap, ext)); 317204076Spjd bit_set(amp->am_memmap, ext); 318204076Spjd amp->am_ndirty++; 319223654Strociny } 320223654Strociny if (keepdirty_add(amp, ext)) 321204076Spjd modified = true; 322204076Spjd } 323204076Spjd 324204076Spjd return (modified); 325204076Spjd} 326204076Spjd 327204076Spjd/* 328204076Spjd * Function should be called after receiving write confirmation. It updates 329204076Spjd * internal structures and returns true if on-disk metadata should be updated. 330204076Spjd */ 331204076Spjdbool 332204076Spjdactivemap_write_complete(struct activemap *amp, off_t offset, off_t length) 333204076Spjd{ 334204076Spjd bool modified; 335204076Spjd off_t end; 336204076Spjd int ext; 337204076Spjd 338229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 339229509Strociny PJDLOG_ASSERT(length > 0); 340204076Spjd 341204076Spjd modified = false; 342204076Spjd end = offset + length - 1; 343204076Spjd 344204076Spjd for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 345204076Spjd /* 346204076Spjd * If the number of pending writes goes down to 0, we have to 347204076Spjd * mark the extent as clean also in on-disk bitmap. 348204076Spjd * By returning true we inform the caller that on-disk bitmap 349204076Spjd * was modified and has to be flushed to disk. 350204076Spjd */ 351229509Strociny PJDLOG_ASSERT(amp->am_memtab[ext] > 0); 352229509Strociny PJDLOG_ASSERT(bit_test(amp->am_memmap, ext)); 353204076Spjd if (--amp->am_memtab[ext] == 0) { 354204076Spjd bit_clear(amp->am_memmap, ext); 355204076Spjd amp->am_ndirty--; 356223654Strociny if (keepdirty_find(amp, ext) == NULL) 357223654Strociny modified = true; 358204076Spjd } 359204076Spjd } 360204076Spjd 361204076Spjd return (modified); 362204076Spjd} 363204076Spjd 364204076Spjd/* 365204076Spjd * Function should be called after finishing synchronization of one extent. 366204076Spjd * It returns true if on-disk metadata should be updated. 367204076Spjd */ 368204076Spjdbool 369204076Spjdactivemap_extent_complete(struct activemap *amp, int extent) 370204076Spjd{ 371204076Spjd bool modified; 372204076Spjd int reqs; 373204076Spjd 374229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 375229509Strociny PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents); 376204076Spjd 377204076Spjd modified = false; 378204076Spjd 379204076Spjd reqs = ext2reqs(amp, extent); 380229509Strociny PJDLOG_ASSERT(amp->am_memtab[extent] >= reqs); 381204076Spjd amp->am_memtab[extent] -= reqs; 382229509Strociny PJDLOG_ASSERT(bit_test(amp->am_memmap, extent)); 383204076Spjd if (amp->am_memtab[extent] == 0) { 384204076Spjd bit_clear(amp->am_memmap, extent); 385204076Spjd amp->am_ndirty--; 386204076Spjd modified = true; 387204076Spjd } 388204076Spjd 389204076Spjd return (modified); 390204076Spjd} 391204076Spjd 392204076Spjd/* 393204076Spjd * Function returns number of dirty regions. 394204076Spjd */ 395204076Spjduint64_t 396204076Spjdactivemap_ndirty(const struct activemap *amp) 397204076Spjd{ 398204076Spjd 399229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 400204076Spjd 401204076Spjd return (amp->am_ndirty); 402204076Spjd} 403204076Spjd 404204076Spjd/* 405204076Spjd * Function compare on-disk bitmap and in-memory bitmap and returns true if 406204076Spjd * they differ and should be flushed to the disk. 407204076Spjd */ 408204076Spjdbool 409204076Spjdactivemap_differ(const struct activemap *amp) 410204076Spjd{ 411204076Spjd 412229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 413204076Spjd 414204076Spjd return (memcmp(amp->am_diskmap, amp->am_memmap, 415204076Spjd amp->am_mapsize) != 0); 416204076Spjd} 417204076Spjd 418204076Spjd/* 419204076Spjd * Function returns number of bytes used by bitmap. 420204076Spjd */ 421204076Spjdsize_t 422204076Spjdactivemap_size(const struct activemap *amp) 423204076Spjd{ 424204076Spjd 425229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 426204076Spjd 427204076Spjd return (amp->am_mapsize); 428204076Spjd} 429204076Spjd 430204076Spjd/* 431204076Spjd * Function returns number of bytes needed for storing on-disk bitmap. 432204076Spjd * This is the same as activemap_size(), but rounded up to sector size. 433204076Spjd */ 434204076Spjdsize_t 435204076Spjdactivemap_ondisk_size(const struct activemap *amp) 436204076Spjd{ 437204076Spjd 438229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 439204076Spjd 440204076Spjd return (amp->am_diskmapsize); 441204076Spjd} 442204076Spjd 443204076Spjd/* 444204076Spjd * Function copies the given buffer read from disk to the internal bitmap. 445204076Spjd */ 446204076Spjdvoid 447204076Spjdactivemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) 448204076Spjd{ 449204076Spjd int ext; 450204076Spjd 451229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 452229509Strociny PJDLOG_ASSERT(size >= amp->am_mapsize); 453204076Spjd 454204076Spjd memcpy(amp->am_diskmap, buf, amp->am_mapsize); 455204076Spjd memcpy(amp->am_memmap, buf, amp->am_mapsize); 456204076Spjd memcpy(amp->am_syncmap, buf, amp->am_mapsize); 457204076Spjd 458204076Spjd bit_ffs(amp->am_memmap, amp->am_nextents, &ext); 459204076Spjd if (ext == -1) { 460204076Spjd /* There are no dirty extents, so we can leave now. */ 461204076Spjd return; 462204076Spjd } 463204076Spjd /* 464204076Spjd * Set synchronization offset to the first dirty extent. 465204076Spjd */ 466204076Spjd activemap_sync_rewind(amp); 467204076Spjd /* 468204076Spjd * We have dirty extents and we want them to stay that way until 469204076Spjd * we synchronize, so we set number of pending writes to number 470204076Spjd * of requests needed to synchronize one extent. 471204076Spjd */ 472204076Spjd amp->am_ndirty = 0; 473204076Spjd for (; ext < amp->am_nextents; ext++) { 474204076Spjd if (bit_test(amp->am_memmap, ext)) { 475204076Spjd amp->am_memtab[ext] = ext2reqs(amp, ext); 476204076Spjd amp->am_ndirty++; 477204076Spjd } 478204076Spjd } 479204076Spjd} 480204076Spjd 481204076Spjd/* 482220521Strociny * Function merges the given bitmap with existing one. 483204076Spjd */ 484204076Spjdvoid 485204076Spjdactivemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) 486204076Spjd{ 487204076Spjd bitstr_t *remmap = __DECONST(bitstr_t *, buf); 488204076Spjd int ext; 489204076Spjd 490229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 491229509Strociny PJDLOG_ASSERT(size >= amp->am_mapsize); 492204076Spjd 493204076Spjd bit_ffs(remmap, amp->am_nextents, &ext); 494204076Spjd if (ext == -1) { 495204076Spjd /* There are no dirty extents, so we can leave now. */ 496204076Spjd return; 497204076Spjd } 498204076Spjd /* 499204076Spjd * We have dirty extents and we want them to stay that way until 500204076Spjd * we synchronize, so we set number of pending writes to number 501204076Spjd * of requests needed to synchronize one extent. 502204076Spjd */ 503204076Spjd for (; ext < amp->am_nextents; ext++) { 504204076Spjd /* Local extent already dirty. */ 505204076Spjd if (bit_test(amp->am_syncmap, ext)) 506204076Spjd continue; 507204076Spjd /* Remote extent isn't dirty. */ 508204076Spjd if (!bit_test(remmap, ext)) 509204076Spjd continue; 510204076Spjd bit_set(amp->am_syncmap, ext); 511204076Spjd bit_set(amp->am_memmap, ext); 512204076Spjd bit_set(amp->am_diskmap, ext); 513204076Spjd if (amp->am_memtab[ext] == 0) 514204076Spjd amp->am_ndirty++; 515204076Spjd amp->am_memtab[ext] = ext2reqs(amp, ext); 516204076Spjd } 517204076Spjd /* 518204076Spjd * Set synchronization offset to the first dirty extent. 519204076Spjd */ 520204076Spjd activemap_sync_rewind(amp); 521204076Spjd} 522204076Spjd 523204076Spjd/* 524204076Spjd * Function returns pointer to internal bitmap that should be written to disk. 525204076Spjd */ 526204076Spjdconst unsigned char * 527204076Spjdactivemap_bitmap(struct activemap *amp, size_t *sizep) 528204076Spjd{ 529204076Spjd 530229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 531204076Spjd 532204076Spjd if (sizep != NULL) 533204076Spjd *sizep = amp->am_diskmapsize; 534204076Spjd memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); 535204076Spjd keepdirty_fill(amp); 536204076Spjd return ((const unsigned char *)amp->am_diskmap); 537204076Spjd} 538204076Spjd 539204076Spjd/* 540204076Spjd * Function calculates size needed to store bitmap on disk. 541204076Spjd */ 542204076Spjdsize_t 543204076Spjdactivemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, 544204076Spjd uint32_t sectorsize) 545204076Spjd{ 546204076Spjd uint64_t nextents, mapsize; 547204076Spjd 548229509Strociny PJDLOG_ASSERT(mediasize > 0); 549229509Strociny PJDLOG_ASSERT(extentsize > 0); 550229509Strociny PJDLOG_ASSERT(powerof2(extentsize)); 551229509Strociny PJDLOG_ASSERT(sectorsize > 0); 552229509Strociny PJDLOG_ASSERT(powerof2(sectorsize)); 553204076Spjd 554204076Spjd nextents = ((mediasize - 1) / extentsize) + 1; 555204076Spjd mapsize = sizeof(bitstr_t) * bitstr_size(nextents); 556204076Spjd return (roundup2(mapsize, sectorsize)); 557204076Spjd} 558204076Spjd 559204076Spjd/* 560204076Spjd * Set synchronization offset to the first dirty extent. 561204076Spjd */ 562204076Spjdvoid 563204076Spjdactivemap_sync_rewind(struct activemap *amp) 564204076Spjd{ 565204076Spjd int ext; 566204076Spjd 567229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 568204076Spjd 569204076Spjd bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 570204076Spjd if (ext == -1) { 571204076Spjd /* There are no extents to synchronize. */ 572204076Spjd amp->am_syncoff = -2; 573204076Spjd return; 574204076Spjd } 575204076Spjd /* 576231017Strociny * Mark that we want to start synchronization from the beginning. 577204076Spjd */ 578204076Spjd amp->am_syncoff = -1; 579204076Spjd} 580204076Spjd 581204076Spjd/* 582204076Spjd * Return next offset of where we should synchronize. 583204076Spjd */ 584204076Spjdoff_t 585204076Spjdactivemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) 586204076Spjd{ 587204076Spjd off_t syncoff, left; 588204076Spjd int ext; 589204076Spjd 590229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 591229509Strociny PJDLOG_ASSERT(lengthp != NULL); 592229509Strociny PJDLOG_ASSERT(syncextp != NULL); 593204076Spjd 594204076Spjd *syncextp = -1; 595204076Spjd 596204076Spjd if (amp->am_syncoff == -2) 597204076Spjd return (-1); 598204076Spjd 599204076Spjd if (amp->am_syncoff >= 0 && 600204076Spjd (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || 601204076Spjd off2ext(amp, amp->am_syncoff) != 602204076Spjd off2ext(amp, amp->am_syncoff + MAXPHYS))) { 603204076Spjd /* 604204076Spjd * We are about to change extent, so mark previous one as clean. 605204076Spjd */ 606204076Spjd ext = off2ext(amp, amp->am_syncoff); 607204076Spjd bit_clear(amp->am_syncmap, ext); 608204076Spjd *syncextp = ext; 609204076Spjd amp->am_syncoff = -1; 610204076Spjd } 611204076Spjd 612204076Spjd if (amp->am_syncoff == -1) { 613204076Spjd /* 614204076Spjd * Let's find first extent to synchronize. 615204076Spjd */ 616204076Spjd bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 617204076Spjd if (ext == -1) { 618204076Spjd amp->am_syncoff = -2; 619204076Spjd return (-1); 620204076Spjd } 621204076Spjd amp->am_syncoff = ext2off(amp, ext); 622204076Spjd } else { 623204076Spjd /* 624204076Spjd * We don't change extent, so just increase offset. 625204076Spjd */ 626204076Spjd amp->am_syncoff += MAXPHYS; 627204076Spjd if (amp->am_syncoff >= amp->am_mediasize) { 628204076Spjd amp->am_syncoff = -2; 629204076Spjd return (-1); 630204076Spjd } 631204076Spjd } 632204076Spjd 633204076Spjd syncoff = amp->am_syncoff; 634204076Spjd left = ext2off(amp, off2ext(amp, syncoff)) + 635204076Spjd amp->am_extentsize - syncoff; 636204076Spjd if (syncoff + left > amp->am_mediasize) 637204076Spjd left = amp->am_mediasize - syncoff; 638204076Spjd if (left > MAXPHYS) 639204076Spjd left = MAXPHYS; 640204076Spjd 641229509Strociny PJDLOG_ASSERT(left >= 0 && left <= MAXPHYS); 642229509Strociny PJDLOG_ASSERT(syncoff >= 0 && syncoff < amp->am_mediasize); 643229509Strociny PJDLOG_ASSERT(syncoff + left >= 0 && 644229509Strociny syncoff + left <= amp->am_mediasize); 645204076Spjd 646204076Spjd *lengthp = left; 647204076Spjd return (syncoff); 648204076Spjd} 649204076Spjd 650204076Spjd/* 651204076Spjd * Mark extent(s) containing the given region for synchronization. 652204076Spjd * Most likely one of the components is unavailable. 653204076Spjd */ 654204076Spjdbool 655204076Spjdactivemap_need_sync(struct activemap *amp, off_t offset, off_t length) 656204076Spjd{ 657204076Spjd bool modified; 658204076Spjd off_t end; 659204076Spjd int ext; 660204076Spjd 661229509Strociny PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC); 662204076Spjd 663204076Spjd modified = false; 664204076Spjd end = offset + length - 1; 665204076Spjd 666204076Spjd for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 667204076Spjd if (bit_test(amp->am_syncmap, ext)) { 668204076Spjd /* Already marked for synchronization. */ 669229509Strociny PJDLOG_ASSERT(bit_test(amp->am_memmap, ext)); 670204076Spjd continue; 671204076Spjd } 672204076Spjd bit_set(amp->am_syncmap, ext); 673204076Spjd if (!bit_test(amp->am_memmap, ext)) { 674204076Spjd bit_set(amp->am_memmap, ext); 675204076Spjd amp->am_ndirty++; 676204076Spjd } 677204076Spjd amp->am_memtab[ext] += ext2reqs(amp, ext); 678204076Spjd modified = true; 679204076Spjd } 680204076Spjd 681204076Spjd return (modified); 682204076Spjd} 683204076Spjd 684204076Spjdvoid 685204076Spjdactivemap_dump(const struct activemap *amp) 686204076Spjd{ 687204076Spjd int bit; 688204076Spjd 689204076Spjd printf("M: "); 690204076Spjd for (bit = 0; bit < amp->am_nextents; bit++) 691204076Spjd printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); 692204076Spjd printf("\n"); 693204076Spjd printf("D: "); 694204076Spjd for (bit = 0; bit < amp->am_nextents; bit++) 695204076Spjd printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); 696204076Spjd printf("\n"); 697204076Spjd printf("S: "); 698204076Spjd for (bit = 0; bit < amp->am_nextents; bit++) 699204076Spjd printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); 700204076Spjd printf("\n"); 701204076Spjd} 702