activemap.c revision 231017
1238106Sdes/*-
2294190Sdes * Copyright (c) 2009-2010 The FreeBSD Foundation
3238106Sdes * All rights reserved.
4238106Sdes *
5238106Sdes * This software was developed by Pawel Jakub Dawidek under sponsorship from
6238106Sdes * the FreeBSD Foundation.
7238106Sdes *
8238106Sdes * Redistribution and use in source and binary forms, with or without
9238106Sdes * modification, are permitted provided that the following conditions
10238106Sdes * are met:
11238106Sdes * 1. Redistributions of source code must retain the above copyright
12238106Sdes *    notice, this list of conditions and the following disclaimer.
13238106Sdes * 2. Redistributions in binary form must reproduce the above copyright
14238106Sdes *    notice, this list of conditions and the following disclaimer in the
15238106Sdes *    documentation and/or other materials provided with the distribution.
16238106Sdes *
17238106Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18238106Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19238106Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20238106Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21238106Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22238106Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23238106Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24269257Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25269257Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26269257Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27269257Sdes * SUCH DAMAGE.
28269257Sdes */
29269257Sdes
30269257Sdes#include <sys/cdefs.h>
31269257Sdes__FBSDID("$FreeBSD: stable/9/sbin/hastd/activemap.c 231017 2012-02-05 15:51:19Z trociny $");
32269257Sdes
33269257Sdes#include <sys/param.h>	/* powerof2() */
34238106Sdes#include <sys/queue.h>
35238106Sdes
36238106Sdes#include <bitstring.h>
37238106Sdes#include <errno.h>
38238106Sdes#include <stdint.h>
39238106Sdes#include <stdio.h>
40238106Sdes#include <stdlib.h>
41294190Sdes#include <string.h>
42238106Sdes
43238106Sdes#include <pjdlog.h>
44238106Sdes
45238106Sdes#include "activemap.h"
46238106Sdes
47238106Sdes#ifndef	PJDLOG_ASSERT
48238106Sdes#include <assert.h>
49238106Sdes#define	PJDLOG_ASSERT(...)	assert(__VA_ARGS__)
50238106Sdes#endif
51238106Sdes
52238106Sdes#define	ACTIVEMAP_MAGIC	0xac71e4
53238106Sdesstruct activemap {
54238106Sdes	int		 am_magic;	/* Magic value. */
55238106Sdes	off_t		 am_mediasize;	/* Media size in bytes. */
56238106Sdes	uint32_t	 am_extentsize;	/* Extent size in bytes,
57238106Sdes					   must be power of 2. */
58238106Sdes	uint8_t		 am_extentshift;/* 2 ^ extentbits == extentsize */
59238106Sdes	int		 am_nextents;	/* Number of extents. */
60238106Sdes	size_t		 am_mapsize;	/* Bitmap size in bytes. */
61238106Sdes	uint16_t	*am_memtab;	/* An array that holds number of pending
62238106Sdes					   writes per extent. */
63238106Sdes	bitstr_t	*am_diskmap;	/* On-disk bitmap of dirty extents. */
64238106Sdes	bitstr_t	*am_memmap;	/* In-memory bitmap of dirty extents. */
65238106Sdes	size_t		 am_diskmapsize; /* Map size rounded up to sector size. */
66238106Sdes	uint64_t	 am_ndirty;	/* Number of dirty regions. */
67238106Sdes	bitstr_t	*am_syncmap;	/* Bitmap of extents to sync. */
68238106Sdes	off_t		 am_syncoff;	/* Next synchronization offset. */
69238106Sdes	TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
70238106Sdes					   we keep dirty to reduce bitmap
71238106Sdes					   updates. */
72238106Sdes	int		 am_nkeepdirty;	/* Number of am_keepdirty elements. */
73238106Sdes	int		 am_nkeepdirty_limit; /* Maximum number of am_keepdirty
74238106Sdes					         elements. */
75238106Sdes};
76238106Sdes
77238106Sdesstruct keepdirty {
78238106Sdes	int	kd_extent;
79238106Sdes	TAILQ_ENTRY(keepdirty) kd_next;
80238106Sdes};
81238106Sdes
82238106Sdes/*
83238106Sdes * Helper function taken from sys/systm.h to calculate extentshift.
84238106Sdes */
85238106Sdesstatic uint32_t
86238106Sdesbitcount32(uint32_t x)
87238106Sdes{
88238106Sdes
89238106Sdes	x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
90238106Sdes	x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
91238106Sdes	x = (x + (x >> 4)) & 0x0f0f0f0f;
92238106Sdes	x = (x + (x >> 8));
93238106Sdes	x = (x + (x >> 16)) & 0x000000ff;
94238106Sdes	return (x);
95238106Sdes}
96238106Sdes
97238106Sdesstatic __inline int
98238106Sdesoff2ext(const struct activemap *amp, off_t offset)
99238106Sdes{
100238106Sdes	int extent;
101238106Sdes
102238106Sdes	PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize);
103238106Sdes	extent = (offset >> amp->am_extentshift);
104238106Sdes	PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents);
105238106Sdes	return (extent);
106238106Sdes}
107238106Sdes
108238106Sdesstatic __inline off_t
109238106Sdesext2off(const struct activemap *amp, int extent)
110238106Sdes{
111238106Sdes	off_t offset;
112238106Sdes
113238106Sdes	PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents);
114238106Sdes	offset = ((off_t)extent << amp->am_extentshift);
115238106Sdes	PJDLOG_ASSERT(offset >= 0 && offset < amp->am_mediasize);
116238106Sdes	return (offset);
117238106Sdes}
118238106Sdes
119238106Sdes/*
120238106Sdes * Function calculates number of requests needed to synchronize the given
121238106Sdes * extent.
122238106Sdes */
123238106Sdesstatic __inline int
124238106Sdesext2reqs(const struct activemap *amp, int ext)
125238106Sdes{
126238106Sdes	off_t left;
127238106Sdes
128238106Sdes	if (ext < amp->am_nextents - 1)
129238106Sdes		return (((amp->am_extentsize - 1) / MAXPHYS) + 1);
130238106Sdes
131238106Sdes	PJDLOG_ASSERT(ext == amp->am_nextents - 1);
132238106Sdes	left = amp->am_mediasize % amp->am_extentsize;
133238106Sdes	if (left == 0)
134238106Sdes		left = amp->am_extentsize;
135238106Sdes	return (((left - 1) / MAXPHYS) + 1);
136238106Sdes}
137238106Sdes
138238106Sdes/*
139238106Sdes * Initialize activemap structure and allocate memory for internal needs.
140238106Sdes * Function returns 0 on success and -1 if any of the allocations failed.
141238106Sdes */
142238106Sdesint
143238106Sdesactivemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize,
144238106Sdes    uint32_t sectorsize, uint32_t keepdirty)
145238106Sdes{
146238106Sdes	struct activemap *amp;
147238106Sdes
148238106Sdes	PJDLOG_ASSERT(ampp != NULL);
149238106Sdes	PJDLOG_ASSERT(mediasize > 0);
150238106Sdes	PJDLOG_ASSERT(extentsize > 0);
151238106Sdes	PJDLOG_ASSERT(powerof2(extentsize));
152238106Sdes	PJDLOG_ASSERT(sectorsize > 0);
153238106Sdes	PJDLOG_ASSERT(powerof2(sectorsize));
154238106Sdes	PJDLOG_ASSERT(keepdirty > 0);
155238106Sdes
156238106Sdes	amp = malloc(sizeof(*amp));
157238106Sdes	if (amp == NULL)
158238106Sdes		return (-1);
159238106Sdes
160238106Sdes	amp->am_mediasize = mediasize;
161238106Sdes	amp->am_nkeepdirty_limit = keepdirty;
162238106Sdes	amp->am_extentsize = extentsize;
163238106Sdes	amp->am_extentshift = bitcount32(extentsize - 1);
164238106Sdes	amp->am_nextents = ((mediasize - 1) / extentsize) + 1;
165238106Sdes	amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents);
166238106Sdes	amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize);
167238106Sdes	amp->am_ndirty = 0;
168238106Sdes	amp->am_syncoff = -2;
169238106Sdes	TAILQ_INIT(&amp->am_keepdirty);
170238106Sdes	amp->am_nkeepdirty = 0;
171238106Sdes
172238106Sdes	amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0]));
173238106Sdes	amp->am_diskmap = calloc(1, amp->am_diskmapsize);
174238106Sdes	amp->am_memmap = bit_alloc(amp->am_nextents);
175238106Sdes	amp->am_syncmap = bit_alloc(amp->am_nextents);
176238106Sdes
177238106Sdes	/*
178238106Sdes	 * Check to see if any of the allocations above failed.
179238106Sdes	 */
180238106Sdes	if (amp->am_memtab == NULL || amp->am_diskmap == NULL ||
181238106Sdes	    amp->am_memmap == NULL || amp->am_syncmap == NULL) {
182238106Sdes		if (amp->am_memtab != NULL)
183238106Sdes			free(amp->am_memtab);
184238106Sdes		if (amp->am_diskmap != NULL)
185238106Sdes			free(amp->am_diskmap);
186238106Sdes		if (amp->am_memmap != NULL)
187238106Sdes			free(amp->am_memmap);
188238106Sdes		if (amp->am_syncmap != NULL)
189238106Sdes			free(amp->am_syncmap);
190238106Sdes		amp->am_magic = 0;
191238106Sdes		free(amp);
192238106Sdes		errno = ENOMEM;
193238106Sdes		return (-1);
194238106Sdes	}
195238106Sdes
196238106Sdes	amp->am_magic = ACTIVEMAP_MAGIC;
197238106Sdes	*ampp = amp;
198238106Sdes
199269257Sdes	return (0);
200238106Sdes}
201238106Sdes
202238106Sdesstatic struct keepdirty *
203238106Sdeskeepdirty_find(struct activemap *amp, int extent)
204238106Sdes{
205238106Sdes	struct keepdirty *kd;
206238106Sdes
207238106Sdes	TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next) {
208238106Sdes		if (kd->kd_extent == extent)
209238106Sdes			break;
210238106Sdes	}
211238106Sdes	return (kd);
212238106Sdes}
213238106Sdes
214238106Sdesstatic bool
215238106Sdeskeepdirty_add(struct activemap *amp, int extent)
216238106Sdes{
217238106Sdes	struct keepdirty *kd;
218238106Sdes
219238106Sdes	kd = keepdirty_find(amp, extent);
220238106Sdes	if (kd != NULL) {
221238106Sdes		/*
222238106Sdes		 * Only move element at the beginning.
223238106Sdes		 */
224238106Sdes		TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
225238106Sdes		TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
226238106Sdes		return (false);
227238106Sdes	}
228238106Sdes	/*
229238106Sdes	 * Add new element, but first remove the most unused one if
230238106Sdes	 * we have too many.
231238106Sdes	 */
232238106Sdes	if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) {
233238106Sdes		kd = TAILQ_LAST(&amp->am_keepdirty, skeepdirty);
234238106Sdes		PJDLOG_ASSERT(kd != NULL);
235238106Sdes		TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
236238106Sdes		amp->am_nkeepdirty--;
237238106Sdes		PJDLOG_ASSERT(amp->am_nkeepdirty > 0);
238238106Sdes	}
239238106Sdes	if (kd == NULL)
240238106Sdes		kd = malloc(sizeof(*kd));
241238106Sdes	/* We can ignore allocation failure. */
242238106Sdes	if (kd != NULL) {
243238106Sdes		kd->kd_extent = extent;
244238106Sdes		amp->am_nkeepdirty++;
245238106Sdes		TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
246238106Sdes	}
247238106Sdes
248238106Sdes	return (true);
249238106Sdes}
250238106Sdes
251238106Sdesstatic void
252238106Sdeskeepdirty_fill(struct activemap *amp)
253238106Sdes{
254238106Sdes	struct keepdirty *kd;
255238106Sdes
256238106Sdes	TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next)
257238106Sdes		bit_set(amp->am_diskmap, kd->kd_extent);
258238106Sdes}
259238106Sdes
260238106Sdesstatic void
261238106Sdeskeepdirty_free(struct activemap *amp)
262238106Sdes{
263238106Sdes	struct keepdirty *kd;
264238106Sdes
265238106Sdes	while ((kd = TAILQ_FIRST(&amp->am_keepdirty)) != NULL) {
266238106Sdes		TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
267238106Sdes		amp->am_nkeepdirty--;
268238106Sdes		free(kd);
269238106Sdes	}
270238106Sdes	PJDLOG_ASSERT(amp->am_nkeepdirty == 0);
271238106Sdes}
272238106Sdes
273238106Sdes/*
274238106Sdes * Function frees resources allocated by activemap_init() function.
275238106Sdes */
276238106Sdesvoid
277238106Sdesactivemap_free(struct activemap *amp)
278238106Sdes{
279238106Sdes
280238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
281238106Sdes
282294190Sdes	amp->am_magic = 0;
283238106Sdes
284238106Sdes	keepdirty_free(amp);
285238106Sdes	free(amp->am_memtab);
286238106Sdes	free(amp->am_diskmap);
287238106Sdes	free(amp->am_memmap);
288238106Sdes	free(amp->am_syncmap);
289238106Sdes}
290238106Sdes
291238106Sdes/*
292238106Sdes * Function should be called before we handle write requests. It updates
293238106Sdes * internal structures and returns true if on-disk metadata should be updated.
294238106Sdes */
295238106Sdesbool
296238106Sdesactivemap_write_start(struct activemap *amp, off_t offset, off_t length)
297238106Sdes{
298238106Sdes	bool modified;
299238106Sdes	off_t end;
300238106Sdes	int ext;
301238106Sdes
302238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
303238106Sdes	PJDLOG_ASSERT(length > 0);
304238106Sdes
305238106Sdes	modified = false;
306238106Sdes	end = offset + length - 1;
307238106Sdes
308238106Sdes	for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
309238106Sdes		/*
310238106Sdes		 * If the number of pending writes is increased from 0,
311238106Sdes		 * we have to mark the extent as dirty also in on-disk bitmap.
312238106Sdes		 * By returning true we inform the caller that on-disk bitmap
313238106Sdes		 * was modified and has to be flushed to disk.
314238106Sdes		 */
315238106Sdes		if (amp->am_memtab[ext]++ == 0) {
316238106Sdes			PJDLOG_ASSERT(!bit_test(amp->am_memmap, ext));
317238106Sdes			bit_set(amp->am_memmap, ext);
318238106Sdes			amp->am_ndirty++;
319238106Sdes		}
320238106Sdes		if (keepdirty_add(amp, ext))
321238106Sdes			modified = true;
322238106Sdes	}
323238106Sdes
324238106Sdes	return (modified);
325238106Sdes}
326238106Sdes
327238106Sdes/*
328238106Sdes * Function should be called after receiving write confirmation. It updates
329238106Sdes * internal structures and returns true if on-disk metadata should be updated.
330238106Sdes */
331238106Sdesbool
332238106Sdesactivemap_write_complete(struct activemap *amp, off_t offset, off_t length)
333238106Sdes{
334238106Sdes	bool modified;
335238106Sdes	off_t end;
336238106Sdes	int ext;
337238106Sdes
338238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
339238106Sdes	PJDLOG_ASSERT(length > 0);
340238106Sdes
341238106Sdes	modified = false;
342238106Sdes	end = offset + length - 1;
343294190Sdes
344294190Sdes	for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
345294190Sdes		/*
346294190Sdes		 * If the number of pending writes goes down to 0, we have to
347294190Sdes		 * mark the extent as clean also in on-disk bitmap.
348294190Sdes		 * By returning true we inform the caller that on-disk bitmap
349294190Sdes		 * was modified and has to be flushed to disk.
350294190Sdes		 */
351294190Sdes		PJDLOG_ASSERT(amp->am_memtab[ext] > 0);
352294190Sdes		PJDLOG_ASSERT(bit_test(amp->am_memmap, ext));
353294190Sdes		if (--amp->am_memtab[ext] == 0) {
354294190Sdes			bit_clear(amp->am_memmap, ext);
355294190Sdes			amp->am_ndirty--;
356294190Sdes			if (keepdirty_find(amp, ext) == NULL)
357294190Sdes				modified = true;
358294190Sdes		}
359294190Sdes	}
360294190Sdes
361294190Sdes	return (modified);
362294190Sdes}
363294190Sdes
364294190Sdes/*
365238106Sdes * Function should be called after finishing synchronization of one extent.
366238106Sdes * It returns true if on-disk metadata should be updated.
367238106Sdes */
368238106Sdesbool
369238106Sdesactivemap_extent_complete(struct activemap *amp, int extent)
370238106Sdes{
371238106Sdes	bool modified;
372238106Sdes	int reqs;
373238106Sdes
374238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
375238106Sdes	PJDLOG_ASSERT(extent >= 0 && extent < amp->am_nextents);
376238106Sdes
377238106Sdes	modified = false;
378238106Sdes
379238106Sdes	reqs = ext2reqs(amp, extent);
380238106Sdes	PJDLOG_ASSERT(amp->am_memtab[extent] >= reqs);
381238106Sdes	amp->am_memtab[extent] -= reqs;
382238106Sdes	PJDLOG_ASSERT(bit_test(amp->am_memmap, extent));
383238106Sdes	if (amp->am_memtab[extent] == 0) {
384238106Sdes		bit_clear(amp->am_memmap, extent);
385238106Sdes		amp->am_ndirty--;
386238106Sdes		modified = true;
387238106Sdes	}
388238106Sdes
389238106Sdes	return (modified);
390238106Sdes}
391238106Sdes
392238106Sdes/*
393238106Sdes * Function returns number of dirty regions.
394238106Sdes */
395238106Sdesuint64_t
396238106Sdesactivemap_ndirty(const struct activemap *amp)
397238106Sdes{
398238106Sdes
399238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
400238106Sdes
401238106Sdes	return (amp->am_ndirty);
402238106Sdes}
403238106Sdes
404238106Sdes/*
405238106Sdes * Function compare on-disk bitmap and in-memory bitmap and returns true if
406238106Sdes * they differ and should be flushed to the disk.
407238106Sdes */
408238106Sdesbool
409238106Sdesactivemap_differ(const struct activemap *amp)
410238106Sdes{
411238106Sdes
412238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
413238106Sdes
414238106Sdes	return (memcmp(amp->am_diskmap, amp->am_memmap,
415238106Sdes	    amp->am_mapsize) != 0);
416238106Sdes}
417238106Sdes
418238106Sdes/*
419238106Sdes * Function returns number of bytes used by bitmap.
420238106Sdes */
421238106Sdessize_t
422238106Sdesactivemap_size(const struct activemap *amp)
423238106Sdes{
424238106Sdes
425238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
426238106Sdes
427238106Sdes	return (amp->am_mapsize);
428238106Sdes}
429238106Sdes
430238106Sdes/*
431238106Sdes * Function returns number of bytes needed for storing on-disk bitmap.
432238106Sdes * This is the same as activemap_size(), but rounded up to sector size.
433238106Sdes */
434238106Sdessize_t
435238106Sdesactivemap_ondisk_size(const struct activemap *amp)
436238106Sdes{
437238106Sdes
438238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
439238106Sdes
440238106Sdes	return (amp->am_diskmapsize);
441238106Sdes}
442238106Sdes
443238106Sdes/*
444238106Sdes * Function copies the given buffer read from disk to the internal bitmap.
445238106Sdes */
446238106Sdesvoid
447238106Sdesactivemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size)
448238106Sdes{
449238106Sdes	int ext;
450238106Sdes
451238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
452238106Sdes	PJDLOG_ASSERT(size >= amp->am_mapsize);
453238106Sdes
454238106Sdes	memcpy(amp->am_diskmap, buf, amp->am_mapsize);
455238106Sdes	memcpy(amp->am_memmap, buf, amp->am_mapsize);
456238106Sdes	memcpy(amp->am_syncmap, buf, amp->am_mapsize);
457238106Sdes
458238106Sdes	bit_ffs(amp->am_memmap, amp->am_nextents, &ext);
459238106Sdes	if (ext == -1) {
460238106Sdes		/* There are no dirty extents, so we can leave now. */
461238106Sdes		return;
462238106Sdes	}
463238106Sdes	/*
464238106Sdes	 * Set synchronization offset to the first dirty extent.
465238106Sdes	 */
466238106Sdes	activemap_sync_rewind(amp);
467238106Sdes	/*
468238106Sdes	 * We have dirty extents and we want them to stay that way until
469238106Sdes	 * we synchronize, so we set number of pending writes to number
470238106Sdes	 * of requests needed to synchronize one extent.
471238106Sdes	 */
472238106Sdes	amp->am_ndirty = 0;
473238106Sdes	for (; ext < amp->am_nextents; ext++) {
474238106Sdes		if (bit_test(amp->am_memmap, ext)) {
475238106Sdes			amp->am_memtab[ext] = ext2reqs(amp, ext);
476238106Sdes			amp->am_ndirty++;
477238106Sdes		}
478238106Sdes	}
479238106Sdes}
480238106Sdes
481238106Sdes/*
482238106Sdes * Function merges the given bitmap with existing one.
483238106Sdes */
484238106Sdesvoid
485238106Sdesactivemap_merge(struct activemap *amp, const unsigned char *buf, size_t size)
486238106Sdes{
487238106Sdes	bitstr_t *remmap = __DECONST(bitstr_t *, buf);
488238106Sdes	int ext;
489238106Sdes
490238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
491238106Sdes	PJDLOG_ASSERT(size >= amp->am_mapsize);
492238106Sdes
493238106Sdes	bit_ffs(remmap, amp->am_nextents, &ext);
494238106Sdes	if (ext == -1) {
495238106Sdes		/* There are no dirty extents, so we can leave now. */
496238106Sdes		return;
497238106Sdes	}
498238106Sdes	/*
499238106Sdes	 * We have dirty extents and we want them to stay that way until
500238106Sdes	 * we synchronize, so we set number of pending writes to number
501238106Sdes	 * of requests needed to synchronize one extent.
502238106Sdes	 */
503238106Sdes	for (; ext < amp->am_nextents; ext++) {
504238106Sdes		/* Local extent already dirty. */
505238106Sdes		if (bit_test(amp->am_syncmap, ext))
506238106Sdes			continue;
507238106Sdes		/* Remote extent isn't dirty. */
508238106Sdes		if (!bit_test(remmap, ext))
509238106Sdes			continue;
510238106Sdes		bit_set(amp->am_syncmap, ext);
511238106Sdes		bit_set(amp->am_memmap, ext);
512238106Sdes		bit_set(amp->am_diskmap, ext);
513238106Sdes		if (amp->am_memtab[ext] == 0)
514238106Sdes			amp->am_ndirty++;
515238106Sdes		amp->am_memtab[ext] = ext2reqs(amp, ext);
516238106Sdes	}
517238106Sdes	/*
518238106Sdes	 * Set synchronization offset to the first dirty extent.
519238106Sdes	 */
520238106Sdes	activemap_sync_rewind(amp);
521238106Sdes}
522238106Sdes
523238106Sdes/*
524238106Sdes * Function returns pointer to internal bitmap that should be written to disk.
525238106Sdes */
526238106Sdesconst unsigned char *
527238106Sdesactivemap_bitmap(struct activemap *amp, size_t *sizep)
528238106Sdes{
529238106Sdes
530238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
531238106Sdes
532238106Sdes	if (sizep != NULL)
533238106Sdes		*sizep = amp->am_diskmapsize;
534238106Sdes	memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize);
535238106Sdes	keepdirty_fill(amp);
536238106Sdes	return ((const unsigned char *)amp->am_diskmap);
537238106Sdes}
538238106Sdes
539238106Sdes/*
540238106Sdes * Function calculates size needed to store bitmap on disk.
541238106Sdes */
542238106Sdessize_t
543238106Sdesactivemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize,
544238106Sdes    uint32_t sectorsize)
545238106Sdes{
546238106Sdes	uint64_t nextents, mapsize;
547238106Sdes
548238106Sdes	PJDLOG_ASSERT(mediasize > 0);
549238106Sdes	PJDLOG_ASSERT(extentsize > 0);
550238106Sdes	PJDLOG_ASSERT(powerof2(extentsize));
551238106Sdes	PJDLOG_ASSERT(sectorsize > 0);
552238106Sdes	PJDLOG_ASSERT(powerof2(sectorsize));
553238106Sdes
554238106Sdes	nextents = ((mediasize - 1) / extentsize) + 1;
555238106Sdes	mapsize = sizeof(bitstr_t) * bitstr_size(nextents);
556238106Sdes	return (roundup2(mapsize, sectorsize));
557238106Sdes}
558238106Sdes
559238106Sdes/*
560238106Sdes * Set synchronization offset to the first dirty extent.
561238106Sdes */
562238106Sdesvoid
563238106Sdesactivemap_sync_rewind(struct activemap *amp)
564238106Sdes{
565238106Sdes	int ext;
566238106Sdes
567238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
568238106Sdes
569238106Sdes	bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
570238106Sdes	if (ext == -1) {
571238106Sdes		/* There are no extents to synchronize. */
572238106Sdes		amp->am_syncoff = -2;
573238106Sdes		return;
574238106Sdes	}
575238106Sdes	/*
576238106Sdes	 * Mark that we want to start synchronization from the beginning.
577238106Sdes	 */
578238106Sdes	amp->am_syncoff = -1;
579238106Sdes}
580238106Sdes
581238106Sdes/*
582238106Sdes * Return next offset of where we should synchronize.
583238106Sdes */
584238106Sdesoff_t
585238106Sdesactivemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp)
586238106Sdes{
587238106Sdes	off_t syncoff, left;
588238106Sdes	int ext;
589238106Sdes
590238106Sdes	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
591238106Sdes	PJDLOG_ASSERT(lengthp != NULL);
592238106Sdes	PJDLOG_ASSERT(syncextp != NULL);
593238106Sdes
594238106Sdes	*syncextp = -1;
595238106Sdes
596238106Sdes	if (amp->am_syncoff == -2)
597238106Sdes		return (-1);
598238106Sdes
599238106Sdes	if (amp->am_syncoff >= 0 &&
600238106Sdes	    (amp->am_syncoff + MAXPHYS >= amp->am_mediasize ||
601238106Sdes	     off2ext(amp, amp->am_syncoff) !=
602238106Sdes	     off2ext(amp, amp->am_syncoff + MAXPHYS))) {
603238106Sdes		/*
604238106Sdes		 * We are about to change extent, so mark previous one as clean.
605238106Sdes		 */
606238106Sdes		ext = off2ext(amp, amp->am_syncoff);
607238106Sdes		bit_clear(amp->am_syncmap, ext);
608238106Sdes		*syncextp = ext;
609238106Sdes		amp->am_syncoff = -1;
610238106Sdes	}
611238106Sdes
612238106Sdes	if (amp->am_syncoff == -1) {
613238106Sdes		/*
614238106Sdes		 * Let's find first extent to synchronize.
615238106Sdes		 */
616238106Sdes		bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
617238106Sdes		if (ext == -1) {
618238106Sdes			amp->am_syncoff = -2;
619238106Sdes			return (-1);
620238106Sdes		}
621238106Sdes		amp->am_syncoff = ext2off(amp, ext);
622238106Sdes	} else {
623238106Sdes		/*
624238106Sdes		 * We don't change extent, so just increase offset.
625		 */
626		amp->am_syncoff += MAXPHYS;
627		if (amp->am_syncoff >= amp->am_mediasize) {
628			amp->am_syncoff = -2;
629			return (-1);
630		}
631	}
632
633	syncoff = amp->am_syncoff;
634	left = ext2off(amp, off2ext(amp, syncoff)) +
635	    amp->am_extentsize - syncoff;
636	if (syncoff + left > amp->am_mediasize)
637		left = amp->am_mediasize - syncoff;
638	if (left > MAXPHYS)
639		left = MAXPHYS;
640
641	PJDLOG_ASSERT(left >= 0 && left <= MAXPHYS);
642	PJDLOG_ASSERT(syncoff >= 0 && syncoff < amp->am_mediasize);
643	PJDLOG_ASSERT(syncoff + left >= 0 &&
644	    syncoff + left <= amp->am_mediasize);
645
646	*lengthp = left;
647	return (syncoff);
648}
649
650/*
651 * Mark extent(s) containing the given region for synchronization.
652 * Most likely one of the components is unavailable.
653 */
654bool
655activemap_need_sync(struct activemap *amp, off_t offset, off_t length)
656{
657	bool modified;
658	off_t end;
659	int ext;
660
661	PJDLOG_ASSERT(amp->am_magic == ACTIVEMAP_MAGIC);
662
663	modified = false;
664	end = offset + length - 1;
665
666	for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
667		if (bit_test(amp->am_syncmap, ext)) {
668			/* Already marked for synchronization. */
669			PJDLOG_ASSERT(bit_test(amp->am_memmap, ext));
670			continue;
671		}
672		bit_set(amp->am_syncmap, ext);
673		if (!bit_test(amp->am_memmap, ext)) {
674			bit_set(amp->am_memmap, ext);
675			amp->am_ndirty++;
676		}
677		amp->am_memtab[ext] += ext2reqs(amp, ext);
678		modified = true;
679	}
680
681	return (modified);
682}
683
684void
685activemap_dump(const struct activemap *amp)
686{
687	int bit;
688
689	printf("M: ");
690	for (bit = 0; bit < amp->am_nextents; bit++)
691		printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0);
692	printf("\n");
693	printf("D: ");
694	for (bit = 0; bit < amp->am_nextents; bit++)
695		printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0);
696	printf("\n");
697	printf("S: ");
698	for (bit = 0; bit < amp->am_nextents; bit++)
699		printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0);
700	printf("\n");
701}
702