128263Spst/* $NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $ */
228263Spst
350472Speter/*-
428263Spst * Copyright (c) 2009 Jed Davis.
528263Spst * All rights reserved.
661981Sbrian *
761981Sbrian * Redistribution and use in source and binary forms, with or without
861981Sbrian * modification, are permitted provided that the following conditions
961981Sbrian * are met:
1061981Sbrian * 1. Redistributions of source code must retain the above copyright
1161981Sbrian *    notice, this list of conditions and the following disclaimer.
1228263Spst * 2. Redistributions in binary form must reproduce the above copyright
1361981Sbrian *    notice, this list of conditions and the following disclaimer in the
1465843Sbrian *    documentation and/or other materials provided with the distribution.
1561981Sbrian *
1661981Sbrian * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1777592Sdougb * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1861981Sbrian * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1961981Sbrian * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20319221Sasomers * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2161981Sbrian * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2261981Sbrian * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2361981Sbrian * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24319221Sasomers * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25319221Sasomers * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26319221Sasomers * POSSIBILITY OF SUCH DAMAGE.
27319221Sasomers */
28319221Sasomers
2965843Sbrian#include <sys/cdefs.h>
3065843Sbrian__KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $");
3165843Sbrian
3265843Sbrian#include <sys/param.h>
3361981Sbrian#include <sys/callout.h>
3465843Sbrian#include <sys/kmem.h>
3565843Sbrian#include <sys/mutex.h>
3665843Sbrian#include <sys/rwlock.h>
3761981Sbrian#include <sys/systm.h>
3861981Sbrian#include <sys/types.h>
3965843Sbrian
4065843Sbrian#include <dev/raidframe/rf_paritymap.h>
41#include <dev/raidframe/rf_stripelocks.h>
42#include <dev/raidframe/rf_layout.h>
43#include <dev/raidframe/rf_raid.h>
44#include <dev/raidframe/rf_parityscan.h>
45#include <dev/raidframe/rf_kintf.h>
46
47/* Important parameters: */
48#define REGION_MINSIZE (25ULL << 20)
49#define DFL_TICKMS      40000
50#define DFL_COOLDOWN    8     /* 7-8 intervals of 40s = 5min +/- 20s */
51
52/* Internal-use flag bits. */
53#define TICKING 1
54#define TICKED 2
55
56/* Prototypes! */
57static void rf_paritymap_write_locked(struct rf_paritymap *);
58static void rf_paritymap_tick(void *);
59static u_int rf_paritymap_nreg(RF_Raid_t *);
60
61/* Extract the current status of the parity map. */
62void
63rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps)
64{
65	memset(ps, 0, sizeof(*ps));
66	if (pm == NULL)
67		ps->enabled = 0;
68	else {
69		ps->enabled = 1;
70		ps->region_size = pm->region_size;
71		mutex_enter(&pm->lock);
72		memcpy(&ps->params, &pm->params, sizeof(ps->params));
73		memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty));
74		memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs));
75		mutex_exit(&pm->lock);
76	}
77}
78
79/*
80 * Test whether parity in a given sector is suspected of being inconsistent
81 * on disk (assuming that any pending I/O to it is allowed to complete).
82 * This may be of interest to future work on parity scrubbing.
83 */
84int
85rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector)
86{
87	unsigned region = sector / pm->region_size;
88	int retval;
89
90	mutex_enter(&pm->lock);
91	retval = isset(pm->disk_boot->bits, region) ? 1 : 0;
92	mutex_exit(&pm->lock);
93	return retval;
94}
95
96/* To be called before a write to the RAID is submitted. */
97void
98rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
99{
100	unsigned i, b, e;
101
102	b = offset / pm->region_size;
103	e = (offset + size - 1) / pm->region_size;
104
105	for (i = b; i <= e; i++)
106		rf_paritymap_begin_region(pm, i);
107}
108
109/* To be called after a write to the RAID completes. */
110void
111rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
112{
113	unsigned i, b, e;
114
115	b = offset / pm->region_size;
116	e = (offset + size - 1) / pm->region_size;
117
118	for (i = b; i <= e; i++)
119		rf_paritymap_end_region(pm, i);
120}
121
122void
123rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region)
124{
125	int needs_write;
126
127	KASSERT(region < RF_PARITYMAP_NREG);
128	pm->ctrs.nwrite++;
129
130	/* If it was being kept warm, deal with that. */
131	mutex_enter(&pm->lock);
132	if (pm->current->state[region] < 0)
133		pm->current->state[region] = 0;
134
135	/* This shouldn't happen unless RAIDOUTSTANDING is set too high. */
136	KASSERT(pm->current->state[region] < 127);
137	pm->current->state[region]++;
138
139	needs_write = isclr(pm->disk_now->bits, region);
140
141	if (needs_write) {
142		KASSERT(pm->current->state[region] == 1);
143		rf_paritymap_write_locked(pm);
144	}
145
146	mutex_exit(&pm->lock);
147}
148
149void
150rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region)
151{
152	KASSERT(region < RF_PARITYMAP_NREG);
153
154	mutex_enter(&pm->lock);
155	KASSERT(pm->current->state[region] > 0);
156	--pm->current->state[region];
157
158	if (pm->current->state[region] <= 0) {
159		pm->current->state[region] = -pm->params.cooldown;
160		KASSERT(pm->current->state[region] <= 0);
161		mutex_enter(&pm->lk_flags);
162		if (!(pm->flags & TICKING)) {
163			pm->flags |= TICKING;
164			mutex_exit(&pm->lk_flags);
165			callout_schedule(&pm->ticker,
166			    mstohz(pm->params.tickms));
167		} else
168			mutex_exit(&pm->lk_flags);
169	}
170	mutex_exit(&pm->lock);
171}
172
173/*
174 * Updates the parity map to account for any changes in current activity
175 * and/or an ongoing parity scan, then writes it to disk with appropriate
176 * synchronization.
177 */
178void
179rf_paritymap_write(struct rf_paritymap *pm)
180{
181	mutex_enter(&pm->lock);
182	rf_paritymap_write_locked(pm);
183	mutex_exit(&pm->lock);
184}
185
186/* As above, but to be used when pm->lock is already held. */
187static void
188rf_paritymap_write_locked(struct rf_paritymap *pm)
189{
190	char w, w0;
191	int i, j, setting, clearing;
192
193	setting = clearing = 0;
194	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
195		w0 = pm->disk_now->bits[i];
196		w = pm->disk_boot->bits[i];
197
198		for (j = 0; j < NBBY; j++)
199			if (pm->current->state[i * NBBY + j] != 0)
200				w |= 1 << j;
201
202		if (w & ~w0)
203			setting = 1;
204		if (w0 & ~w)
205			clearing = 1;
206
207		pm->disk_now->bits[i] = w;
208	}
209	pm->ctrs.ncachesync += setting + clearing;
210	pm->ctrs.nclearing += clearing;
211
212	/*
213	 * If bits are being set in the parity map, then a sync is
214	 * required afterwards, so that the regions are marked dirty
215	 * on disk before any writes to them take place.  If bits are
216	 * being cleared, then a sync is required before the write, so
217	 * that any writes to those regions are processed before the
218	 * region is marked clean.  (Synchronization is somewhat
219	 * overkill; a write ordering barrier would suffice, but we
220	 * currently have no way to express that directly.)
221	 */
222	if (clearing)
223		rf_sync_component_caches(pm->raid, 1);
224	rf_paritymap_kern_write(pm->raid, pm->disk_now);
225	if (setting)
226		rf_sync_component_caches(pm->raid, 1);
227}
228
229/* Mark all parity as being in need of rewrite. */
230void
231rf_paritymap_invalidate(struct rf_paritymap *pm)
232{
233	mutex_enter(&pm->lock);
234	memset(pm->disk_boot, (unsigned char)~0, sizeof(*pm->disk_boot));
235	mutex_exit(&pm->lock);
236}
237
238/* Mark all parity as being correct. */
239void
240rf_paritymap_forceclean(struct rf_paritymap *pm)
241{
242	mutex_enter(&pm->lock);
243	memset(pm->disk_boot, 0, sizeof(*pm->disk_boot));
244	mutex_exit(&pm->lock);
245}
246
247/*
248 * The cooldown callout routine just defers its work to a thread; it can't do
249 * the parity map write itself as it would block, and although mutex-induced
250 * blocking is permitted it seems wise to avoid tying up the softint.
251 */
252static void
253rf_paritymap_tick(void *arg)
254{
255	struct rf_paritymap *pm = arg;
256
257	mutex_enter(&pm->lk_flags);
258	pm->flags |= TICKED;
259	mutex_exit(&pm->lk_flags);
260
261	rf_lock_mutex2(pm->raid->iodone_lock);
262	rf_signal_cond2(pm->raid->iodone_cv); /* XXX */
263	rf_unlock_mutex2(pm->raid->iodone_lock);
264}
265
266/*
267 * This is where the parity cooling work (and rearming the callout if needed)
268 * is done; the raidio thread calls it when woken up, as by the above.
269 */
270void
271rf_paritymap_checkwork(struct rf_paritymap *pm)
272{
273	int i, zerop, progressp;
274
275	mutex_enter(&pm->lk_flags);
276	if (pm->flags & TICKED) {
277		zerop = progressp = 0;
278
279		pm->flags &= ~TICKED;
280		mutex_exit(&pm->lk_flags);
281
282		mutex_enter(&pm->lock);
283		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
284			if (pm->current->state[i] < 0) {
285				progressp = 1;
286				pm->current->state[i]++;
287				if (pm->current->state[i] == 0)
288					zerop = 1;
289			}
290		}
291
292		if (progressp)
293			callout_schedule(&pm->ticker,
294			    mstohz(pm->params.tickms));
295		else {
296			mutex_enter(&pm->lk_flags);
297			pm->flags &= ~TICKING;
298			mutex_exit(&pm->lk_flags);
299		}
300
301		if (zerop)
302			rf_paritymap_write_locked(pm);
303		mutex_exit(&pm->lock);
304	} else
305		mutex_exit(&pm->lk_flags);
306}
307
308/*
309 * Set parity map parameters; used both to alter parameters on the fly and to
310 * establish their initial values.  Note that setting a parameter to 0 means
311 * to leave the previous setting unchanged, and that if this is done for the
312 * initial setting of "regions", then a default value will be computed based
313 * on the RAID component size.
314 */
315int
316rf_paritymap_set_params(struct rf_paritymap *pm,
317    const struct rf_pmparams *params, int todisk)
318{
319	int cooldown, tickms;
320	u_int regions;
321	RF_RowCol_t col;
322	RF_ComponentLabel_t *clabel;
323	RF_Raid_t *raidPtr;
324
325	cooldown = params->cooldown != 0
326	    ? params->cooldown : pm->params.cooldown;
327	tickms = params->tickms != 0
328	    ? params->tickms : pm->params.tickms;
329	regions = params->regions != 0
330	    ? params->regions : pm->params.regions;
331
332	if (cooldown < 1 || cooldown > 128) {
333		printf("raid%d: cooldown %d out of range\n", pm->raid->raidid,
334		    cooldown);
335		return (-1);
336	}
337	if (tickms < 10) {
338		printf("raid%d: tick time %dms out of range\n",
339		    pm->raid->raidid, tickms);
340		return (-1);
341	}
342	if (regions == 0) {
343		regions = rf_paritymap_nreg(pm->raid);
344	} else if (regions > RF_PARITYMAP_NREG) {
345		printf("raid%d: region count %u too large (more than %u)\n",
346		    pm->raid->raidid, regions, RF_PARITYMAP_NREG);
347		return (-1);
348	}
349
350	/* XXX any currently warm parity will be used with the new tickms! */
351	pm->params.cooldown = cooldown;
352	pm->params.tickms = tickms;
353	/* Apply the initial region count, but do not change it after that. */
354	if (pm->params.regions == 0)
355		pm->params.regions = regions;
356
357	/* So that the newly set parameters can be tested: */
358	pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0;
359
360	if (todisk) {
361		raidPtr = pm->raid;
362		for (col = 0; col < raidPtr->numCol; col++) {
363			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
364				continue;
365
366			clabel = raidget_component_label(raidPtr, col);
367			clabel->parity_map_ntick = cooldown;
368			clabel->parity_map_tickms = tickms;
369			clabel->parity_map_regions = regions;
370
371			/* Don't touch the disk if it's been spared */
372			if (clabel->status == rf_ds_spared)
373				continue;
374
375			raidflush_component_label(raidPtr, col);
376		}
377
378		/* handle the spares too... */
379		for (col = 0; col < raidPtr->numSpare; col++) {
380			if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) {
381				clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
382				clabel->parity_map_ntick = cooldown;
383				clabel->parity_map_tickms = tickms;
384				clabel->parity_map_regions = regions;
385				raidflush_component_label(raidPtr, raidPtr->numCol+col);
386			}
387		}
388	}
389	return 0;
390}
391
392/*
393 * The number of regions may not be as many as can fit into the map, because
394 * when regions are too small, the overhead of setting parity map bits
395 * becomes significant in comparison to the actual I/O, while the
396 * corresponding gains in parity verification time become negligible.  Thus,
397 * a minimum region size (defined above) is imposed.
398 *
399 * Note that, if the number of regions is less than the maximum, then some of
400 * the regions will be "fictional", corresponding to no actual disk; some
401 * parts of the code may process them as normal, but they can not ever be
402 * written to.
403 */
404static u_int
405rf_paritymap_nreg(RF_Raid_t *raid)
406{
407	daddr_t bytes_per_disk, nreg;
408
409	bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector;
410	nreg = bytes_per_disk / REGION_MINSIZE;
411	if (nreg > RF_PARITYMAP_NREG)
412		nreg = RF_PARITYMAP_NREG;
413	if (nreg < 1)
414		nreg = 1;
415
416	return (u_int)nreg;
417}
418
419/*
420 * Initialize a parity map given specific parameters.  This neither reads nor
421 * writes the parity map config in the component labels; for that, see below.
422 */
423int
424rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid,
425    const struct rf_pmparams *params)
426{
427	daddr_t rstripes;
428	struct rf_pmparams safe;
429
430	pm->raid = raid;
431	pm->params.regions = 0;
432	if (0 != rf_paritymap_set_params(pm, params, 0)) {
433		/*
434		 * If the parameters are out-of-range, then bring the
435		 * parity map up with something reasonable, so that
436		 * the admin can at least go and fix it (or ignore it
437		 * entirely).
438		 */
439		safe.cooldown = DFL_COOLDOWN;
440		safe.tickms = DFL_TICKMS;
441		safe.regions = 0;
442
443		if (0 != rf_paritymap_set_params(pm, &safe, 0))
444			return (-1);
445	}
446
447	rstripes = howmany(raid->Layout.numStripe, pm->params.regions);
448	pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe;
449
450	callout_init(&pm->ticker, CALLOUT_MPSAFE);
451	callout_setfunc(&pm->ticker, rf_paritymap_tick, pm);
452	pm->flags = 0;
453
454	pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
455	    KM_SLEEP);
456	pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
457	    KM_SLEEP);
458	pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current),
459	    KM_SLEEP);
460
461	rf_paritymap_kern_read(pm->raid, pm->disk_boot);
462	memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now));
463
464	mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE);
465	mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK);
466
467	return 0;
468}
469
470/*
471 * Destroys a parity map; unless "force" is set, also cleans parity for any
472 * regions which were still in cooldown (but are not dirty on disk).
473 */
474void
475rf_paritymap_destroy(struct rf_paritymap *pm, int force)
476{
477	int i;
478
479	callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */
480	callout_destroy(&pm->ticker);
481
482	if (!force) {
483		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
484			/* XXX check for > 0 ? */
485			if (pm->current->state[i] < 0)
486				pm->current->state[i] = 0;
487		}
488
489		rf_paritymap_write_locked(pm);
490	}
491
492	mutex_destroy(&pm->lock);
493	mutex_destroy(&pm->lk_flags);
494
495	kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk));
496	kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk));
497	kmem_free(pm->current, sizeof(struct rf_paritymap_current));
498}
499
500/*
501 * Rewrite parity, taking parity map into account; this is the equivalent of
502 * the old rf_RewriteParity, and is likewise to be called from a suitable
503 * thread and shouldn't have multiple copies running in parallel and so on.
504 *
505 * Note that the fictional regions are "cleaned" in one shot, so that very
506 * small RAIDs (useful for testing) will not experience potentially severe
507 * regressions in rewrite time.
508 */
509int
510rf_paritymap_rewrite(struct rf_paritymap *pm)
511{
512	int i, ret_val = 0;
513	daddr_t reg_b, reg_e;
514
515	/* Process only the actual regions. */
516	for (i = 0; i < pm->params.regions; i++) {
517		mutex_enter(&pm->lock);
518		if (isset(pm->disk_boot->bits, i)) {
519			mutex_exit(&pm->lock);
520
521			reg_b = i * pm->region_size;
522			reg_e = reg_b + pm->region_size;
523			if (reg_e > pm->raid->totalSectors)
524				reg_e = pm->raid->totalSectors;
525
526			if (rf_RewriteParityRange(pm->raid, reg_b,
527			    reg_e - reg_b)) {
528				ret_val = 1;
529				if (pm->raid->waitShutdown)
530					return ret_val;
531			} else {
532				mutex_enter(&pm->lock);
533				clrbit(pm->disk_boot->bits, i);
534				rf_paritymap_write_locked(pm);
535				mutex_exit(&pm->lock);
536			}
537		} else {
538			mutex_exit(&pm->lock);
539		}
540	}
541
542	/* Now, clear the fictional regions, if any. */
543	rf_paritymap_forceclean(pm);
544	rf_paritymap_write(pm);
545
546	return ret_val;
547}
548
549/*
550 * How to merge the on-disk parity maps when reading them in from the
551 * various components; returns whether they differ.  In the case that
552 * they do differ, sets *dst to the union of *dst and *src.
553 *
554 * In theory, it should be safe to take the intersection (or just pick
555 * a single component arbitrarily), but the paranoid approach costs
556 * little.
557 *
558 * Appropriate locking, if any, is the responsibility of the caller.
559 */
560int
561rf_paritymap_merge(struct rf_paritymap_ondisk *dst,
562    struct rf_paritymap_ondisk *src)
563{
564	int i, discrep = 0;
565
566	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
567		if (dst->bits[i] != src->bits[i])
568			discrep = 1;
569		dst->bits[i] |= src->bits[i];
570	}
571
572	return discrep;
573}
574
575/*
576 * Detach a parity map from its RAID.  This is not meant to be applied except
577 * when unconfiguring the RAID after all I/O has been resolved, as otherwise
578 * an out-of-date parity map could be treated as current.
579 */
580void
581rf_paritymap_detach(RF_Raid_t *raidPtr)
582{
583	if (raidPtr->parity_map == NULL)
584		return;
585
586	rf_lock_mutex2(raidPtr->iodone_lock);
587	struct rf_paritymap *pm = raidPtr->parity_map;
588	raidPtr->parity_map = NULL;
589	rf_unlock_mutex2(raidPtr->iodone_lock);
590	/* XXXjld is that enough locking?  Or too much? */
591	rf_paritymap_destroy(pm, 0);
592	kmem_free(pm, sizeof(*pm));
593}
594
595/*
596 * Is this RAID set ineligible for parity-map use due to not actually
597 * having any parity?  (If so, rf_paritymap_attach is a no-op, but
598 * rf_paritymap_{get,set}_disable will still pointlessly act on the
599 * component labels.)
600 */
601int
602rf_paritymap_ineligible(RF_Raid_t *raidPtr)
603{
604	return raidPtr->Layout.map->faultsTolerated == 0;
605}
606
607/*
608 * Attach a parity map to a RAID set if appropriate.  Includes
609 * configure-time processing of parity-map fields of component label.
610 */
611void
612rf_paritymap_attach(RF_Raid_t *raidPtr, int force)
613{
614	RF_RowCol_t col;
615	int pm_use, pm_zap;
616	int g_tickms, g_ntick, g_regions;
617	int good;
618	RF_ComponentLabel_t *clabel;
619	u_int flags, regions;
620	struct rf_pmparams params;
621
622	if (rf_paritymap_ineligible(raidPtr)) {
623		/* There isn't any parity. */
624		return;
625	}
626
627	pm_use = 1;
628	pm_zap = 0;
629	g_tickms = DFL_TICKMS;
630	g_ntick = DFL_COOLDOWN;
631	g_regions = 0;
632
633	/*
634	 * Collect opinions on the set config.  If this is the initial
635	 * config (raidctl -C), treat all labels as invalid, since
636	 * there may be random data present.
637	 */
638	if (!force) {
639		for (col = 0; col < raidPtr->numCol; col++) {
640			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
641				continue;
642			clabel = raidget_component_label(raidPtr, col);
643			flags = clabel->parity_map_flags;
644			/* Check for use by non-parity-map kernel. */
645			if (clabel->parity_map_modcount
646			    != clabel->mod_counter) {
647				flags &= ~RF_PMLABEL_WASUSED;
648			}
649
650			if (flags & RF_PMLABEL_VALID) {
651				g_tickms = clabel->parity_map_tickms;
652				g_ntick = clabel->parity_map_ntick;
653				regions = clabel->parity_map_regions;
654				if (g_regions == 0)
655					g_regions = regions;
656				else if (g_regions != regions) {
657					pm_zap = 1; /* important! */
658				}
659
660				if (flags & RF_PMLABEL_DISABLE) {
661					pm_use = 0;
662				}
663				if (!(flags & RF_PMLABEL_WASUSED)) {
664					pm_zap = 1;
665				}
666			} else {
667				pm_zap = 1;
668			}
669		}
670	} else {
671		pm_zap = 1;
672	}
673
674	/* Finally, create and attach the parity map. */
675	if (pm_use) {
676		params.cooldown = g_ntick;
677		params.tickms = g_tickms;
678		params.regions = g_regions;
679
680		raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap),
681		    KM_SLEEP);
682		if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr,
683			&params)) {
684			/* It failed; do without. */
685			kmem_free(raidPtr->parity_map,
686			    sizeof(struct rf_paritymap));
687			raidPtr->parity_map = NULL;
688			return;
689		}
690
691		if (g_regions == 0)
692			/* Pick up the autoconfigured region count. */
693			g_regions = raidPtr->parity_map->params.regions;
694
695		if (pm_zap) {
696			good = raidPtr->parity_good && !force;
697
698			if (good)
699				rf_paritymap_forceclean(raidPtr->parity_map);
700			else
701				rf_paritymap_invalidate(raidPtr->parity_map);
702			/* This needs to be on disk before WASUSED is set. */
703			rf_paritymap_write(raidPtr->parity_map);
704		}
705	}
706
707	/* Alter labels in-core to reflect the current view of things. */
708	for (col = 0; col < raidPtr->numCol; col++) {
709		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
710			continue;
711		clabel = raidget_component_label(raidPtr, col);
712
713		if (pm_use)
714			flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
715		else
716			flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE;
717
718		clabel->parity_map_flags = flags;
719		clabel->parity_map_tickms = g_tickms;
720		clabel->parity_map_ntick = g_ntick;
721		clabel->parity_map_regions = g_regions;
722		raidflush_component_label(raidPtr, col);
723	}
724	/* Note that we're just in 'attach' here, and there won't
725	   be any spare disks at this point. */
726}
727
728/*
729 * For initializing the parity-map fields of a component label, both on
730 * initial creation and on reconstruct.  */
731void
732rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel)
733{
734	if (pm != NULL) {
735		clabel->parity_map_flags =
736		    RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
737		clabel->parity_map_tickms = pm->params.tickms;
738		clabel->parity_map_ntick = pm->params.cooldown;
739		/*
740		 * XXXjld: If the number of regions is changed on disk, and
741		 * then a new component is labeled before the next configure,
742		 * then it will get the old value and they will conflict on
743		 * the next boot (and the default will be used instead).
744		 */
745		clabel->parity_map_regions = pm->params.regions;
746	} else {
747		/*
748		 * XXXjld: if the map is disabled, and all the components are
749		 * replaced without an intervening unconfigure/reconfigure,
750		 * then it will become enabled on the next unconfig/reconfig.
751		 */
752	}
753}
754
755
756/* Will the parity map be disabled next time? */
757int
758rf_paritymap_get_disable(RF_Raid_t *raidPtr)
759{
760	RF_ComponentLabel_t *clabel;
761	RF_RowCol_t col;
762	int dis;
763
764	dis = 0;
765	for (col = 0; col < raidPtr->numCol; col++) {
766		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
767			continue;
768		clabel = raidget_component_label(raidPtr, col);
769		if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
770			dis = 1;
771	}
772        for (col = 0; col < raidPtr->numSpare; col++) {
773		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
774                        continue;
775                clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
776                if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
777                        dis = 1;
778        }
779
780	return dis;
781}
782
783/* Set whether the parity map will be disabled next time. */
784void
785rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis)
786{
787	RF_ComponentLabel_t *clabel;
788	RF_RowCol_t col;
789
790	for (col = 0; col < raidPtr->numCol; col++) {
791		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
792			continue;
793		clabel = raidget_component_label(raidPtr, col);
794		if (dis)
795			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
796		else
797			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
798		raidflush_component_label(raidPtr, col);
799	}
800
801	/* update any used spares as well */
802	for (col = 0; col < raidPtr->numSpare; col++) {
803		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
804			continue;
805
806		clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
807		if (dis)
808			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
809		else
810			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
811		raidflush_component_label(raidPtr, raidPtr->numCol+col);
812	}
813}
814