1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29#include <sys/bio.h>
30#include <sys/buf.h>
31#include <sys/ctype.h>
32#include <sys/kernel.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/reboot.h>
37#include <sys/rwlock.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40
41#include <geom/geom.h>
42#include <geom/geom_dbg.h>
43#include <geom/union/g_union.h>
44
45SYSCTL_DECL(_kern_geom);
46static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47    "GEOM_UNION stuff");
48static u_int g_union_debug = 0;
49SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
50    "Debug level");
51
52static void g_union_config(struct gctl_req *req, struct g_class *mp,
53    const char *verb);
54static g_access_t g_union_access;
55static g_start_t g_union_start;
56static g_dumpconf_t g_union_dumpconf;
57static g_orphan_t g_union_orphan;
58static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
59    struct g_geom *gp);
60static g_provgone_t g_union_providergone;
61static g_resize_t g_union_resize;
62
63struct g_class g_union_class = {
64	.name = G_UNION_CLASS_NAME,
65	.version = G_VERSION,
66	.ctlreq = g_union_config,
67	.access = g_union_access,
68	.start = g_union_start,
69	.dumpconf = g_union_dumpconf,
70	.orphan = g_union_orphan,
71	.destroy_geom = g_union_destroy_geom,
72	.providergone = g_union_providergone,
73	.resize = g_union_resize,
74};
75
76static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
77static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
78static bool g_union_verify_nprefix(const char *name);
79static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
80static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
81static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
82static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
83static void g_union_revert(struct g_union_softc *sc);
84static void g_union_doio(struct g_union_wip *wip);
85static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
86static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
87static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
88	off_t *len2read);
89static void g_union_done(struct bio *bp);
90static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
91static int g_union_dumper(void *, void *, off_t, size_t);
92static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
93
94/*
95 * Operate on union-specific configuration commands.
96 */
97static void
98g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
99{
100	uint32_t *version, *verbose;
101
102	g_topology_assert();
103
104	version = gctl_get_paraml(req, "version", sizeof(*version));
105	if (version == NULL) {
106		gctl_error(req, "No '%s' argument.", "version");
107		return;
108	}
109	if (*version != G_UNION_VERSION) {
110		gctl_error(req, "Userland and kernel parts are out of sync.");
111		return;
112	}
113	verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
114	if (verbose == NULL) {
115		gctl_error(req, "No '%s' argument.", "verbose");
116		return;
117	}
118	if (strcmp(verb, "create") == 0) {
119		g_union_ctl_create(req, mp, *verbose);
120		return;
121	} else if (strcmp(verb, "destroy") == 0) {
122		g_union_ctl_destroy(req, mp, *verbose);
123		return;
124	} else if (strcmp(verb, "reset") == 0) {
125		g_union_ctl_reset(req, mp, *verbose);
126		return;
127	} else if (strcmp(verb, "revert") == 0) {
128		g_union_ctl_revert(req, mp, *verbose);
129		return;
130	} else if (strcmp(verb, "commit") == 0) {
131		g_union_ctl_commit(req, mp, *verbose);
132		return;
133	}
134
135	gctl_error(req, "Unknown verb.");
136}
137
138/*
139 * Create a union device.
140 */
141static void
142g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
143{
144	struct g_provider *upperpp, *lowerpp, *newpp;
145	struct g_consumer *uppercp, *lowercp;
146	struct g_union_softc *sc;
147	struct g_geom_alias *gap;
148	struct g_geom *gp;
149	intmax_t offset, secsize, size, needed;
150	const char *gunionname;
151	int *nargs, error, i, n;
152	char name[64];
153
154	g_topology_assert();
155
156	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
157	if (nargs == NULL) {
158		gctl_error(req, "No '%s' argument.", "nargs");
159		return;
160	}
161	if (*nargs < 2) {
162		gctl_error(req, "Missing device(s).");
163		return;
164	}
165	if (*nargs > 2) {
166		gctl_error(req, "Extra device(s).");
167		return;
168	}
169
170	offset = g_union_fetcharg(req, "offset");
171	size = g_union_fetcharg(req, "size");
172	secsize = g_union_fetcharg(req, "secsize");
173	gunionname = gctl_get_asciiparam(req, "gunionname");
174
175	upperpp = gctl_get_provider(req, "arg0");
176	lowerpp = gctl_get_provider(req, "arg1");
177	if (upperpp == NULL || lowerpp == NULL)
178		/* error message provided by gctl_get_provider() */
179		return;
180	/* Create the union */
181	if (secsize == 0)
182		secsize = lowerpp->sectorsize;
183	else if ((secsize % lowerpp->sectorsize) != 0) {
184		gctl_error(req, "Sector size %jd is not a multiple of lower "
185		    "provider %s's %jd sector size.", (intmax_t)secsize,
186		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
187		return;
188	}
189	if (secsize > maxphys) {
190		gctl_error(req, "Too big secsize %jd for lower provider %s.",
191		    (intmax_t)secsize, lowerpp->name);
192		return;
193	}
194	if (secsize % upperpp->sectorsize != 0) {
195		gctl_error(req, "Sector size %jd is not a multiple of upper "
196		    "provider %s's %jd sector size.", (intmax_t)secsize,
197		    upperpp->name, (intmax_t)upperpp->sectorsize);
198		return;
199	}
200	if ((offset % secsize) != 0) {
201		gctl_error(req, "Offset %jd is not a multiple of lower "
202		    "provider %s's %jd sector size.", (intmax_t)offset,
203		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
204		return;
205	}
206	if (size == 0)
207		size = lowerpp->mediasize - offset;
208	else
209		size -= offset;
210	if ((size % secsize) != 0) {
211		gctl_error(req, "Size %jd is not a multiple of sector size "
212		    "%jd.", (intmax_t)size, (intmax_t)secsize);
213		return;
214	}
215	if (offset + size < lowerpp->mediasize) {
216		gctl_error(req, "Size %jd is too small for lower provider %s, "
217		    "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
218		    lowerpp->mediasize);
219		return;
220	}
221	if (size > upperpp->mediasize) {
222		gctl_error(req, "Upper provider %s size (%jd) is too small, "
223		    "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
224		    (intmax_t)size);
225		return;
226	}
227	if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
228		gctl_error(req, "Gunion name %s must be alphanumeric.",
229		    gunionname);
230		return;
231	}
232	if (gunionname != NULL) {
233		n = snprintf(name, sizeof(name), "%s%s", gunionname,
234		    G_UNION_SUFFIX);
235	} else {
236		n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
237		    lowerpp->name, G_UNION_SUFFIX);
238	}
239	if (n <= 0 || n >= sizeof(name)) {
240		gctl_error(req, "Invalid provider name.");
241		return;
242	}
243	LIST_FOREACH(gp, &mp->geom, geom) {
244		if (strcmp(gp->name, name) == 0) {
245			gctl_error(req, "Provider %s already exists.", name);
246			return;
247		}
248	}
249	gp = g_new_geomf(mp, "%s", name);
250	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
251	rw_init(&sc->sc_rwlock, "gunion");
252	TAILQ_INIT(&sc->sc_wiplist);
253	sc->sc_offset = offset;
254	sc->sc_size = size;
255	sc->sc_sectorsize = secsize;
256	sc->sc_reads = 0;
257	sc->sc_writes = 0;
258	sc->sc_deletes = 0;
259	sc->sc_getattrs = 0;
260	sc->sc_flushes = 0;
261	sc->sc_speedups = 0;
262	sc->sc_cmd0s = 0;
263	sc->sc_cmd1s = 0;
264	sc->sc_cmd2s = 0;
265	sc->sc_readbytes = 0;
266	sc->sc_wrotebytes = 0;
267	sc->sc_writemap_memory = 0;
268	gp->softc = sc;
269
270	newpp = g_new_providerf(gp, "%s", gp->name);
271	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
272	newpp->mediasize = size;
273	newpp->sectorsize = secsize;
274	LIST_FOREACH(gap, &upperpp->aliases, ga_next)
275		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
276		    G_UNION_SUFFIX);
277	LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
278		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
279		    G_UNION_SUFFIX);
280	lowercp = g_new_consumer(gp);
281	lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
282	if ((error = g_attach(lowercp, lowerpp)) != 0) {
283		gctl_error(req, "Error %d: cannot attach to provider %s.",
284		    error, lowerpp->name);
285		goto fail1;
286	}
287	/* request read and exclusive access for lower */
288	if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
289		gctl_error(req, "Error %d: cannot obtain exclusive access to "
290		    "%s.\n\tMust be unmounted or mounted read-only.", error,
291		    lowerpp->name);
292		goto fail2;
293	}
294	uppercp = g_new_consumer(gp);
295	uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
296	if ((error = g_attach(uppercp, upperpp)) != 0) {
297		gctl_error(req, "Error %d: cannot attach to provider %s.",
298		    error, upperpp->name);
299		goto fail3;
300	}
301	/* request read, write, and exclusive access for upper */
302	if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
303		gctl_error(req, "Error %d: cannot obtain write access to %s.",
304		    error, upperpp->name);
305		goto fail4;
306	}
307	sc->sc_uppercp = uppercp;
308	sc->sc_lowercp = lowercp;
309
310	newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
311	    (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
312	g_error_provider(newpp, 0);
313	/*
314	 * Allocate the map that tracks the sectors that have been written
315	 * to the top layer. We use a 2-level hierarchy as that lets us
316	 * map up to 1 petabyte using allocations of less than 33 Mb
317	 * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
318	 *
319	 * We totally populate the leaf nodes rather than allocating them
320	 * as they are first used because their usage occurs in the
321	 * g_union_start() routine that may be running in the g_down
322	 * thread which cannot sleep.
323	 */
324	sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
325	needed = sc->sc_map_size / BITS_PER_ENTRY;
326	for (sc->sc_root_size = 1;
327	     sc->sc_root_size * sc->sc_root_size < needed;
328	     sc->sc_root_size++)
329		continue;
330	sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
331	    M_WAITOK | M_ZERO);
332	sc->sc_leaf_size = sc->sc_root_size;
333	sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
334	sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
335	    M_WAITOK | M_ZERO);
336	for (i = 0; i < sc->sc_root_size; i++)
337		sc->sc_writemap_root[i] =
338		    g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
339		    M_WAITOK | M_ZERO);
340	sc->sc_writemap_memory =
341	    (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
342	    sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
343	if (verbose)
344		gctl_msg(req, 0, "Device %s created with memory map size %jd.",
345		    gp->name, (intmax_t)sc->sc_writemap_memory);
346	gctl_post_messages(req);
347	G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
348	    gp->name, (intmax_t)sc->sc_writemap_memory);
349	return;
350
351fail4:
352	g_detach(uppercp);
353fail3:
354	g_destroy_consumer(uppercp);
355	g_access(lowercp, -1, 0, -1);
356fail2:
357	g_detach(lowercp);
358fail1:
359	g_destroy_consumer(lowercp);
360	g_destroy_provider(newpp);
361	g_destroy_geom(gp);
362}
363
364/*
365 * Fetch named option and verify that it is positive.
366 */
367static intmax_t
368g_union_fetcharg(struct gctl_req *req, const char *name)
369{
370	intmax_t *val;
371
372	val = gctl_get_paraml_opt(req, name, sizeof(*val));
373	if (val == NULL)
374		return (0);
375	if (*val >= 0)
376		return (*val);
377	gctl_msg(req, EINVAL, "Invalid '%s' (%jd): negative value, "
378	    "using default.", name, *val);
379	return (0);
380}
381
382/*
383 * Verify that a name is alphanumeric.
384 */
385static bool
386g_union_verify_nprefix(const char *name)
387{
388	int i;
389
390	for (i = 0; i < strlen(name); i++) {
391		if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
392			return (false);
393		}
394	}
395	return (true);
396}
397
398/*
399 * Destroy a union device.
400 */
401static void
402g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
403{
404	int *nargs, *force, error, i;
405	struct g_geom *gp;
406	const char *name;
407	char param[16];
408
409	g_topology_assert();
410
411	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
412	if (nargs == NULL) {
413		gctl_error(req, "No '%s' argument.", "nargs");
414		return;
415	}
416	if (*nargs <= 0) {
417		gctl_error(req, "Missing device(s).");
418		return;
419	}
420	force = gctl_get_paraml(req, "force", sizeof(*force));
421	if (force == NULL) {
422		gctl_error(req, "No 'force' argument.");
423		return;
424	}
425
426	for (i = 0; i < *nargs; i++) {
427		snprintf(param, sizeof(param), "arg%d", i);
428		name = gctl_get_asciiparam(req, param);
429		if (name == NULL) {
430			gctl_msg(req, EINVAL, "No '%s' argument.", param);
431			continue;
432		}
433		if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
434			name += strlen(_PATH_DEV);
435		gp = g_union_find_geom(mp, name);
436		if (gp == NULL) {
437			gctl_msg(req, EINVAL, "Device %s is invalid.", name);
438			continue;
439		}
440		error = g_union_destroy(verbose ? req : NULL, gp, *force);
441		if (error != 0)
442			gctl_msg(req, error, "Error %d: "
443			    "cannot destroy device %s.", error, gp->name);
444	}
445	gctl_post_messages(req);
446}
447
448/*
449 * Find a union geom.
450 */
451static struct g_geom *
452g_union_find_geom(struct g_class *mp, const char *name)
453{
454	struct g_geom *gp;
455
456	LIST_FOREACH(gp, &mp->geom, geom) {
457		if (strcmp(gp->name, name) == 0)
458			return (gp);
459	}
460	return (NULL);
461}
462
463/*
464 * Zero out all the statistics associated with a union device.
465 */
466static void
467g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
468{
469	struct g_union_softc *sc;
470	struct g_provider *pp;
471	struct g_geom *gp;
472	char param[16];
473	int i, *nargs;
474
475	g_topology_assert();
476
477	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
478	if (nargs == NULL) {
479		gctl_error(req, "No '%s' argument.", "nargs");
480		return;
481	}
482	if (*nargs <= 0) {
483		gctl_error(req, "Missing device(s).");
484		return;
485	}
486
487	for (i = 0; i < *nargs; i++) {
488		snprintf(param, sizeof(param), "arg%d", i);
489		pp = gctl_get_provider(req, param);
490		if (pp == NULL) {
491			gctl_msg(req, EINVAL, "No '%s' argument.", param);
492			continue;
493		}
494		gp = pp->geom;
495		if (gp->class != mp) {
496			gctl_msg(req, EINVAL, "Provider %s is invalid.",
497			    pp->name);
498			continue;
499		}
500		sc = gp->softc;
501		sc->sc_reads = 0;
502		sc->sc_writes = 0;
503		sc->sc_deletes = 0;
504		sc->sc_getattrs = 0;
505		sc->sc_flushes = 0;
506		sc->sc_speedups = 0;
507		sc->sc_cmd0s = 0;
508		sc->sc_cmd1s = 0;
509		sc->sc_cmd2s = 0;
510		sc->sc_readbytes = 0;
511		sc->sc_wrotebytes = 0;
512		if (verbose)
513			gctl_msg(req, 0, "Device %s has been reset.", pp->name);
514		G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
515	}
516	gctl_post_messages(req);
517}
518
519/*
520 * Revert all write requests made to the top layer of the union.
521 */
522static void
523g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
524{
525	struct g_union_softc *sc;
526	struct g_provider *pp;
527	struct g_geom *gp;
528	char param[16];
529	int i, *nargs;
530
531	g_topology_assert();
532
533	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
534	if (nargs == NULL) {
535		gctl_error(req, "No '%s' argument.", "nargs");
536		return;
537	}
538	if (*nargs <= 0) {
539		gctl_error(req, "Missing device(s).");
540		return;
541	}
542
543	for (i = 0; i < *nargs; i++) {
544		snprintf(param, sizeof(param), "arg%d", i);
545		pp = gctl_get_provider(req, param);
546		if (pp == NULL) {
547			gctl_msg(req, EINVAL, "No '%s' argument.", param);
548			continue;
549		}
550		gp = pp->geom;
551		if (gp->class != mp) {
552			gctl_msg(req, EINVAL, "Provider %s is invalid.",
553			    pp->name);
554			continue;
555		}
556		sc = gp->softc;
557		if (g_union_get_writelock(sc) != 0) {
558			gctl_msg(req, EINVAL, "Revert already in progress for "
559			    "provider %s.", pp->name);
560			continue;
561		}
562		/*
563		 * No mount or other use of union is allowed.
564		 */
565		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
566			gctl_msg(req, EPERM, "Unable to get exclusive access "
567			    "for reverting of %s;\n\t%s cannot be mounted or "
568			    "otherwise open during a revert.",
569			     pp->name, pp->name);
570			g_union_rel_writelock(sc);
571			continue;
572		}
573		g_union_revert(sc);
574		g_union_rel_writelock(sc);
575		if (verbose)
576			gctl_msg(req, 0, "Device %s has been reverted.",
577			    pp->name);
578		G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
579	}
580	gctl_post_messages(req);
581}
582
583/*
584 * Revert union writes by zero'ing out the writemap.
585 */
586static void
587g_union_revert(struct g_union_softc *sc)
588{
589	int i;
590
591	G_WLOCK(sc);
592	for (i = 0; i < sc->sc_root_size; i++)
593		memset(sc->sc_writemap_root[i], 0,
594		    sc->sc_leaf_size * sizeof(uint64_t));
595	memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
596	G_WUNLOCK(sc);
597}
598
599/*
600 * Commit all the writes made in the top layer to the lower layer.
601 */
602static void
603g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
604{
605	struct g_union_softc *sc;
606	struct g_provider *pp, *lowerpp;
607	struct g_consumer *lowercp;
608	struct g_geom *gp;
609	struct bio *bp;
610	char param[16];
611	off_t len2rd, len2wt, savelen;
612	int i, error, error1, *nargs, *force, *reboot;
613
614	g_topology_assert();
615
616	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
617	if (nargs == NULL) {
618		gctl_error(req, "No '%s' argument.", "nargs");
619		return;
620	}
621	if (*nargs <= 0) {
622		gctl_error(req, "Missing device(s).");
623		return;
624	}
625	force = gctl_get_paraml(req, "force", sizeof(*force));
626	if (force == NULL) {
627		gctl_error(req, "No 'force' argument.");
628		return;
629	}
630	reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
631	if (reboot == NULL) {
632		gctl_error(req, "No 'reboot' argument.");
633		return;
634	}
635
636	/* Get a bio buffer to do our I/O */
637	bp = g_alloc_bio();
638	bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
639	bp->bio_done = biodone;
640	for (i = 0; i < *nargs; i++) {
641		snprintf(param, sizeof(param), "arg%d", i);
642		pp = gctl_get_provider(req, param);
643		if (pp == NULL) {
644			gctl_msg(req, EINVAL, "No '%s' argument.", param);
645			continue;
646		}
647		gp = pp->geom;
648		if (gp->class != mp) {
649			gctl_msg(req, EINVAL, "Provider %s is invalid.",
650			    pp->name);
651			continue;
652		}
653		sc = gp->softc;
654		if (g_union_get_writelock(sc) != 0) {
655			gctl_msg(req, EINVAL, "Commit already in progress for "
656			    "provider %s.", pp->name);
657			continue;
658		}
659
660		/* upgrade to write access for lower */
661		lowercp = sc->sc_lowercp;
662		lowerpp = lowercp->provider;
663		/*
664		 * No mount or other use of union is allowed, unless the
665		 * -f flag is given which allows read-only mount or usage.
666		 */
667		if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
668		     pp->ace > 0) {
669			gctl_msg(req, EPERM, "Unable to get exclusive access "
670			    "for writing of %s.\n\tNote that %s cannot be "
671			    "mounted or otherwise\n\topen during a commit "
672			    "unless the -f flag is used.", pp->name, pp->name);
673			g_union_rel_writelock(sc);
674			continue;
675		}
676		/*
677		 * No mount or other use of lower media is allowed, unless the
678		 * -f flag is given which allows read-only mount or usage.
679		 */
680		if ((*force == false && lowerpp->acr > lowercp->acr) ||
681		     lowerpp->acw > lowercp->acw ||
682		     lowerpp->ace > lowercp->ace) {
683			gctl_msg(req, EPERM, "provider %s is unable to get "
684			    "exclusive access to %s\n\tfor writing. Note that "
685			    "%s cannot be mounted or otherwise open\n\tduring "
686			    "a commit unless the -f flag is used.", pp->name,
687			    lowerpp->name, lowerpp->name);
688			g_union_rel_writelock(sc);
689			continue;
690		}
691		if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
692			gctl_msg(req, error, "Error %d: provider %s is unable "
693			    "to access %s for writing.", error, pp->name,
694			    lowerpp->name);
695			g_union_rel_writelock(sc);
696			continue;
697		}
698		g_topology_unlock();
699		/* Loop over write map copying across written blocks */
700		bp->bio_offset = 0;
701		bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
702		G_RLOCK(sc);
703		error = 0;
704		while (bp->bio_length > 0) {
705			if (!g_union_getmap(bp, sc, &len2rd)) {
706				/* not written, so skip */
707				bp->bio_offset += len2rd;
708				bp->bio_length -= len2rd;
709				continue;
710			}
711			G_RUNLOCK(sc);
712			/* need to read then write len2rd sectors */
713			for ( ; len2rd > 0; len2rd -= len2wt) {
714				/* limit ourselves to MAXBSIZE size I/Os */
715				len2wt = len2rd;
716				if (len2wt > MAXBSIZE)
717					len2wt = MAXBSIZE;
718				savelen = bp->bio_length;
719				bp->bio_length = len2wt;
720				bp->bio_cmd = BIO_READ;
721				g_io_request(bp, sc->sc_uppercp);
722				if ((error = biowait(bp, "rdunion")) != 0) {
723					gctl_msg(req, error, "Commit read "
724					    "error %d in provider %s, commit "
725					    "aborted.", error, pp->name);
726					goto cleanup;
727				}
728				bp->bio_flags &= ~BIO_DONE;
729				bp->bio_cmd = BIO_WRITE;
730				g_io_request(bp, lowercp);
731				if ((error = biowait(bp, "wtunion")) != 0) {
732					gctl_msg(req, error, "Commit write "
733					    "error %d in provider %s, commit "
734					    "aborted.", error, pp->name);
735					goto cleanup;
736				}
737				bp->bio_flags &= ~BIO_DONE;
738				bp->bio_offset += len2wt;
739				bp->bio_length = savelen - len2wt;
740			}
741			G_RLOCK(sc);
742		}
743		G_RUNLOCK(sc);
744		/* clear the write map */
745		g_union_revert(sc);
746cleanup:
747		g_topology_lock();
748		/* return lower to previous access */
749		if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
750			G_UNION_DEBUG(2, "Error %d: device %s could not reset "
751			    "access to %s (r=0 w=-1 e=0).", error1, pp->name,
752			    lowerpp->name);
753		}
754		g_union_rel_writelock(sc);
755		if (error == 0 && verbose)
756			gctl_msg(req, 0, "Device %s has been committed.",
757			    pp->name);
758		G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
759	}
760	gctl_post_messages(req);
761	g_free(bp->bio_data);
762	g_destroy_bio(bp);
763	if (*reboot)
764		kern_reboot(RB_AUTOBOOT);
765}
766
767/*
768 * Generally allow access unless a commit is in progress.
769 */
770static int
771g_union_access(struct g_provider *pp, int r, int w, int e)
772{
773	struct g_union_softc *sc;
774
775	sc = pp->geom->softc;
776	if (sc == NULL) {
777		if (r <= 0 && w <= 0 && e <= 0)
778			return (0);
779		return (ENXIO);
780	}
781	r += pp->acr;
782	w += pp->acw;
783	e += pp->ace;
784	if (g_union_get_writelock(sc) != 0) {
785		if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
786			return (0);
787		return (EBUSY);
788	}
789	g_union_rel_writelock(sc);
790	return (0);
791}
792
793/*
794 * Initiate an I/O operation on the union device.
795 */
796static void
797g_union_start(struct bio *bp)
798{
799	struct g_union_softc *sc;
800	struct g_union_wip *wip;
801	struct bio *cbp;
802
803	sc = bp->bio_to->geom->softc;
804	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
805		wip = g_malloc(sizeof(*wip), M_NOWAIT);
806		if (wip == NULL) {
807			g_io_deliver(bp, ENOMEM);
808			return;
809		}
810		TAILQ_INIT(&wip->wip_waiting);
811		wip->wip_bp = bp;
812		wip->wip_sc = sc;
813		wip->wip_start = bp->bio_offset + sc->sc_offset;
814		wip->wip_end = wip->wip_start + bp->bio_length - 1;
815		wip->wip_numios = 1;
816		wip->wip_error = 0;
817		g_union_doio(wip);
818		return;
819	}
820
821	/*
822	 * All commands other than read and write are passed through to
823	 * the upper-level device since it is writable and thus able to
824	 * respond to delete, flush, and speedup requests.
825	 */
826	cbp = g_clone_bio(bp);
827	if (cbp == NULL) {
828		g_io_deliver(bp, ENOMEM);
829		return;
830	}
831	cbp->bio_offset = bp->bio_offset + sc->sc_offset;
832	cbp->bio_done = g_std_done;
833
834	switch (cbp->bio_cmd) {
835	case BIO_DELETE:
836		G_UNION_LOGREQ(cbp, "Delete request received.");
837		atomic_add_long(&sc->sc_deletes, 1);
838		break;
839	case BIO_GETATTR:
840		G_UNION_LOGREQ(cbp, "Getattr request received.");
841		atomic_add_long(&sc->sc_getattrs, 1);
842		if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
843			/* forward the GETATTR to the lower-level device */
844			break;
845		g_union_kerneldump(bp, sc);
846		return;
847	case BIO_FLUSH:
848		G_UNION_LOGREQ(cbp, "Flush request received.");
849		atomic_add_long(&sc->sc_flushes, 1);
850		break;
851	case BIO_SPEEDUP:
852		G_UNION_LOGREQ(cbp, "Speedup request received.");
853		atomic_add_long(&sc->sc_speedups, 1);
854		break;
855	case BIO_CMD0:
856		G_UNION_LOGREQ(cbp, "Cmd0 request received.");
857		atomic_add_long(&sc->sc_cmd0s, 1);
858		break;
859	case BIO_CMD1:
860		G_UNION_LOGREQ(cbp, "Cmd1 request received.");
861		atomic_add_long(&sc->sc_cmd1s, 1);
862		break;
863	case BIO_CMD2:
864		G_UNION_LOGREQ(cbp, "Cmd2 request received.");
865		atomic_add_long(&sc->sc_cmd2s, 1);
866		break;
867	default:
868		G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
869		    cbp->bio_cmd);
870		break;
871	}
872	g_io_request(cbp, sc->sc_uppercp);
873}
874
875/*
876 * Initiate a read or write operation on the union device.
877 */
878static void
879g_union_doio(struct g_union_wip *wip)
880{
881	struct g_union_softc *sc;
882	struct g_consumer *cp, *firstcp;
883	struct g_union_wip *activewip;
884	struct bio *cbp, *firstbp;
885	off_t rdlen, len2rd, offset;
886	int iocnt, needstoblock;
887	char *level;
888
889	/*
890	 * To maintain consistency, we cannot allow concurrent reads
891	 * or writes to the same block.
892	 *
893	 * A work-in-progress (wip) structure is allocated for each
894	 * read or write request. All active requests are kept on the
895	 * softc sc_wiplist. As each request arrives, it is checked to
896	 * see if it overlaps any of the active entries. If it does not
897	 * overlap, then it is added to the active list and initiated.
898	 * If it does overlap an active entry, it is added to the
899	 * wip_waiting list for the active entry that it overlaps.
900	 * When an active entry completes, it restarts all the requests
901	 * on its wip_waiting list.
902	 */
903	sc = wip->wip_sc;
904	G_WLOCK(sc);
905	TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
906		if (wip->wip_end < activewip->wip_start ||
907		    wip->wip_start > activewip->wip_end)
908			continue;
909		needstoblock = 1;
910		if (wip->wip_bp->bio_cmd == BIO_WRITE)
911			if (activewip->wip_bp->bio_cmd == BIO_WRITE)
912				sc->sc_writeblockwrite += 1;
913			else
914				sc->sc_readblockwrite += 1;
915		else
916			if (activewip->wip_bp->bio_cmd == BIO_WRITE)
917				sc->sc_writeblockread += 1;
918			else {
919				sc->sc_readcurrentread += 1;
920				needstoblock = 0;
921			}
922		/* Put request on a waiting list if necessary */
923		if (needstoblock) {
924			TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
925			    wip_next);
926			G_WUNLOCK(sc);
927			return;
928		}
929	}
930	/* Put request on the active list */
931	TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
932
933	/*
934	 * Process I/O requests that have been cleared to go.
935	 */
936	cbp = g_clone_bio(wip->wip_bp);
937	if (cbp == NULL) {
938		TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
939		G_WUNLOCK(sc);
940		KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
941		    ("g_union_doio: non-empty work-in-progress waiting queue"));
942		g_io_deliver(wip->wip_bp, ENOMEM);
943		g_free(wip);
944		return;
945	}
946	G_WUNLOCK(sc);
947	cbp->bio_caller1 = wip;
948	cbp->bio_done = g_union_done;
949	cbp->bio_offset = wip->wip_start;
950
951	/*
952	 * Writes are always done to the top level. The blocks that
953	 * are written are recorded in the bitmap when the I/O completes.
954	 */
955	if (cbp->bio_cmd == BIO_WRITE) {
956		G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
957		    "level.", cbp->bio_length);
958		atomic_add_long(&sc->sc_writes, 1);
959		atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
960		g_io_request(cbp, sc->sc_uppercp);
961		return;
962	}
963	/*
964	 * The usual read case is that we either read the top layer
965	 * if the block has been previously written or the bottom layer
966	 * if it has not been written. However, it is possible that
967	 * only part of the block has been written, For example we may
968	 * have written a UFS/FFS file fragment comprising several
969	 * sectors out of an 8-sector block.  Here, if the entire
970	 * 8-sector block is read for example by a snapshot needing
971	 * to copy the full block, then we need to read the written
972	 * sectors from the upper level and the unwritten sectors from
973	 * the lower level. We do this by alternately reading from the
974	 * top and bottom layers until we complete the read. We
975	 * simplify for the common case to just do the I/O and return.
976	 */
977	atomic_add_long(&sc->sc_reads, 1);
978	atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
979	rdlen = cbp->bio_length;
980	offset = 0;
981	for (iocnt = 0; ; iocnt++) {
982		if (g_union_getmap(cbp, sc, &len2rd)) {
983			/* read top */
984			cp = sc->sc_uppercp;
985			level = "upper";
986		} else {
987			/* read bottom */
988			cp = sc->sc_lowercp;
989			level = "lower";
990		}
991		/* Check if only a single read is required */
992		if (iocnt == 0 && rdlen == len2rd) {
993			G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
994			    3 : 4, cbp, "Sending %jd byte read "
995			    "request to %s level.", len2rd, level);
996			g_io_request(cbp, cp);
997			return;
998		}
999		cbp->bio_length = len2rd;
1000		if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
1001			cbp->bio_ma_offset += offset;
1002		else
1003			cbp->bio_data += offset;
1004		offset += len2rd;
1005		rdlen -= len2rd;
1006		G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
1007		    "request to %s level.", len2rd, level);
1008		/*
1009		 * To avoid prematurely notifying our consumer
1010		 * that their I/O has completed, we have to delay
1011		 * issuing our first I/O request until we have
1012		 * issued all the additional I/O requests.
1013		 */
1014		if (iocnt > 0) {
1015			atomic_add_long(&wip->wip_numios, 1);
1016			g_io_request(cbp, cp);
1017		} else {
1018			firstbp = cbp;
1019			firstcp = cp;
1020		}
1021		if (rdlen == 0)
1022			break;
1023		/* set up for next read */
1024		cbp = g_clone_bio(wip->wip_bp);
1025		if (cbp == NULL) {
1026			wip->wip_error = ENOMEM;
1027			atomic_add_long(&wip->wip_numios, -1);
1028			break;
1029		}
1030		cbp->bio_caller1 = wip;
1031		cbp->bio_done = g_union_done;
1032		cbp->bio_offset += offset;
1033		cbp->bio_length = rdlen;
1034		atomic_add_long(&sc->sc_reads, 1);
1035	}
1036	/* We have issued all our I/O, so start the first one */
1037	g_io_request(firstbp, firstcp);
1038	return;
1039}
1040
1041/*
1042 * Used when completing a union I/O operation.
1043 */
1044static void
1045g_union_done(struct bio *bp)
1046{
1047	struct g_union_wip *wip, *waitingwip;
1048	struct g_union_softc *sc;
1049
1050	wip = bp->bio_caller1;
1051	if (wip->wip_error != 0 && bp->bio_error == 0)
1052		bp->bio_error = wip->wip_error;
1053	wip->wip_error = 0;
1054	if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
1055		sc = wip->wip_sc;
1056		G_WLOCK(sc);
1057		if (bp->bio_cmd == BIO_WRITE)
1058			g_union_setmap(bp, sc);
1059		TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
1060		G_WUNLOCK(sc);
1061		while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
1062			TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
1063			g_union_doio(waitingwip);
1064		}
1065		g_free(wip);
1066	}
1067	g_std_done(bp);
1068}
1069
1070/*
1071 * Record blocks that have been written in the map.
1072 */
1073static void
1074g_union_setmap(struct bio *bp, struct g_union_softc *sc)
1075{
1076	size_t root_idx;
1077	uint64_t **leaf;
1078	uint64_t *wordp;
1079	off_t start, numsec;
1080
1081	G_WLOCKOWNED(sc);
1082	KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1083	    ("g_union_setmap: offset not on sector boundry"));
1084	KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1085	    ("g_union_setmap: length not a multiple of sectors"));
1086	start = bp->bio_offset / sc->sc_sectorsize;
1087	numsec = bp->bio_length / sc->sc_sectorsize;
1088	KASSERT(start + numsec <= sc->sc_map_size,
1089	    ("g_union_setmap: block %jd is out of range", start + numsec));
1090	for ( ; numsec > 0; numsec--, start++) {
1091		root_idx = start / sc->sc_bits_per_leaf;
1092		leaf = &sc->sc_writemap_root[root_idx];
1093		wordp = &(*leaf)
1094		    [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1095		*wordp |= 1ULL << (start % BITS_PER_ENTRY);
1096		sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
1097		    1ULL << (root_idx % BITS_PER_ENTRY);
1098	}
1099}
1100
1101/*
1102 * Check map to determine whether blocks have been written.
1103 *
1104 * Return true if they have been written so should be read from the top
1105 * layer. Return false if they have not been written so should be read
1106 * from the bottom layer. Return in len2read the bytes to be read. See
1107 * the comment above the BIO_READ implementation in g_union_start() for
1108 * an explantion of why len2read may be shorter than the buffer length.
1109 */
1110static bool
1111g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
1112{
1113	off_t start, numsec, leafresid, bitloc;
1114	bool first, maptype, retval;
1115	uint64_t *leaf, word;
1116	size_t root_idx;
1117
1118	KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1119	    ("g_union_getmap: offset not on sector boundry"));
1120	KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1121	    ("g_union_getmap: length not a multiple of sectors"));
1122	start = bp->bio_offset / sc->sc_sectorsize;
1123	numsec = bp->bio_length / sc->sc_sectorsize;
1124	G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
1125	    numsec, start);
1126	KASSERT(start + numsec <= sc->sc_map_size,
1127	    ("g_union_getmap: block %jd is out of range", start + numsec));
1128		root_idx = start / sc->sc_bits_per_leaf;
1129	first = true;
1130	maptype = false;
1131	while (numsec > 0) {
1132		/* Check first if the leaf records any written sectors */
1133		root_idx = start / sc->sc_bits_per_leaf;
1134		leafresid = sc->sc_bits_per_leaf -
1135		    (start % sc->sc_bits_per_leaf);
1136		if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
1137		    (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
1138			if (first) {
1139				maptype = false;
1140				first = false;
1141			}
1142			if (maptype)
1143				break;
1144			numsec -= leafresid;
1145			start += leafresid;
1146			continue;
1147		}
1148		/* Check up to a word boundry, then check word by word */
1149		leaf = sc->sc_writemap_root[root_idx];
1150		word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1151		bitloc = start % BITS_PER_ENTRY;
1152		if (bitloc == 0 && (word == 0 || word == ~0)) {
1153			if (first) {
1154				if (word == 0)
1155					maptype = false;
1156				else
1157					maptype = true;
1158				first = false;
1159			}
1160			if ((word == 0 && maptype) ||
1161			    (word == ~0 && !maptype))
1162				break;
1163			numsec -= BITS_PER_ENTRY;
1164			start += BITS_PER_ENTRY;
1165			continue;
1166		}
1167		for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
1168			retval = (word & (1ULL << bitloc)) != 0;
1169			if (first) {
1170				maptype = retval;
1171				first = false;
1172			}
1173			if (maptype == retval) {
1174				numsec--;
1175				start++;
1176				continue;
1177			}
1178			goto out;
1179		}
1180	}
1181out:
1182	if (numsec < 0) {
1183		start += numsec;
1184		numsec = 0;
1185	}
1186	*len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
1187	G_UNION_DEBUG(maptype ? 3 : 4,
1188	    "g_union_getmap: return maptype %swritten for %jd "
1189	    "sectors ending at %jd\n", maptype ? "" : "NOT ",
1190	    *len2read / sc->sc_sectorsize, start - 1);
1191	return (maptype);
1192}
1193
1194/*
1195 * Fill in details for a BIO_GETATTR request.
1196 */
1197static void
1198g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
1199{
1200	struct g_kerneldump *gkd;
1201	struct g_geom *gp;
1202	struct g_provider *pp;
1203
1204	gkd = (struct g_kerneldump *)bp->bio_data;
1205	gp = bp->bio_to->geom;
1206	g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
1207	    (intmax_t)gkd->offset, (intmax_t)gkd->length);
1208
1209	pp = LIST_FIRST(&gp->provider);
1210
1211	gkd->di.dumper = g_union_dumper;
1212	gkd->di.priv = sc;
1213	gkd->di.blocksize = pp->sectorsize;
1214	gkd->di.maxiosize = DFLTPHYS;
1215	gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
1216	if (gkd->offset > sc->sc_size) {
1217		g_io_deliver(bp, ENODEV);
1218		return;
1219	}
1220	if (gkd->offset + gkd->length > sc->sc_size)
1221		gkd->length = sc->sc_size - gkd->offset;
1222	gkd->di.mediasize = gkd->length;
1223	g_io_deliver(bp, 0);
1224}
1225
1226/*
1227 * Handler for g_union_kerneldump().
1228 */
1229static int
1230g_union_dumper(void *priv, void *virtual, off_t offset, size_t length)
1231{
1232
1233	return (0);
1234}
1235
1236/*
1237 * List union statistics.
1238 */
1239static void
1240g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1241    struct g_consumer *cp, struct g_provider *pp)
1242{
1243	struct g_union_softc *sc;
1244
1245	if (pp != NULL || cp != NULL || gp->softc == NULL)
1246		return;
1247	sc = gp->softc;
1248	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
1249	    (uintmax_t)sc->sc_reads);
1250	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
1251	    (uintmax_t)sc->sc_writes);
1252	sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
1253	    (uintmax_t)sc->sc_deletes);
1254	sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
1255	    (uintmax_t)sc->sc_getattrs);
1256	sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
1257	    (uintmax_t)sc->sc_flushes);
1258	sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
1259	    (uintmax_t)sc->sc_speedups);
1260	sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
1261	    (uintmax_t)sc->sc_cmd0s);
1262	sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
1263	    (uintmax_t)sc->sc_cmd1s);
1264	sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
1265	    (uintmax_t)sc->sc_cmd2s);
1266	sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
1267	    (uintmax_t)sc->sc_readcurrentread);
1268	sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
1269	    (uintmax_t)sc->sc_readblockwrite);
1270	sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
1271	    (uintmax_t)sc->sc_writeblockread);
1272	sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
1273	    (uintmax_t)sc->sc_writeblockwrite);
1274	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
1275	    (uintmax_t)sc->sc_readbytes);
1276	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
1277	    (uintmax_t)sc->sc_wrotebytes);
1278	sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
1279	    (intmax_t)sc->sc_offset);
1280}
1281
1282/*
1283 * Clean up an orphaned geom.
1284 */
1285static void
1286g_union_orphan(struct g_consumer *cp)
1287{
1288
1289	g_topology_assert();
1290	g_union_destroy(NULL, cp->geom, true);
1291}
1292
1293/*
1294 * Clean up a union geom.
1295 */
1296static int
1297g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
1298    struct g_geom *gp)
1299{
1300
1301	return (g_union_destroy(NULL, gp, false));
1302}
1303
1304/*
1305 * Clean up a union device.
1306 */
1307static int
1308g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
1309{
1310	struct g_union_softc *sc;
1311	struct g_provider *pp;
1312	int error;
1313
1314	g_topology_assert();
1315	sc = gp->softc;
1316	if (sc == NULL)
1317		return (ENXIO);
1318	pp = LIST_FIRST(&gp->provider);
1319	if ((sc->sc_flags & DOING_COMMIT) != 0 ||
1320	    (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
1321		if (force) {
1322			if (req != NULL)
1323				gctl_msg(req, 0, "Device %s is still in use, "
1324				    "so is being forcibly removed.", gp->name);
1325			G_UNION_DEBUG(1, "Device %s is still in use, so "
1326			    "is being forcibly removed.", gp->name);
1327		} else {
1328			if (req != NULL)
1329				gctl_msg(req, EBUSY, "Device %s is still open "
1330				    "(r=%d w=%d e=%d).", gp->name, pp->acr,
1331				    pp->acw, pp->ace);
1332			G_UNION_DEBUG(1, "Device %s is still open "
1333			    "(r=%d w=%d e=%d).", gp->name, pp->acr,
1334			    pp->acw, pp->ace);
1335			return (EBUSY);
1336		}
1337	} else {
1338		if (req != NULL)
1339			gctl_msg(req, 0, "Device %s removed.", gp->name);
1340		G_UNION_DEBUG(1, "Device %s removed.", gp->name);
1341	}
1342	/* Close consumers */
1343	if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
1344		G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1345		    "to %s.", error, gp->name, sc->sc_lowercp->provider->name);
1346	if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
1347		G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1348		    "to %s.", error, gp->name, sc->sc_uppercp->provider->name);
1349
1350	g_wither_geom(gp, ENXIO);
1351
1352	return (0);
1353}
1354
1355/*
1356 * Clean up a union provider.
1357 */
1358static void
1359g_union_providergone(struct g_provider *pp)
1360{
1361	struct g_geom *gp;
1362	struct g_union_softc *sc;
1363	size_t i;
1364
1365	gp = pp->geom;
1366	sc = gp->softc;
1367	gp->softc = NULL;
1368	for (i = 0; i < sc->sc_root_size; i++)
1369		g_free(sc->sc_writemap_root[i]);
1370	g_free(sc->sc_writemap_root);
1371	g_free(sc->sc_leafused);
1372	rw_destroy(&sc->sc_rwlock);
1373	g_free(sc);
1374}
1375
1376/*
1377 * Respond to a resized provider.
1378 */
1379static void
1380g_union_resize(struct g_consumer *cp)
1381{
1382	struct g_union_softc *sc;
1383	struct g_geom *gp;
1384
1385	g_topology_assert();
1386
1387	gp = cp->geom;
1388	sc = gp->softc;
1389
1390	/*
1391	 * If size has gotten bigger, ignore it and just keep using
1392	 * the space we already had. Otherwise we are done.
1393	 */
1394	if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
1395		return;
1396	g_union_destroy(NULL, gp, true);
1397}
1398
1399DECLARE_GEOM_CLASS(g_union_class, g_union);
1400MODULE_VERSION(geom_union, 0);
1401