1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2005-2019 Pawel Jakub Dawidek <pawel@dawidek.net>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/cons.h>
32#include <sys/kenv.h>
33#include <sys/kernel.h>
34#include <sys/linker.h>
35#include <sys/module.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/bio.h>
39#include <sys/sbuf.h>
40#include <sys/sysctl.h>
41#include <sys/malloc.h>
42#include <sys/eventhandler.h>
43#include <sys/kthread.h>
44#include <sys/proc.h>
45#include <sys/sched.h>
46#include <sys/smp.h>
47#include <sys/uio.h>
48#include <sys/vnode.h>
49
50#include <machine/vmparam.h>
51
52#include <vm/uma.h>
53#include <vm/vm.h>
54#include <vm/swap_pager.h>
55
56#include <geom/geom.h>
57#include <geom/geom_dbg.h>
58#include <geom/eli/g_eli.h>
59#include <geom/eli/pkcs5v2.h>
60
61#include <crypto/intake.h>
62
63FEATURE(geom_eli, "GEOM crypto module");
64
65MALLOC_DEFINE(M_ELI, "eli_data", "GEOM_ELI Data");
66
67SYSCTL_DECL(_kern_geom);
68SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
69    "GEOM_ELI stuff");
70static int g_eli_version = G_ELI_VERSION;
71SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
72    "GELI version");
73int g_eli_debug = 0;
74SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
75    "Debug level");
76static u_int g_eli_tries = 3;
77SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
78    "Number of tries for entering the passphrase");
79static u_int g_eli_visible_passphrase = GETS_NOECHO;
80SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
81    &g_eli_visible_passphrase, 0,
82    "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
83u_int g_eli_overwrites = G_ELI_OVERWRITES;
84SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
85    0, "Number of times on-disk keys should be overwritten when destroying them");
86static u_int g_eli_threads = 0;
87SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
88    "Number of threads doing crypto work");
89u_int g_eli_batch = 0;
90SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
91    "Use crypto operations batching");
92static u_int g_eli_minbufs = 16;
93static int sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS);
94SYSCTL_PROC(_kern_geom_eli, OID_AUTO, minbufs, CTLTYPE_UINT | CTLFLAG_RW |
95    CTLFLAG_MPSAFE, NULL, 0, sysctl_g_eli_minbufs, "IU",
96    "Number of GELI bufs reserved for swap transactions");
97static bool g_eli_blocking_malloc = false;
98SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, blocking_malloc, CTLFLAG_RWTUN,
99    &g_eli_blocking_malloc, 0, "Use blocking malloc calls for GELI buffers");
100static bool g_eli_unmapped_io = true;
101SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, unmapped_io, CTLFLAG_RDTUN,
102    &g_eli_unmapped_io, 0, "Enable support for unmapped I/O");
103static int g_eli_alloc_sz;
104SYSCTL_UINT(_kern_geom_eli, OID_AUTO, use_uma_bytes, CTLFLAG_RD,
105    &g_eli_alloc_sz, 0, "Use uma(9) for allocations of this size or smaller.");
106
107static struct sx g_eli_umalock;	/* Controls changes to UMA zone. */
108SX_SYSINIT(g_eli_umalock, &g_eli_umalock, "GELI UMA");
109static uma_zone_t g_eli_uma = NULL;
110static volatile int g_eli_umaoutstanding;
111static volatile int g_eli_devs;
112
113/*
114 * Control the number of reserved entries in the GELI zone.
115 * If the GELI zone has already been allocated, update the zone. Otherwise,
116 * simply update the variable for use the next time the zone is created.
117 */
118static int
119sysctl_g_eli_minbufs(SYSCTL_HANDLER_ARGS)
120{
121	int error;
122	u_int new;
123
124	new = g_eli_minbufs;
125	error = sysctl_handle_int(oidp, &new, 0, req);
126	if (error != 0 || req->newptr == NULL)
127		return (error);
128	sx_xlock(&g_eli_umalock);
129	if (g_eli_uma != NULL) {
130		if (new != g_eli_minbufs)
131			uma_zone_reserve(g_eli_uma, new);
132		if (new > g_eli_minbufs)
133			uma_prealloc(g_eli_uma, new - g_eli_minbufs);
134	}
135	if (new != g_eli_minbufs)
136		g_eli_minbufs = new;
137	sx_xunlock(&g_eli_umalock);
138	return (0);
139}
140
141/*
142 * Passphrase cached during boot, in order to be more user-friendly if
143 * there are multiple providers using the same passphrase.
144 */
145static char cached_passphrase[256];
146static u_int g_eli_boot_passcache = 1;
147TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
148SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
149    &g_eli_boot_passcache, 0,
150    "Passphrases are cached during boot process for possible reuse");
151static void
152fetch_loader_passphrase(void * dummy)
153{
154	char * env_passphrase;
155
156	KASSERT(dynamic_kenv, ("need dynamic kenv"));
157
158	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
159		/* Extract passphrase from the environment. */
160		strlcpy(cached_passphrase, env_passphrase,
161		    sizeof(cached_passphrase));
162		freeenv(env_passphrase);
163
164		/* Wipe the passphrase from the environment. */
165		kern_unsetenv("kern.geom.eli.passphrase");
166	}
167}
168SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
169    fetch_loader_passphrase, NULL);
170
171static void
172zero_boot_passcache(void)
173{
174
175	explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
176}
177
178static void
179zero_geli_intake_keys(void)
180{
181	struct keybuf *keybuf;
182	int i;
183
184	if ((keybuf = get_keybuf()) != NULL) {
185		/* Scan the key buffer, clear all GELI keys. */
186		for (i = 0; i < keybuf->kb_nents; i++) {
187			 if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
188				explicit_bzero(keybuf->kb_ents[i].ke_data,
189				    sizeof(keybuf->kb_ents[i].ke_data));
190				keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
191			}
192		}
193	}
194}
195
196static void
197zero_intake_passcache(void *dummy)
198{
199	zero_boot_passcache();
200	zero_geli_intake_keys();
201}
202EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
203
204static eventhandler_tag g_eli_pre_sync = NULL;
205
206static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
207    off_t offset, struct g_eli_metadata *md);
208
209static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
210    struct g_geom *gp);
211static void g_eli_init(struct g_class *mp);
212static void g_eli_fini(struct g_class *mp);
213
214static g_taste_t g_eli_taste;
215static g_dumpconf_t g_eli_dumpconf;
216
217struct g_class g_eli_class = {
218	.name = G_ELI_CLASS_NAME,
219	.version = G_VERSION,
220	.ctlreq = g_eli_config,
221	.taste = g_eli_taste,
222	.destroy_geom = g_eli_destroy_geom,
223	.init = g_eli_init,
224	.fini = g_eli_fini
225};
226
227/*
228 * Code paths:
229 * BIO_READ:
230 *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
231 * BIO_WRITE:
232 *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
233 */
234
235/*
236 * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
237 * accelerator or something like this.
238 * The function updates the SID and rerun the operation.
239 */
240int
241g_eli_crypto_rerun(struct cryptop *crp)
242{
243	struct g_eli_softc *sc;
244	struct g_eli_worker *wr;
245	struct bio *bp;
246	int error;
247
248	bp = (struct bio *)crp->crp_opaque;
249	sc = bp->bio_to->geom->softc;
250	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
251		if (wr->w_number == G_ELI_WORKER(bp->bio_pflags))
252			break;
253	}
254	KASSERT(wr != NULL, ("Invalid worker (%u).",
255	    G_ELI_WORKER(bp->bio_pflags)));
256	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).",
257	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid,
258	    crp->crp_session);
259	wr->w_sid = crp->crp_session;
260	crp->crp_etype = 0;
261	error = crypto_dispatch(crp);
262	if (error == 0)
263		return (0);
264	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
265	crp->crp_etype = error;
266	return (error);
267}
268
269static void
270g_eli_getattr_done(struct bio *bp)
271{
272	if (bp->bio_error == 0 &&
273	    !strcmp(bp->bio_attribute, "GEOM::physpath")) {
274		strlcat(bp->bio_data, "/eli", bp->bio_length);
275	}
276	g_std_done(bp);
277}
278
279/*
280 * The function is called afer reading encrypted data from the provider.
281 *
282 * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
283 */
284void
285g_eli_read_done(struct bio *bp)
286{
287	struct g_eli_softc *sc;
288	struct bio *pbp;
289
290	G_ELI_LOGREQ(2, bp, "Request done.");
291	pbp = bp->bio_parent;
292	if (pbp->bio_error == 0 && bp->bio_error != 0)
293		pbp->bio_error = bp->bio_error;
294	g_destroy_bio(bp);
295	/*
296	 * Do we have all sectors already?
297	 */
298	pbp->bio_inbed++;
299	if (pbp->bio_inbed < pbp->bio_children)
300		return;
301	sc = pbp->bio_to->geom->softc;
302	if (pbp->bio_error != 0) {
303		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
304		    pbp->bio_error);
305		pbp->bio_completed = 0;
306		g_eli_free_data(pbp);
307		g_io_deliver(pbp, pbp->bio_error);
308		if (sc != NULL)
309			atomic_subtract_int(&sc->sc_inflight, 1);
310		return;
311	}
312	mtx_lock(&sc->sc_queue_mtx);
313	bioq_insert_tail(&sc->sc_queue, pbp);
314	mtx_unlock(&sc->sc_queue_mtx);
315	wakeup(sc);
316}
317
318/*
319 * The function is called after we encrypt and write data.
320 *
321 * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
322 */
323void
324g_eli_write_done(struct bio *bp)
325{
326	struct g_eli_softc *sc;
327	struct bio *pbp;
328
329	G_ELI_LOGREQ(2, bp, "Request done.");
330	pbp = bp->bio_parent;
331	if (pbp->bio_error == 0 && bp->bio_error != 0)
332		pbp->bio_error = bp->bio_error;
333	g_destroy_bio(bp);
334	/*
335	 * Do we have all sectors already?
336	 */
337	pbp->bio_inbed++;
338	if (pbp->bio_inbed < pbp->bio_children)
339		return;
340	sc = pbp->bio_to->geom->softc;
341	g_eli_free_data(pbp);
342	if (pbp->bio_error != 0) {
343		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
344		    pbp->bio_error);
345		pbp->bio_completed = 0;
346	} else
347		pbp->bio_completed = pbp->bio_length;
348
349	/*
350	 * Write is finished, send it up.
351	 */
352	g_io_deliver(pbp, pbp->bio_error);
353	if (sc != NULL)
354		atomic_subtract_int(&sc->sc_inflight, 1);
355}
356
357/*
358 * This function should never be called, but GEOM made as it set ->orphan()
359 * method for every geom.
360 */
361static void
362g_eli_orphan_spoil_assert(struct g_consumer *cp)
363{
364
365	panic("Function %s() called for %s.", __func__, cp->geom->name);
366}
367
368static void
369g_eli_orphan(struct g_consumer *cp)
370{
371	struct g_eli_softc *sc;
372
373	g_topology_assert();
374	sc = cp->geom->softc;
375	if (sc == NULL)
376		return;
377	g_eli_destroy(sc, TRUE);
378}
379
380static void
381g_eli_resize(struct g_consumer *cp)
382{
383	struct g_eli_softc *sc;
384	struct g_provider *epp, *pp;
385	off_t oldsize;
386
387	g_topology_assert();
388	sc = cp->geom->softc;
389	if (sc == NULL)
390		return;
391
392	if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) {
393		G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.",
394		    (intmax_t)sc->sc_provsize);
395		return;
396	}
397
398	pp = cp->provider;
399
400	if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) {
401		struct g_eli_metadata md;
402		u_char *sector;
403		int error;
404
405		sector = NULL;
406
407		error = g_eli_read_metadata_offset(cp->geom->class, pp,
408		    sc->sc_provsize - pp->sectorsize, &md);
409		if (error != 0) {
410			G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).",
411			    pp->name, error);
412			goto iofail;
413		}
414
415		md.md_provsize = pp->mediasize;
416
417		sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO);
418		eli_metadata_encode(&md, sector);
419		error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
420		    pp->sectorsize);
421		if (error != 0) {
422			G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).",
423			    pp->name, error);
424			goto iofail;
425		}
426		explicit_bzero(sector, pp->sectorsize);
427		error = g_write_data(cp, sc->sc_provsize - pp->sectorsize,
428		    sector, pp->sectorsize);
429		if (error != 0) {
430			G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).",
431			    pp->name, error);
432			goto iofail;
433		}
434iofail:
435		explicit_bzero(&md, sizeof(md));
436		zfree(sector, M_ELI);
437	}
438
439	oldsize = sc->sc_mediasize;
440	sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize);
441	g_eli_key_resize(sc);
442	sc->sc_provsize = pp->mediasize;
443
444	epp = LIST_FIRST(&sc->sc_geom->provider);
445	g_resize_provider(epp, sc->sc_mediasize);
446	G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name,
447	    (intmax_t)oldsize, (intmax_t)sc->sc_mediasize);
448}
449
450/*
451 * BIO_READ:
452 *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
453 * BIO_WRITE:
454 *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
455 */
456static void
457g_eli_start(struct bio *bp)
458{
459	struct g_eli_softc *sc;
460	struct g_consumer *cp;
461	struct bio *cbp;
462
463	sc = bp->bio_to->geom->softc;
464	KASSERT(sc != NULL,
465	    ("Provider's error should be set (error=%d)(device=%s).",
466	    bp->bio_to->error, bp->bio_to->name));
467	G_ELI_LOGREQ(2, bp, "Request received.");
468
469	switch (bp->bio_cmd) {
470	case BIO_READ:
471	case BIO_WRITE:
472	case BIO_GETATTR:
473	case BIO_FLUSH:
474	case BIO_ZONE:
475	case BIO_SPEEDUP:
476		break;
477	case BIO_DELETE:
478		/*
479		 * If the user hasn't set the NODELETE flag, we just pass
480		 * it down the stack and let the layers beneath us do (or
481		 * not) whatever they do with it.  If they have, we
482		 * reject it.  A possible extension would be an
483		 * additional flag to take it as a hint to shred the data
484		 * with [multiple?] overwrites.
485		 */
486		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
487			break;
488	default:
489		g_io_deliver(bp, EOPNOTSUPP);
490		return;
491	}
492	cbp = g_clone_bio(bp);
493	if (cbp == NULL) {
494		g_io_deliver(bp, ENOMEM);
495		return;
496	}
497	bp->bio_driver1 = cbp;
498	bp->bio_pflags = 0;
499	G_ELI_SET_NEW_BIO(bp->bio_pflags);
500	switch (bp->bio_cmd) {
501	case BIO_READ:
502		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
503			g_eli_crypto_read(sc, bp, 0);
504			break;
505		}
506		/* FALLTHROUGH */
507	case BIO_WRITE:
508		mtx_lock(&sc->sc_queue_mtx);
509		bioq_insert_tail(&sc->sc_queue, bp);
510		mtx_unlock(&sc->sc_queue_mtx);
511		wakeup(sc);
512		break;
513	case BIO_GETATTR:
514	case BIO_FLUSH:
515	case BIO_DELETE:
516	case BIO_SPEEDUP:
517	case BIO_ZONE:
518		if (bp->bio_cmd == BIO_GETATTR)
519			cbp->bio_done = g_eli_getattr_done;
520		else
521			cbp->bio_done = g_std_done;
522		cp = LIST_FIRST(&sc->sc_geom->consumer);
523		cbp->bio_to = cp->provider;
524		G_ELI_LOGREQ(2, cbp, "Sending request.");
525		g_io_request(cbp, cp);
526		break;
527	}
528}
529
530static int
531g_eli_newsession(struct g_eli_worker *wr)
532{
533	struct g_eli_softc *sc;
534	struct crypto_session_params csp;
535	uint32_t caps;
536	int error, new_crypto;
537	void *key;
538
539	sc = wr->w_softc;
540
541	memset(&csp, 0, sizeof(csp));
542	csp.csp_mode = CSP_MODE_CIPHER;
543	csp.csp_cipher_alg = sc->sc_ealgo;
544	csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo);
545	csp.csp_cipher_klen = sc->sc_ekeylen / 8;
546	if (sc->sc_ealgo == CRYPTO_AES_XTS)
547		csp.csp_cipher_klen <<= 1;
548	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
549		key = g_eli_key_hold(sc, 0,
550		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
551		csp.csp_cipher_key = key;
552	} else {
553		key = NULL;
554		csp.csp_cipher_key = sc->sc_ekey;
555	}
556	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
557		csp.csp_mode = CSP_MODE_ETA;
558		csp.csp_auth_alg = sc->sc_aalgo;
559		csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN;
560	}
561
562	switch (sc->sc_crypto) {
563	case G_ELI_CRYPTO_SW_ACCEL:
564	case G_ELI_CRYPTO_SW:
565		error = crypto_newsession(&wr->w_sid, &csp,
566		    CRYPTOCAP_F_SOFTWARE);
567		break;
568	case G_ELI_CRYPTO_HW:
569		error = crypto_newsession(&wr->w_sid, &csp,
570		    CRYPTOCAP_F_HARDWARE);
571		break;
572	case G_ELI_CRYPTO_UNKNOWN:
573		error = crypto_newsession(&wr->w_sid, &csp,
574		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
575		if (error == 0) {
576			caps = crypto_ses2caps(wr->w_sid);
577			if (caps & CRYPTOCAP_F_HARDWARE)
578				new_crypto = G_ELI_CRYPTO_HW;
579			else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE)
580				new_crypto = G_ELI_CRYPTO_SW_ACCEL;
581			else
582				new_crypto = G_ELI_CRYPTO_SW;
583			mtx_lock(&sc->sc_queue_mtx);
584			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
585				sc->sc_crypto = new_crypto;
586			mtx_unlock(&sc->sc_queue_mtx);
587		}
588		break;
589	default:
590		panic("%s: invalid condition", __func__);
591	}
592
593	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
594		if (error)
595			g_eli_key_drop(sc, key);
596		else
597			wr->w_first_key = key;
598	}
599
600	return (error);
601}
602
603static void
604g_eli_freesession(struct g_eli_worker *wr)
605{
606	struct g_eli_softc *sc;
607
608	crypto_freesession(wr->w_sid);
609	if (wr->w_first_key != NULL) {
610		sc = wr->w_softc;
611		g_eli_key_drop(sc, wr->w_first_key);
612		wr->w_first_key = NULL;
613	}
614}
615
616static void
617g_eli_cancel(struct g_eli_softc *sc)
618{
619	struct bio *bp;
620
621	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
622
623	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
624		KASSERT(G_ELI_IS_NEW_BIO(bp->bio_pflags),
625		    ("Not new bio when canceling (bp=%p).", bp));
626		g_io_deliver(bp, ENXIO);
627	}
628}
629
630static struct bio *
631g_eli_takefirst(struct g_eli_softc *sc)
632{
633	struct bio *bp;
634
635	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
636
637	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
638		return (bioq_takefirst(&sc->sc_queue));
639	/*
640	 * Device suspended, so we skip new I/O requests.
641	 */
642	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
643		if (!G_ELI_IS_NEW_BIO(bp->bio_pflags))
644			break;
645	}
646	if (bp != NULL)
647		bioq_remove(&sc->sc_queue, bp);
648	return (bp);
649}
650
651/*
652 * This is the main function for kernel worker thread when we don't have
653 * hardware acceleration and we have to do cryptography in software.
654 * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
655 * threads with crypto work.
656 */
657static void
658g_eli_worker(void *arg)
659{
660	struct g_eli_softc *sc;
661	struct g_eli_worker *wr;
662	struct bio *bp;
663	int error __diagused;
664
665	wr = arg;
666	sc = wr->w_softc;
667#ifdef EARLY_AP_STARTUP
668	MPASS(!sc->sc_cpubind || smp_started);
669#elif defined(SMP)
670	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
671	if (sc->sc_cpubind) {
672		while (!smp_started)
673			tsleep(wr, 0, "geli:smp", hz / 4);
674	}
675#endif
676	thread_lock(curthread);
677	sched_prio(curthread, PUSER);
678	if (sc->sc_cpubind)
679		sched_bind(curthread, wr->w_number % mp_ncpus);
680	thread_unlock(curthread);
681
682	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
683
684	for (;;) {
685		mtx_lock(&sc->sc_queue_mtx);
686again:
687		bp = g_eli_takefirst(sc);
688		if (bp == NULL) {
689			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
690				g_eli_cancel(sc);
691				LIST_REMOVE(wr, w_next);
692				g_eli_freesession(wr);
693				free(wr, M_ELI);
694				G_ELI_DEBUG(1, "Thread %s exiting.",
695				    curthread->td_proc->p_comm);
696				wakeup(&sc->sc_workers);
697				mtx_unlock(&sc->sc_queue_mtx);
698				kproc_exit(0);
699			}
700			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
701				if (sc->sc_inflight > 0) {
702					G_ELI_DEBUG(0, "inflight=%d",
703					    sc->sc_inflight);
704					/*
705					 * We still have inflight BIOs, so
706					 * sleep and retry.
707					 */
708					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
709					    "geli:inf", hz / 5);
710					goto again;
711				}
712				/*
713				 * Suspend requested, mark the worker as
714				 * suspended and go to sleep.
715				 */
716				if (wr->w_active) {
717					g_eli_freesession(wr);
718					wr->w_active = FALSE;
719				}
720				wakeup(&sc->sc_workers);
721				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
722				    "geli:suspend", 0);
723				if (!wr->w_active &&
724				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
725					error = g_eli_newsession(wr);
726					KASSERT(error == 0,
727					    ("g_eli_newsession() failed on resume (error=%d)",
728					    error));
729					wr->w_active = TRUE;
730				}
731				goto again;
732			}
733			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
734			continue;
735		}
736		if (G_ELI_IS_NEW_BIO(bp->bio_pflags))
737			atomic_add_int(&sc->sc_inflight, 1);
738		mtx_unlock(&sc->sc_queue_mtx);
739		if (G_ELI_IS_NEW_BIO(bp->bio_pflags)) {
740			G_ELI_SETWORKER(bp->bio_pflags, 0);
741			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
742				if (bp->bio_cmd == BIO_READ)
743					g_eli_auth_read(sc, bp);
744				else
745					g_eli_auth_run(wr, bp);
746			} else {
747				if (bp->bio_cmd == BIO_READ)
748					g_eli_crypto_read(sc, bp, 1);
749				else
750					g_eli_crypto_run(wr, bp);
751			}
752		} else {
753			if (sc->sc_flags & G_ELI_FLAG_AUTH)
754				g_eli_auth_run(wr, bp);
755			else
756				g_eli_crypto_run(wr, bp);
757		}
758	}
759}
760
761static int
762g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp,
763    off_t offset, struct g_eli_metadata *md)
764{
765	struct g_geom *gp;
766	struct g_consumer *cp;
767	u_char *buf = NULL;
768	int error;
769
770	g_topology_assert();
771
772	gp = g_new_geomf(mp, "eli:taste");
773	gp->start = g_eli_start;
774	gp->access = g_std_access;
775	/*
776	 * g_eli_read_metadata() is always called from the event thread.
777	 * Our geom is created and destroyed in the same event, so there
778	 * could be no orphan nor spoil event in the meantime.
779	 */
780	gp->orphan = g_eli_orphan_spoil_assert;
781	gp->spoiled = g_eli_orphan_spoil_assert;
782	cp = g_new_consumer(gp);
783	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
784	error = g_attach(cp, pp);
785	if (error != 0)
786		goto end;
787	error = g_access(cp, 1, 0, 0);
788	if (error != 0)
789		goto end;
790	g_topology_unlock();
791	buf = g_read_data(cp, offset, pp->sectorsize, &error);
792	g_topology_lock();
793	if (buf == NULL)
794		goto end;
795	error = eli_metadata_decode(buf, md);
796	if (error != 0)
797		goto end;
798	/* Metadata was read and decoded successfully. */
799end:
800	g_free(buf);
801	if (cp->provider != NULL) {
802		if (cp->acr == 1)
803			g_access(cp, -1, 0, 0);
804		g_detach(cp);
805	}
806	g_destroy_consumer(cp);
807	g_destroy_geom(gp);
808	return (error);
809}
810
811int
812g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
813    struct g_eli_metadata *md)
814{
815
816	return (g_eli_read_metadata_offset(mp, pp,
817	    pp->mediasize - pp->sectorsize, md));
818}
819
820/*
821 * The function is called when we had last close on provider and user requested
822 * to close it when this situation occur.
823 */
824static void
825g_eli_last_close(void *arg, int flags __unused)
826{
827	struct g_geom *gp;
828	char gpname[64];
829	int error __diagused;
830
831	g_topology_assert();
832	gp = arg;
833	strlcpy(gpname, gp->name, sizeof(gpname));
834	error = g_eli_destroy(gp->softc, TRUE);
835	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
836	    gpname, error));
837	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
838}
839
840int
841g_eli_access(struct g_provider *pp, int dr, int dw, int de)
842{
843	struct g_eli_softc *sc;
844	struct g_geom *gp;
845
846	gp = pp->geom;
847	sc = gp->softc;
848
849	if (dw > 0) {
850		if (sc->sc_flags & G_ELI_FLAG_RO) {
851			/* Deny write attempts. */
852			return (EROFS);
853		}
854		/* Someone is opening us for write, we need to remember that. */
855		sc->sc_flags |= G_ELI_FLAG_WOPEN;
856		return (0);
857	}
858	/* Is this the last close? */
859	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
860		return (0);
861
862	/*
863	 * Automatically detach on last close if requested.
864	 */
865	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
866	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
867		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
868	}
869	return (0);
870}
871
872static int
873g_eli_cpu_is_disabled(int cpu)
874{
875#ifdef SMP
876	return (CPU_ISSET(cpu, &hlt_cpus_mask));
877#else
878	return (0);
879#endif
880}
881
882static void
883g_eli_init_uma(void)
884{
885
886	atomic_add_int(&g_eli_devs, 1);
887	sx_xlock(&g_eli_umalock);
888	if (g_eli_uma == NULL) {
889		/*
890		 * Calculate the maximum-sized swap buffer we are
891		 * likely to see.
892		 */
893		g_eli_alloc_sz = roundup2((PAGE_SIZE + sizeof(int) +
894		    G_ELI_AUTH_SECKEYLEN) * nsw_cluster_max +
895		    sizeof(uintptr_t), PAGE_SIZE);
896
897		g_eli_uma = uma_zcreate("GELI buffers", g_eli_alloc_sz,
898		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
899
900		/* Reserve and pre-allocate pages, as appropriate. */
901		uma_zone_reserve(g_eli_uma, g_eli_minbufs);
902		uma_prealloc(g_eli_uma, g_eli_minbufs);
903	}
904	sx_xunlock(&g_eli_umalock);
905}
906
907/*
908 * Try to destroy the UMA pool. This will do nothing if there are existing
909 * GELI devices or existing UMA allocations.
910 */
911static void
912g_eli_destroy_uma(void)
913{
914	uma_zone_t oldzone;
915
916	sx_xlock(&g_eli_umalock);
917	/* Ensure we really should be destroying this. */
918	if (atomic_load_int(&g_eli_devs) == 0 &&
919	    atomic_load_int(&g_eli_umaoutstanding) == 0) {
920		oldzone = g_eli_uma;
921		g_eli_uma = NULL;
922	} else
923		oldzone = NULL;
924	sx_xunlock(&g_eli_umalock);
925
926	if (oldzone != NULL)
927		uma_zdestroy(oldzone);
928}
929
930static void
931g_eli_fini_uma(void)
932{
933
934	/*
935	 * If this is the last outstanding GELI device, try to
936	 * destroy the UMA pool.
937	 */
938	if (atomic_fetchadd_int(&g_eli_devs, -1) == 1)
939		g_eli_destroy_uma();
940}
941
942/*
943 * Allocate a data buffer. If the size fits within our swap-sized buffers,
944 * try to allocate a swap-sized buffer from the UMA pool. Otherwise, fall
945 * back to using malloc.
946 *
947 * Swap-related requests are special: they can only use the UMA pool, they
948 * use M_USE_RESERVE to let them dip farther into system resources, and
949 * they always use M_NOWAIT to prevent swap operations from deadlocking.
950 */
951bool
952g_eli_alloc_data(struct bio *bp, int sz)
953{
954
955	KASSERT(sz <= g_eli_alloc_sz || (bp->bio_flags & BIO_SWAP) == 0,
956	    ("BIO_SWAP request for %d bytes exceeds the precalculated buffer"
957	    " size (%d)", sz, g_eli_alloc_sz));
958	if (sz <= g_eli_alloc_sz) {
959		bp->bio_driver2 = uma_zalloc(g_eli_uma, M_NOWAIT |
960		    ((bp->bio_flags & BIO_SWAP) != 0 ? M_USE_RESERVE : 0));
961		if (bp->bio_driver2 != NULL) {
962			bp->bio_pflags |= G_ELI_UMA_ALLOC;
963			atomic_add_int(&g_eli_umaoutstanding, 1);
964		}
965		if (bp->bio_driver2 != NULL || (bp->bio_flags & BIO_SWAP) != 0)
966			return (bp->bio_driver2 != NULL);
967	}
968	bp->bio_pflags &= ~(G_ELI_UMA_ALLOC);
969	bp->bio_driver2 = malloc(sz, M_ELI, g_eli_blocking_malloc ? M_WAITOK :
970	    M_NOWAIT);
971	return (bp->bio_driver2 != NULL);
972}
973
974/*
975 * Free a buffer from bp->bio_driver2 which was allocated with
976 * g_eli_alloc_data(). This function makes sure that the memory is freed
977 * to the correct place.
978 *
979 * Additionally, if this function frees the last outstanding UMA request
980 * and there are no open GELI devices, this will destroy the UMA pool.
981 */
982void
983g_eli_free_data(struct bio *bp)
984{
985
986	/*
987	 * Mimic the free(9) behavior of allowing a NULL pointer to be
988	 * freed.
989	 */
990	if (bp->bio_driver2 == NULL)
991		return;
992
993	if ((bp->bio_pflags & G_ELI_UMA_ALLOC) != 0) {
994		uma_zfree(g_eli_uma, bp->bio_driver2);
995		if (atomic_fetchadd_int(&g_eli_umaoutstanding, -1) == 1 &&
996		    atomic_load_int(&g_eli_devs) == 0)
997			g_eli_destroy_uma();
998	} else
999		free(bp->bio_driver2, M_ELI);
1000	bp->bio_driver2 = NULL;
1001}
1002
1003struct g_geom *
1004g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
1005    const struct g_eli_metadata *md, const u_char *mkey, int nkey)
1006{
1007	struct g_eli_softc *sc;
1008	struct g_eli_worker *wr;
1009	struct g_geom *gp;
1010	struct g_provider *pp;
1011	struct g_consumer *cp;
1012	struct g_geom_alias *gap;
1013	u_int i, threads;
1014	int dcw, error;
1015
1016	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
1017	KASSERT(eli_metadata_crypto_supported(md),
1018	    ("%s: unsupported crypto for %s", __func__, bpp->name));
1019
1020	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
1021	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
1022	gp->start = g_eli_start;
1023	/*
1024	 * Spoiling can happen even though we have the provider open
1025	 * exclusively, e.g. through media change events.
1026	 */
1027	gp->spoiled = g_eli_orphan;
1028	gp->orphan = g_eli_orphan;
1029	gp->resize = g_eli_resize;
1030	gp->dumpconf = g_eli_dumpconf;
1031	/*
1032	 * If detach-on-last-close feature is not enabled and we don't operate
1033	 * on read-only provider, we can simply use g_std_access().
1034	 */
1035	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
1036		gp->access = g_eli_access;
1037	else
1038		gp->access = g_std_access;
1039
1040	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
1041	sc->sc_nkey = nkey;
1042
1043	gp->softc = sc;
1044	sc->sc_geom = gp;
1045
1046	bioq_init(&sc->sc_queue);
1047	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
1048	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
1049	g_eli_init_uma();
1050
1051	pp = NULL;
1052	cp = g_new_consumer(gp);
1053	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1054
1055	error = g_attach(cp, bpp);
1056	if (error != 0) {
1057		if (req != NULL) {
1058			gctl_error(req, "Cannot attach to %s (error=%d).",
1059			    bpp->name, error);
1060		} else {
1061			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
1062			    bpp->name, error);
1063		}
1064		goto failed;
1065	}
1066	/*
1067	 * Keep provider open all the time, so we can run critical tasks,
1068	 * like Master Keys deletion, without wondering if we can open
1069	 * provider or not.
1070	 * We don't open provider for writing only when user requested read-only
1071	 * access.
1072	 */
1073	dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1;
1074	error = g_access(cp, 1, dcw, 1);
1075	if (error != 0) {
1076		if (req != NULL) {
1077			gctl_error(req, "Cannot access %s (error=%d).",
1078			    bpp->name, error);
1079		} else {
1080			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
1081			    bpp->name, error);
1082		}
1083		goto failed;
1084	}
1085
1086	/*
1087	 * Remember the keys in our softc structure.
1088	 */
1089	g_eli_mkey_propagate(sc, mkey);
1090
1091	LIST_INIT(&sc->sc_workers);
1092
1093	threads = g_eli_threads;
1094	if (threads == 0)
1095		threads = mp_ncpus;
1096	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
1097	for (i = 0; i < threads; i++) {
1098		if (g_eli_cpu_is_disabled(i)) {
1099			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
1100			    bpp->name, i);
1101			continue;
1102		}
1103		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
1104		wr->w_softc = sc;
1105		wr->w_number = i;
1106		wr->w_active = TRUE;
1107
1108		error = g_eli_newsession(wr);
1109		if (error != 0) {
1110			free(wr, M_ELI);
1111			if (req != NULL) {
1112				gctl_error(req, "Cannot set up crypto session "
1113				    "for %s (error=%d).", bpp->name, error);
1114			} else {
1115				G_ELI_DEBUG(1, "Cannot set up crypto session "
1116				    "for %s (error=%d).", bpp->name, error);
1117			}
1118			goto failed;
1119		}
1120
1121		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
1122		    "g_eli[%u] %s", i, bpp->name);
1123		if (error != 0) {
1124			g_eli_freesession(wr);
1125			free(wr, M_ELI);
1126			if (req != NULL) {
1127				gctl_error(req, "Cannot create kernel thread "
1128				    "for %s (error=%d).", bpp->name, error);
1129			} else {
1130				G_ELI_DEBUG(1, "Cannot create kernel thread "
1131				    "for %s (error=%d).", bpp->name, error);
1132			}
1133			goto failed;
1134		}
1135		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
1136	}
1137
1138	/*
1139	 * Create decrypted provider.
1140	 */
1141	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
1142	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
1143	if (g_eli_unmapped_io && CRYPTO_HAS_VMPAGE) {
1144		/*
1145		 * On DMAP architectures we can use unmapped I/O.  But don't
1146		 * use it with data integrity verification.  That code hasn't
1147		 * been written yet.
1148		 */
1149		 if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0)
1150			pp->flags |= G_PF_ACCEPT_UNMAPPED;
1151	}
1152	pp->mediasize = sc->sc_mediasize;
1153	pp->sectorsize = sc->sc_sectorsize;
1154	LIST_FOREACH(gap, &bpp->aliases, ga_next)
1155		g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX);
1156
1157	g_error_provider(pp, 0);
1158
1159	G_ELI_DEBUG(0, "Device %s created.", pp->name);
1160	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
1161	    sc->sc_ekeylen);
1162	if (sc->sc_flags & G_ELI_FLAG_AUTH)
1163		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
1164	G_ELI_DEBUG(0, "    Crypto: %s",
1165	    sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" :
1166	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
1167	return (gp);
1168
1169failed:
1170	mtx_lock(&sc->sc_queue_mtx);
1171	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1172	wakeup(sc);
1173	/*
1174	 * Wait for kernel threads self destruction.
1175	 */
1176	while (!LIST_EMPTY(&sc->sc_workers)) {
1177		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1178		    "geli:destroy", 0);
1179	}
1180	mtx_destroy(&sc->sc_queue_mtx);
1181	if (cp->provider != NULL) {
1182		if (cp->acr == 1)
1183			g_access(cp, -1, -dcw, -1);
1184		g_detach(cp);
1185	}
1186	g_destroy_consumer(cp);
1187	g_destroy_geom(gp);
1188	g_eli_key_destroy(sc);
1189	g_eli_fini_uma();
1190	zfree(sc, M_ELI);
1191	return (NULL);
1192}
1193
1194int
1195g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
1196{
1197	struct g_geom *gp;
1198	struct g_provider *pp;
1199
1200	g_topology_assert();
1201
1202	if (sc == NULL)
1203		return (ENXIO);
1204
1205	gp = sc->sc_geom;
1206	pp = LIST_FIRST(&gp->provider);
1207	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
1208		if (force) {
1209			G_ELI_DEBUG(1, "Device %s is still open, so it "
1210			    "cannot be definitely removed.", pp->name);
1211			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1212			gp->access = g_eli_access;
1213			g_wither_provider(pp, ENXIO);
1214			return (EBUSY);
1215		} else {
1216			G_ELI_DEBUG(1,
1217			    "Device %s is still open (r%dw%de%d).", pp->name,
1218			    pp->acr, pp->acw, pp->ace);
1219			return (EBUSY);
1220		}
1221	}
1222
1223	mtx_lock(&sc->sc_queue_mtx);
1224	sc->sc_flags |= G_ELI_FLAG_DESTROY;
1225	wakeup(sc);
1226	while (!LIST_EMPTY(&sc->sc_workers)) {
1227		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
1228		    "geli:destroy", 0);
1229	}
1230	mtx_destroy(&sc->sc_queue_mtx);
1231	gp->softc = NULL;
1232	g_eli_key_destroy(sc);
1233	g_eli_fini_uma();
1234	zfree(sc, M_ELI);
1235
1236	G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
1237	g_wither_geom_close(gp, ENXIO);
1238
1239	return (0);
1240}
1241
1242static int
1243g_eli_destroy_geom(struct gctl_req *req __unused,
1244    struct g_class *mp __unused, struct g_geom *gp)
1245{
1246	struct g_eli_softc *sc;
1247
1248	sc = gp->softc;
1249	return (g_eli_destroy(sc, FALSE));
1250}
1251
1252static int
1253g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
1254{
1255	u_char *keyfile, *data;
1256	char *file, name[64];
1257	size_t size;
1258	int i;
1259
1260	for (i = 0; ; i++) {
1261		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1262		keyfile = preload_search_by_type(name);
1263		if (keyfile == NULL && i == 0) {
1264			/*
1265			 * If there is only one keyfile, allow simpler name.
1266			 */
1267			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
1268			keyfile = preload_search_by_type(name);
1269		}
1270		if (keyfile == NULL)
1271			return (i);	/* Return number of loaded keyfiles. */
1272		data = preload_fetch_addr(keyfile);
1273		if (data == NULL) {
1274			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
1275			    name);
1276			return (0);
1277		}
1278		size = preload_fetch_size(keyfile);
1279		if (size == 0) {
1280			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
1281			    name);
1282			return (0);
1283		}
1284		file = preload_search_info(keyfile, MODINFO_NAME);
1285		if (file == NULL) {
1286			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
1287			    name);
1288			return (0);
1289		}
1290		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
1291		    provider, name);
1292		g_eli_crypto_hmac_update(ctx, data, size);
1293	}
1294}
1295
1296static void
1297g_eli_keyfiles_clear(const char *provider)
1298{
1299	u_char *keyfile, *data;
1300	char name[64];
1301	size_t size;
1302	int i;
1303
1304	for (i = 0; ; i++) {
1305		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
1306		keyfile = preload_search_by_type(name);
1307		if (keyfile == NULL)
1308			return;
1309		data = preload_fetch_addr(keyfile);
1310		size = preload_fetch_size(keyfile);
1311		if (data != NULL && size != 0)
1312			explicit_bzero(data, size);
1313	}
1314}
1315
1316/*
1317 * Tasting is only made on boot.
1318 * We detect providers which should be attached before root is mounted.
1319 */
1320static struct g_geom *
1321g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
1322{
1323	struct g_eli_metadata md;
1324	struct g_geom *gp;
1325	struct hmac_ctx ctx;
1326	char passphrase[256];
1327	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
1328	u_int i, nkey, nkeyfiles, tries, showpass;
1329	int error;
1330	struct keybuf *keybuf;
1331
1332	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
1333	g_topology_assert();
1334
1335	if (root_mounted() || g_eli_tries == 0)
1336		return (NULL);
1337
1338	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
1339
1340	error = g_eli_read_metadata(mp, pp, &md);
1341	if (error != 0)
1342		return (NULL);
1343	gp = NULL;
1344
1345	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
1346		return (NULL);
1347	if (md.md_version > G_ELI_VERSION) {
1348		printf("geom_eli.ko module is too old to handle %s.\n",
1349		    pp->name);
1350		return (NULL);
1351	}
1352	if (md.md_provsize != pp->mediasize)
1353		return (NULL);
1354	/* Should we attach it on boot? */
1355	if (!(md.md_flags & G_ELI_FLAG_BOOT) &&
1356	    !(md.md_flags & G_ELI_FLAG_GELIBOOT))
1357		return (NULL);
1358	if (md.md_keys == 0x00) {
1359		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
1360		return (NULL);
1361	}
1362	if (!eli_metadata_crypto_supported(&md)) {
1363		G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n",
1364		    pp->name);
1365		return (NULL);
1366	}
1367	if (md.md_iterations == -1) {
1368		/* If there is no passphrase, we try only once. */
1369		tries = 1;
1370	} else {
1371		/* Ask for the passphrase no more than g_eli_tries times. */
1372		tries = g_eli_tries;
1373	}
1374
1375	if ((keybuf = get_keybuf()) != NULL) {
1376		/* Scan the key buffer, try all GELI keys. */
1377		for (i = 0; i < keybuf->kb_nents; i++) {
1378			 if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
1379				 memcpy(key, keybuf->kb_ents[i].ke_data,
1380				     sizeof(key));
1381
1382				if (g_eli_mkey_decrypt_any(&md, key,
1383				    mkey, &nkey) == 0 ) {
1384					explicit_bzero(key, sizeof(key));
1385					goto have_key;
1386				}
1387			}
1388		}
1389	}
1390
1391	for (i = 0; i <= tries; i++) {
1392		g_eli_crypto_hmac_init(&ctx, NULL, 0);
1393
1394		/*
1395		 * Load all key files.
1396		 */
1397		nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
1398
1399		if (nkeyfiles == 0 && md.md_iterations == -1) {
1400			/*
1401			 * No key files and no passphrase, something is
1402			 * definitely wrong here.
1403			 * geli(8) doesn't allow for such situation, so assume
1404			 * that there was really no passphrase and in that case
1405			 * key files are no properly defined in loader.conf.
1406			 */
1407			G_ELI_DEBUG(0,
1408			    "Found no key files in loader.conf for %s.",
1409			    pp->name);
1410			return (NULL);
1411		}
1412
1413		/* Ask for the passphrase if defined. */
1414		if (md.md_iterations >= 0) {
1415			/* Try first with cached passphrase. */
1416			if (i == 0) {
1417				if (!g_eli_boot_passcache)
1418					continue;
1419				memcpy(passphrase, cached_passphrase,
1420				    sizeof(passphrase));
1421			} else {
1422				printf("Enter passphrase for %s: ", pp->name);
1423				showpass = g_eli_visible_passphrase;
1424				if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0)
1425					showpass = GETS_ECHOPASS;
1426				cngets(passphrase, sizeof(passphrase),
1427				    showpass);
1428				memcpy(cached_passphrase, passphrase,
1429				    sizeof(passphrase));
1430			}
1431		}
1432
1433		/*
1434		 * Prepare Derived-Key from the user passphrase.
1435		 */
1436		if (md.md_iterations == 0) {
1437			g_eli_crypto_hmac_update(&ctx, md.md_salt,
1438			    sizeof(md.md_salt));
1439			g_eli_crypto_hmac_update(&ctx, passphrase,
1440			    strlen(passphrase));
1441			explicit_bzero(passphrase, sizeof(passphrase));
1442		} else if (md.md_iterations > 0) {
1443			u_char dkey[G_ELI_USERKEYLEN];
1444
1445			pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
1446			    sizeof(md.md_salt), passphrase, md.md_iterations);
1447			explicit_bzero(passphrase, sizeof(passphrase));
1448			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
1449			explicit_bzero(dkey, sizeof(dkey));
1450		}
1451
1452		g_eli_crypto_hmac_final(&ctx, key, 0);
1453
1454		/*
1455		 * Decrypt Master-Key.
1456		 */
1457		error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey);
1458		explicit_bzero(key, sizeof(key));
1459		if (error == -1) {
1460			if (i == tries) {
1461				G_ELI_DEBUG(0,
1462				    "Wrong key for %s. No tries left.",
1463				    pp->name);
1464				g_eli_keyfiles_clear(pp->name);
1465				return (NULL);
1466			}
1467			if (i > 0) {
1468				G_ELI_DEBUG(0,
1469				    "Wrong key for %s. Tries left: %u.",
1470				    pp->name, tries - i);
1471			}
1472			/* Try again. */
1473			continue;
1474		} else if (error > 0) {
1475			G_ELI_DEBUG(0,
1476			    "Cannot decrypt Master Key for %s (error=%d).",
1477			    pp->name, error);
1478			g_eli_keyfiles_clear(pp->name);
1479			return (NULL);
1480		}
1481		g_eli_keyfiles_clear(pp->name);
1482		G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
1483		break;
1484	}
1485have_key:
1486
1487	/*
1488	 * We have correct key, let's attach provider.
1489	 */
1490	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
1491	explicit_bzero(mkey, sizeof(mkey));
1492	explicit_bzero(&md, sizeof(md));
1493	if (gp == NULL) {
1494		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
1495		    G_ELI_SUFFIX);
1496		return (NULL);
1497	}
1498	return (gp);
1499}
1500
1501static void
1502g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1503    struct g_consumer *cp, struct g_provider *pp)
1504{
1505	struct g_eli_softc *sc;
1506
1507	g_topology_assert();
1508	sc = gp->softc;
1509	if (sc == NULL)
1510		return;
1511	if (pp != NULL || cp != NULL)
1512		return;	/* Nothing here. */
1513
1514	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
1515	    (uintmax_t)sc->sc_ekeys_total);
1516	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
1517	    (uintmax_t)sc->sc_ekeys_allocated);
1518	sbuf_printf(sb, "%s<Flags>", indent);
1519	if (sc->sc_flags == 0)
1520		sbuf_cat(sb, "NONE");
1521	else {
1522		int first = 1;
1523
1524#define ADD_FLAG(flag, name)	do {					\
1525	if (sc->sc_flags & (flag)) {					\
1526		if (!first)						\
1527			sbuf_cat(sb, ", ");				\
1528		else							\
1529			first = 0;					\
1530		sbuf_cat(sb, name);					\
1531	}								\
1532} while (0)
1533		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
1534		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
1535		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
1536		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
1537		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
1538		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
1539		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
1540		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
1541		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
1542		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
1543		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
1544		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
1545		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
1546		ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS");
1547		ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE");
1548#undef  ADD_FLAG
1549	}
1550	sbuf_cat(sb, "</Flags>\n");
1551
1552	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
1553		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
1554		    sc->sc_nkey);
1555	}
1556	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
1557	sbuf_printf(sb, "%s<Crypto>", indent);
1558	switch (sc->sc_crypto) {
1559	case G_ELI_CRYPTO_HW:
1560		sbuf_cat(sb, "hardware");
1561		break;
1562	case G_ELI_CRYPTO_SW:
1563		sbuf_cat(sb, "software");
1564		break;
1565	case G_ELI_CRYPTO_SW_ACCEL:
1566		sbuf_cat(sb, "accelerated software");
1567		break;
1568	default:
1569		sbuf_cat(sb, "UNKNOWN");
1570		break;
1571	}
1572	sbuf_cat(sb, "</Crypto>\n");
1573	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
1574		sbuf_printf(sb,
1575		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
1576		    indent, g_eli_algo2str(sc->sc_aalgo));
1577	}
1578	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
1579	    sc->sc_ekeylen);
1580	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
1581	    indent, g_eli_algo2str(sc->sc_ealgo));
1582	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
1583	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
1584}
1585
1586static void
1587g_eli_shutdown_pre_sync(void *arg, int howto)
1588{
1589	struct g_class *mp;
1590	struct g_geom *gp, *gp2;
1591	struct g_provider *pp;
1592	struct g_eli_softc *sc;
1593
1594	mp = arg;
1595	g_topology_lock();
1596	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
1597		sc = gp->softc;
1598		if (sc == NULL)
1599			continue;
1600		pp = LIST_FIRST(&gp->provider);
1601		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
1602		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 ||
1603		    SCHEDULER_STOPPED())
1604		{
1605			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
1606			gp->access = g_eli_access;
1607		} else {
1608			(void) g_eli_destroy(sc, TRUE);
1609		}
1610	}
1611	g_topology_unlock();
1612}
1613
1614static void
1615g_eli_init(struct g_class *mp)
1616{
1617
1618	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
1619	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
1620	if (g_eli_pre_sync == NULL)
1621		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
1622}
1623
1624static void
1625g_eli_fini(struct g_class *mp)
1626{
1627
1628	if (g_eli_pre_sync != NULL)
1629		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
1630}
1631
1632DECLARE_GEOM_CLASS(g_eli_class, g_eli);
1633MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
1634MODULE_VERSION(geom_eli, 0);
1635