1/*-
2 * Copyright (c) 2017 W. Dean Freeman
3 * Copyright (c) 2013-2015 Mark R V Murray
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29/*
30 * This implementation of Fortuna is based on the descriptions found in
31 * ISBN 978-0-470-47424-2 "Cryptography Engineering" by Ferguson, Schneier
32 * and Kohno ("FS&K").
33 */
34
35#include <sys/param.h>
36#include <sys/limits.h>
37
38#ifdef _KERNEL
39#include <sys/fail.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mutex.h>
44#include <sys/random.h>
45#include <sys/sdt.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48
49#include <machine/cpu.h>
50#else /* !_KERNEL */
51#include <inttypes.h>
52#include <stdbool.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <threads.h>
57
58#include "unit_test.h"
59#endif /* _KERNEL */
60
61#include <crypto/chacha20/chacha.h>
62#include <crypto/rijndael/rijndael-api-fst.h>
63#include <crypto/sha2/sha256.h>
64
65#include <dev/random/hash.h>
66#include <dev/random/randomdev.h>
67#ifdef _KERNEL
68#include <dev/random/random_harvestq.h>
69#endif
70#include <dev/random/uint128.h>
71#include <dev/random/fortuna.h>
72
73/* Defined in FS&K */
74#define	RANDOM_FORTUNA_NPOOLS 32		/* The number of accumulation pools */
75#define	RANDOM_FORTUNA_DEFPOOLSIZE 64		/* The default pool size/length for a (re)seed */
76#define	RANDOM_FORTUNA_MAX_READ (1 << 20)	/* Max bytes from AES before rekeying */
77#define	RANDOM_FORTUNA_BLOCKS_PER_KEY (1 << 16)	/* Max blocks from AES before rekeying */
78CTASSERT(RANDOM_FORTUNA_BLOCKS_PER_KEY * RANDOM_BLOCKSIZE ==
79    RANDOM_FORTUNA_MAX_READ);
80
81/*
82 * The allowable range of RANDOM_FORTUNA_DEFPOOLSIZE. The default value is above.
83 * Making RANDOM_FORTUNA_DEFPOOLSIZE too large will mean a long time between reseeds,
84 * and too small may compromise initial security but get faster reseeds.
85 */
86#define	RANDOM_FORTUNA_MINPOOLSIZE 16
87#define	RANDOM_FORTUNA_MAXPOOLSIZE INT_MAX
88CTASSERT(RANDOM_FORTUNA_MINPOOLSIZE <= RANDOM_FORTUNA_DEFPOOLSIZE);
89CTASSERT(RANDOM_FORTUNA_DEFPOOLSIZE <= RANDOM_FORTUNA_MAXPOOLSIZE);
90
91/* This algorithm (and code) presumes that RANDOM_KEYSIZE is twice as large as RANDOM_BLOCKSIZE */
92CTASSERT(RANDOM_BLOCKSIZE == sizeof(uint128_t));
93CTASSERT(RANDOM_KEYSIZE == 2*RANDOM_BLOCKSIZE);
94
95/* Probes for dtrace(1) */
96#ifdef _KERNEL
97SDT_PROVIDER_DECLARE(random);
98SDT_PROVIDER_DEFINE(random);
99SDT_PROBE_DEFINE2(random, fortuna, event_processor, debug, "u_int", "struct fs_pool *");
100#endif /* _KERNEL */
101
102/*
103 * This is the beastie that needs protecting. It contains all of the
104 * state that we are excited about. Exactly one is instantiated.
105 */
106static struct fortuna_state {
107	struct fs_pool {		/* P_i */
108		u_int fsp_length;	/* Only the first one is used by Fortuna */
109		struct randomdev_hash fsp_hash;
110	} fs_pool[RANDOM_FORTUNA_NPOOLS];
111	u_int fs_reseedcount;		/* ReseedCnt */
112	uint128_t fs_counter;		/* C */
113	union randomdev_key fs_key;	/* K */
114	u_int fs_minpoolsize;		/* Extras */
115	/* Extras for the OS */
116#ifdef _KERNEL
117	/* For use when 'pacing' the reseeds */
118	sbintime_t fs_lasttime;
119#endif
120	/* Reseed lock */
121	mtx_t fs_mtx;
122} fortuna_state;
123
124/*
125 * This knob enables or disables the "Concurrent Reads" Fortuna feature.
126 *
127 * The benefit of Concurrent Reads is improved concurrency in Fortuna.  That is
128 * reflected in two related aspects:
129 *
130 * 1. Concurrent full-rate devrandom readers can achieve similar throughput to
131 *    a single reader thread (at least up to a modest number of cores; the
132 *    non-concurrent design falls over at 2 readers).
133 *
134 * 2. The rand_harvestq process spends much less time spinning when one or more
135 *    readers is processing a large request.  Partially this is due to
136 *    rand_harvestq / ra_event_processor design, which only passes one event at
137 *    a time to the underlying algorithm.  Each time, Fortuna must take its
138 *    global state mutex, potentially blocking on a reader.  Our adaptive
139 *    mutexes assume that a lock holder currently on CPU will release the lock
140 *    quickly, and spin if the owning thread is currently running.
141 *
142 *    (There is no reason rand_harvestq necessarily has to use the same lock as
143 *    the generator, or that it must necessarily drop and retake locks
144 *    repeatedly, but that is the current status quo.)
145 *
146 * The concern is that the reduced lock scope might results in a less safe
147 * random(4) design.  However, the reduced-lock scope design is still
148 * fundamentally Fortuna.  This is discussed below.
149 *
150 * Fortuna Read() only needs mutual exclusion between readers to correctly
151 * update the shared read-side state: C, the 128-bit counter; and K, the
152 * current cipher/PRF key.
153 *
154 * In the Fortuna design, the global counter C should provide an independent
155 * range of values per request.
156 *
157 * Under lock, we can save a copy of C on the stack, and increment the global C
158 * by the number of blocks a Read request will require.
159 *
160 * Still under lock, we can save a copy of the key K on the stack, and then
161 * perform the usual key erasure K' <- Keystream(C, K, ...).  This does require
162 * generating 256 bits (32 bytes) of cryptographic keystream output with the
163 * global lock held, but that's all; none of the API keystream generation must
164 * be performed under lock.
165 *
166 * At this point, we may unlock.
167 *
168 * Some example timelines below (to oversimplify, all requests are in units of
169 * native blocks, and the keysize happens to be equal or less to the native
170 * blocksize of the underlying cipher, and the same sequence of two requests
171 * arrive in the same order).  The possibly expensive consumer keystream
172 * generation portion is marked with '**'.
173 *
174 * Status Quo fortuna_read()           Reduced-scope locking
175 * -------------------------           ---------------------
176 * C=C_0, K=K_0                        C=C_0, K=K_0
177 * <Thr 1 requests N blocks>           <Thr 1 requests N blocks>
178 * 1:Lock()                            1:Lock()
179 * <Thr 2 requests M blocks>           <Thr 2 requests M blocks>
180 * 1:GenBytes()                        1:stack_C := C_0
181 * 1:  Keystream(C_0, K_0, N)          1:stack_K := K_0
182 * 1:    <N blocks generated>**        1:C' := C_0 + N
183 * 1:    C' := C_0 + N                 1:K' := Keystream(C', K_0, 1)
184 * 1:    <- Keystream                  1:  <1 block generated>
185 * 1:  K' := Keystream(C', K_0, 1)     1:  C'' := C' + 1
186 * 1:    <1 block generated>           1:  <- Keystream
187 * 1:    C'' := C' + 1                 1:Unlock()
188 * 1:    <- Keystream
189 * 1:  <- GenBytes()
190 * 1:Unlock()
191 *
192 * Just prior to unlock, shared state is identical:
193 * ------------------------------------------------
194 * C'' == C_0 + N + 1                  C'' == C_0 + N + 1
195 * K' == keystream generated from      K' == keystream generated from
196 *       C_0 + N, K_0.                       C_0 + N, K_0.
197 * K_0 has been erased.                K_0 has been erased.
198 *
199 * After both designs unlock, the 2nd reader is unblocked.
200 *
201 * 2:Lock()                            2:Lock()
202 * 2:GenBytes()                        2:stack_C' := C''
203 * 2:  Keystream(C'', K', M)           2:stack_K' := K'
204 * 2:    <M blocks generated>**        2:C''' := C'' + M
205 * 2:    C''' := C'' + M               2:K'' := Keystream(C''', K', 1)
206 * 2:    <- Keystream                  2:  <1 block generated>
207 * 2:  K'' := Keystream(C''', K', 1)   2:  C'''' := C''' + 1
208 * 2:    <1 block generated>           2:  <- Keystream
209 * 2:    C'''' := C''' + 1             2:Unlock()
210 * 2:    <- Keystream
211 * 2:  <- GenBytes()
212 * 2:Unlock()
213 *
214 * Just prior to unlock, global state is identical:
215 * ------------------------------------------------------
216 *
217 * C'''' == (C_0 + N + 1) + M + 1      C'''' == (C_0 + N + 1) + M + 1
218 * K'' == keystream generated from     K'' == keystream generated from
219 *        C_0 + N + 1 + M, K'.                C_0 + N + 1 + M, K'.
220 * K' has been erased.                 K' has been erased.
221 *
222 * Finally, in the new design, the two consumer threads can finish the
223 * remainder of the generation at any time (including simultaneously):
224 *
225 *                                     1:  GenBytes()
226 *                                     1:    Keystream(stack_C, stack_K, N)
227 *                                     1:      <N blocks generated>**
228 *                                     1:    <- Keystream
229 *                                     1:  <- GenBytes
230 *                                     1:ExplicitBzero(stack_C, stack_K)
231 *
232 *                                     2:  GenBytes()
233 *                                     2:    Keystream(stack_C', stack_K', M)
234 *                                     2:      <M blocks generated>**
235 *                                     2:    <- Keystream
236 *                                     2:  <- GenBytes
237 *                                     2:ExplicitBzero(stack_C', stack_K')
238 *
239 * The generated user keystream for both threads is identical between the two
240 * implementations:
241 *
242 * 1: Keystream(C_0, K_0, N)           1: Keystream(stack_C, stack_K, N)
243 * 2: Keystream(C'', K', M)            2: Keystream(stack_C', stack_K', M)
244 *
245 * (stack_C == C_0; stack_K == K_0; stack_C' == C''; stack_K' == K'.)
246 */
247static bool fortuna_concurrent_read __read_frequently = true;
248
249#ifdef _KERNEL
250static struct sysctl_ctx_list random_clist;
251RANDOM_CHECK_UINT(fs_minpoolsize, RANDOM_FORTUNA_MINPOOLSIZE, RANDOM_FORTUNA_MAXPOOLSIZE);
252#else
253static uint8_t zero_region[RANDOM_ZERO_BLOCKSIZE];
254#endif
255
256static void random_fortuna_pre_read(void);
257static void random_fortuna_read(uint8_t *, size_t);
258static bool random_fortuna_seeded(void);
259static bool random_fortuna_seeded_internal(void);
260static void random_fortuna_process_event(struct harvest_event *);
261
262static void random_fortuna_reseed_internal(uint32_t *entropy_data, u_int blockcount);
263
264#ifdef RANDOM_LOADABLE
265static
266#endif
267const struct random_algorithm random_alg_context = {
268	.ra_ident = "Fortuna",
269	.ra_pre_read = random_fortuna_pre_read,
270	.ra_read = random_fortuna_read,
271	.ra_seeded = random_fortuna_seeded,
272	.ra_event_processor = random_fortuna_process_event,
273	.ra_poolcount = RANDOM_FORTUNA_NPOOLS,
274};
275
276/* ARGSUSED */
277static void
278random_fortuna_init_alg(void *unused __unused)
279{
280	int i;
281#ifdef _KERNEL
282	struct sysctl_oid *random_fortuna_o;
283#endif
284
285#ifdef RANDOM_LOADABLE
286	p_random_alg_context = &random_alg_context;
287#endif
288
289	RANDOM_RESEED_INIT_LOCK();
290	/*
291	 * Fortuna parameters. Do not adjust these unless you have
292	 * have a very good clue about what they do!
293	 */
294	fortuna_state.fs_minpoolsize = RANDOM_FORTUNA_DEFPOOLSIZE;
295#ifdef _KERNEL
296	fortuna_state.fs_lasttime = 0;
297	random_fortuna_o = SYSCTL_ADD_NODE(&random_clist,
298		SYSCTL_STATIC_CHILDREN(_kern_random),
299		OID_AUTO, "fortuna", CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
300		"Fortuna Parameters");
301	SYSCTL_ADD_PROC(&random_clist,
302	    SYSCTL_CHILDREN(random_fortuna_o), OID_AUTO, "minpoolsize",
303	    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
304	    &fortuna_state.fs_minpoolsize, RANDOM_FORTUNA_DEFPOOLSIZE,
305	    random_check_uint_fs_minpoolsize, "IU",
306	    "Minimum pool size necessary to cause a reseed");
307	KASSERT(fortuna_state.fs_minpoolsize > 0, ("random: Fortuna threshold must be > 0 at startup"));
308
309	SYSCTL_ADD_BOOL(&random_clist, SYSCTL_CHILDREN(random_fortuna_o),
310	    OID_AUTO, "concurrent_read", CTLFLAG_RDTUN,
311	    &fortuna_concurrent_read, 0, "If non-zero, enable "
312	    "feature to improve concurrent Fortuna performance.");
313#endif
314
315	/*-
316	 * FS&K - InitializePRNG()
317	 *      - P_i = \epsilon
318	 *      - ReseedCNT = 0
319	 */
320	for (i = 0; i < RANDOM_FORTUNA_NPOOLS; i++) {
321		randomdev_hash_init(&fortuna_state.fs_pool[i].fsp_hash);
322		fortuna_state.fs_pool[i].fsp_length = 0;
323	}
324	fortuna_state.fs_reseedcount = 0;
325	/*-
326	 * FS&K - InitializeGenerator()
327	 *      - C = 0
328	 *      - K = 0
329	 */
330	fortuna_state.fs_counter = UINT128_ZERO;
331	explicit_bzero(&fortuna_state.fs_key, sizeof(fortuna_state.fs_key));
332}
333SYSINIT(random_alg, SI_SUB_RANDOM, SI_ORDER_SECOND, random_fortuna_init_alg,
334    NULL);
335
336/*-
337 * FS&K - AddRandomEvent()
338 * Process a single stochastic event off the harvest queue
339 */
340static void
341random_fortuna_process_event(struct harvest_event *event)
342{
343	u_int pl;
344
345	RANDOM_RESEED_LOCK();
346	/*-
347	 * FS&K - P_i = P_i|<harvested stuff>
348	 * Accumulate the event into the appropriate pool
349	 * where each event carries the destination information.
350	 *
351	 * The hash_init() and hash_finish() calls are done in
352	 * random_fortuna_pre_read().
353	 *
354	 * We must be locked against pool state modification which can happen
355	 * during accumulation/reseeding and reading/regating.
356	 */
357	pl = event->he_destination % RANDOM_FORTUNA_NPOOLS;
358	/*
359	 * If a VM generation ID changes (clone and play or VM rewind), we want
360	 * to incorporate that as soon as possible.  Override destingation pool
361	 * for immediate next use.
362	 */
363	if (event->he_source == RANDOM_PURE_VMGENID)
364		pl = 0;
365	/*
366	 * We ignore low entropy static/counter fields towards the end of the
367	 * he_event structure in order to increase measurable entropy when
368	 * conducting SP800-90B entropy analysis measurements of seed material
369	 * fed into PRNG.
370	 * -- wdf
371	 */
372	KASSERT(event->he_size <= sizeof(event->he_entropy),
373	    ("%s: event->he_size: %hhu > sizeof(event->he_entropy): %zu\n",
374	    __func__, event->he_size, sizeof(event->he_entropy)));
375	randomdev_hash_iterate(&fortuna_state.fs_pool[pl].fsp_hash,
376	    &event->he_somecounter, sizeof(event->he_somecounter));
377	randomdev_hash_iterate(&fortuna_state.fs_pool[pl].fsp_hash,
378	    event->he_entropy, event->he_size);
379
380	/*-
381	 * Don't wrap the length.  This is a "saturating" add.
382	 * XXX: FIX!!: We don't actually need lengths for anything but fs_pool[0],
383	 * but it's been useful debugging to see them all.
384	 */
385	fortuna_state.fs_pool[pl].fsp_length = MIN(RANDOM_FORTUNA_MAXPOOLSIZE,
386	    fortuna_state.fs_pool[pl].fsp_length +
387	    sizeof(event->he_somecounter) + event->he_size);
388	RANDOM_RESEED_UNLOCK();
389}
390
391/*-
392 * FS&K - Reseed()
393 * This introduces new key material into the output generator.
394 * Additionally it increments the output generator's counter
395 * variable C. When C > 0, the output generator is seeded and
396 * will deliver output.
397 * The entropy_data buffer passed is a very specific size; the
398 * product of RANDOM_FORTUNA_NPOOLS and RANDOM_KEYSIZE.
399 */
400static void
401random_fortuna_reseed_internal(uint32_t *entropy_data, u_int blockcount)
402{
403	struct randomdev_hash context;
404	uint8_t hash[RANDOM_KEYSIZE];
405	const void *keymaterial;
406	size_t keysz;
407	bool seeded;
408
409	RANDOM_RESEED_ASSERT_LOCK_OWNED();
410
411	seeded = random_fortuna_seeded_internal();
412	if (seeded) {
413		randomdev_getkey(&fortuna_state.fs_key, &keymaterial, &keysz);
414		KASSERT(keysz == RANDOM_KEYSIZE, ("%s: key size %zu not %u",
415			__func__, keysz, (unsigned)RANDOM_KEYSIZE));
416	}
417
418	/*-
419	 * FS&K - K = Hd(K|s) where Hd(m) is H(H(0^512|m))
420	 *      - C = C + 1
421	 */
422	randomdev_hash_init(&context);
423	randomdev_hash_iterate(&context, zero_region, RANDOM_ZERO_BLOCKSIZE);
424	if (seeded)
425		randomdev_hash_iterate(&context, keymaterial, keysz);
426	randomdev_hash_iterate(&context, entropy_data, RANDOM_KEYSIZE*blockcount);
427	randomdev_hash_finish(&context, hash);
428	randomdev_hash_init(&context);
429	randomdev_hash_iterate(&context, hash, RANDOM_KEYSIZE);
430	randomdev_hash_finish(&context, hash);
431	randomdev_encrypt_init(&fortuna_state.fs_key, hash);
432	explicit_bzero(hash, sizeof(hash));
433	/* Unblock the device if this is the first time we are reseeding. */
434	if (uint128_is_zero(fortuna_state.fs_counter))
435		randomdev_unblock();
436	uint128_increment(&fortuna_state.fs_counter);
437}
438
439/*-
440 * FS&K - RandomData() (Part 1)
441 * Used to return processed entropy from the PRNG. There is a pre_read
442 * required to be present (but it can be a stub) in order to allow
443 * specific actions at the begin of the read.
444 */
445void
446random_fortuna_pre_read(void)
447{
448#ifdef _KERNEL
449	sbintime_t now;
450#endif
451	struct randomdev_hash context;
452	uint32_t s[RANDOM_FORTUNA_NPOOLS*RANDOM_KEYSIZE_WORDS];
453	uint8_t temp[RANDOM_KEYSIZE];
454	u_int i;
455
456	KASSERT(fortuna_state.fs_minpoolsize > 0, ("random: Fortuna threshold must be > 0"));
457	RANDOM_RESEED_LOCK();
458#ifdef _KERNEL
459	/* FS&K - Use 'getsbinuptime()' to prevent reseed-spamming. */
460	now = getsbinuptime();
461#endif
462
463	if (fortuna_state.fs_pool[0].fsp_length < fortuna_state.fs_minpoolsize
464#ifdef _KERNEL
465	    /*
466	     * FS&K - Use 'getsbinuptime()' to prevent reseed-spamming, but do
467	     * not block initial seeding (fs_lasttime == 0).
468	     */
469	    || (__predict_true(fortuna_state.fs_lasttime != 0) &&
470		now - fortuna_state.fs_lasttime <= SBT_1S/10)
471#endif
472	) {
473		RANDOM_RESEED_UNLOCK();
474		return;
475	}
476
477#ifdef _KERNEL
478	/*
479	 * When set, pretend we do not have enough entropy to reseed yet.
480	 */
481	KFAIL_POINT_CODE(DEBUG_FP, random_fortuna_pre_read, {
482		if (RETURN_VALUE != 0) {
483			RANDOM_RESEED_UNLOCK();
484			return;
485		}
486	});
487#endif
488
489#ifdef _KERNEL
490	fortuna_state.fs_lasttime = now;
491#endif
492
493	/* FS&K - ReseedCNT = ReseedCNT + 1 */
494	fortuna_state.fs_reseedcount++;
495	/* s = \epsilon at start */
496	for (i = 0; i < RANDOM_FORTUNA_NPOOLS; i++) {
497		/* FS&K - if Divides(ReseedCnt, 2^i) ... */
498		if ((fortuna_state.fs_reseedcount % (1 << i)) == 0) {
499			/*-
500			    * FS&K - temp = (P_i)
501			    *      - P_i = \epsilon
502			    *      - s = s|H(temp)
503			    */
504			randomdev_hash_finish(&fortuna_state.fs_pool[i].fsp_hash, temp);
505			randomdev_hash_init(&fortuna_state.fs_pool[i].fsp_hash);
506			fortuna_state.fs_pool[i].fsp_length = 0;
507			randomdev_hash_init(&context);
508			randomdev_hash_iterate(&context, temp, RANDOM_KEYSIZE);
509			randomdev_hash_finish(&context, s + i*RANDOM_KEYSIZE_WORDS);
510		} else
511			break;
512	}
513#ifdef _KERNEL
514	SDT_PROBE2(random, fortuna, event_processor, debug, fortuna_state.fs_reseedcount, fortuna_state.fs_pool);
515#endif
516	/* FS&K */
517	random_fortuna_reseed_internal(s, i);
518	RANDOM_RESEED_UNLOCK();
519
520	/* Clean up and secure */
521	explicit_bzero(s, sizeof(s));
522	explicit_bzero(temp, sizeof(temp));
523}
524
525/*
526 * This is basically GenerateBlocks() from FS&K.
527 *
528 * It differs in two ways:
529 *
530 * 1. Chacha20 is tolerant of non-block-multiple request sizes, so we do not
531 * need to handle any remainder bytes specially and can just pass the length
532 * directly to the PRF construction; and
533 *
534 * 2. Chacha20 is a 512-bit block size cipher (whereas AES has 128-bit block
535 * size, regardless of key size).  This means Chacha does not require re-keying
536 * every 1MiB.  This is implied by the math in FS&K 9.4 and mentioned
537 * explicitly in the conclusion, "If we had a block cipher with a 256-bit [or
538 * greater] block size, then the collisions would not have been an issue at
539 * all" (p. 144).
540 *
541 * 3. In conventional ("locked") mode, we produce a maximum of PAGE_SIZE output
542 * at a time before dropping the lock, to not bully the lock especially.  This
543 * has been the status quo since 2015 (r284959).
544 *
545 * The upstream caller random_fortuna_read is responsible for zeroing out
546 * sensitive buffers provided as parameters to this routine.
547 */
548enum {
549	FORTUNA_UNLOCKED = false,
550	FORTUNA_LOCKED = true
551};
552static void
553random_fortuna_genbytes(uint8_t *buf, size_t bytecount,
554    uint8_t newkey[static RANDOM_KEYSIZE], uint128_t *p_counter,
555    union randomdev_key *p_key, bool locked)
556{
557	uint8_t remainder_buf[RANDOM_BLOCKSIZE];
558	size_t chunk_size;
559
560	if (locked)
561		RANDOM_RESEED_ASSERT_LOCK_OWNED();
562	else
563		RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
564
565	/*
566	 * Easy case: don't have to worry about bullying the global mutex,
567	 * don't have to worry about rekeying Chacha; API is byte-oriented.
568	 */
569	if (!locked && random_chachamode) {
570		randomdev_keystream(p_key, p_counter, buf, bytecount);
571		return;
572	}
573
574	if (locked) {
575		/*
576		 * While holding the global lock, limit PRF generation to
577		 * mitigate, but not eliminate, bullying symptoms.
578		 */
579		chunk_size = PAGE_SIZE;
580	} else {
581		/*
582		* 128-bit block ciphers like AES must be re-keyed at 1MB
583		* intervals to avoid unacceptable statistical differentiation
584		* from true random data (FS&K 9.4, p. 143-144).
585		*/
586		MPASS(!random_chachamode);
587		chunk_size = RANDOM_FORTUNA_MAX_READ;
588	}
589
590	chunk_size = MIN(bytecount, chunk_size);
591	if (!random_chachamode)
592		chunk_size = rounddown(chunk_size, RANDOM_BLOCKSIZE);
593
594	while (bytecount >= chunk_size && chunk_size > 0) {
595		randomdev_keystream(p_key, p_counter, buf, chunk_size);
596
597		buf += chunk_size;
598		bytecount -= chunk_size;
599
600		/* We have to rekey if there is any data remaining to be
601		 * generated, in two scenarios:
602		 *
603		 * locked: we need to rekey before we unlock and release the
604		 * global state to another consumer; or
605		 *
606		 * unlocked: we need to rekey because we're in AES mode and are
607		 * required to rekey at chunk_size==1MB.  But we do not need to
608		 * rekey during the last trailing <1MB chunk.
609		 */
610		if (bytecount > 0) {
611			if (locked || chunk_size == RANDOM_FORTUNA_MAX_READ) {
612				randomdev_keystream(p_key, p_counter, newkey,
613				    RANDOM_KEYSIZE);
614				randomdev_encrypt_init(p_key, newkey);
615			}
616
617			/*
618			 * If we're holding the global lock, yield it briefly
619			 * now.
620			 */
621			if (locked) {
622				RANDOM_RESEED_UNLOCK();
623				RANDOM_RESEED_LOCK();
624			}
625
626			/*
627			 * At the trailing end, scale down chunk_size from 1MB or
628			 * PAGE_SIZE to all remaining full blocks (AES) or all
629			 * remaining bytes (Chacha).
630			 */
631			if (bytecount < chunk_size) {
632				if (random_chachamode)
633					chunk_size = bytecount;
634				else if (bytecount >= RANDOM_BLOCKSIZE)
635					chunk_size = rounddown(bytecount,
636					    RANDOM_BLOCKSIZE);
637				else
638					break;
639			}
640		}
641	}
642
643	/*
644	 * Generate any partial AES block remaining into a temporary buffer and
645	 * copy the desired substring out.
646	 */
647	if (bytecount > 0) {
648		MPASS(!random_chachamode);
649
650		randomdev_keystream(p_key, p_counter, remainder_buf,
651		    sizeof(remainder_buf));
652	}
653
654	/*
655	 * In locked mode, re-key global K before dropping the lock, which we
656	 * don't need for memcpy/bzero below.
657	 */
658	if (locked) {
659		randomdev_keystream(p_key, p_counter, newkey, RANDOM_KEYSIZE);
660		randomdev_encrypt_init(p_key, newkey);
661		RANDOM_RESEED_UNLOCK();
662	}
663
664	if (bytecount > 0) {
665		memcpy(buf, remainder_buf, bytecount);
666		explicit_bzero(remainder_buf, sizeof(remainder_buf));
667	}
668}
669
670
671/*
672 * Handle only "concurrency-enabled" Fortuna reads to simplify logic.
673 *
674 * Caller (random_fortuna_read) is responsible for zeroing out sensitive
675 * buffers provided as parameters to this routine.
676 */
677static void
678random_fortuna_read_concurrent(uint8_t *buf, size_t bytecount,
679    uint8_t newkey[static RANDOM_KEYSIZE])
680{
681	union randomdev_key key_copy;
682	uint128_t counter_copy;
683	size_t blockcount;
684
685	MPASS(fortuna_concurrent_read);
686
687	/*
688	 * Compute number of blocks required for the PRF request ('delta C').
689	 * We will step the global counter 'C' by this number under lock, and
690	 * then actually consume the counter values outside the lock.
691	 *
692	 * This ensures that contemporaneous but independent requests for
693	 * randomness receive distinct 'C' values and thus independent PRF
694	 * results.
695	 */
696	if (random_chachamode) {
697		blockcount = howmany(bytecount, CHACHA_BLOCKLEN);
698	} else {
699		blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
700
701		/*
702		 * Need to account for the additional blocks generated by
703		 * rekeying when updating the global fs_counter.
704		 */
705		blockcount += RANDOM_KEYS_PER_BLOCK *
706		    (blockcount / RANDOM_FORTUNA_BLOCKS_PER_KEY);
707	}
708
709	RANDOM_RESEED_LOCK();
710	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
711
712	/*
713	 * Save the original counter and key values that will be used as the
714	 * PRF for this particular consumer.
715	 */
716	memcpy(&counter_copy, &fortuna_state.fs_counter, sizeof(counter_copy));
717	memcpy(&key_copy, &fortuna_state.fs_key, sizeof(key_copy));
718
719	/*
720	 * Step the counter as if we had generated 'bytecount' blocks for this
721	 * consumer.  I.e., ensure that the next consumer gets an independent
722	 * range of counter values once we drop the global lock.
723	 */
724	uint128_add64(&fortuna_state.fs_counter, blockcount);
725
726	/*
727	 * We still need to Rekey the global 'K' between independent calls;
728	 * this is no different from conventional Fortuna.  Note that
729	 * 'randomdev_keystream()' will step the fs_counter 'C' appropriately
730	 * for the blocks needed for the 'newkey'.
731	 *
732	 * (This is part of PseudoRandomData() in FS&K, 9.4.4.)
733	 */
734	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
735	    newkey, RANDOM_KEYSIZE);
736	randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
737
738	/*
739	 * We have everything we need to generate a unique PRF for this
740	 * consumer without touching global state.
741	 */
742	RANDOM_RESEED_UNLOCK();
743
744	random_fortuna_genbytes(buf, bytecount, newkey, &counter_copy,
745	    &key_copy, FORTUNA_UNLOCKED);
746	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
747
748	explicit_bzero(&counter_copy, sizeof(counter_copy));
749	explicit_bzero(&key_copy, sizeof(key_copy));
750}
751
752/*-
753 * FS&K - RandomData() (Part 2)
754 * Main read from Fortuna, continued. May be called multiple times after
755 * the random_fortuna_pre_read() above.
756 *
757 * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
758 * the responsibility of the algorithm to accommodate partial block reads, if a
759 * block output mode is used.
760 */
761void
762random_fortuna_read(uint8_t *buf, size_t bytecount)
763{
764	uint8_t newkey[RANDOM_KEYSIZE];
765
766	if (fortuna_concurrent_read) {
767		random_fortuna_read_concurrent(buf, bytecount, newkey);
768		goto out;
769	}
770
771	RANDOM_RESEED_LOCK();
772	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
773
774	random_fortuna_genbytes(buf, bytecount, newkey,
775	    &fortuna_state.fs_counter, &fortuna_state.fs_key, FORTUNA_LOCKED);
776	/* Returns unlocked */
777	RANDOM_RESEED_ASSERT_LOCK_NOT_OWNED();
778
779out:
780	explicit_bzero(newkey, sizeof(newkey));
781}
782
783#ifdef _KERNEL
784static bool block_seeded_status = false;
785SYSCTL_BOOL(_kern_random, OID_AUTO, block_seeded_status, CTLFLAG_RWTUN,
786    &block_seeded_status, 0,
787    "If non-zero, pretend Fortuna is in an unseeded state.  By setting "
788    "this as a tunable, boot can be tested as if the random device is "
789    "unavailable.");
790#endif
791
792static bool
793random_fortuna_seeded_internal(void)
794{
795	return (!uint128_is_zero(fortuna_state.fs_counter));
796}
797
798static bool
799random_fortuna_seeded(void)
800{
801
802#ifdef _KERNEL
803	if (block_seeded_status)
804		return (false);
805#endif
806
807	if (__predict_true(random_fortuna_seeded_internal()))
808		return (true);
809
810	/*
811	 * Maybe we have enough entropy in the zeroth pool but just haven't
812	 * kicked the initial seed step.  Do so now.
813	 */
814	random_fortuna_pre_read();
815
816	return (random_fortuna_seeded_internal());
817}
818