1/* $NetBSD: octeon_rnm.c,v 1.16 2023/03/21 22:07:29 riastradh Exp $ */ 2 3/* 4 * Copyright (c) 2007 Internet Initiative Japan, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* 30 * Cavium Octeon Random Number Generator / Random Number Memory `RNM' 31 * 32 * The RNM unit consists of: 33 * 34 * 1. 128 ring oscillators 35 * 2. an LFSR/SHA-1 conditioner 36 * 3. a 512-byte FIFO 37 * 38 * When the unit is enabled, there are three modes of operation: 39 * 40 * (a) deterministic: the ring oscillators are disabled and the 41 * LFSR/SHA-1 conditioner operates on fixed inputs to give 42 * reproducible results for testing, 43 * 44 * (b) conditioned entropy: the ring oscillators are enabled and 45 * samples from them are fed through the LFSR/SHA-1 46 * conditioner before being put into the FIFO, and 47 * 48 * (c) raw entropy: the ring oscillators are enabled, and a group 49 * of eight of them selected at any one time is sampled and 50 * fed into the FIFO. 51 * 52 * Details: 53 * 54 * - The FIFO is refilled whenever we read out of it, either with 55 * a load address or an IOBDMA operation. 56 * 57 * - The conditioner takes 81 cycles to produce a 64-bit block of 58 * output in the FIFO whether in deterministic or conditioned 59 * entropy mode, each block consisting of the first 64 bits of a 60 * SHA-1 hash. 61 * 62 * - A group of eight ring oscillators take 8 cycles to produce a 63 * 64-bit block of output in the FIFO in raw entropy mode, each 64 * block consisting of eight consecutive samples from each RO in 65 * parallel. 66 * 67 * The first sample of each RO always seems to be zero. Further, 68 * consecutive samples from a single ring oscillator are not 69 * independent, so naive debiasing like a von Neumann extractor 70 * falls flat on its face. And parallel ring oscillators powered 71 * by the same source may not be independent either, if they end 72 * up locked. 73 * 74 * We read out one FIFO's worth of raw samples from groups of 8 75 * ring oscillators at a time, of 128 total, by going through them 76 * round robin. We take 32 consecutive samples from each ring 77 * oscillator in a group of 8 in parallel before we count one bit 78 * of entropy. To get 256 bits of entropy, we read 4Kbit of data 79 * from each of two 8-RO groups. 80 * 81 * We could use the on-board LFSR/SHA-1 conditioner like the Linux 82 * driver written by Cavium does, but it's not clear how many RO 83 * samples go into the conditioner, and our entropy pool is a 84 * perfectly good conditioner itself, so it seems there is little 85 * advantage -- other than expedience -- to using the LFSR/SHA-1 86 * conditioner. All the manual says is that it samples 125 of the 87 * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to 88 * have a latency of 100 cycles, so it seems implausible that much 89 * more than one sample from each RO could be squeezed in there. 90 * 91 * The hardware exposes only 64 bits of each SHA-1 hash, and the 92 * Linux driver uses 32 bits of that -- which, if treated as full 93 * entropy, would mean an assessment of 3.9 bits of RO samples to 94 * get 1 bit of entropy, whereas we take 256 bits of RO samples to 95 * get one bit of entropy, so this seems reasonably conservative. 96 * 97 * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference 98 * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008. 99 */ 100 101#include <sys/cdefs.h> 102__KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.16 2023/03/21 22:07:29 riastradh Exp $"); 103 104#include <sys/param.h> 105#include <sys/device.h> 106#include <sys/kernel.h> 107#include <sys/rndsource.h> 108#include <sys/systm.h> 109 110#include <mips/locore.h> 111#include <mips/cavium/octeonreg.h> 112#include <mips/cavium/octeonvar.h> 113#include <mips/cavium/include/iobusvar.h> 114#include <mips/cavium/dev/octeon_rnmreg.h> 115#include <mips/cavium/dev/octeon_corereg.h> 116 117#include <sys/bus.h> 118 119//#define OCTRNM_DEBUG 120 121#define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */ 122#define LFSR_DELAY_CLOCK 81 /* cycles to fill LFSR buffer */ 123#define SHA1_DELAY_CLOCK 81 /* cycles to compute SHA-1 output */ 124#define NROGROUPS 16 125#define RNG_FIFO_WORDS (512/sizeof(uint64_t)) 126 127struct octrnm_softc { 128 uint64_t sc_sample[RNG_FIFO_WORDS]; 129 bus_space_tag_t sc_bust; 130 bus_space_handle_t sc_regh; 131 krndsource_t sc_rndsrc; /* /dev/random source */ 132 unsigned sc_rogroup; 133}; 134 135static int octrnm_match(device_t, struct cfdata *, void *); 136static void octrnm_attach(device_t, device_t, void *); 137static void octrnm_rng(size_t, void *); 138static void octrnm_reset(struct octrnm_softc *); 139static void octrnm_conditioned_deterministic(struct octrnm_softc *); 140static void octrnm_conditioned_entropy(struct octrnm_softc *); 141static void octrnm_raw_entropy(struct octrnm_softc *, unsigned); 142static uint64_t octrnm_load(struct octrnm_softc *); 143static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned); 144static void octrnm_delay(uint32_t); 145 146CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc), 147 octrnm_match, octrnm_attach, NULL, NULL); 148 149static int 150octrnm_match(device_t parent, struct cfdata *cf, void *aux) 151{ 152 struct iobus_attach_args *aa = aux; 153 154 if (strcmp(cf->cf_name, aa->aa_name) != 0) 155 return 0; 156 if (cf->cf_unit != aa->aa_unitno) 157 return 0; 158 return 1; 159} 160 161static void 162octrnm_attach(device_t parent, device_t self, void *aux) 163{ 164 struct octrnm_softc *sc = device_private(self); 165 struct iobus_attach_args *aa = aux; 166 uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b); 167 168 aprint_normal("\n"); 169 170 /* Map the device registers, all two of them. */ 171 sc->sc_bust = aa->aa_bust; 172 if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE, 173 0, &sc->sc_regh) != 0) { 174 aprint_error_dev(self, "unable to map device\n"); 175 return; 176 } 177 178 /* Verify that the built-in self-test succeeded. */ 179 bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh, 180 RNM_BIST_STATUS_OFFSET); 181 if (bist_status) { 182 aprint_error_dev(self, "RNG built in self test failed: %#lx\n", 183 bist_status); 184 return; 185 } 186 187 /* 188 * Reset the core, enable the RNG engine without entropy, wait 189 * 81 cycles for it to produce a single sample, and draw the 190 * deterministic sample to test. 191 * 192 * XXX Verify that the output matches the SHA-1 computation 193 * described by the data sheet, not just a known answer. 194 */ 195 octrnm_reset(sc); 196 octrnm_conditioned_deterministic(sc); 197 octrnm_delay(LFSR_DELAY_CLOCK + SHA1_DELAY_CLOCK); 198 sample = octrnm_load(sc); 199 if (sample != expected) 200 aprint_error_dev(self, "self-test: read %016"PRIx64"," 201 " expected %016"PRIx64, sample, expected); 202 203 /* 204 * Reset the core again to clear the FIFO, and enable the RNG 205 * engine with entropy exposed directly. Start from the first 206 * group of ring oscillators; as we gather samples we will 207 * rotate through the rest of them. 208 */ 209 octrnm_reset(sc); 210 sc->sc_rogroup = 0; 211 octrnm_raw_entropy(sc, sc->sc_rogroup); 212 octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS); 213 214 /* Attach the rndsource. */ 215 rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc); 216 rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG, 217 RND_FLAG_DEFAULT | RND_FLAG_HASCB); 218} 219 220static void 221octrnm_rng(size_t nbytes, void *vsc) 222{ 223 const unsigned BPB = 256; /* bits of data per bit of entropy */ 224 struct octrnm_softc *sc = vsc; 225 uint64_t *samplepos; 226 size_t needed = NBBY*nbytes; 227 unsigned i; 228 229 /* Sample the ring oscillators round-robin. */ 230 while (needed) { 231 /* 232 * Switch to the next RO group once we drain the FIFO. 233 * By the time rnd_add_data is done, we will have 234 * processed all 512 bytes of the FIFO. We assume it 235 * takes at least one cycle per byte (realistically, 236 * more like ~80cpb to draw from the FIFO and then 237 * process it with rnd_add_data), so there is no need 238 * for any other delays. 239 */ 240 sc->sc_rogroup++; 241 sc->sc_rogroup %= NROGROUPS; 242 octrnm_raw_entropy(sc, sc->sc_rogroup); 243 244 /* 245 * Gather quarter the FIFO at a time -- we are limited 246 * to 128 bytes because of limits on the CVMSEG buffer. 247 */ 248 CTASSERT(sizeof sc->sc_sample == 512); 249 CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS); 250 for (samplepos = sc->sc_sample, i = 0; i < 4; i++) { 251 octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4); 252 samplepos += RNG_FIFO_WORDS / 4; 253 } 254#ifdef OCTRNM_DEBUG 255 hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample); 256#endif 257 rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample, 258 sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB); 259 needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB)); 260 261 /* Now's a good time to yield. */ 262 preempt_point(); 263 } 264 265 /* Zero the sample. */ 266 explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample); 267} 268 269/* 270 * octrnm_reset(sc) 271 * 272 * Reset the RNM unit, disabling it and clearing the FIFO. 273 */ 274static void 275octrnm_reset(struct octrnm_softc *sc) 276{ 277 278 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 279 RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST); 280} 281 282/* 283 * octrnm_conditioned_deterministic(sc) 284 * 285 * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with 286 * no entropy, for the next data loaded into the FIFO. 287 */ 288static void 289octrnm_conditioned_deterministic(struct octrnm_softc *sc) 290{ 291 292 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 293 RNM_CTL_STATUS_RNG_EN); 294} 295 296/* 297 * octrnm_conditioned_entropy(sc) 298 * 299 * Switch the RNM unit to generate ring oscillator samples 300 * conditioned with an LFSR/SHA-1, for the next data loaded into 301 * the FIFO. 302 */ 303static void __unused 304octrnm_conditioned_entropy(struct octrnm_softc *sc) 305{ 306 307 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 308 RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN); 309} 310 311/* 312 * octrnm_raw_entropy(sc, rogroup) 313 * 314 * Switch the RNM unit to generate raw ring oscillator samples 315 * from the specified group of eight ring oscillator. 316 */ 317static void 318octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup) 319{ 320 uint64_t ctl = 0; 321 322 ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */ 323 ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */ 324 ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */ 325 ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK); 326 327 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 328 ctl); 329} 330 331/* 332 * octrnm_load(sc) 333 * 334 * Load a single 64-bit word out of the FIFO. 335 */ 336static uint64_t 337octrnm_load(struct octrnm_softc *sc) 338{ 339 uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID); 340 341 return octeon_xkphys_read_8(addr); 342} 343 344/* 345 * octrnm_iobdma(sc, buf, nwords) 346 * 347 * Load nwords, at most 32, out of the FIFO into buf. 348 */ 349static void 350octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords) 351{ 352 /* ``scraddr'' part is index in 64-bit words, not address */ 353 size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm); 354 uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID, 355 scraddr / sizeof(uint64_t), nwords, 0); 356 357 KASSERT(nwords < 128); /* iobdma address restriction */ 358 KASSERT(nwords <= CVMSEG_LM_RNM_SIZE); /* size of CVMSEG LM buffer */ 359 360 octeon_iobdma_write_8(iobdma); 361 OCTEON_SYNCIOBDMA; 362 for (; nwords --> 0; scraddr += 8) 363 *buf++ = octeon_cvmseg_read_8(scraddr); 364} 365 366/* 367 * octrnm_delay(ncycles) 368 * 369 * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even 370 * if the cycle counter rolls over. 371 */ 372static void 373octrnm_delay(uint32_t ncycles) 374{ 375 uint32_t deadline = mips3_cp0_count_read() + ncycles; 376 377 KASSERT(ncycles <= UINT32_MAX/2); 378 379 while ((deadline - mips3_cp0_count_read()) < ncycles) 380 continue; 381} 382