safe.c revision 233024
1/*-
2 * Copyright (c) 2003 Sam Leffler, Errno Consulting
3 * Copyright (c) 2003 Global Technology Associates, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/9/sys/dev/safe/safe.c 233024 2012-03-16 08:46:58Z scottl $");
30
31/*
32 * SafeNet SafeXcel-1141 hardware crypto accelerator
33 */
34#include "opt_safe.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/proc.h>
39#include <sys/errno.h>
40#include <sys/malloc.h>
41#include <sys/kernel.h>
42#include <sys/mbuf.h>
43#include <sys/module.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/sysctl.h>
47#include <sys/endian.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51
52#include <machine/bus.h>
53#include <machine/resource.h>
54#include <sys/bus.h>
55#include <sys/rman.h>
56
57#include <crypto/sha1.h>
58#include <opencrypto/cryptodev.h>
59#include <opencrypto/cryptosoft.h>
60#include <sys/md5.h>
61#include <sys/random.h>
62#include <sys/kobj.h>
63
64#include "cryptodev_if.h"
65
66#include <dev/pci/pcivar.h>
67#include <dev/pci/pcireg.h>
68
69#ifdef SAFE_RNDTEST
70#include <dev/rndtest/rndtest.h>
71#endif
72#include <dev/safe/safereg.h>
73#include <dev/safe/safevar.h>
74
75#ifndef bswap32
76#define	bswap32	NTOHL
77#endif
78
79/*
80 * Prototypes and count for the pci_device structure
81 */
82static	int safe_probe(device_t);
83static	int safe_attach(device_t);
84static	int safe_detach(device_t);
85static	int safe_suspend(device_t);
86static	int safe_resume(device_t);
87static	int safe_shutdown(device_t);
88
89static	int safe_newsession(device_t, u_int32_t *, struct cryptoini *);
90static	int safe_freesession(device_t, u_int64_t);
91static	int safe_process(device_t, struct cryptop *, int);
92
93static device_method_t safe_methods[] = {
94	/* Device interface */
95	DEVMETHOD(device_probe,		safe_probe),
96	DEVMETHOD(device_attach,	safe_attach),
97	DEVMETHOD(device_detach,	safe_detach),
98	DEVMETHOD(device_suspend,	safe_suspend),
99	DEVMETHOD(device_resume,	safe_resume),
100	DEVMETHOD(device_shutdown,	safe_shutdown),
101
102	/* crypto device methods */
103	DEVMETHOD(cryptodev_newsession,	safe_newsession),
104	DEVMETHOD(cryptodev_freesession,safe_freesession),
105	DEVMETHOD(cryptodev_process,	safe_process),
106
107	DEVMETHOD_END
108};
109static driver_t safe_driver = {
110	"safe",
111	safe_methods,
112	sizeof (struct safe_softc)
113};
114static devclass_t safe_devclass;
115
116DRIVER_MODULE(safe, pci, safe_driver, safe_devclass, 0, 0);
117MODULE_DEPEND(safe, crypto, 1, 1, 1);
118#ifdef SAFE_RNDTEST
119MODULE_DEPEND(safe, rndtest, 1, 1, 1);
120#endif
121
122static	void safe_intr(void *);
123static	void safe_callback(struct safe_softc *, struct safe_ringentry *);
124static	void safe_feed(struct safe_softc *, struct safe_ringentry *);
125static	void safe_mcopy(struct mbuf *, struct mbuf *, u_int);
126#ifndef SAFE_NO_RNG
127static	void safe_rng_init(struct safe_softc *);
128static	void safe_rng(void *);
129#endif /* SAFE_NO_RNG */
130static	int safe_dma_malloc(struct safe_softc *, bus_size_t,
131	        struct safe_dma_alloc *, int);
132#define	safe_dma_sync(_dma, _flags) \
133	bus_dmamap_sync((_dma)->dma_tag, (_dma)->dma_map, (_flags))
134static	void safe_dma_free(struct safe_softc *, struct safe_dma_alloc *);
135static	int safe_dmamap_aligned(const struct safe_operand *);
136static	int safe_dmamap_uniform(const struct safe_operand *);
137
138static	void safe_reset_board(struct safe_softc *);
139static	void safe_init_board(struct safe_softc *);
140static	void safe_init_pciregs(device_t dev);
141static	void safe_cleanchip(struct safe_softc *);
142static	void safe_totalreset(struct safe_softc *);
143
144static	int safe_free_entry(struct safe_softc *, struct safe_ringentry *);
145
146SYSCTL_NODE(_hw, OID_AUTO, safe, CTLFLAG_RD, 0, "SafeNet driver parameters");
147
148#ifdef SAFE_DEBUG
149static	void safe_dump_dmastatus(struct safe_softc *, const char *);
150static	void safe_dump_ringstate(struct safe_softc *, const char *);
151static	void safe_dump_intrstate(struct safe_softc *, const char *);
152static	void safe_dump_request(struct safe_softc *, const char *,
153		struct safe_ringentry *);
154
155static	struct safe_softc *safec;		/* for use by hw.safe.dump */
156
157static	int safe_debug = 0;
158SYSCTL_INT(_hw_safe, OID_AUTO, debug, CTLFLAG_RW, &safe_debug,
159	    0, "control debugging msgs");
160#define	DPRINTF(_x)	if (safe_debug) printf _x
161#else
162#define	DPRINTF(_x)
163#endif
164
165#define	READ_REG(sc,r) \
166	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
167
168#define WRITE_REG(sc,reg,val) \
169	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
170
171struct safe_stats safestats;
172SYSCTL_STRUCT(_hw_safe, OID_AUTO, stats, CTLFLAG_RD, &safestats,
173	    safe_stats, "driver statistics");
174#ifndef SAFE_NO_RNG
175static	int safe_rnginterval = 1;		/* poll once a second */
176SYSCTL_INT(_hw_safe, OID_AUTO, rnginterval, CTLFLAG_RW, &safe_rnginterval,
177	    0, "RNG polling interval (secs)");
178static	int safe_rngbufsize = 16;		/* 64 bytes each poll  */
179SYSCTL_INT(_hw_safe, OID_AUTO, rngbufsize, CTLFLAG_RW, &safe_rngbufsize,
180	    0, "RNG polling buffer size (32-bit words)");
181static	int safe_rngmaxalarm = 8;		/* max alarms before reset */
182SYSCTL_INT(_hw_safe, OID_AUTO, rngmaxalarm, CTLFLAG_RW, &safe_rngmaxalarm,
183	    0, "RNG max alarms before reset");
184#endif /* SAFE_NO_RNG */
185
186static int
187safe_probe(device_t dev)
188{
189	if (pci_get_vendor(dev) == PCI_VENDOR_SAFENET &&
190	    pci_get_device(dev) == PCI_PRODUCT_SAFEXCEL)
191		return (BUS_PROBE_DEFAULT);
192	return (ENXIO);
193}
194
195static const char*
196safe_partname(struct safe_softc *sc)
197{
198	/* XXX sprintf numbers when not decoded */
199	switch (pci_get_vendor(sc->sc_dev)) {
200	case PCI_VENDOR_SAFENET:
201		switch (pci_get_device(sc->sc_dev)) {
202		case PCI_PRODUCT_SAFEXCEL: return "SafeNet SafeXcel-1141";
203		}
204		return "SafeNet unknown-part";
205	}
206	return "Unknown-vendor unknown-part";
207}
208
209#ifndef SAFE_NO_RNG
210static void
211default_harvest(struct rndtest_state *rsp, void *buf, u_int count)
212{
213	random_harvest(buf, count, count*NBBY, 0, RANDOM_PURE);
214}
215#endif /* SAFE_NO_RNG */
216
217static int
218safe_attach(device_t dev)
219{
220	struct safe_softc *sc = device_get_softc(dev);
221	u_int32_t raddr;
222	u_int32_t cmd, i, devinfo;
223	int rid;
224
225	bzero(sc, sizeof (*sc));
226	sc->sc_dev = dev;
227
228	/* XXX handle power management */
229
230	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
231	cmd |= PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN;
232	pci_write_config(dev, PCIR_COMMAND, cmd, 4);
233	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
234
235	if (!(cmd & PCIM_CMD_MEMEN)) {
236		device_printf(dev, "failed to enable memory mapping\n");
237		goto bad;
238	}
239
240	if (!(cmd & PCIM_CMD_BUSMASTEREN)) {
241		device_printf(dev, "failed to enable bus mastering\n");
242		goto bad;
243	}
244
245	/*
246	 * Setup memory-mapping of PCI registers.
247	 */
248	rid = BS_BAR;
249	sc->sc_sr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
250					   RF_ACTIVE);
251	if (sc->sc_sr == NULL) {
252		device_printf(dev, "cannot map register space\n");
253		goto bad;
254	}
255	sc->sc_st = rman_get_bustag(sc->sc_sr);
256	sc->sc_sh = rman_get_bushandle(sc->sc_sr);
257
258	/*
259	 * Arrange interrupt line.
260	 */
261	rid = 0;
262	sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
263					    RF_SHAREABLE|RF_ACTIVE);
264	if (sc->sc_irq == NULL) {
265		device_printf(dev, "could not map interrupt\n");
266		goto bad1;
267	}
268	/*
269	 * NB: Network code assumes we are blocked with splimp()
270	 *     so make sure the IRQ is mapped appropriately.
271	 */
272	if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE,
273			   NULL, safe_intr, sc, &sc->sc_ih)) {
274		device_printf(dev, "could not establish interrupt\n");
275		goto bad2;
276	}
277
278	sc->sc_cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE);
279	if (sc->sc_cid < 0) {
280		device_printf(dev, "could not get crypto driver id\n");
281		goto bad3;
282	}
283
284	sc->sc_chiprev = READ_REG(sc, SAFE_DEVINFO) &
285		(SAFE_DEVINFO_REV_MAJ | SAFE_DEVINFO_REV_MIN);
286
287	/*
288	 * Setup DMA descriptor area.
289	 */
290	if (bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
291			       1,			/* alignment */
292			       SAFE_DMA_BOUNDARY,	/* boundary */
293			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
294			       BUS_SPACE_MAXADDR,	/* highaddr */
295			       NULL, NULL,		/* filter, filterarg */
296			       SAFE_MAX_DMA,		/* maxsize */
297			       SAFE_MAX_PART,		/* nsegments */
298			       SAFE_MAX_SSIZE,		/* maxsegsize */
299			       BUS_DMA_ALLOCNOW,	/* flags */
300			       NULL, NULL,		/* locking */
301			       &sc->sc_srcdmat)) {
302		device_printf(dev, "cannot allocate DMA tag\n");
303		goto bad4;
304	}
305	if (bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
306			       1,			/* alignment */
307			       SAFE_MAX_DSIZE,		/* boundary */
308			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
309			       BUS_SPACE_MAXADDR,	/* highaddr */
310			       NULL, NULL,		/* filter, filterarg */
311			       SAFE_MAX_DMA,		/* maxsize */
312			       SAFE_MAX_PART,		/* nsegments */
313			       SAFE_MAX_DSIZE,		/* maxsegsize */
314			       BUS_DMA_ALLOCNOW,	/* flags */
315			       NULL, NULL,		/* locking */
316			       &sc->sc_dstdmat)) {
317		device_printf(dev, "cannot allocate DMA tag\n");
318		goto bad4;
319	}
320
321	/*
322	 * Allocate packet engine descriptors.
323	 */
324	if (safe_dma_malloc(sc,
325	    SAFE_MAX_NQUEUE * sizeof (struct safe_ringentry),
326	    &sc->sc_ringalloc, 0)) {
327		device_printf(dev, "cannot allocate PE descriptor ring\n");
328		bus_dma_tag_destroy(sc->sc_srcdmat);
329		goto bad4;
330	}
331	/*
332	 * Hookup the static portion of all our data structures.
333	 */
334	sc->sc_ring = (struct safe_ringentry *) sc->sc_ringalloc.dma_vaddr;
335	sc->sc_ringtop = sc->sc_ring + SAFE_MAX_NQUEUE;
336	sc->sc_front = sc->sc_ring;
337	sc->sc_back = sc->sc_ring;
338	raddr = sc->sc_ringalloc.dma_paddr;
339	bzero(sc->sc_ring, SAFE_MAX_NQUEUE * sizeof(struct safe_ringentry));
340	for (i = 0; i < SAFE_MAX_NQUEUE; i++) {
341		struct safe_ringentry *re = &sc->sc_ring[i];
342
343		re->re_desc.d_sa = raddr +
344			offsetof(struct safe_ringentry, re_sa);
345		re->re_sa.sa_staterec = raddr +
346			offsetof(struct safe_ringentry, re_sastate);
347
348		raddr += sizeof (struct safe_ringentry);
349	}
350	mtx_init(&sc->sc_ringmtx, device_get_nameunit(dev),
351		"packet engine ring", MTX_DEF);
352
353	/*
354	 * Allocate scatter and gather particle descriptors.
355	 */
356	if (safe_dma_malloc(sc, SAFE_TOTAL_SPART * sizeof (struct safe_pdesc),
357	    &sc->sc_spalloc, 0)) {
358		device_printf(dev, "cannot allocate source particle "
359			"descriptor ring\n");
360		mtx_destroy(&sc->sc_ringmtx);
361		safe_dma_free(sc, &sc->sc_ringalloc);
362		bus_dma_tag_destroy(sc->sc_srcdmat);
363		goto bad4;
364	}
365	sc->sc_spring = (struct safe_pdesc *) sc->sc_spalloc.dma_vaddr;
366	sc->sc_springtop = sc->sc_spring + SAFE_TOTAL_SPART;
367	sc->sc_spfree = sc->sc_spring;
368	bzero(sc->sc_spring, SAFE_TOTAL_SPART * sizeof(struct safe_pdesc));
369
370	if (safe_dma_malloc(sc, SAFE_TOTAL_DPART * sizeof (struct safe_pdesc),
371	    &sc->sc_dpalloc, 0)) {
372		device_printf(dev, "cannot allocate destination particle "
373			"descriptor ring\n");
374		mtx_destroy(&sc->sc_ringmtx);
375		safe_dma_free(sc, &sc->sc_spalloc);
376		safe_dma_free(sc, &sc->sc_ringalloc);
377		bus_dma_tag_destroy(sc->sc_dstdmat);
378		goto bad4;
379	}
380	sc->sc_dpring = (struct safe_pdesc *) sc->sc_dpalloc.dma_vaddr;
381	sc->sc_dpringtop = sc->sc_dpring + SAFE_TOTAL_DPART;
382	sc->sc_dpfree = sc->sc_dpring;
383	bzero(sc->sc_dpring, SAFE_TOTAL_DPART * sizeof(struct safe_pdesc));
384
385	device_printf(sc->sc_dev, "%s", safe_partname(sc));
386
387	devinfo = READ_REG(sc, SAFE_DEVINFO);
388	if (devinfo & SAFE_DEVINFO_RNG) {
389		sc->sc_flags |= SAFE_FLAGS_RNG;
390		printf(" rng");
391	}
392	if (devinfo & SAFE_DEVINFO_PKEY) {
393#if 0
394		printf(" key");
395		sc->sc_flags |= SAFE_FLAGS_KEY;
396		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0);
397		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0);
398#endif
399	}
400	if (devinfo & SAFE_DEVINFO_DES) {
401		printf(" des/3des");
402		crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0);
403		crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0);
404	}
405	if (devinfo & SAFE_DEVINFO_AES) {
406		printf(" aes");
407		crypto_register(sc->sc_cid, CRYPTO_AES_CBC, 0, 0);
408	}
409	if (devinfo & SAFE_DEVINFO_MD5) {
410		printf(" md5");
411		crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0);
412	}
413	if (devinfo & SAFE_DEVINFO_SHA1) {
414		printf(" sha1");
415		crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0);
416	}
417	printf(" null");
418	crypto_register(sc->sc_cid, CRYPTO_NULL_CBC, 0, 0);
419	crypto_register(sc->sc_cid, CRYPTO_NULL_HMAC, 0, 0);
420	/* XXX other supported algorithms */
421	printf("\n");
422
423	safe_reset_board(sc);		/* reset h/w */
424	safe_init_pciregs(dev);		/* init pci settings */
425	safe_init_board(sc);		/* init h/w */
426
427#ifndef SAFE_NO_RNG
428	if (sc->sc_flags & SAFE_FLAGS_RNG) {
429#ifdef SAFE_RNDTEST
430		sc->sc_rndtest = rndtest_attach(dev);
431		if (sc->sc_rndtest)
432			sc->sc_harvest = rndtest_harvest;
433		else
434			sc->sc_harvest = default_harvest;
435#else
436		sc->sc_harvest = default_harvest;
437#endif
438		safe_rng_init(sc);
439
440		callout_init(&sc->sc_rngto, CALLOUT_MPSAFE);
441		callout_reset(&sc->sc_rngto, hz*safe_rnginterval, safe_rng, sc);
442	}
443#endif /* SAFE_NO_RNG */
444#ifdef SAFE_DEBUG
445	safec = sc;			/* for use by hw.safe.dump */
446#endif
447	return (0);
448bad4:
449	crypto_unregister_all(sc->sc_cid);
450bad3:
451	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
452bad2:
453	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
454bad1:
455	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
456bad:
457	return (ENXIO);
458}
459
460/*
461 * Detach a device that successfully probed.
462 */
463static int
464safe_detach(device_t dev)
465{
466	struct safe_softc *sc = device_get_softc(dev);
467
468	/* XXX wait/abort active ops */
469
470	WRITE_REG(sc, SAFE_HI_MASK, 0);		/* disable interrupts */
471
472	callout_stop(&sc->sc_rngto);
473
474	crypto_unregister_all(sc->sc_cid);
475
476#ifdef SAFE_RNDTEST
477	if (sc->sc_rndtest)
478		rndtest_detach(sc->sc_rndtest);
479#endif
480
481	safe_cleanchip(sc);
482	safe_dma_free(sc, &sc->sc_dpalloc);
483	safe_dma_free(sc, &sc->sc_spalloc);
484	mtx_destroy(&sc->sc_ringmtx);
485	safe_dma_free(sc, &sc->sc_ringalloc);
486
487	bus_generic_detach(dev);
488	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
489	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
490
491	bus_dma_tag_destroy(sc->sc_srcdmat);
492	bus_dma_tag_destroy(sc->sc_dstdmat);
493	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
494
495	return (0);
496}
497
498/*
499 * Stop all chip i/o so that the kernel's probe routines don't
500 * get confused by errant DMAs when rebooting.
501 */
502static int
503safe_shutdown(device_t dev)
504{
505#ifdef notyet
506	safe_stop(device_get_softc(dev));
507#endif
508	return (0);
509}
510
511/*
512 * Device suspend routine.
513 */
514static int
515safe_suspend(device_t dev)
516{
517	struct safe_softc *sc = device_get_softc(dev);
518
519#ifdef notyet
520	/* XXX stop the device and save PCI settings */
521#endif
522	sc->sc_suspended = 1;
523
524	return (0);
525}
526
527static int
528safe_resume(device_t dev)
529{
530	struct safe_softc *sc = device_get_softc(dev);
531
532#ifdef notyet
533	/* XXX retore PCI settings and start the device */
534#endif
535	sc->sc_suspended = 0;
536	return (0);
537}
538
539/*
540 * SafeXcel Interrupt routine
541 */
542static void
543safe_intr(void *arg)
544{
545	struct safe_softc *sc = arg;
546	volatile u_int32_t stat;
547
548	stat = READ_REG(sc, SAFE_HM_STAT);
549	if (stat == 0)			/* shared irq, not for us */
550		return;
551
552	WRITE_REG(sc, SAFE_HI_CLR, stat);	/* IACK */
553
554	if ((stat & SAFE_INT_PE_DDONE)) {
555		/*
556		 * Descriptor(s) done; scan the ring and
557		 * process completed operations.
558		 */
559		mtx_lock(&sc->sc_ringmtx);
560		while (sc->sc_back != sc->sc_front) {
561			struct safe_ringentry *re = sc->sc_back;
562#ifdef SAFE_DEBUG
563			if (safe_debug) {
564				safe_dump_ringstate(sc, __func__);
565				safe_dump_request(sc, __func__, re);
566			}
567#endif
568			/*
569			 * safe_process marks ring entries that were allocated
570			 * but not used with a csr of zero.  This insures the
571			 * ring front pointer never needs to be set backwards
572			 * in the event that an entry is allocated but not used
573			 * because of a setup error.
574			 */
575			if (re->re_desc.d_csr != 0) {
576				if (!SAFE_PE_CSR_IS_DONE(re->re_desc.d_csr))
577					break;
578				if (!SAFE_PE_LEN_IS_DONE(re->re_desc.d_len))
579					break;
580				sc->sc_nqchip--;
581				safe_callback(sc, re);
582			}
583			if (++(sc->sc_back) == sc->sc_ringtop)
584				sc->sc_back = sc->sc_ring;
585		}
586		mtx_unlock(&sc->sc_ringmtx);
587	}
588
589	/*
590	 * Check to see if we got any DMA Error
591	 */
592	if (stat & SAFE_INT_PE_ERROR) {
593		DPRINTF(("dmaerr dmastat %08x\n",
594			READ_REG(sc, SAFE_PE_DMASTAT)));
595		safestats.st_dmaerr++;
596		safe_totalreset(sc);
597#if 0
598		safe_feed(sc);
599#endif
600	}
601
602	if (sc->sc_needwakeup) {		/* XXX check high watermark */
603		int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
604		DPRINTF(("%s: wakeup crypto %x\n", __func__,
605			sc->sc_needwakeup));
606		sc->sc_needwakeup &= ~wakeup;
607		crypto_unblock(sc->sc_cid, wakeup);
608	}
609}
610
611/*
612 * safe_feed() - post a request to chip
613 */
614static void
615safe_feed(struct safe_softc *sc, struct safe_ringentry *re)
616{
617	bus_dmamap_sync(sc->sc_srcdmat, re->re_src_map, BUS_DMASYNC_PREWRITE);
618	if (re->re_dst_map != NULL)
619		bus_dmamap_sync(sc->sc_dstdmat, re->re_dst_map,
620			BUS_DMASYNC_PREREAD);
621	/* XXX have no smaller granularity */
622	safe_dma_sync(&sc->sc_ringalloc,
623		BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
624	safe_dma_sync(&sc->sc_spalloc, BUS_DMASYNC_PREWRITE);
625	safe_dma_sync(&sc->sc_dpalloc, BUS_DMASYNC_PREWRITE);
626
627#ifdef SAFE_DEBUG
628	if (safe_debug) {
629		safe_dump_ringstate(sc, __func__);
630		safe_dump_request(sc, __func__, re);
631	}
632#endif
633	sc->sc_nqchip++;
634	if (sc->sc_nqchip > safestats.st_maxqchip)
635		safestats.st_maxqchip = sc->sc_nqchip;
636	/* poke h/w to check descriptor ring, any value can be written */
637	WRITE_REG(sc, SAFE_HI_RD_DESCR, 0);
638}
639
640#define	N(a)	(sizeof(a) / sizeof (a[0]))
641static void
642safe_setup_enckey(struct safe_session *ses, caddr_t key)
643{
644	int i;
645
646	bcopy(key, ses->ses_key, ses->ses_klen / 8);
647
648	/* PE is little-endian, insure proper byte order */
649	for (i = 0; i < N(ses->ses_key); i++)
650		ses->ses_key[i] = htole32(ses->ses_key[i]);
651}
652
653static void
654safe_setup_mackey(struct safe_session *ses, int algo, caddr_t key, int klen)
655{
656	MD5_CTX md5ctx;
657	SHA1_CTX sha1ctx;
658	int i;
659
660
661	for (i = 0; i < klen; i++)
662		key[i] ^= HMAC_IPAD_VAL;
663
664	if (algo == CRYPTO_MD5_HMAC) {
665		MD5Init(&md5ctx);
666		MD5Update(&md5ctx, key, klen);
667		MD5Update(&md5ctx, hmac_ipad_buffer, MD5_HMAC_BLOCK_LEN - klen);
668		bcopy(md5ctx.state, ses->ses_hminner, sizeof(md5ctx.state));
669	} else {
670		SHA1Init(&sha1ctx);
671		SHA1Update(&sha1ctx, key, klen);
672		SHA1Update(&sha1ctx, hmac_ipad_buffer,
673		    SHA1_HMAC_BLOCK_LEN - klen);
674		bcopy(sha1ctx.h.b32, ses->ses_hminner, sizeof(sha1ctx.h.b32));
675	}
676
677	for (i = 0; i < klen; i++)
678		key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
679
680	if (algo == CRYPTO_MD5_HMAC) {
681		MD5Init(&md5ctx);
682		MD5Update(&md5ctx, key, klen);
683		MD5Update(&md5ctx, hmac_opad_buffer, MD5_HMAC_BLOCK_LEN - klen);
684		bcopy(md5ctx.state, ses->ses_hmouter, sizeof(md5ctx.state));
685	} else {
686		SHA1Init(&sha1ctx);
687		SHA1Update(&sha1ctx, key, klen);
688		SHA1Update(&sha1ctx, hmac_opad_buffer,
689		    SHA1_HMAC_BLOCK_LEN - klen);
690		bcopy(sha1ctx.h.b32, ses->ses_hmouter, sizeof(sha1ctx.h.b32));
691	}
692
693	for (i = 0; i < klen; i++)
694		key[i] ^= HMAC_OPAD_VAL;
695
696	/* PE is little-endian, insure proper byte order */
697	for (i = 0; i < N(ses->ses_hminner); i++) {
698		ses->ses_hminner[i] = htole32(ses->ses_hminner[i]);
699		ses->ses_hmouter[i] = htole32(ses->ses_hmouter[i]);
700	}
701}
702#undef N
703
704/*
705 * Allocate a new 'session' and return an encoded session id.  'sidp'
706 * contains our registration id, and should contain an encoded session
707 * id on successful allocation.
708 */
709static int
710safe_newsession(device_t dev, u_int32_t *sidp, struct cryptoini *cri)
711{
712	struct safe_softc *sc = device_get_softc(dev);
713	struct cryptoini *c, *encini = NULL, *macini = NULL;
714	struct safe_session *ses = NULL;
715	int sesn;
716
717	if (sidp == NULL || cri == NULL || sc == NULL)
718		return (EINVAL);
719
720	for (c = cri; c != NULL; c = c->cri_next) {
721		if (c->cri_alg == CRYPTO_MD5_HMAC ||
722		    c->cri_alg == CRYPTO_SHA1_HMAC ||
723		    c->cri_alg == CRYPTO_NULL_HMAC) {
724			if (macini)
725				return (EINVAL);
726			macini = c;
727		} else if (c->cri_alg == CRYPTO_DES_CBC ||
728		    c->cri_alg == CRYPTO_3DES_CBC ||
729		    c->cri_alg == CRYPTO_AES_CBC ||
730		    c->cri_alg == CRYPTO_NULL_CBC) {
731			if (encini)
732				return (EINVAL);
733			encini = c;
734		} else
735			return (EINVAL);
736	}
737	if (encini == NULL && macini == NULL)
738		return (EINVAL);
739	if (encini) {			/* validate key length */
740		switch (encini->cri_alg) {
741		case CRYPTO_DES_CBC:
742			if (encini->cri_klen != 64)
743				return (EINVAL);
744			break;
745		case CRYPTO_3DES_CBC:
746			if (encini->cri_klen != 192)
747				return (EINVAL);
748			break;
749		case CRYPTO_AES_CBC:
750			if (encini->cri_klen != 128 &&
751			    encini->cri_klen != 192 &&
752			    encini->cri_klen != 256)
753				return (EINVAL);
754			break;
755		}
756	}
757
758	if (sc->sc_sessions == NULL) {
759		ses = sc->sc_sessions = (struct safe_session *)malloc(
760		    sizeof(struct safe_session), M_DEVBUF, M_NOWAIT);
761		if (ses == NULL)
762			return (ENOMEM);
763		sesn = 0;
764		sc->sc_nsessions = 1;
765	} else {
766		for (sesn = 0; sesn < sc->sc_nsessions; sesn++) {
767			if (sc->sc_sessions[sesn].ses_used == 0) {
768				ses = &sc->sc_sessions[sesn];
769				break;
770			}
771		}
772
773		if (ses == NULL) {
774			sesn = sc->sc_nsessions;
775			ses = (struct safe_session *)malloc((sesn + 1) *
776			    sizeof(struct safe_session), M_DEVBUF, M_NOWAIT);
777			if (ses == NULL)
778				return (ENOMEM);
779			bcopy(sc->sc_sessions, ses, sesn *
780			    sizeof(struct safe_session));
781			bzero(sc->sc_sessions, sesn *
782			    sizeof(struct safe_session));
783			free(sc->sc_sessions, M_DEVBUF);
784			sc->sc_sessions = ses;
785			ses = &sc->sc_sessions[sesn];
786			sc->sc_nsessions++;
787		}
788	}
789
790	bzero(ses, sizeof(struct safe_session));
791	ses->ses_used = 1;
792
793	if (encini) {
794		/* get an IV */
795		/* XXX may read fewer than requested */
796		read_random(ses->ses_iv, sizeof(ses->ses_iv));
797
798		ses->ses_klen = encini->cri_klen;
799		if (encini->cri_key != NULL)
800			safe_setup_enckey(ses, encini->cri_key);
801	}
802
803	if (macini) {
804		ses->ses_mlen = macini->cri_mlen;
805		if (ses->ses_mlen == 0) {
806			if (macini->cri_alg == CRYPTO_MD5_HMAC)
807				ses->ses_mlen = MD5_HASH_LEN;
808			else
809				ses->ses_mlen = SHA1_HASH_LEN;
810		}
811
812		if (macini->cri_key != NULL) {
813			safe_setup_mackey(ses, macini->cri_alg, macini->cri_key,
814			    macini->cri_klen / 8);
815		}
816	}
817
818	*sidp = SAFE_SID(device_get_unit(sc->sc_dev), sesn);
819	return (0);
820}
821
822/*
823 * Deallocate a session.
824 */
825static int
826safe_freesession(device_t dev, u_int64_t tid)
827{
828	struct safe_softc *sc = device_get_softc(dev);
829	int session, ret;
830	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
831
832	if (sc == NULL)
833		return (EINVAL);
834
835	session = SAFE_SESSION(sid);
836	if (session < sc->sc_nsessions) {
837		bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session]));
838		ret = 0;
839	} else
840		ret = EINVAL;
841	return (ret);
842}
843
844static void
845safe_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
846{
847	struct safe_operand *op = arg;
848
849	DPRINTF(("%s: mapsize %u nsegs %d error %d\n", __func__,
850		(u_int) mapsize, nsegs, error));
851	if (error != 0)
852		return;
853	op->mapsize = mapsize;
854	op->nsegs = nsegs;
855	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
856}
857
858static int
859safe_process(device_t dev, struct cryptop *crp, int hint)
860{
861	struct safe_softc *sc = device_get_softc(dev);
862	int err = 0, i, nicealign, uniform;
863	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
864	int bypass, oplen, ivsize;
865	caddr_t iv;
866	int16_t coffset;
867	struct safe_session *ses;
868	struct safe_ringentry *re;
869	struct safe_sarec *sa;
870	struct safe_pdesc *pd;
871	u_int32_t cmd0, cmd1, staterec;
872
873	if (crp == NULL || crp->crp_callback == NULL || sc == NULL) {
874		safestats.st_invalid++;
875		return (EINVAL);
876	}
877	if (SAFE_SESSION(crp->crp_sid) >= sc->sc_nsessions) {
878		safestats.st_badsession++;
879		return (EINVAL);
880	}
881
882	mtx_lock(&sc->sc_ringmtx);
883	if (sc->sc_front == sc->sc_back && sc->sc_nqchip != 0) {
884		safestats.st_ringfull++;
885		sc->sc_needwakeup |= CRYPTO_SYMQ;
886		mtx_unlock(&sc->sc_ringmtx);
887		return (ERESTART);
888	}
889	re = sc->sc_front;
890
891	staterec = re->re_sa.sa_staterec;	/* save */
892	/* NB: zero everything but the PE descriptor */
893	bzero(&re->re_sa, sizeof(struct safe_ringentry) - sizeof(re->re_desc));
894	re->re_sa.sa_staterec = staterec;	/* restore */
895
896	re->re_crp = crp;
897	re->re_sesn = SAFE_SESSION(crp->crp_sid);
898
899	if (crp->crp_flags & CRYPTO_F_IMBUF) {
900		re->re_src_m = (struct mbuf *)crp->crp_buf;
901		re->re_dst_m = (struct mbuf *)crp->crp_buf;
902	} else if (crp->crp_flags & CRYPTO_F_IOV) {
903		re->re_src_io = (struct uio *)crp->crp_buf;
904		re->re_dst_io = (struct uio *)crp->crp_buf;
905	} else {
906		safestats.st_badflags++;
907		err = EINVAL;
908		goto errout;	/* XXX we don't handle contiguous blocks! */
909	}
910
911	sa = &re->re_sa;
912	ses = &sc->sc_sessions[re->re_sesn];
913
914	crd1 = crp->crp_desc;
915	if (crd1 == NULL) {
916		safestats.st_nodesc++;
917		err = EINVAL;
918		goto errout;
919	}
920	crd2 = crd1->crd_next;
921
922	cmd0 = SAFE_SA_CMD0_BASIC;		/* basic group operation */
923	cmd1 = 0;
924	if (crd2 == NULL) {
925		if (crd1->crd_alg == CRYPTO_MD5_HMAC ||
926		    crd1->crd_alg == CRYPTO_SHA1_HMAC ||
927		    crd1->crd_alg == CRYPTO_NULL_HMAC) {
928			maccrd = crd1;
929			enccrd = NULL;
930			cmd0 |= SAFE_SA_CMD0_OP_HASH;
931		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
932		    crd1->crd_alg == CRYPTO_3DES_CBC ||
933		    crd1->crd_alg == CRYPTO_AES_CBC ||
934		    crd1->crd_alg == CRYPTO_NULL_CBC) {
935			maccrd = NULL;
936			enccrd = crd1;
937			cmd0 |= SAFE_SA_CMD0_OP_CRYPT;
938		} else {
939			safestats.st_badalg++;
940			err = EINVAL;
941			goto errout;
942		}
943	} else {
944		if ((crd1->crd_alg == CRYPTO_MD5_HMAC ||
945		    crd1->crd_alg == CRYPTO_SHA1_HMAC ||
946		    crd1->crd_alg == CRYPTO_NULL_HMAC) &&
947		    (crd2->crd_alg == CRYPTO_DES_CBC ||
948			crd2->crd_alg == CRYPTO_3DES_CBC ||
949		        crd2->crd_alg == CRYPTO_AES_CBC ||
950		        crd2->crd_alg == CRYPTO_NULL_CBC) &&
951		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
952			maccrd = crd1;
953			enccrd = crd2;
954		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
955		    crd1->crd_alg == CRYPTO_3DES_CBC ||
956		    crd1->crd_alg == CRYPTO_AES_CBC ||
957		    crd1->crd_alg == CRYPTO_NULL_CBC) &&
958		    (crd2->crd_alg == CRYPTO_MD5_HMAC ||
959			crd2->crd_alg == CRYPTO_SHA1_HMAC ||
960			crd2->crd_alg == CRYPTO_NULL_HMAC) &&
961		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
962			enccrd = crd1;
963			maccrd = crd2;
964		} else {
965			safestats.st_badalg++;
966			err = EINVAL;
967			goto errout;
968		}
969		cmd0 |= SAFE_SA_CMD0_OP_BOTH;
970	}
971
972	if (enccrd) {
973		if (enccrd->crd_flags & CRD_F_KEY_EXPLICIT)
974			safe_setup_enckey(ses, enccrd->crd_key);
975
976		if (enccrd->crd_alg == CRYPTO_DES_CBC) {
977			cmd0 |= SAFE_SA_CMD0_DES;
978			cmd1 |= SAFE_SA_CMD1_CBC;
979			ivsize = 2*sizeof(u_int32_t);
980		} else if (enccrd->crd_alg == CRYPTO_3DES_CBC) {
981			cmd0 |= SAFE_SA_CMD0_3DES;
982			cmd1 |= SAFE_SA_CMD1_CBC;
983			ivsize = 2*sizeof(u_int32_t);
984		} else if (enccrd->crd_alg == CRYPTO_AES_CBC) {
985			cmd0 |= SAFE_SA_CMD0_AES;
986			cmd1 |= SAFE_SA_CMD1_CBC;
987			if (ses->ses_klen == 128)
988			     cmd1 |=  SAFE_SA_CMD1_AES128;
989			else if (ses->ses_klen == 192)
990			     cmd1 |=  SAFE_SA_CMD1_AES192;
991			else
992			     cmd1 |=  SAFE_SA_CMD1_AES256;
993			ivsize = 4*sizeof(u_int32_t);
994		} else {
995			cmd0 |= SAFE_SA_CMD0_CRYPT_NULL;
996			ivsize = 0;
997		}
998
999		/*
1000		 * Setup encrypt/decrypt state.  When using basic ops
1001		 * we can't use an inline IV because hash/crypt offset
1002		 * must be from the end of the IV to the start of the
1003		 * crypt data and this leaves out the preceding header
1004		 * from the hash calculation.  Instead we place the IV
1005		 * in the state record and set the hash/crypt offset to
1006		 * copy both the header+IV.
1007		 */
1008		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
1009			cmd0 |= SAFE_SA_CMD0_OUTBOUND;
1010
1011			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1012				iv = enccrd->crd_iv;
1013			else
1014				iv = (caddr_t) ses->ses_iv;
1015			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
1016				crypto_copyback(crp->crp_flags, crp->crp_buf,
1017				    enccrd->crd_inject, ivsize, iv);
1018			}
1019			bcopy(iv, re->re_sastate.sa_saved_iv, ivsize);
1020			cmd0 |= SAFE_SA_CMD0_IVLD_STATE | SAFE_SA_CMD0_SAVEIV;
1021			re->re_flags |= SAFE_QFLAGS_COPYOUTIV;
1022		} else {
1023			cmd0 |= SAFE_SA_CMD0_INBOUND;
1024
1025			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) {
1026				bcopy(enccrd->crd_iv,
1027					re->re_sastate.sa_saved_iv, ivsize);
1028			} else {
1029				crypto_copydata(crp->crp_flags, crp->crp_buf,
1030				    enccrd->crd_inject, ivsize,
1031				    (caddr_t)re->re_sastate.sa_saved_iv);
1032			}
1033			cmd0 |= SAFE_SA_CMD0_IVLD_STATE;
1034		}
1035		/*
1036		 * For basic encryption use the zero pad algorithm.
1037		 * This pads results to an 8-byte boundary and
1038		 * suppresses padding verification for inbound (i.e.
1039		 * decrypt) operations.
1040		 *
1041		 * NB: Not sure if the 8-byte pad boundary is a problem.
1042		 */
1043		cmd0 |= SAFE_SA_CMD0_PAD_ZERO;
1044
1045		/* XXX assert key bufs have the same size */
1046		bcopy(ses->ses_key, sa->sa_key, sizeof(sa->sa_key));
1047	}
1048
1049	if (maccrd) {
1050		if (maccrd->crd_flags & CRD_F_KEY_EXPLICIT) {
1051			safe_setup_mackey(ses, maccrd->crd_alg,
1052			    maccrd->crd_key, maccrd->crd_klen / 8);
1053		}
1054
1055		if (maccrd->crd_alg == CRYPTO_MD5_HMAC) {
1056			cmd0 |= SAFE_SA_CMD0_MD5;
1057			cmd1 |= SAFE_SA_CMD1_HMAC;	/* NB: enable HMAC */
1058		} else if (maccrd->crd_alg == CRYPTO_SHA1_HMAC) {
1059			cmd0 |= SAFE_SA_CMD0_SHA1;
1060			cmd1 |= SAFE_SA_CMD1_HMAC;	/* NB: enable HMAC */
1061		} else {
1062			cmd0 |= SAFE_SA_CMD0_HASH_NULL;
1063		}
1064		/*
1065		 * Digest data is loaded from the SA and the hash
1066		 * result is saved to the state block where we
1067		 * retrieve it for return to the caller.
1068		 */
1069		/* XXX assert digest bufs have the same size */
1070		bcopy(ses->ses_hminner, sa->sa_indigest,
1071			sizeof(sa->sa_indigest));
1072		bcopy(ses->ses_hmouter, sa->sa_outdigest,
1073			sizeof(sa->sa_outdigest));
1074
1075		cmd0 |= SAFE_SA_CMD0_HSLD_SA | SAFE_SA_CMD0_SAVEHASH;
1076		re->re_flags |= SAFE_QFLAGS_COPYOUTICV;
1077	}
1078
1079	if (enccrd && maccrd) {
1080		/*
1081		 * The offset from hash data to the start of
1082		 * crypt data is the difference in the skips.
1083		 */
1084		bypass = maccrd->crd_skip;
1085		coffset = enccrd->crd_skip - maccrd->crd_skip;
1086		if (coffset < 0) {
1087			DPRINTF(("%s: hash does not precede crypt; "
1088				"mac skip %u enc skip %u\n",
1089				__func__, maccrd->crd_skip, enccrd->crd_skip));
1090			safestats.st_skipmismatch++;
1091			err = EINVAL;
1092			goto errout;
1093		}
1094		oplen = enccrd->crd_skip + enccrd->crd_len;
1095		if (maccrd->crd_skip + maccrd->crd_len != oplen) {
1096			DPRINTF(("%s: hash amount %u != crypt amount %u\n",
1097				__func__, maccrd->crd_skip + maccrd->crd_len,
1098				oplen));
1099			safestats.st_lenmismatch++;
1100			err = EINVAL;
1101			goto errout;
1102		}
1103#ifdef SAFE_DEBUG
1104		if (safe_debug) {
1105			printf("mac: skip %d, len %d, inject %d\n",
1106			    maccrd->crd_skip, maccrd->crd_len,
1107			    maccrd->crd_inject);
1108			printf("enc: skip %d, len %d, inject %d\n",
1109			    enccrd->crd_skip, enccrd->crd_len,
1110			    enccrd->crd_inject);
1111			printf("bypass %d coffset %d oplen %d\n",
1112				bypass, coffset, oplen);
1113		}
1114#endif
1115		if (coffset & 3) {	/* offset must be 32-bit aligned */
1116			DPRINTF(("%s: coffset %u misaligned\n",
1117				__func__, coffset));
1118			safestats.st_coffmisaligned++;
1119			err = EINVAL;
1120			goto errout;
1121		}
1122		coffset >>= 2;
1123		if (coffset > 255) {	/* offset must be <256 dwords */
1124			DPRINTF(("%s: coffset %u too big\n",
1125				__func__, coffset));
1126			safestats.st_cofftoobig++;
1127			err = EINVAL;
1128			goto errout;
1129		}
1130		/*
1131		 * Tell the hardware to copy the header to the output.
1132		 * The header is defined as the data from the end of
1133		 * the bypass to the start of data to be encrypted.
1134		 * Typically this is the inline IV.  Note that you need
1135		 * to do this even if src+dst are the same; it appears
1136		 * that w/o this bit the crypted data is written
1137		 * immediately after the bypass data.
1138		 */
1139		cmd1 |= SAFE_SA_CMD1_HDRCOPY;
1140		/*
1141		 * Disable IP header mutable bit handling.  This is
1142		 * needed to get correct HMAC calculations.
1143		 */
1144		cmd1 |= SAFE_SA_CMD1_MUTABLE;
1145	} else {
1146		if (enccrd) {
1147			bypass = enccrd->crd_skip;
1148			oplen = bypass + enccrd->crd_len;
1149		} else {
1150			bypass = maccrd->crd_skip;
1151			oplen = bypass + maccrd->crd_len;
1152		}
1153		coffset = 0;
1154	}
1155	/* XXX verify multiple of 4 when using s/g */
1156	if (bypass > 96) {		/* bypass offset must be <= 96 bytes */
1157		DPRINTF(("%s: bypass %u too big\n", __func__, bypass));
1158		safestats.st_bypasstoobig++;
1159		err = EINVAL;
1160		goto errout;
1161	}
1162
1163	if (bus_dmamap_create(sc->sc_srcdmat, BUS_DMA_NOWAIT, &re->re_src_map)) {
1164		safestats.st_nomap++;
1165		err = ENOMEM;
1166		goto errout;
1167	}
1168	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1169		if (bus_dmamap_load_mbuf(sc->sc_srcdmat, re->re_src_map,
1170		    re->re_src_m, safe_op_cb,
1171		    &re->re_src, BUS_DMA_NOWAIT) != 0) {
1172			bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1173			re->re_src_map = NULL;
1174			safestats.st_noload++;
1175			err = ENOMEM;
1176			goto errout;
1177		}
1178	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1179		if (bus_dmamap_load_uio(sc->sc_srcdmat, re->re_src_map,
1180		    re->re_src_io, safe_op_cb,
1181		    &re->re_src, BUS_DMA_NOWAIT) != 0) {
1182			bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1183			re->re_src_map = NULL;
1184			safestats.st_noload++;
1185			err = ENOMEM;
1186			goto errout;
1187		}
1188	}
1189	nicealign = safe_dmamap_aligned(&re->re_src);
1190	uniform = safe_dmamap_uniform(&re->re_src);
1191
1192	DPRINTF(("src nicealign %u uniform %u nsegs %u\n",
1193		nicealign, uniform, re->re_src.nsegs));
1194	if (re->re_src.nsegs > 1) {
1195		re->re_desc.d_src = sc->sc_spalloc.dma_paddr +
1196			((caddr_t) sc->sc_spfree - (caddr_t) sc->sc_spring);
1197		for (i = 0; i < re->re_src_nsegs; i++) {
1198			/* NB: no need to check if there's space */
1199			pd = sc->sc_spfree;
1200			if (++(sc->sc_spfree) == sc->sc_springtop)
1201				sc->sc_spfree = sc->sc_spring;
1202
1203			KASSERT((pd->pd_flags&3) == 0 ||
1204				(pd->pd_flags&3) == SAFE_PD_DONE,
1205				("bogus source particle descriptor; flags %x",
1206				pd->pd_flags));
1207			pd->pd_addr = re->re_src_segs[i].ds_addr;
1208			pd->pd_size = re->re_src_segs[i].ds_len;
1209			pd->pd_flags = SAFE_PD_READY;
1210		}
1211		cmd0 |= SAFE_SA_CMD0_IGATHER;
1212	} else {
1213		/*
1214		 * No need for gather, reference the operand directly.
1215		 */
1216		re->re_desc.d_src = re->re_src_segs[0].ds_addr;
1217	}
1218
1219	if (enccrd == NULL && maccrd != NULL) {
1220		/*
1221		 * Hash op; no destination needed.
1222		 */
1223	} else {
1224		if (crp->crp_flags & CRYPTO_F_IOV) {
1225			if (!nicealign) {
1226				safestats.st_iovmisaligned++;
1227				err = EINVAL;
1228				goto errout;
1229			}
1230			if (uniform != 1) {
1231				/*
1232				 * Source is not suitable for direct use as
1233				 * the destination.  Create a new scatter/gather
1234				 * list based on the destination requirements
1235				 * and check if that's ok.
1236				 */
1237				if (bus_dmamap_create(sc->sc_dstdmat,
1238				    BUS_DMA_NOWAIT, &re->re_dst_map)) {
1239					safestats.st_nomap++;
1240					err = ENOMEM;
1241					goto errout;
1242				}
1243				if (bus_dmamap_load_uio(sc->sc_dstdmat,
1244				    re->re_dst_map, re->re_dst_io,
1245				    safe_op_cb, &re->re_dst,
1246				    BUS_DMA_NOWAIT) != 0) {
1247					bus_dmamap_destroy(sc->sc_dstdmat,
1248						re->re_dst_map);
1249					re->re_dst_map = NULL;
1250					safestats.st_noload++;
1251					err = ENOMEM;
1252					goto errout;
1253				}
1254				uniform = safe_dmamap_uniform(&re->re_dst);
1255				if (!uniform) {
1256					/*
1257					 * There's no way to handle the DMA
1258					 * requirements with this uio.  We
1259					 * could create a separate DMA area for
1260					 * the result and then copy it back,
1261					 * but for now we just bail and return
1262					 * an error.  Note that uio requests
1263					 * > SAFE_MAX_DSIZE are handled because
1264					 * the DMA map and segment list for the
1265					 * destination wil result in a
1266					 * destination particle list that does
1267					 * the necessary scatter DMA.
1268					 */
1269					safestats.st_iovnotuniform++;
1270					err = EINVAL;
1271					goto errout;
1272				}
1273			} else
1274				re->re_dst = re->re_src;
1275		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
1276			if (nicealign && uniform == 1) {
1277				/*
1278				 * Source layout is suitable for direct
1279				 * sharing of the DMA map and segment list.
1280				 */
1281				re->re_dst = re->re_src;
1282			} else if (nicealign && uniform == 2) {
1283				/*
1284				 * The source is properly aligned but requires a
1285				 * different particle list to handle DMA of the
1286				 * result.  Create a new map and do the load to
1287				 * create the segment list.  The particle
1288				 * descriptor setup code below will handle the
1289				 * rest.
1290				 */
1291				if (bus_dmamap_create(sc->sc_dstdmat,
1292				    BUS_DMA_NOWAIT, &re->re_dst_map)) {
1293					safestats.st_nomap++;
1294					err = ENOMEM;
1295					goto errout;
1296				}
1297				if (bus_dmamap_load_mbuf(sc->sc_dstdmat,
1298				    re->re_dst_map, re->re_dst_m,
1299				    safe_op_cb, &re->re_dst,
1300				    BUS_DMA_NOWAIT) != 0) {
1301					bus_dmamap_destroy(sc->sc_dstdmat,
1302						re->re_dst_map);
1303					re->re_dst_map = NULL;
1304					safestats.st_noload++;
1305					err = ENOMEM;
1306					goto errout;
1307				}
1308			} else {		/* !(aligned and/or uniform) */
1309				int totlen, len;
1310				struct mbuf *m, *top, **mp;
1311
1312				/*
1313				 * DMA constraints require that we allocate a
1314				 * new mbuf chain for the destination.  We
1315				 * allocate an entire new set of mbufs of
1316				 * optimal/required size and then tell the
1317				 * hardware to copy any bits that are not
1318				 * created as a byproduct of the operation.
1319				 */
1320				if (!nicealign)
1321					safestats.st_unaligned++;
1322				if (!uniform)
1323					safestats.st_notuniform++;
1324				totlen = re->re_src_mapsize;
1325				if (re->re_src_m->m_flags & M_PKTHDR) {
1326					len = MHLEN;
1327					MGETHDR(m, M_DONTWAIT, MT_DATA);
1328					if (m && !m_dup_pkthdr(m, re->re_src_m,
1329					    M_DONTWAIT)) {
1330						m_free(m);
1331						m = NULL;
1332					}
1333				} else {
1334					len = MLEN;
1335					MGET(m, M_DONTWAIT, MT_DATA);
1336				}
1337				if (m == NULL) {
1338					safestats.st_nombuf++;
1339					err = sc->sc_nqchip ? ERESTART : ENOMEM;
1340					goto errout;
1341				}
1342				if (totlen >= MINCLSIZE) {
1343					MCLGET(m, M_DONTWAIT);
1344					if ((m->m_flags & M_EXT) == 0) {
1345						m_free(m);
1346						safestats.st_nomcl++;
1347						err = sc->sc_nqchip ?
1348							ERESTART : ENOMEM;
1349						goto errout;
1350					}
1351					len = MCLBYTES;
1352				}
1353				m->m_len = len;
1354				top = NULL;
1355				mp = &top;
1356
1357				while (totlen > 0) {
1358					if (top) {
1359						MGET(m, M_DONTWAIT, MT_DATA);
1360						if (m == NULL) {
1361							m_freem(top);
1362							safestats.st_nombuf++;
1363							err = sc->sc_nqchip ?
1364							    ERESTART : ENOMEM;
1365							goto errout;
1366						}
1367						len = MLEN;
1368					}
1369					if (top && totlen >= MINCLSIZE) {
1370						MCLGET(m, M_DONTWAIT);
1371						if ((m->m_flags & M_EXT) == 0) {
1372							*mp = m;
1373							m_freem(top);
1374							safestats.st_nomcl++;
1375							err = sc->sc_nqchip ?
1376							    ERESTART : ENOMEM;
1377							goto errout;
1378						}
1379						len = MCLBYTES;
1380					}
1381					m->m_len = len = min(totlen, len);
1382					totlen -= len;
1383					*mp = m;
1384					mp = &m->m_next;
1385				}
1386				re->re_dst_m = top;
1387				if (bus_dmamap_create(sc->sc_dstdmat,
1388				    BUS_DMA_NOWAIT, &re->re_dst_map) != 0) {
1389					safestats.st_nomap++;
1390					err = ENOMEM;
1391					goto errout;
1392				}
1393				if (bus_dmamap_load_mbuf(sc->sc_dstdmat,
1394				    re->re_dst_map, re->re_dst_m,
1395				    safe_op_cb, &re->re_dst,
1396				    BUS_DMA_NOWAIT) != 0) {
1397					bus_dmamap_destroy(sc->sc_dstdmat,
1398					re->re_dst_map);
1399					re->re_dst_map = NULL;
1400					safestats.st_noload++;
1401					err = ENOMEM;
1402					goto errout;
1403				}
1404				if (re->re_src.mapsize > oplen) {
1405					/*
1406					 * There's data following what the
1407					 * hardware will copy for us.  If this
1408					 * isn't just the ICV (that's going to
1409					 * be written on completion), copy it
1410					 * to the new mbufs
1411					 */
1412					if (!(maccrd &&
1413					    (re->re_src.mapsize-oplen) == 12 &&
1414					    maccrd->crd_inject == oplen))
1415						safe_mcopy(re->re_src_m,
1416							   re->re_dst_m,
1417							   oplen);
1418					else
1419						safestats.st_noicvcopy++;
1420				}
1421			}
1422		} else {
1423			safestats.st_badflags++;
1424			err = EINVAL;
1425			goto errout;
1426		}
1427
1428		if (re->re_dst.nsegs > 1) {
1429			re->re_desc.d_dst = sc->sc_dpalloc.dma_paddr +
1430			    ((caddr_t) sc->sc_dpfree - (caddr_t) sc->sc_dpring);
1431			for (i = 0; i < re->re_dst_nsegs; i++) {
1432				pd = sc->sc_dpfree;
1433				KASSERT((pd->pd_flags&3) == 0 ||
1434					(pd->pd_flags&3) == SAFE_PD_DONE,
1435					("bogus dest particle descriptor; flags %x",
1436						pd->pd_flags));
1437				if (++(sc->sc_dpfree) == sc->sc_dpringtop)
1438					sc->sc_dpfree = sc->sc_dpring;
1439				pd->pd_addr = re->re_dst_segs[i].ds_addr;
1440				pd->pd_flags = SAFE_PD_READY;
1441			}
1442			cmd0 |= SAFE_SA_CMD0_OSCATTER;
1443		} else {
1444			/*
1445			 * No need for scatter, reference the operand directly.
1446			 */
1447			re->re_desc.d_dst = re->re_dst_segs[0].ds_addr;
1448		}
1449	}
1450
1451	/*
1452	 * All done with setup; fillin the SA command words
1453	 * and the packet engine descriptor.  The operation
1454	 * is now ready for submission to the hardware.
1455	 */
1456	sa->sa_cmd0 = cmd0 | SAFE_SA_CMD0_IPCI | SAFE_SA_CMD0_OPCI;
1457	sa->sa_cmd1 = cmd1
1458		    | (coffset << SAFE_SA_CMD1_OFFSET_S)
1459		    | SAFE_SA_CMD1_SAREV1	/* Rev 1 SA data structure */
1460		    | SAFE_SA_CMD1_SRPCI
1461		    ;
1462	/*
1463	 * NB: the order of writes is important here.  In case the
1464	 * chip is scanning the ring because of an outstanding request
1465	 * it might nab this one too.  In that case we need to make
1466	 * sure the setup is complete before we write the length
1467	 * field of the descriptor as it signals the descriptor is
1468	 * ready for processing.
1469	 */
1470	re->re_desc.d_csr = SAFE_PE_CSR_READY | SAFE_PE_CSR_SAPCI;
1471	if (maccrd)
1472		re->re_desc.d_csr |= SAFE_PE_CSR_LOADSA | SAFE_PE_CSR_HASHFINAL;
1473	re->re_desc.d_len = oplen
1474			  | SAFE_PE_LEN_READY
1475			  | (bypass << SAFE_PE_LEN_BYPASS_S)
1476			  ;
1477
1478	safestats.st_ipackets++;
1479	safestats.st_ibytes += oplen;
1480
1481	if (++(sc->sc_front) == sc->sc_ringtop)
1482		sc->sc_front = sc->sc_ring;
1483
1484	/* XXX honor batching */
1485	safe_feed(sc, re);
1486	mtx_unlock(&sc->sc_ringmtx);
1487	return (0);
1488
1489errout:
1490	if ((re->re_dst_m != NULL) && (re->re_src_m != re->re_dst_m))
1491		m_freem(re->re_dst_m);
1492
1493	if (re->re_dst_map != NULL && re->re_dst_map != re->re_src_map) {
1494		bus_dmamap_unload(sc->sc_dstdmat, re->re_dst_map);
1495		bus_dmamap_destroy(sc->sc_dstdmat, re->re_dst_map);
1496	}
1497	if (re->re_src_map != NULL) {
1498		bus_dmamap_unload(sc->sc_srcdmat, re->re_src_map);
1499		bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1500	}
1501	mtx_unlock(&sc->sc_ringmtx);
1502	if (err != ERESTART) {
1503		crp->crp_etype = err;
1504		crypto_done(crp);
1505	} else {
1506		sc->sc_needwakeup |= CRYPTO_SYMQ;
1507	}
1508	return (err);
1509}
1510
1511static void
1512safe_callback(struct safe_softc *sc, struct safe_ringentry *re)
1513{
1514	struct cryptop *crp = (struct cryptop *)re->re_crp;
1515	struct cryptodesc *crd;
1516
1517	safestats.st_opackets++;
1518	safestats.st_obytes += re->re_dst.mapsize;
1519
1520	safe_dma_sync(&sc->sc_ringalloc,
1521		BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1522	if (re->re_desc.d_csr & SAFE_PE_CSR_STATUS) {
1523		device_printf(sc->sc_dev, "csr 0x%x cmd0 0x%x cmd1 0x%x\n",
1524			re->re_desc.d_csr,
1525			re->re_sa.sa_cmd0, re->re_sa.sa_cmd1);
1526		safestats.st_peoperr++;
1527		crp->crp_etype = EIO;		/* something more meaningful? */
1528	}
1529	if (re->re_dst_map != NULL && re->re_dst_map != re->re_src_map) {
1530		bus_dmamap_sync(sc->sc_dstdmat, re->re_dst_map,
1531		    BUS_DMASYNC_POSTREAD);
1532		bus_dmamap_unload(sc->sc_dstdmat, re->re_dst_map);
1533		bus_dmamap_destroy(sc->sc_dstdmat, re->re_dst_map);
1534	}
1535	bus_dmamap_sync(sc->sc_srcdmat, re->re_src_map, BUS_DMASYNC_POSTWRITE);
1536	bus_dmamap_unload(sc->sc_srcdmat, re->re_src_map);
1537	bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1538
1539	/*
1540	 * If result was written to a differet mbuf chain, swap
1541	 * it in as the return value and reclaim the original.
1542	 */
1543	if ((crp->crp_flags & CRYPTO_F_IMBUF) && re->re_src_m != re->re_dst_m) {
1544		m_freem(re->re_src_m);
1545		crp->crp_buf = (caddr_t)re->re_dst_m;
1546	}
1547
1548	if (re->re_flags & SAFE_QFLAGS_COPYOUTIV) {
1549		/* copy out IV for future use */
1550		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1551			int ivsize;
1552
1553			if (crd->crd_alg == CRYPTO_DES_CBC ||
1554			    crd->crd_alg == CRYPTO_3DES_CBC) {
1555				ivsize = 2*sizeof(u_int32_t);
1556			} else if (crd->crd_alg == CRYPTO_AES_CBC) {
1557				ivsize = 4*sizeof(u_int32_t);
1558			} else
1559				continue;
1560			crypto_copydata(crp->crp_flags, crp->crp_buf,
1561			    crd->crd_skip + crd->crd_len - ivsize, ivsize,
1562			    (caddr_t)sc->sc_sessions[re->re_sesn].ses_iv);
1563			break;
1564		}
1565	}
1566
1567	if (re->re_flags & SAFE_QFLAGS_COPYOUTICV) {
1568		/* copy out ICV result */
1569		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1570			if (!(crd->crd_alg == CRYPTO_MD5_HMAC ||
1571			    crd->crd_alg == CRYPTO_SHA1_HMAC ||
1572			    crd->crd_alg == CRYPTO_NULL_HMAC))
1573				continue;
1574			if (crd->crd_alg == CRYPTO_SHA1_HMAC) {
1575				/*
1576				 * SHA-1 ICV's are byte-swapped; fix 'em up
1577				 * before copy them to their destination.
1578				 */
1579				re->re_sastate.sa_saved_indigest[0] =
1580				    bswap32(re->re_sastate.sa_saved_indigest[0]);
1581				re->re_sastate.sa_saved_indigest[1] =
1582				    bswap32(re->re_sastate.sa_saved_indigest[1]);
1583				re->re_sastate.sa_saved_indigest[2] =
1584				    bswap32(re->re_sastate.sa_saved_indigest[2]);
1585			}
1586			crypto_copyback(crp->crp_flags, crp->crp_buf,
1587			    crd->crd_inject,
1588			    sc->sc_sessions[re->re_sesn].ses_mlen,
1589			    (caddr_t)re->re_sastate.sa_saved_indigest);
1590			break;
1591		}
1592	}
1593	crypto_done(crp);
1594}
1595
1596/*
1597 * Copy all data past offset from srcm to dstm.
1598 */
1599static void
1600safe_mcopy(struct mbuf *srcm, struct mbuf *dstm, u_int offset)
1601{
1602	u_int j, dlen, slen;
1603	caddr_t dptr, sptr;
1604
1605	/*
1606	 * Advance src and dst to offset.
1607	 */
1608	j = offset;
1609	while (j >= 0) {
1610		if (srcm->m_len > j)
1611			break;
1612		j -= srcm->m_len;
1613		srcm = srcm->m_next;
1614		if (srcm == NULL)
1615			return;
1616	}
1617	sptr = mtod(srcm, caddr_t) + j;
1618	slen = srcm->m_len - j;
1619
1620	j = offset;
1621	while (j >= 0) {
1622		if (dstm->m_len > j)
1623			break;
1624		j -= dstm->m_len;
1625		dstm = dstm->m_next;
1626		if (dstm == NULL)
1627			return;
1628	}
1629	dptr = mtod(dstm, caddr_t) + j;
1630	dlen = dstm->m_len - j;
1631
1632	/*
1633	 * Copy everything that remains.
1634	 */
1635	for (;;) {
1636		j = min(slen, dlen);
1637		bcopy(sptr, dptr, j);
1638		if (slen == j) {
1639			srcm = srcm->m_next;
1640			if (srcm == NULL)
1641				return;
1642			sptr = srcm->m_data;
1643			slen = srcm->m_len;
1644		} else
1645			sptr += j, slen -= j;
1646		if (dlen == j) {
1647			dstm = dstm->m_next;
1648			if (dstm == NULL)
1649				return;
1650			dptr = dstm->m_data;
1651			dlen = dstm->m_len;
1652		} else
1653			dptr += j, dlen -= j;
1654	}
1655}
1656
1657#ifndef SAFE_NO_RNG
1658#define	SAFE_RNG_MAXWAIT	1000
1659
1660static void
1661safe_rng_init(struct safe_softc *sc)
1662{
1663	u_int32_t w, v;
1664	int i;
1665
1666	WRITE_REG(sc, SAFE_RNG_CTRL, 0);
1667	/* use default value according to the manual */
1668	WRITE_REG(sc, SAFE_RNG_CNFG, 0x834);	/* magic from SafeNet */
1669	WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1670
1671	/*
1672	 * There is a bug in rev 1.0 of the 1140 that when the RNG
1673	 * is brought out of reset the ready status flag does not
1674	 * work until the RNG has finished its internal initialization.
1675	 *
1676	 * So in order to determine the device is through its
1677	 * initialization we must read the data register, using the
1678	 * status reg in the read in case it is initialized.  Then read
1679	 * the data register until it changes from the first read.
1680	 * Once it changes read the data register until it changes
1681	 * again.  At this time the RNG is considered initialized.
1682	 * This could take between 750ms - 1000ms in time.
1683	 */
1684	i = 0;
1685	w = READ_REG(sc, SAFE_RNG_OUT);
1686	do {
1687		v = READ_REG(sc, SAFE_RNG_OUT);
1688		if (v != w) {
1689			w = v;
1690			break;
1691		}
1692		DELAY(10);
1693	} while (++i < SAFE_RNG_MAXWAIT);
1694
1695	/* Wait Until data changes again */
1696	i = 0;
1697	do {
1698		v = READ_REG(sc, SAFE_RNG_OUT);
1699		if (v != w)
1700			break;
1701		DELAY(10);
1702	} while (++i < SAFE_RNG_MAXWAIT);
1703}
1704
1705static __inline void
1706safe_rng_disable_short_cycle(struct safe_softc *sc)
1707{
1708	WRITE_REG(sc, SAFE_RNG_CTRL,
1709		READ_REG(sc, SAFE_RNG_CTRL) &~ SAFE_RNG_CTRL_SHORTEN);
1710}
1711
1712static __inline void
1713safe_rng_enable_short_cycle(struct safe_softc *sc)
1714{
1715	WRITE_REG(sc, SAFE_RNG_CTRL,
1716		READ_REG(sc, SAFE_RNG_CTRL) | SAFE_RNG_CTRL_SHORTEN);
1717}
1718
1719static __inline u_int32_t
1720safe_rng_read(struct safe_softc *sc)
1721{
1722	int i;
1723
1724	i = 0;
1725	while (READ_REG(sc, SAFE_RNG_STAT) != 0 && ++i < SAFE_RNG_MAXWAIT)
1726		;
1727	return READ_REG(sc, SAFE_RNG_OUT);
1728}
1729
1730static void
1731safe_rng(void *arg)
1732{
1733	struct safe_softc *sc = arg;
1734	u_int32_t buf[SAFE_RNG_MAXBUFSIZ];	/* NB: maybe move to softc */
1735	u_int maxwords;
1736	int i;
1737
1738	safestats.st_rng++;
1739	/*
1740	 * Fetch the next block of data.
1741	 */
1742	maxwords = safe_rngbufsize;
1743	if (maxwords > SAFE_RNG_MAXBUFSIZ)
1744		maxwords = SAFE_RNG_MAXBUFSIZ;
1745retry:
1746	for (i = 0; i < maxwords; i++)
1747		buf[i] = safe_rng_read(sc);
1748	/*
1749	 * Check the comparator alarm count and reset the h/w if
1750	 * it exceeds our threshold.  This guards against the
1751	 * hardware oscillators resonating with external signals.
1752	 */
1753	if (READ_REG(sc, SAFE_RNG_ALM_CNT) > safe_rngmaxalarm) {
1754		u_int32_t freq_inc, w;
1755
1756		DPRINTF(("%s: alarm count %u exceeds threshold %u\n", __func__,
1757			READ_REG(sc, SAFE_RNG_ALM_CNT), safe_rngmaxalarm));
1758		safestats.st_rngalarm++;
1759		safe_rng_enable_short_cycle(sc);
1760		freq_inc = 18;
1761		for (i = 0; i < 64; i++) {
1762			w = READ_REG(sc, SAFE_RNG_CNFG);
1763			freq_inc = ((w + freq_inc) & 0x3fL);
1764			w = ((w & ~0x3fL) | freq_inc);
1765			WRITE_REG(sc, SAFE_RNG_CNFG, w);
1766
1767			WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1768
1769			(void) safe_rng_read(sc);
1770			DELAY(25);
1771
1772			if (READ_REG(sc, SAFE_RNG_ALM_CNT) == 0) {
1773				safe_rng_disable_short_cycle(sc);
1774				goto retry;
1775			}
1776			freq_inc = 1;
1777		}
1778		safe_rng_disable_short_cycle(sc);
1779	} else
1780		WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1781
1782	(*sc->sc_harvest)(sc->sc_rndtest, buf, maxwords*sizeof (u_int32_t));
1783	callout_reset(&sc->sc_rngto,
1784		hz * (safe_rnginterval ? safe_rnginterval : 1), safe_rng, sc);
1785}
1786#endif /* SAFE_NO_RNG */
1787
1788static void
1789safe_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1790{
1791	bus_addr_t *paddr = (bus_addr_t*) arg;
1792	*paddr = segs->ds_addr;
1793}
1794
1795static int
1796safe_dma_malloc(
1797	struct safe_softc *sc,
1798	bus_size_t size,
1799	struct safe_dma_alloc *dma,
1800	int mapflags
1801)
1802{
1803	int r;
1804
1805	r = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev),	/* parent */
1806			       sizeof(u_int32_t), 0,	/* alignment, bounds */
1807			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
1808			       BUS_SPACE_MAXADDR,	/* highaddr */
1809			       NULL, NULL,		/* filter, filterarg */
1810			       size,			/* maxsize */
1811			       1,			/* nsegments */
1812			       size,			/* maxsegsize */
1813			       BUS_DMA_ALLOCNOW,	/* flags */
1814			       NULL, NULL,		/* locking */
1815			       &dma->dma_tag);
1816	if (r != 0) {
1817		device_printf(sc->sc_dev, "safe_dma_malloc: "
1818			"bus_dma_tag_create failed; error %u\n", r);
1819		goto fail_0;
1820	}
1821
1822	r = bus_dmamap_create(dma->dma_tag, BUS_DMA_NOWAIT, &dma->dma_map);
1823	if (r != 0) {
1824		device_printf(sc->sc_dev, "safe_dma_malloc: "
1825			"bus_dmamap_create failed; error %u\n", r);
1826		goto fail_1;
1827	}
1828
1829	r = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
1830			     BUS_DMA_NOWAIT, &dma->dma_map);
1831	if (r != 0) {
1832		device_printf(sc->sc_dev, "safe_dma_malloc: "
1833			"bus_dmammem_alloc failed; size %zu, error %u\n",
1834			size, r);
1835		goto fail_2;
1836	}
1837
1838	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
1839		            size,
1840			    safe_dmamap_cb,
1841			    &dma->dma_paddr,
1842			    mapflags | BUS_DMA_NOWAIT);
1843	if (r != 0) {
1844		device_printf(sc->sc_dev, "safe_dma_malloc: "
1845			"bus_dmamap_load failed; error %u\n", r);
1846		goto fail_3;
1847	}
1848
1849	dma->dma_size = size;
1850	return (0);
1851
1852fail_3:
1853	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1854fail_2:
1855	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1856fail_1:
1857	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1858	bus_dma_tag_destroy(dma->dma_tag);
1859fail_0:
1860	dma->dma_map = NULL;
1861	dma->dma_tag = NULL;
1862	return (r);
1863}
1864
1865static void
1866safe_dma_free(struct safe_softc *sc, struct safe_dma_alloc *dma)
1867{
1868	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1869	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1870	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1871	bus_dma_tag_destroy(dma->dma_tag);
1872}
1873
1874/*
1875 * Resets the board.  Values in the regesters are left as is
1876 * from the reset (i.e. initial values are assigned elsewhere).
1877 */
1878static void
1879safe_reset_board(struct safe_softc *sc)
1880{
1881	u_int32_t v;
1882	/*
1883	 * Reset the device.  The manual says no delay
1884	 * is needed between marking and clearing reset.
1885	 */
1886	v = READ_REG(sc, SAFE_PE_DMACFG) &~
1887		(SAFE_PE_DMACFG_PERESET | SAFE_PE_DMACFG_PDRRESET |
1888		 SAFE_PE_DMACFG_SGRESET);
1889	WRITE_REG(sc, SAFE_PE_DMACFG, v
1890				    | SAFE_PE_DMACFG_PERESET
1891				    | SAFE_PE_DMACFG_PDRRESET
1892				    | SAFE_PE_DMACFG_SGRESET);
1893	WRITE_REG(sc, SAFE_PE_DMACFG, v);
1894}
1895
1896/*
1897 * Initialize registers we need to touch only once.
1898 */
1899static void
1900safe_init_board(struct safe_softc *sc)
1901{
1902	u_int32_t v, dwords;
1903
1904	v = READ_REG(sc, SAFE_PE_DMACFG);
1905	v &=~ SAFE_PE_DMACFG_PEMODE;
1906	v |= SAFE_PE_DMACFG_FSENA		/* failsafe enable */
1907	  |  SAFE_PE_DMACFG_GPRPCI		/* gather ring on PCI */
1908	  |  SAFE_PE_DMACFG_SPRPCI		/* scatter ring on PCI */
1909	  |  SAFE_PE_DMACFG_ESDESC		/* endian-swap descriptors */
1910	  |  SAFE_PE_DMACFG_ESSA		/* endian-swap SA's */
1911	  |  SAFE_PE_DMACFG_ESPDESC		/* endian-swap part. desc's */
1912	  ;
1913	WRITE_REG(sc, SAFE_PE_DMACFG, v);
1914#if 0
1915	/* XXX select byte swap based on host byte order */
1916	WRITE_REG(sc, SAFE_ENDIAN, 0x1b);
1917#endif
1918	if (sc->sc_chiprev == SAFE_REV(1,0)) {
1919		/*
1920		 * Avoid large PCI DMA transfers.  Rev 1.0 has a bug where
1921		 * "target mode transfers" done while the chip is DMA'ing
1922		 * >1020 bytes cause the hardware to lockup.  To avoid this
1923		 * we reduce the max PCI transfer size and use small source
1924		 * particle descriptors (<= 256 bytes).
1925		 */
1926		WRITE_REG(sc, SAFE_DMA_CFG, 256);
1927		device_printf(sc->sc_dev,
1928			"Reduce max DMA size to %u words for rev %u.%u WAR\n",
1929			(READ_REG(sc, SAFE_DMA_CFG)>>2) & 0xff,
1930			SAFE_REV_MAJ(sc->sc_chiprev),
1931			SAFE_REV_MIN(sc->sc_chiprev));
1932	}
1933
1934	/* NB: operands+results are overlaid */
1935	WRITE_REG(sc, SAFE_PE_PDRBASE, sc->sc_ringalloc.dma_paddr);
1936	WRITE_REG(sc, SAFE_PE_RDRBASE, sc->sc_ringalloc.dma_paddr);
1937	/*
1938	 * Configure ring entry size and number of items in the ring.
1939	 */
1940	KASSERT((sizeof(struct safe_ringentry) % sizeof(u_int32_t)) == 0,
1941		("PE ring entry not 32-bit aligned!"));
1942	dwords = sizeof(struct safe_ringentry) / sizeof(u_int32_t);
1943	WRITE_REG(sc, SAFE_PE_RINGCFG,
1944		(dwords << SAFE_PE_RINGCFG_OFFSET_S) | SAFE_MAX_NQUEUE);
1945	WRITE_REG(sc, SAFE_PE_RINGPOLL, 0);	/* disable polling */
1946
1947	WRITE_REG(sc, SAFE_PE_GRNGBASE, sc->sc_spalloc.dma_paddr);
1948	WRITE_REG(sc, SAFE_PE_SRNGBASE, sc->sc_dpalloc.dma_paddr);
1949	WRITE_REG(sc, SAFE_PE_PARTSIZE,
1950		(SAFE_TOTAL_DPART<<16) | SAFE_TOTAL_SPART);
1951	/*
1952	 * NB: destination particles are fixed size.  We use
1953	 *     an mbuf cluster and require all results go to
1954	 *     clusters or smaller.
1955	 */
1956	WRITE_REG(sc, SAFE_PE_PARTCFG, SAFE_MAX_DSIZE);
1957
1958	/* it's now safe to enable PE mode, do it */
1959	WRITE_REG(sc, SAFE_PE_DMACFG, v | SAFE_PE_DMACFG_PEMODE);
1960
1961	/*
1962	 * Configure hardware to use level-triggered interrupts and
1963	 * to interrupt after each descriptor is processed.
1964	 */
1965	WRITE_REG(sc, SAFE_HI_CFG, SAFE_HI_CFG_LEVEL);
1966	WRITE_REG(sc, SAFE_HI_DESC_CNT, 1);
1967	WRITE_REG(sc, SAFE_HI_MASK, SAFE_INT_PE_DDONE | SAFE_INT_PE_ERROR);
1968}
1969
1970/*
1971 * Init PCI registers
1972 */
1973static void
1974safe_init_pciregs(device_t dev)
1975{
1976}
1977
1978/*
1979 * Clean up after a chip crash.
1980 * It is assumed that the caller in splimp()
1981 */
1982static void
1983safe_cleanchip(struct safe_softc *sc)
1984{
1985
1986	if (sc->sc_nqchip != 0) {
1987		struct safe_ringentry *re = sc->sc_back;
1988
1989		while (re != sc->sc_front) {
1990			if (re->re_desc.d_csr != 0)
1991				safe_free_entry(sc, re);
1992			if (++re == sc->sc_ringtop)
1993				re = sc->sc_ring;
1994		}
1995		sc->sc_back = re;
1996		sc->sc_nqchip = 0;
1997	}
1998}
1999
2000/*
2001 * free a safe_q
2002 * It is assumed that the caller is within splimp().
2003 */
2004static int
2005safe_free_entry(struct safe_softc *sc, struct safe_ringentry *re)
2006{
2007	struct cryptop *crp;
2008
2009	/*
2010	 * Free header MCR
2011	 */
2012	if ((re->re_dst_m != NULL) && (re->re_src_m != re->re_dst_m))
2013		m_freem(re->re_dst_m);
2014
2015	crp = (struct cryptop *)re->re_crp;
2016
2017	re->re_desc.d_csr = 0;
2018
2019	crp->crp_etype = EFAULT;
2020	crypto_done(crp);
2021	return(0);
2022}
2023
2024/*
2025 * Routine to reset the chip and clean up.
2026 * It is assumed that the caller is in splimp()
2027 */
2028static void
2029safe_totalreset(struct safe_softc *sc)
2030{
2031	safe_reset_board(sc);
2032	safe_init_board(sc);
2033	safe_cleanchip(sc);
2034}
2035
2036/*
2037 * Is the operand suitable aligned for direct DMA.  Each
2038 * segment must be aligned on a 32-bit boundary and all
2039 * but the last segment must be a multiple of 4 bytes.
2040 */
2041static int
2042safe_dmamap_aligned(const struct safe_operand *op)
2043{
2044	int i;
2045
2046	for (i = 0; i < op->nsegs; i++) {
2047		if (op->segs[i].ds_addr & 3)
2048			return (0);
2049		if (i != (op->nsegs - 1) && (op->segs[i].ds_len & 3))
2050			return (0);
2051	}
2052	return (1);
2053}
2054
2055/*
2056 * Is the operand suitable for direct DMA as the destination
2057 * of an operation.  The hardware requires that each ``particle''
2058 * but the last in an operation result have the same size.  We
2059 * fix that size at SAFE_MAX_DSIZE bytes.  This routine returns
2060 * 0 if some segment is not a multiple of of this size, 1 if all
2061 * segments are exactly this size, or 2 if segments are at worst
2062 * a multple of this size.
2063 */
2064static int
2065safe_dmamap_uniform(const struct safe_operand *op)
2066{
2067	int result = 1;
2068
2069	if (op->nsegs > 0) {
2070		int i;
2071
2072		for (i = 0; i < op->nsegs-1; i++) {
2073			if (op->segs[i].ds_len % SAFE_MAX_DSIZE)
2074				return (0);
2075			if (op->segs[i].ds_len != SAFE_MAX_DSIZE)
2076				result = 2;
2077		}
2078	}
2079	return (result);
2080}
2081
2082#ifdef SAFE_DEBUG
2083static void
2084safe_dump_dmastatus(struct safe_softc *sc, const char *tag)
2085{
2086	printf("%s: ENDIAN 0x%x SRC 0x%x DST 0x%x STAT 0x%x\n"
2087		, tag
2088		, READ_REG(sc, SAFE_DMA_ENDIAN)
2089		, READ_REG(sc, SAFE_DMA_SRCADDR)
2090		, READ_REG(sc, SAFE_DMA_DSTADDR)
2091		, READ_REG(sc, SAFE_DMA_STAT)
2092	);
2093}
2094
2095static void
2096safe_dump_intrstate(struct safe_softc *sc, const char *tag)
2097{
2098	printf("%s: HI_CFG 0x%x HI_MASK 0x%x HI_DESC_CNT 0x%x HU_STAT 0x%x HM_STAT 0x%x\n"
2099		, tag
2100		, READ_REG(sc, SAFE_HI_CFG)
2101		, READ_REG(sc, SAFE_HI_MASK)
2102		, READ_REG(sc, SAFE_HI_DESC_CNT)
2103		, READ_REG(sc, SAFE_HU_STAT)
2104		, READ_REG(sc, SAFE_HM_STAT)
2105	);
2106}
2107
2108static void
2109safe_dump_ringstate(struct safe_softc *sc, const char *tag)
2110{
2111	u_int32_t estat = READ_REG(sc, SAFE_PE_ERNGSTAT);
2112
2113	/* NB: assume caller has lock on ring */
2114	printf("%s: ERNGSTAT %x (next %u) back %lu front %lu\n",
2115		tag,
2116		estat, (estat >> SAFE_PE_ERNGSTAT_NEXT_S),
2117		(unsigned long)(sc->sc_back - sc->sc_ring),
2118		(unsigned long)(sc->sc_front - sc->sc_ring));
2119}
2120
2121static void
2122safe_dump_request(struct safe_softc *sc, const char* tag, struct safe_ringentry *re)
2123{
2124	int ix, nsegs;
2125
2126	ix = re - sc->sc_ring;
2127	printf("%s: %p (%u): csr %x src %x dst %x sa %x len %x\n"
2128		, tag
2129		, re, ix
2130		, re->re_desc.d_csr
2131		, re->re_desc.d_src
2132		, re->re_desc.d_dst
2133		, re->re_desc.d_sa
2134		, re->re_desc.d_len
2135	);
2136	if (re->re_src.nsegs > 1) {
2137		ix = (re->re_desc.d_src - sc->sc_spalloc.dma_paddr) /
2138			sizeof(struct safe_pdesc);
2139		for (nsegs = re->re_src.nsegs; nsegs; nsegs--) {
2140			printf(" spd[%u] %p: %p size %u flags %x"
2141				, ix, &sc->sc_spring[ix]
2142				, (caddr_t)(uintptr_t) sc->sc_spring[ix].pd_addr
2143				, sc->sc_spring[ix].pd_size
2144				, sc->sc_spring[ix].pd_flags
2145			);
2146			if (sc->sc_spring[ix].pd_size == 0)
2147				printf(" (zero!)");
2148			printf("\n");
2149			if (++ix == SAFE_TOTAL_SPART)
2150				ix = 0;
2151		}
2152	}
2153	if (re->re_dst.nsegs > 1) {
2154		ix = (re->re_desc.d_dst - sc->sc_dpalloc.dma_paddr) /
2155			sizeof(struct safe_pdesc);
2156		for (nsegs = re->re_dst.nsegs; nsegs; nsegs--) {
2157			printf(" dpd[%u] %p: %p flags %x\n"
2158				, ix, &sc->sc_dpring[ix]
2159				, (caddr_t)(uintptr_t) sc->sc_dpring[ix].pd_addr
2160				, sc->sc_dpring[ix].pd_flags
2161			);
2162			if (++ix == SAFE_TOTAL_DPART)
2163				ix = 0;
2164		}
2165	}
2166	printf("sa: cmd0 %08x cmd1 %08x staterec %x\n",
2167		re->re_sa.sa_cmd0, re->re_sa.sa_cmd1, re->re_sa.sa_staterec);
2168	printf("sa: key %x %x %x %x %x %x %x %x\n"
2169		, re->re_sa.sa_key[0]
2170		, re->re_sa.sa_key[1]
2171		, re->re_sa.sa_key[2]
2172		, re->re_sa.sa_key[3]
2173		, re->re_sa.sa_key[4]
2174		, re->re_sa.sa_key[5]
2175		, re->re_sa.sa_key[6]
2176		, re->re_sa.sa_key[7]
2177	);
2178	printf("sa: indigest %x %x %x %x %x\n"
2179		, re->re_sa.sa_indigest[0]
2180		, re->re_sa.sa_indigest[1]
2181		, re->re_sa.sa_indigest[2]
2182		, re->re_sa.sa_indigest[3]
2183		, re->re_sa.sa_indigest[4]
2184	);
2185	printf("sa: outdigest %x %x %x %x %x\n"
2186		, re->re_sa.sa_outdigest[0]
2187		, re->re_sa.sa_outdigest[1]
2188		, re->re_sa.sa_outdigest[2]
2189		, re->re_sa.sa_outdigest[3]
2190		, re->re_sa.sa_outdigest[4]
2191	);
2192	printf("sr: iv %x %x %x %x\n"
2193		, re->re_sastate.sa_saved_iv[0]
2194		, re->re_sastate.sa_saved_iv[1]
2195		, re->re_sastate.sa_saved_iv[2]
2196		, re->re_sastate.sa_saved_iv[3]
2197	);
2198	printf("sr: hashbc %u indigest %x %x %x %x %x\n"
2199		, re->re_sastate.sa_saved_hashbc
2200		, re->re_sastate.sa_saved_indigest[0]
2201		, re->re_sastate.sa_saved_indigest[1]
2202		, re->re_sastate.sa_saved_indigest[2]
2203		, re->re_sastate.sa_saved_indigest[3]
2204		, re->re_sastate.sa_saved_indigest[4]
2205	);
2206}
2207
2208static void
2209safe_dump_ring(struct safe_softc *sc, const char *tag)
2210{
2211	mtx_lock(&sc->sc_ringmtx);
2212	printf("\nSafeNet Ring State:\n");
2213	safe_dump_intrstate(sc, tag);
2214	safe_dump_dmastatus(sc, tag);
2215	safe_dump_ringstate(sc, tag);
2216	if (sc->sc_nqchip) {
2217		struct safe_ringentry *re = sc->sc_back;
2218		do {
2219			safe_dump_request(sc, tag, re);
2220			if (++re == sc->sc_ringtop)
2221				re = sc->sc_ring;
2222		} while (re != sc->sc_front);
2223	}
2224	mtx_unlock(&sc->sc_ringmtx);
2225}
2226
2227static int
2228sysctl_hw_safe_dump(SYSCTL_HANDLER_ARGS)
2229{
2230	char dmode[64];
2231	int error;
2232
2233	strncpy(dmode, "", sizeof(dmode) - 1);
2234	dmode[sizeof(dmode) - 1] = '\0';
2235	error = sysctl_handle_string(oidp, &dmode[0], sizeof(dmode), req);
2236
2237	if (error == 0 && req->newptr != NULL) {
2238		struct safe_softc *sc = safec;
2239
2240		if (!sc)
2241			return EINVAL;
2242		if (strncmp(dmode, "dma", 3) == 0)
2243			safe_dump_dmastatus(sc, "safe0");
2244		else if (strncmp(dmode, "int", 3) == 0)
2245			safe_dump_intrstate(sc, "safe0");
2246		else if (strncmp(dmode, "ring", 4) == 0)
2247			safe_dump_ring(sc, "safe0");
2248		else
2249			return EINVAL;
2250	}
2251	return error;
2252}
2253SYSCTL_PROC(_hw_safe, OID_AUTO, dump, CTLTYPE_STRING | CTLFLAG_RW,
2254	0, 0, sysctl_hw_safe_dump, "A", "Dump driver state");
2255#endif /* SAFE_DEBUG */
2256