1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2003 Sam Leffler, Errno Consulting
5 * Copyright (c) 2003 Global Technology Associates, Inc.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33/*
34 * SafeNet SafeXcel-1141 hardware crypto accelerator
35 */
36#include "opt_safe.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/errno.h>
42#include <sys/malloc.h>
43#include <sys/kernel.h>
44#include <sys/mbuf.h>
45#include <sys/module.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysctl.h>
49#include <sys/endian.h>
50
51#include <vm/vm.h>
52#include <vm/pmap.h>
53
54#include <machine/bus.h>
55#include <machine/resource.h>
56#include <sys/bus.h>
57#include <sys/rman.h>
58
59#include <crypto/sha1.h>
60#include <opencrypto/cryptodev.h>
61#include <opencrypto/cryptosoft.h>
62#include <sys/md5.h>
63#include <sys/random.h>
64#include <sys/kobj.h>
65
66#include "cryptodev_if.h"
67
68#include <dev/pci/pcivar.h>
69#include <dev/pci/pcireg.h>
70
71#ifdef SAFE_RNDTEST
72#include <dev/rndtest/rndtest.h>
73#endif
74#include <dev/safe/safereg.h>
75#include <dev/safe/safevar.h>
76
77#ifndef bswap32
78#define	bswap32	NTOHL
79#endif
80
81/*
82 * Prototypes and count for the pci_device structure
83 */
84static	int safe_probe(device_t);
85static	int safe_attach(device_t);
86static	int safe_detach(device_t);
87static	int safe_suspend(device_t);
88static	int safe_resume(device_t);
89static	int safe_shutdown(device_t);
90
91static	int safe_newsession(device_t, crypto_session_t, struct cryptoini *);
92static	int safe_process(device_t, struct cryptop *, int);
93
94static device_method_t safe_methods[] = {
95	/* Device interface */
96	DEVMETHOD(device_probe,		safe_probe),
97	DEVMETHOD(device_attach,	safe_attach),
98	DEVMETHOD(device_detach,	safe_detach),
99	DEVMETHOD(device_suspend,	safe_suspend),
100	DEVMETHOD(device_resume,	safe_resume),
101	DEVMETHOD(device_shutdown,	safe_shutdown),
102
103	/* crypto device methods */
104	DEVMETHOD(cryptodev_newsession,	safe_newsession),
105	DEVMETHOD(cryptodev_process,	safe_process),
106
107	DEVMETHOD_END
108};
109static driver_t safe_driver = {
110	"safe",
111	safe_methods,
112	sizeof (struct safe_softc)
113};
114static devclass_t safe_devclass;
115
116DRIVER_MODULE(safe, pci, safe_driver, safe_devclass, 0, 0);
117MODULE_DEPEND(safe, crypto, 1, 1, 1);
118#ifdef SAFE_RNDTEST
119MODULE_DEPEND(safe, rndtest, 1, 1, 1);
120#endif
121
122static	void safe_intr(void *);
123static	void safe_callback(struct safe_softc *, struct safe_ringentry *);
124static	void safe_feed(struct safe_softc *, struct safe_ringentry *);
125static	void safe_mcopy(struct mbuf *, struct mbuf *, u_int);
126#ifndef SAFE_NO_RNG
127static	void safe_rng_init(struct safe_softc *);
128static	void safe_rng(void *);
129#endif /* SAFE_NO_RNG */
130static	int safe_dma_malloc(struct safe_softc *, bus_size_t,
131	        struct safe_dma_alloc *, int);
132#define	safe_dma_sync(_dma, _flags) \
133	bus_dmamap_sync((_dma)->dma_tag, (_dma)->dma_map, (_flags))
134static	void safe_dma_free(struct safe_softc *, struct safe_dma_alloc *);
135static	int safe_dmamap_aligned(const struct safe_operand *);
136static	int safe_dmamap_uniform(const struct safe_operand *);
137
138static	void safe_reset_board(struct safe_softc *);
139static	void safe_init_board(struct safe_softc *);
140static	void safe_init_pciregs(device_t dev);
141static	void safe_cleanchip(struct safe_softc *);
142static	void safe_totalreset(struct safe_softc *);
143
144static	int safe_free_entry(struct safe_softc *, struct safe_ringentry *);
145
146static SYSCTL_NODE(_hw, OID_AUTO, safe, CTLFLAG_RD, 0,
147    "SafeNet driver parameters");
148
149#ifdef SAFE_DEBUG
150static	void safe_dump_dmastatus(struct safe_softc *, const char *);
151static	void safe_dump_ringstate(struct safe_softc *, const char *);
152static	void safe_dump_intrstate(struct safe_softc *, const char *);
153static	void safe_dump_request(struct safe_softc *, const char *,
154		struct safe_ringentry *);
155
156static	struct safe_softc *safec;		/* for use by hw.safe.dump */
157
158static	int safe_debug = 0;
159SYSCTL_INT(_hw_safe, OID_AUTO, debug, CTLFLAG_RW, &safe_debug,
160	    0, "control debugging msgs");
161#define	DPRINTF(_x)	if (safe_debug) printf _x
162#else
163#define	DPRINTF(_x)
164#endif
165
166#define	READ_REG(sc,r) \
167	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
168
169#define WRITE_REG(sc,reg,val) \
170	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
171
172struct safe_stats safestats;
173SYSCTL_STRUCT(_hw_safe, OID_AUTO, stats, CTLFLAG_RD, &safestats,
174	    safe_stats, "driver statistics");
175#ifndef SAFE_NO_RNG
176static	int safe_rnginterval = 1;		/* poll once a second */
177SYSCTL_INT(_hw_safe, OID_AUTO, rnginterval, CTLFLAG_RW, &safe_rnginterval,
178	    0, "RNG polling interval (secs)");
179static	int safe_rngbufsize = 16;		/* 64 bytes each poll  */
180SYSCTL_INT(_hw_safe, OID_AUTO, rngbufsize, CTLFLAG_RW, &safe_rngbufsize,
181	    0, "RNG polling buffer size (32-bit words)");
182static	int safe_rngmaxalarm = 8;		/* max alarms before reset */
183SYSCTL_INT(_hw_safe, OID_AUTO, rngmaxalarm, CTLFLAG_RW, &safe_rngmaxalarm,
184	    0, "RNG max alarms before reset");
185#endif /* SAFE_NO_RNG */
186
187static int
188safe_probe(device_t dev)
189{
190	if (pci_get_vendor(dev) == PCI_VENDOR_SAFENET &&
191	    pci_get_device(dev) == PCI_PRODUCT_SAFEXCEL)
192		return (BUS_PROBE_DEFAULT);
193	return (ENXIO);
194}
195
196static const char*
197safe_partname(struct safe_softc *sc)
198{
199	/* XXX sprintf numbers when not decoded */
200	switch (pci_get_vendor(sc->sc_dev)) {
201	case PCI_VENDOR_SAFENET:
202		switch (pci_get_device(sc->sc_dev)) {
203		case PCI_PRODUCT_SAFEXCEL: return "SafeNet SafeXcel-1141";
204		}
205		return "SafeNet unknown-part";
206	}
207	return "Unknown-vendor unknown-part";
208}
209
210#ifndef SAFE_NO_RNG
211static void
212default_harvest(struct rndtest_state *rsp, void *buf, u_int count)
213{
214	/* MarkM: FIX!! Check that this does not swamp the harvester! */
215	random_harvest_queue(buf, count, RANDOM_PURE_SAFE);
216}
217#endif /* SAFE_NO_RNG */
218
219static int
220safe_attach(device_t dev)
221{
222	struct safe_softc *sc = device_get_softc(dev);
223	u_int32_t raddr;
224	u_int32_t i, devinfo;
225	int rid;
226
227	bzero(sc, sizeof (*sc));
228	sc->sc_dev = dev;
229
230	/* XXX handle power management */
231
232	pci_enable_busmaster(dev);
233
234	/*
235	 * Setup memory-mapping of PCI registers.
236	 */
237	rid = BS_BAR;
238	sc->sc_sr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
239					   RF_ACTIVE);
240	if (sc->sc_sr == NULL) {
241		device_printf(dev, "cannot map register space\n");
242		goto bad;
243	}
244	sc->sc_st = rman_get_bustag(sc->sc_sr);
245	sc->sc_sh = rman_get_bushandle(sc->sc_sr);
246
247	/*
248	 * Arrange interrupt line.
249	 */
250	rid = 0;
251	sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
252					    RF_SHAREABLE|RF_ACTIVE);
253	if (sc->sc_irq == NULL) {
254		device_printf(dev, "could not map interrupt\n");
255		goto bad1;
256	}
257	/*
258	 * NB: Network code assumes we are blocked with splimp()
259	 *     so make sure the IRQ is mapped appropriately.
260	 */
261	if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE,
262			   NULL, safe_intr, sc, &sc->sc_ih)) {
263		device_printf(dev, "could not establish interrupt\n");
264		goto bad2;
265	}
266
267	sc->sc_cid = crypto_get_driverid(dev, sizeof(struct safe_session),
268	    CRYPTOCAP_F_HARDWARE);
269	if (sc->sc_cid < 0) {
270		device_printf(dev, "could not get crypto driver id\n");
271		goto bad3;
272	}
273
274	sc->sc_chiprev = READ_REG(sc, SAFE_DEVINFO) &
275		(SAFE_DEVINFO_REV_MAJ | SAFE_DEVINFO_REV_MIN);
276
277	/*
278	 * Setup DMA descriptor area.
279	 */
280	if (bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
281			       1,			/* alignment */
282			       SAFE_DMA_BOUNDARY,	/* boundary */
283			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
284			       BUS_SPACE_MAXADDR,	/* highaddr */
285			       NULL, NULL,		/* filter, filterarg */
286			       SAFE_MAX_DMA,		/* maxsize */
287			       SAFE_MAX_PART,		/* nsegments */
288			       SAFE_MAX_SSIZE,		/* maxsegsize */
289			       BUS_DMA_ALLOCNOW,	/* flags */
290			       NULL, NULL,		/* locking */
291			       &sc->sc_srcdmat)) {
292		device_printf(dev, "cannot allocate DMA tag\n");
293		goto bad4;
294	}
295	if (bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
296			       1,			/* alignment */
297			       SAFE_MAX_DSIZE,		/* boundary */
298			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
299			       BUS_SPACE_MAXADDR,	/* highaddr */
300			       NULL, NULL,		/* filter, filterarg */
301			       SAFE_MAX_DMA,		/* maxsize */
302			       SAFE_MAX_PART,		/* nsegments */
303			       SAFE_MAX_DSIZE,		/* maxsegsize */
304			       BUS_DMA_ALLOCNOW,	/* flags */
305			       NULL, NULL,		/* locking */
306			       &sc->sc_dstdmat)) {
307		device_printf(dev, "cannot allocate DMA tag\n");
308		goto bad4;
309	}
310
311	/*
312	 * Allocate packet engine descriptors.
313	 */
314	if (safe_dma_malloc(sc,
315	    SAFE_MAX_NQUEUE * sizeof (struct safe_ringentry),
316	    &sc->sc_ringalloc, 0)) {
317		device_printf(dev, "cannot allocate PE descriptor ring\n");
318		bus_dma_tag_destroy(sc->sc_srcdmat);
319		goto bad4;
320	}
321	/*
322	 * Hookup the static portion of all our data structures.
323	 */
324	sc->sc_ring = (struct safe_ringentry *) sc->sc_ringalloc.dma_vaddr;
325	sc->sc_ringtop = sc->sc_ring + SAFE_MAX_NQUEUE;
326	sc->sc_front = sc->sc_ring;
327	sc->sc_back = sc->sc_ring;
328	raddr = sc->sc_ringalloc.dma_paddr;
329	bzero(sc->sc_ring, SAFE_MAX_NQUEUE * sizeof(struct safe_ringentry));
330	for (i = 0; i < SAFE_MAX_NQUEUE; i++) {
331		struct safe_ringentry *re = &sc->sc_ring[i];
332
333		re->re_desc.d_sa = raddr +
334			offsetof(struct safe_ringentry, re_sa);
335		re->re_sa.sa_staterec = raddr +
336			offsetof(struct safe_ringentry, re_sastate);
337
338		raddr += sizeof (struct safe_ringentry);
339	}
340	mtx_init(&sc->sc_ringmtx, device_get_nameunit(dev),
341		"packet engine ring", MTX_DEF);
342
343	/*
344	 * Allocate scatter and gather particle descriptors.
345	 */
346	if (safe_dma_malloc(sc, SAFE_TOTAL_SPART * sizeof (struct safe_pdesc),
347	    &sc->sc_spalloc, 0)) {
348		device_printf(dev, "cannot allocate source particle "
349			"descriptor ring\n");
350		mtx_destroy(&sc->sc_ringmtx);
351		safe_dma_free(sc, &sc->sc_ringalloc);
352		bus_dma_tag_destroy(sc->sc_srcdmat);
353		goto bad4;
354	}
355	sc->sc_spring = (struct safe_pdesc *) sc->sc_spalloc.dma_vaddr;
356	sc->sc_springtop = sc->sc_spring + SAFE_TOTAL_SPART;
357	sc->sc_spfree = sc->sc_spring;
358	bzero(sc->sc_spring, SAFE_TOTAL_SPART * sizeof(struct safe_pdesc));
359
360	if (safe_dma_malloc(sc, SAFE_TOTAL_DPART * sizeof (struct safe_pdesc),
361	    &sc->sc_dpalloc, 0)) {
362		device_printf(dev, "cannot allocate destination particle "
363			"descriptor ring\n");
364		mtx_destroy(&sc->sc_ringmtx);
365		safe_dma_free(sc, &sc->sc_spalloc);
366		safe_dma_free(sc, &sc->sc_ringalloc);
367		bus_dma_tag_destroy(sc->sc_dstdmat);
368		goto bad4;
369	}
370	sc->sc_dpring = (struct safe_pdesc *) sc->sc_dpalloc.dma_vaddr;
371	sc->sc_dpringtop = sc->sc_dpring + SAFE_TOTAL_DPART;
372	sc->sc_dpfree = sc->sc_dpring;
373	bzero(sc->sc_dpring, SAFE_TOTAL_DPART * sizeof(struct safe_pdesc));
374
375	device_printf(sc->sc_dev, "%s", safe_partname(sc));
376
377	devinfo = READ_REG(sc, SAFE_DEVINFO);
378	if (devinfo & SAFE_DEVINFO_RNG) {
379		sc->sc_flags |= SAFE_FLAGS_RNG;
380		printf(" rng");
381	}
382	if (devinfo & SAFE_DEVINFO_PKEY) {
383#if 0
384		printf(" key");
385		sc->sc_flags |= SAFE_FLAGS_KEY;
386		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0);
387		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0);
388#endif
389	}
390	if (devinfo & SAFE_DEVINFO_DES) {
391		printf(" des/3des");
392		crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0);
393		crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0);
394	}
395	if (devinfo & SAFE_DEVINFO_AES) {
396		printf(" aes");
397		crypto_register(sc->sc_cid, CRYPTO_AES_CBC, 0, 0);
398	}
399	if (devinfo & SAFE_DEVINFO_MD5) {
400		printf(" md5");
401		crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0);
402	}
403	if (devinfo & SAFE_DEVINFO_SHA1) {
404		printf(" sha1");
405		crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0);
406	}
407	printf(" null");
408	crypto_register(sc->sc_cid, CRYPTO_NULL_CBC, 0, 0);
409	crypto_register(sc->sc_cid, CRYPTO_NULL_HMAC, 0, 0);
410	/* XXX other supported algorithms */
411	printf("\n");
412
413	safe_reset_board(sc);		/* reset h/w */
414	safe_init_pciregs(dev);		/* init pci settings */
415	safe_init_board(sc);		/* init h/w */
416
417#ifndef SAFE_NO_RNG
418	if (sc->sc_flags & SAFE_FLAGS_RNG) {
419#ifdef SAFE_RNDTEST
420		sc->sc_rndtest = rndtest_attach(dev);
421		if (sc->sc_rndtest)
422			sc->sc_harvest = rndtest_harvest;
423		else
424			sc->sc_harvest = default_harvest;
425#else
426		sc->sc_harvest = default_harvest;
427#endif
428		safe_rng_init(sc);
429
430		callout_init(&sc->sc_rngto, 1);
431		callout_reset(&sc->sc_rngto, hz*safe_rnginterval, safe_rng, sc);
432	}
433#endif /* SAFE_NO_RNG */
434#ifdef SAFE_DEBUG
435	safec = sc;			/* for use by hw.safe.dump */
436#endif
437	return (0);
438bad4:
439	crypto_unregister_all(sc->sc_cid);
440bad3:
441	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
442bad2:
443	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
444bad1:
445	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
446bad:
447	return (ENXIO);
448}
449
450/*
451 * Detach a device that successfully probed.
452 */
453static int
454safe_detach(device_t dev)
455{
456	struct safe_softc *sc = device_get_softc(dev);
457
458	/* XXX wait/abort active ops */
459
460	WRITE_REG(sc, SAFE_HI_MASK, 0);		/* disable interrupts */
461
462	callout_stop(&sc->sc_rngto);
463
464	crypto_unregister_all(sc->sc_cid);
465
466#ifdef SAFE_RNDTEST
467	if (sc->sc_rndtest)
468		rndtest_detach(sc->sc_rndtest);
469#endif
470
471	safe_cleanchip(sc);
472	safe_dma_free(sc, &sc->sc_dpalloc);
473	safe_dma_free(sc, &sc->sc_spalloc);
474	mtx_destroy(&sc->sc_ringmtx);
475	safe_dma_free(sc, &sc->sc_ringalloc);
476
477	bus_generic_detach(dev);
478	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
479	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
480
481	bus_dma_tag_destroy(sc->sc_srcdmat);
482	bus_dma_tag_destroy(sc->sc_dstdmat);
483	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
484
485	return (0);
486}
487
488/*
489 * Stop all chip i/o so that the kernel's probe routines don't
490 * get confused by errant DMAs when rebooting.
491 */
492static int
493safe_shutdown(device_t dev)
494{
495#ifdef notyet
496	safe_stop(device_get_softc(dev));
497#endif
498	return (0);
499}
500
501/*
502 * Device suspend routine.
503 */
504static int
505safe_suspend(device_t dev)
506{
507	struct safe_softc *sc = device_get_softc(dev);
508
509#ifdef notyet
510	/* XXX stop the device and save PCI settings */
511#endif
512	sc->sc_suspended = 1;
513
514	return (0);
515}
516
517static int
518safe_resume(device_t dev)
519{
520	struct safe_softc *sc = device_get_softc(dev);
521
522#ifdef notyet
523	/* XXX retore PCI settings and start the device */
524#endif
525	sc->sc_suspended = 0;
526	return (0);
527}
528
529/*
530 * SafeXcel Interrupt routine
531 */
532static void
533safe_intr(void *arg)
534{
535	struct safe_softc *sc = arg;
536	volatile u_int32_t stat;
537
538	stat = READ_REG(sc, SAFE_HM_STAT);
539	if (stat == 0)			/* shared irq, not for us */
540		return;
541
542	WRITE_REG(sc, SAFE_HI_CLR, stat);	/* IACK */
543
544	if ((stat & SAFE_INT_PE_DDONE)) {
545		/*
546		 * Descriptor(s) done; scan the ring and
547		 * process completed operations.
548		 */
549		mtx_lock(&sc->sc_ringmtx);
550		while (sc->sc_back != sc->sc_front) {
551			struct safe_ringentry *re = sc->sc_back;
552#ifdef SAFE_DEBUG
553			if (safe_debug) {
554				safe_dump_ringstate(sc, __func__);
555				safe_dump_request(sc, __func__, re);
556			}
557#endif
558			/*
559			 * safe_process marks ring entries that were allocated
560			 * but not used with a csr of zero.  This insures the
561			 * ring front pointer never needs to be set backwards
562			 * in the event that an entry is allocated but not used
563			 * because of a setup error.
564			 */
565			if (re->re_desc.d_csr != 0) {
566				if (!SAFE_PE_CSR_IS_DONE(re->re_desc.d_csr))
567					break;
568				if (!SAFE_PE_LEN_IS_DONE(re->re_desc.d_len))
569					break;
570				sc->sc_nqchip--;
571				safe_callback(sc, re);
572			}
573			if (++(sc->sc_back) == sc->sc_ringtop)
574				sc->sc_back = sc->sc_ring;
575		}
576		mtx_unlock(&sc->sc_ringmtx);
577	}
578
579	/*
580	 * Check to see if we got any DMA Error
581	 */
582	if (stat & SAFE_INT_PE_ERROR) {
583		DPRINTF(("dmaerr dmastat %08x\n",
584			READ_REG(sc, SAFE_PE_DMASTAT)));
585		safestats.st_dmaerr++;
586		safe_totalreset(sc);
587#if 0
588		safe_feed(sc);
589#endif
590	}
591
592	if (sc->sc_needwakeup) {		/* XXX check high watermark */
593		int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
594		DPRINTF(("%s: wakeup crypto %x\n", __func__,
595			sc->sc_needwakeup));
596		sc->sc_needwakeup &= ~wakeup;
597		crypto_unblock(sc->sc_cid, wakeup);
598	}
599}
600
601/*
602 * safe_feed() - post a request to chip
603 */
604static void
605safe_feed(struct safe_softc *sc, struct safe_ringentry *re)
606{
607	bus_dmamap_sync(sc->sc_srcdmat, re->re_src_map, BUS_DMASYNC_PREWRITE);
608	if (re->re_dst_map != NULL)
609		bus_dmamap_sync(sc->sc_dstdmat, re->re_dst_map,
610			BUS_DMASYNC_PREREAD);
611	/* XXX have no smaller granularity */
612	safe_dma_sync(&sc->sc_ringalloc,
613		BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
614	safe_dma_sync(&sc->sc_spalloc, BUS_DMASYNC_PREWRITE);
615	safe_dma_sync(&sc->sc_dpalloc, BUS_DMASYNC_PREWRITE);
616
617#ifdef SAFE_DEBUG
618	if (safe_debug) {
619		safe_dump_ringstate(sc, __func__);
620		safe_dump_request(sc, __func__, re);
621	}
622#endif
623	sc->sc_nqchip++;
624	if (sc->sc_nqchip > safestats.st_maxqchip)
625		safestats.st_maxqchip = sc->sc_nqchip;
626	/* poke h/w to check descriptor ring, any value can be written */
627	WRITE_REG(sc, SAFE_HI_RD_DESCR, 0);
628}
629
630#define	N(a)	(sizeof(a) / sizeof (a[0]))
631static void
632safe_setup_enckey(struct safe_session *ses, caddr_t key)
633{
634	int i;
635
636	bcopy(key, ses->ses_key, ses->ses_klen / 8);
637
638	/* PE is little-endian, insure proper byte order */
639	for (i = 0; i < N(ses->ses_key); i++)
640		ses->ses_key[i] = htole32(ses->ses_key[i]);
641}
642
643static void
644safe_setup_mackey(struct safe_session *ses, int algo, caddr_t key, int klen)
645{
646	MD5_CTX md5ctx;
647	SHA1_CTX sha1ctx;
648	int i;
649
650
651	for (i = 0; i < klen; i++)
652		key[i] ^= HMAC_IPAD_VAL;
653
654	if (algo == CRYPTO_MD5_HMAC) {
655		MD5Init(&md5ctx);
656		MD5Update(&md5ctx, key, klen);
657		MD5Update(&md5ctx, hmac_ipad_buffer, MD5_BLOCK_LEN - klen);
658		bcopy(md5ctx.state, ses->ses_hminner, sizeof(md5ctx.state));
659	} else {
660		SHA1Init(&sha1ctx);
661		SHA1Update(&sha1ctx, key, klen);
662		SHA1Update(&sha1ctx, hmac_ipad_buffer,
663		    SHA1_BLOCK_LEN - klen);
664		bcopy(sha1ctx.h.b32, ses->ses_hminner, sizeof(sha1ctx.h.b32));
665	}
666
667	for (i = 0; i < klen; i++)
668		key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
669
670	if (algo == CRYPTO_MD5_HMAC) {
671		MD5Init(&md5ctx);
672		MD5Update(&md5ctx, key, klen);
673		MD5Update(&md5ctx, hmac_opad_buffer, MD5_BLOCK_LEN - klen);
674		bcopy(md5ctx.state, ses->ses_hmouter, sizeof(md5ctx.state));
675	} else {
676		SHA1Init(&sha1ctx);
677		SHA1Update(&sha1ctx, key, klen);
678		SHA1Update(&sha1ctx, hmac_opad_buffer,
679		    SHA1_BLOCK_LEN - klen);
680		bcopy(sha1ctx.h.b32, ses->ses_hmouter, sizeof(sha1ctx.h.b32));
681	}
682
683	for (i = 0; i < klen; i++)
684		key[i] ^= HMAC_OPAD_VAL;
685
686	/* PE is little-endian, insure proper byte order */
687	for (i = 0; i < N(ses->ses_hminner); i++) {
688		ses->ses_hminner[i] = htole32(ses->ses_hminner[i]);
689		ses->ses_hmouter[i] = htole32(ses->ses_hmouter[i]);
690	}
691}
692#undef N
693
694/*
695 * Allocate a new 'session' and return an encoded session id.  'sidp'
696 * contains our registration id, and should contain an encoded session
697 * id on successful allocation.
698 */
699static int
700safe_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri)
701{
702	struct safe_softc *sc = device_get_softc(dev);
703	struct cryptoini *c, *encini = NULL, *macini = NULL;
704	struct safe_session *ses = NULL;
705
706	if (cri == NULL || sc == NULL)
707		return (EINVAL);
708
709	for (c = cri; c != NULL; c = c->cri_next) {
710		if (c->cri_alg == CRYPTO_MD5_HMAC ||
711		    c->cri_alg == CRYPTO_SHA1_HMAC ||
712		    c->cri_alg == CRYPTO_NULL_HMAC) {
713			if (macini)
714				return (EINVAL);
715			macini = c;
716		} else if (c->cri_alg == CRYPTO_DES_CBC ||
717		    c->cri_alg == CRYPTO_3DES_CBC ||
718		    c->cri_alg == CRYPTO_AES_CBC ||
719		    c->cri_alg == CRYPTO_NULL_CBC) {
720			if (encini)
721				return (EINVAL);
722			encini = c;
723		} else
724			return (EINVAL);
725	}
726	if (encini == NULL && macini == NULL)
727		return (EINVAL);
728	if (encini) {			/* validate key length */
729		switch (encini->cri_alg) {
730		case CRYPTO_DES_CBC:
731			if (encini->cri_klen != 64)
732				return (EINVAL);
733			break;
734		case CRYPTO_3DES_CBC:
735			if (encini->cri_klen != 192)
736				return (EINVAL);
737			break;
738		case CRYPTO_AES_CBC:
739			if (encini->cri_klen != 128 &&
740			    encini->cri_klen != 192 &&
741			    encini->cri_klen != 256)
742				return (EINVAL);
743			break;
744		}
745	}
746
747	ses = crypto_get_driver_session(cses);
748	if (encini) {
749		/* get an IV */
750		/* XXX may read fewer than requested */
751		read_random(ses->ses_iv, sizeof(ses->ses_iv));
752
753		ses->ses_klen = encini->cri_klen;
754		if (encini->cri_key != NULL)
755			safe_setup_enckey(ses, encini->cri_key);
756	}
757
758	if (macini) {
759		ses->ses_mlen = macini->cri_mlen;
760		if (ses->ses_mlen == 0) {
761			if (macini->cri_alg == CRYPTO_MD5_HMAC)
762				ses->ses_mlen = MD5_HASH_LEN;
763			else
764				ses->ses_mlen = SHA1_HASH_LEN;
765		}
766
767		if (macini->cri_key != NULL) {
768			safe_setup_mackey(ses, macini->cri_alg, macini->cri_key,
769			    macini->cri_klen / 8);
770		}
771	}
772
773	return (0);
774}
775
776static void
777safe_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
778{
779	struct safe_operand *op = arg;
780
781	DPRINTF(("%s: mapsize %u nsegs %d error %d\n", __func__,
782		(u_int) mapsize, nsegs, error));
783	if (error != 0)
784		return;
785	op->mapsize = mapsize;
786	op->nsegs = nsegs;
787	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
788}
789
790static int
791safe_process(device_t dev, struct cryptop *crp, int hint)
792{
793	struct safe_softc *sc = device_get_softc(dev);
794	int err = 0, i, nicealign, uniform;
795	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
796	int bypass, oplen, ivsize;
797	caddr_t iv;
798	int16_t coffset;
799	struct safe_session *ses;
800	struct safe_ringentry *re;
801	struct safe_sarec *sa;
802	struct safe_pdesc *pd;
803	u_int32_t cmd0, cmd1, staterec;
804
805	if (crp == NULL || crp->crp_callback == NULL || sc == NULL) {
806		safestats.st_invalid++;
807		return (EINVAL);
808	}
809
810	mtx_lock(&sc->sc_ringmtx);
811	if (sc->sc_front == sc->sc_back && sc->sc_nqchip != 0) {
812		safestats.st_ringfull++;
813		sc->sc_needwakeup |= CRYPTO_SYMQ;
814		mtx_unlock(&sc->sc_ringmtx);
815		return (ERESTART);
816	}
817	re = sc->sc_front;
818
819	staterec = re->re_sa.sa_staterec;	/* save */
820	/* NB: zero everything but the PE descriptor */
821	bzero(&re->re_sa, sizeof(struct safe_ringentry) - sizeof(re->re_desc));
822	re->re_sa.sa_staterec = staterec;	/* restore */
823
824	re->re_crp = crp;
825
826	if (crp->crp_flags & CRYPTO_F_IMBUF) {
827		re->re_src_m = (struct mbuf *)crp->crp_buf;
828		re->re_dst_m = (struct mbuf *)crp->crp_buf;
829	} else if (crp->crp_flags & CRYPTO_F_IOV) {
830		re->re_src_io = (struct uio *)crp->crp_buf;
831		re->re_dst_io = (struct uio *)crp->crp_buf;
832	} else {
833		safestats.st_badflags++;
834		err = EINVAL;
835		goto errout;	/* XXX we don't handle contiguous blocks! */
836	}
837
838	sa = &re->re_sa;
839	ses = crypto_get_driver_session(crp->crp_session);
840
841	crd1 = crp->crp_desc;
842	if (crd1 == NULL) {
843		safestats.st_nodesc++;
844		err = EINVAL;
845		goto errout;
846	}
847	crd2 = crd1->crd_next;
848
849	cmd0 = SAFE_SA_CMD0_BASIC;		/* basic group operation */
850	cmd1 = 0;
851	if (crd2 == NULL) {
852		if (crd1->crd_alg == CRYPTO_MD5_HMAC ||
853		    crd1->crd_alg == CRYPTO_SHA1_HMAC ||
854		    crd1->crd_alg == CRYPTO_NULL_HMAC) {
855			maccrd = crd1;
856			enccrd = NULL;
857			cmd0 |= SAFE_SA_CMD0_OP_HASH;
858		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
859		    crd1->crd_alg == CRYPTO_3DES_CBC ||
860		    crd1->crd_alg == CRYPTO_AES_CBC ||
861		    crd1->crd_alg == CRYPTO_NULL_CBC) {
862			maccrd = NULL;
863			enccrd = crd1;
864			cmd0 |= SAFE_SA_CMD0_OP_CRYPT;
865		} else {
866			safestats.st_badalg++;
867			err = EINVAL;
868			goto errout;
869		}
870	} else {
871		if ((crd1->crd_alg == CRYPTO_MD5_HMAC ||
872		    crd1->crd_alg == CRYPTO_SHA1_HMAC ||
873		    crd1->crd_alg == CRYPTO_NULL_HMAC) &&
874		    (crd2->crd_alg == CRYPTO_DES_CBC ||
875			crd2->crd_alg == CRYPTO_3DES_CBC ||
876		        crd2->crd_alg == CRYPTO_AES_CBC ||
877		        crd2->crd_alg == CRYPTO_NULL_CBC) &&
878		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
879			maccrd = crd1;
880			enccrd = crd2;
881		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
882		    crd1->crd_alg == CRYPTO_3DES_CBC ||
883		    crd1->crd_alg == CRYPTO_AES_CBC ||
884		    crd1->crd_alg == CRYPTO_NULL_CBC) &&
885		    (crd2->crd_alg == CRYPTO_MD5_HMAC ||
886			crd2->crd_alg == CRYPTO_SHA1_HMAC ||
887			crd2->crd_alg == CRYPTO_NULL_HMAC) &&
888		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
889			enccrd = crd1;
890			maccrd = crd2;
891		} else {
892			safestats.st_badalg++;
893			err = EINVAL;
894			goto errout;
895		}
896		cmd0 |= SAFE_SA_CMD0_OP_BOTH;
897	}
898
899	if (enccrd) {
900		if (enccrd->crd_flags & CRD_F_KEY_EXPLICIT)
901			safe_setup_enckey(ses, enccrd->crd_key);
902
903		if (enccrd->crd_alg == CRYPTO_DES_CBC) {
904			cmd0 |= SAFE_SA_CMD0_DES;
905			cmd1 |= SAFE_SA_CMD1_CBC;
906			ivsize = 2*sizeof(u_int32_t);
907		} else if (enccrd->crd_alg == CRYPTO_3DES_CBC) {
908			cmd0 |= SAFE_SA_CMD0_3DES;
909			cmd1 |= SAFE_SA_CMD1_CBC;
910			ivsize = 2*sizeof(u_int32_t);
911		} else if (enccrd->crd_alg == CRYPTO_AES_CBC) {
912			cmd0 |= SAFE_SA_CMD0_AES;
913			cmd1 |= SAFE_SA_CMD1_CBC;
914			if (ses->ses_klen == 128)
915			     cmd1 |=  SAFE_SA_CMD1_AES128;
916			else if (ses->ses_klen == 192)
917			     cmd1 |=  SAFE_SA_CMD1_AES192;
918			else
919			     cmd1 |=  SAFE_SA_CMD1_AES256;
920			ivsize = 4*sizeof(u_int32_t);
921		} else {
922			cmd0 |= SAFE_SA_CMD0_CRYPT_NULL;
923			ivsize = 0;
924		}
925
926		/*
927		 * Setup encrypt/decrypt state.  When using basic ops
928		 * we can't use an inline IV because hash/crypt offset
929		 * must be from the end of the IV to the start of the
930		 * crypt data and this leaves out the preceding header
931		 * from the hash calculation.  Instead we place the IV
932		 * in the state record and set the hash/crypt offset to
933		 * copy both the header+IV.
934		 */
935		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
936			cmd0 |= SAFE_SA_CMD0_OUTBOUND;
937
938			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
939				iv = enccrd->crd_iv;
940			else
941				iv = (caddr_t) ses->ses_iv;
942			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
943				crypto_copyback(crp->crp_flags, crp->crp_buf,
944				    enccrd->crd_inject, ivsize, iv);
945			}
946			bcopy(iv, re->re_sastate.sa_saved_iv, ivsize);
947			cmd0 |= SAFE_SA_CMD0_IVLD_STATE | SAFE_SA_CMD0_SAVEIV;
948			re->re_flags |= SAFE_QFLAGS_COPYOUTIV;
949		} else {
950			cmd0 |= SAFE_SA_CMD0_INBOUND;
951
952			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) {
953				bcopy(enccrd->crd_iv,
954					re->re_sastate.sa_saved_iv, ivsize);
955			} else {
956				crypto_copydata(crp->crp_flags, crp->crp_buf,
957				    enccrd->crd_inject, ivsize,
958				    (caddr_t)re->re_sastate.sa_saved_iv);
959			}
960			cmd0 |= SAFE_SA_CMD0_IVLD_STATE;
961		}
962		/*
963		 * For basic encryption use the zero pad algorithm.
964		 * This pads results to an 8-byte boundary and
965		 * suppresses padding verification for inbound (i.e.
966		 * decrypt) operations.
967		 *
968		 * NB: Not sure if the 8-byte pad boundary is a problem.
969		 */
970		cmd0 |= SAFE_SA_CMD0_PAD_ZERO;
971
972		/* XXX assert key bufs have the same size */
973		bcopy(ses->ses_key, sa->sa_key, sizeof(sa->sa_key));
974	}
975
976	if (maccrd) {
977		if (maccrd->crd_flags & CRD_F_KEY_EXPLICIT) {
978			safe_setup_mackey(ses, maccrd->crd_alg,
979			    maccrd->crd_key, maccrd->crd_klen / 8);
980		}
981
982		if (maccrd->crd_alg == CRYPTO_MD5_HMAC) {
983			cmd0 |= SAFE_SA_CMD0_MD5;
984			cmd1 |= SAFE_SA_CMD1_HMAC;	/* NB: enable HMAC */
985		} else if (maccrd->crd_alg == CRYPTO_SHA1_HMAC) {
986			cmd0 |= SAFE_SA_CMD0_SHA1;
987			cmd1 |= SAFE_SA_CMD1_HMAC;	/* NB: enable HMAC */
988		} else {
989			cmd0 |= SAFE_SA_CMD0_HASH_NULL;
990		}
991		/*
992		 * Digest data is loaded from the SA and the hash
993		 * result is saved to the state block where we
994		 * retrieve it for return to the caller.
995		 */
996		/* XXX assert digest bufs have the same size */
997		bcopy(ses->ses_hminner, sa->sa_indigest,
998			sizeof(sa->sa_indigest));
999		bcopy(ses->ses_hmouter, sa->sa_outdigest,
1000			sizeof(sa->sa_outdigest));
1001
1002		cmd0 |= SAFE_SA_CMD0_HSLD_SA | SAFE_SA_CMD0_SAVEHASH;
1003		re->re_flags |= SAFE_QFLAGS_COPYOUTICV;
1004	}
1005
1006	if (enccrd && maccrd) {
1007		/*
1008		 * The offset from hash data to the start of
1009		 * crypt data is the difference in the skips.
1010		 */
1011		bypass = maccrd->crd_skip;
1012		coffset = enccrd->crd_skip - maccrd->crd_skip;
1013		if (coffset < 0) {
1014			DPRINTF(("%s: hash does not precede crypt; "
1015				"mac skip %u enc skip %u\n",
1016				__func__, maccrd->crd_skip, enccrd->crd_skip));
1017			safestats.st_skipmismatch++;
1018			err = EINVAL;
1019			goto errout;
1020		}
1021		oplen = enccrd->crd_skip + enccrd->crd_len;
1022		if (maccrd->crd_skip + maccrd->crd_len != oplen) {
1023			DPRINTF(("%s: hash amount %u != crypt amount %u\n",
1024				__func__, maccrd->crd_skip + maccrd->crd_len,
1025				oplen));
1026			safestats.st_lenmismatch++;
1027			err = EINVAL;
1028			goto errout;
1029		}
1030#ifdef SAFE_DEBUG
1031		if (safe_debug) {
1032			printf("mac: skip %d, len %d, inject %d\n",
1033			    maccrd->crd_skip, maccrd->crd_len,
1034			    maccrd->crd_inject);
1035			printf("enc: skip %d, len %d, inject %d\n",
1036			    enccrd->crd_skip, enccrd->crd_len,
1037			    enccrd->crd_inject);
1038			printf("bypass %d coffset %d oplen %d\n",
1039				bypass, coffset, oplen);
1040		}
1041#endif
1042		if (coffset & 3) {	/* offset must be 32-bit aligned */
1043			DPRINTF(("%s: coffset %u misaligned\n",
1044				__func__, coffset));
1045			safestats.st_coffmisaligned++;
1046			err = EINVAL;
1047			goto errout;
1048		}
1049		coffset >>= 2;
1050		if (coffset > 255) {	/* offset must be <256 dwords */
1051			DPRINTF(("%s: coffset %u too big\n",
1052				__func__, coffset));
1053			safestats.st_cofftoobig++;
1054			err = EINVAL;
1055			goto errout;
1056		}
1057		/*
1058		 * Tell the hardware to copy the header to the output.
1059		 * The header is defined as the data from the end of
1060		 * the bypass to the start of data to be encrypted.
1061		 * Typically this is the inline IV.  Note that you need
1062		 * to do this even if src+dst are the same; it appears
1063		 * that w/o this bit the crypted data is written
1064		 * immediately after the bypass data.
1065		 */
1066		cmd1 |= SAFE_SA_CMD1_HDRCOPY;
1067		/*
1068		 * Disable IP header mutable bit handling.  This is
1069		 * needed to get correct HMAC calculations.
1070		 */
1071		cmd1 |= SAFE_SA_CMD1_MUTABLE;
1072	} else {
1073		if (enccrd) {
1074			bypass = enccrd->crd_skip;
1075			oplen = bypass + enccrd->crd_len;
1076		} else {
1077			bypass = maccrd->crd_skip;
1078			oplen = bypass + maccrd->crd_len;
1079		}
1080		coffset = 0;
1081	}
1082	/* XXX verify multiple of 4 when using s/g */
1083	if (bypass > 96) {		/* bypass offset must be <= 96 bytes */
1084		DPRINTF(("%s: bypass %u too big\n", __func__, bypass));
1085		safestats.st_bypasstoobig++;
1086		err = EINVAL;
1087		goto errout;
1088	}
1089
1090	if (bus_dmamap_create(sc->sc_srcdmat, BUS_DMA_NOWAIT, &re->re_src_map)) {
1091		safestats.st_nomap++;
1092		err = ENOMEM;
1093		goto errout;
1094	}
1095	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1096		if (bus_dmamap_load_mbuf(sc->sc_srcdmat, re->re_src_map,
1097		    re->re_src_m, safe_op_cb,
1098		    &re->re_src, BUS_DMA_NOWAIT) != 0) {
1099			bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1100			re->re_src_map = NULL;
1101			safestats.st_noload++;
1102			err = ENOMEM;
1103			goto errout;
1104		}
1105	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1106		if (bus_dmamap_load_uio(sc->sc_srcdmat, re->re_src_map,
1107		    re->re_src_io, safe_op_cb,
1108		    &re->re_src, BUS_DMA_NOWAIT) != 0) {
1109			bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1110			re->re_src_map = NULL;
1111			safestats.st_noload++;
1112			err = ENOMEM;
1113			goto errout;
1114		}
1115	}
1116	nicealign = safe_dmamap_aligned(&re->re_src);
1117	uniform = safe_dmamap_uniform(&re->re_src);
1118
1119	DPRINTF(("src nicealign %u uniform %u nsegs %u\n",
1120		nicealign, uniform, re->re_src.nsegs));
1121	if (re->re_src.nsegs > 1) {
1122		re->re_desc.d_src = sc->sc_spalloc.dma_paddr +
1123			((caddr_t) sc->sc_spfree - (caddr_t) sc->sc_spring);
1124		for (i = 0; i < re->re_src_nsegs; i++) {
1125			/* NB: no need to check if there's space */
1126			pd = sc->sc_spfree;
1127			if (++(sc->sc_spfree) == sc->sc_springtop)
1128				sc->sc_spfree = sc->sc_spring;
1129
1130			KASSERT((pd->pd_flags&3) == 0 ||
1131				(pd->pd_flags&3) == SAFE_PD_DONE,
1132				("bogus source particle descriptor; flags %x",
1133				pd->pd_flags));
1134			pd->pd_addr = re->re_src_segs[i].ds_addr;
1135			pd->pd_size = re->re_src_segs[i].ds_len;
1136			pd->pd_flags = SAFE_PD_READY;
1137		}
1138		cmd0 |= SAFE_SA_CMD0_IGATHER;
1139	} else {
1140		/*
1141		 * No need for gather, reference the operand directly.
1142		 */
1143		re->re_desc.d_src = re->re_src_segs[0].ds_addr;
1144	}
1145
1146	if (enccrd == NULL && maccrd != NULL) {
1147		/*
1148		 * Hash op; no destination needed.
1149		 */
1150	} else {
1151		if (crp->crp_flags & CRYPTO_F_IOV) {
1152			if (!nicealign) {
1153				safestats.st_iovmisaligned++;
1154				err = EINVAL;
1155				goto errout;
1156			}
1157			if (uniform != 1) {
1158				/*
1159				 * Source is not suitable for direct use as
1160				 * the destination.  Create a new scatter/gather
1161				 * list based on the destination requirements
1162				 * and check if that's ok.
1163				 */
1164				if (bus_dmamap_create(sc->sc_dstdmat,
1165				    BUS_DMA_NOWAIT, &re->re_dst_map)) {
1166					safestats.st_nomap++;
1167					err = ENOMEM;
1168					goto errout;
1169				}
1170				if (bus_dmamap_load_uio(sc->sc_dstdmat,
1171				    re->re_dst_map, re->re_dst_io,
1172				    safe_op_cb, &re->re_dst,
1173				    BUS_DMA_NOWAIT) != 0) {
1174					bus_dmamap_destroy(sc->sc_dstdmat,
1175						re->re_dst_map);
1176					re->re_dst_map = NULL;
1177					safestats.st_noload++;
1178					err = ENOMEM;
1179					goto errout;
1180				}
1181				uniform = safe_dmamap_uniform(&re->re_dst);
1182				if (!uniform) {
1183					/*
1184					 * There's no way to handle the DMA
1185					 * requirements with this uio.  We
1186					 * could create a separate DMA area for
1187					 * the result and then copy it back,
1188					 * but for now we just bail and return
1189					 * an error.  Note that uio requests
1190					 * > SAFE_MAX_DSIZE are handled because
1191					 * the DMA map and segment list for the
1192					 * destination wil result in a
1193					 * destination particle list that does
1194					 * the necessary scatter DMA.
1195					 */
1196					safestats.st_iovnotuniform++;
1197					err = EINVAL;
1198					goto errout;
1199				}
1200			} else
1201				re->re_dst = re->re_src;
1202		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
1203			if (nicealign && uniform == 1) {
1204				/*
1205				 * Source layout is suitable for direct
1206				 * sharing of the DMA map and segment list.
1207				 */
1208				re->re_dst = re->re_src;
1209			} else if (nicealign && uniform == 2) {
1210				/*
1211				 * The source is properly aligned but requires a
1212				 * different particle list to handle DMA of the
1213				 * result.  Create a new map and do the load to
1214				 * create the segment list.  The particle
1215				 * descriptor setup code below will handle the
1216				 * rest.
1217				 */
1218				if (bus_dmamap_create(sc->sc_dstdmat,
1219				    BUS_DMA_NOWAIT, &re->re_dst_map)) {
1220					safestats.st_nomap++;
1221					err = ENOMEM;
1222					goto errout;
1223				}
1224				if (bus_dmamap_load_mbuf(sc->sc_dstdmat,
1225				    re->re_dst_map, re->re_dst_m,
1226				    safe_op_cb, &re->re_dst,
1227				    BUS_DMA_NOWAIT) != 0) {
1228					bus_dmamap_destroy(sc->sc_dstdmat,
1229						re->re_dst_map);
1230					re->re_dst_map = NULL;
1231					safestats.st_noload++;
1232					err = ENOMEM;
1233					goto errout;
1234				}
1235			} else {		/* !(aligned and/or uniform) */
1236				int totlen, len;
1237				struct mbuf *m, *top, **mp;
1238
1239				/*
1240				 * DMA constraints require that we allocate a
1241				 * new mbuf chain for the destination.  We
1242				 * allocate an entire new set of mbufs of
1243				 * optimal/required size and then tell the
1244				 * hardware to copy any bits that are not
1245				 * created as a byproduct of the operation.
1246				 */
1247				if (!nicealign)
1248					safestats.st_unaligned++;
1249				if (!uniform)
1250					safestats.st_notuniform++;
1251				totlen = re->re_src_mapsize;
1252				if (re->re_src_m->m_flags & M_PKTHDR) {
1253					len = MHLEN;
1254					MGETHDR(m, M_NOWAIT, MT_DATA);
1255					if (m && !m_dup_pkthdr(m, re->re_src_m,
1256					    M_NOWAIT)) {
1257						m_free(m);
1258						m = NULL;
1259					}
1260				} else {
1261					len = MLEN;
1262					MGET(m, M_NOWAIT, MT_DATA);
1263				}
1264				if (m == NULL) {
1265					safestats.st_nombuf++;
1266					err = sc->sc_nqchip ? ERESTART : ENOMEM;
1267					goto errout;
1268				}
1269				if (totlen >= MINCLSIZE) {
1270					if (!(MCLGET(m, M_NOWAIT))) {
1271						m_free(m);
1272						safestats.st_nomcl++;
1273						err = sc->sc_nqchip ?
1274							ERESTART : ENOMEM;
1275						goto errout;
1276					}
1277					len = MCLBYTES;
1278				}
1279				m->m_len = len;
1280				top = NULL;
1281				mp = &top;
1282
1283				while (totlen > 0) {
1284					if (top) {
1285						MGET(m, M_NOWAIT, MT_DATA);
1286						if (m == NULL) {
1287							m_freem(top);
1288							safestats.st_nombuf++;
1289							err = sc->sc_nqchip ?
1290							    ERESTART : ENOMEM;
1291							goto errout;
1292						}
1293						len = MLEN;
1294					}
1295					if (top && totlen >= MINCLSIZE) {
1296						if (!(MCLGET(m, M_NOWAIT))) {
1297							*mp = m;
1298							m_freem(top);
1299							safestats.st_nomcl++;
1300							err = sc->sc_nqchip ?
1301							    ERESTART : ENOMEM;
1302							goto errout;
1303						}
1304						len = MCLBYTES;
1305					}
1306					m->m_len = len = min(totlen, len);
1307					totlen -= len;
1308					*mp = m;
1309					mp = &m->m_next;
1310				}
1311				re->re_dst_m = top;
1312				if (bus_dmamap_create(sc->sc_dstdmat,
1313				    BUS_DMA_NOWAIT, &re->re_dst_map) != 0) {
1314					safestats.st_nomap++;
1315					err = ENOMEM;
1316					goto errout;
1317				}
1318				if (bus_dmamap_load_mbuf(sc->sc_dstdmat,
1319				    re->re_dst_map, re->re_dst_m,
1320				    safe_op_cb, &re->re_dst,
1321				    BUS_DMA_NOWAIT) != 0) {
1322					bus_dmamap_destroy(sc->sc_dstdmat,
1323					re->re_dst_map);
1324					re->re_dst_map = NULL;
1325					safestats.st_noload++;
1326					err = ENOMEM;
1327					goto errout;
1328				}
1329				if (re->re_src.mapsize > oplen) {
1330					/*
1331					 * There's data following what the
1332					 * hardware will copy for us.  If this
1333					 * isn't just the ICV (that's going to
1334					 * be written on completion), copy it
1335					 * to the new mbufs
1336					 */
1337					if (!(maccrd &&
1338					    (re->re_src.mapsize-oplen) == 12 &&
1339					    maccrd->crd_inject == oplen))
1340						safe_mcopy(re->re_src_m,
1341							   re->re_dst_m,
1342							   oplen);
1343					else
1344						safestats.st_noicvcopy++;
1345				}
1346			}
1347		} else {
1348			safestats.st_badflags++;
1349			err = EINVAL;
1350			goto errout;
1351		}
1352
1353		if (re->re_dst.nsegs > 1) {
1354			re->re_desc.d_dst = sc->sc_dpalloc.dma_paddr +
1355			    ((caddr_t) sc->sc_dpfree - (caddr_t) sc->sc_dpring);
1356			for (i = 0; i < re->re_dst_nsegs; i++) {
1357				pd = sc->sc_dpfree;
1358				KASSERT((pd->pd_flags&3) == 0 ||
1359					(pd->pd_flags&3) == SAFE_PD_DONE,
1360					("bogus dest particle descriptor; flags %x",
1361						pd->pd_flags));
1362				if (++(sc->sc_dpfree) == sc->sc_dpringtop)
1363					sc->sc_dpfree = sc->sc_dpring;
1364				pd->pd_addr = re->re_dst_segs[i].ds_addr;
1365				pd->pd_flags = SAFE_PD_READY;
1366			}
1367			cmd0 |= SAFE_SA_CMD0_OSCATTER;
1368		} else {
1369			/*
1370			 * No need for scatter, reference the operand directly.
1371			 */
1372			re->re_desc.d_dst = re->re_dst_segs[0].ds_addr;
1373		}
1374	}
1375
1376	/*
1377	 * All done with setup; fillin the SA command words
1378	 * and the packet engine descriptor.  The operation
1379	 * is now ready for submission to the hardware.
1380	 */
1381	sa->sa_cmd0 = cmd0 | SAFE_SA_CMD0_IPCI | SAFE_SA_CMD0_OPCI;
1382	sa->sa_cmd1 = cmd1
1383		    | (coffset << SAFE_SA_CMD1_OFFSET_S)
1384		    | SAFE_SA_CMD1_SAREV1	/* Rev 1 SA data structure */
1385		    | SAFE_SA_CMD1_SRPCI
1386		    ;
1387	/*
1388	 * NB: the order of writes is important here.  In case the
1389	 * chip is scanning the ring because of an outstanding request
1390	 * it might nab this one too.  In that case we need to make
1391	 * sure the setup is complete before we write the length
1392	 * field of the descriptor as it signals the descriptor is
1393	 * ready for processing.
1394	 */
1395	re->re_desc.d_csr = SAFE_PE_CSR_READY | SAFE_PE_CSR_SAPCI;
1396	if (maccrd)
1397		re->re_desc.d_csr |= SAFE_PE_CSR_LOADSA | SAFE_PE_CSR_HASHFINAL;
1398	re->re_desc.d_len = oplen
1399			  | SAFE_PE_LEN_READY
1400			  | (bypass << SAFE_PE_LEN_BYPASS_S)
1401			  ;
1402
1403	safestats.st_ipackets++;
1404	safestats.st_ibytes += oplen;
1405
1406	if (++(sc->sc_front) == sc->sc_ringtop)
1407		sc->sc_front = sc->sc_ring;
1408
1409	/* XXX honor batching */
1410	safe_feed(sc, re);
1411	mtx_unlock(&sc->sc_ringmtx);
1412	return (0);
1413
1414errout:
1415	if ((re->re_dst_m != NULL) && (re->re_src_m != re->re_dst_m))
1416		m_freem(re->re_dst_m);
1417
1418	if (re->re_dst_map != NULL && re->re_dst_map != re->re_src_map) {
1419		bus_dmamap_unload(sc->sc_dstdmat, re->re_dst_map);
1420		bus_dmamap_destroy(sc->sc_dstdmat, re->re_dst_map);
1421	}
1422	if (re->re_src_map != NULL) {
1423		bus_dmamap_unload(sc->sc_srcdmat, re->re_src_map);
1424		bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1425	}
1426	mtx_unlock(&sc->sc_ringmtx);
1427	if (err != ERESTART) {
1428		crp->crp_etype = err;
1429		crypto_done(crp);
1430	} else {
1431		sc->sc_needwakeup |= CRYPTO_SYMQ;
1432	}
1433	return (err);
1434}
1435
1436static void
1437safe_callback(struct safe_softc *sc, struct safe_ringentry *re)
1438{
1439	struct cryptop *crp = (struct cryptop *)re->re_crp;
1440	struct safe_session *ses;
1441	struct cryptodesc *crd;
1442
1443	ses = crypto_get_driver_session(crp->crp_session);
1444
1445	safestats.st_opackets++;
1446	safestats.st_obytes += re->re_dst.mapsize;
1447
1448	safe_dma_sync(&sc->sc_ringalloc,
1449		BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1450	if (re->re_desc.d_csr & SAFE_PE_CSR_STATUS) {
1451		device_printf(sc->sc_dev, "csr 0x%x cmd0 0x%x cmd1 0x%x\n",
1452			re->re_desc.d_csr,
1453			re->re_sa.sa_cmd0, re->re_sa.sa_cmd1);
1454		safestats.st_peoperr++;
1455		crp->crp_etype = EIO;		/* something more meaningful? */
1456	}
1457	if (re->re_dst_map != NULL && re->re_dst_map != re->re_src_map) {
1458		bus_dmamap_sync(sc->sc_dstdmat, re->re_dst_map,
1459		    BUS_DMASYNC_POSTREAD);
1460		bus_dmamap_unload(sc->sc_dstdmat, re->re_dst_map);
1461		bus_dmamap_destroy(sc->sc_dstdmat, re->re_dst_map);
1462	}
1463	bus_dmamap_sync(sc->sc_srcdmat, re->re_src_map, BUS_DMASYNC_POSTWRITE);
1464	bus_dmamap_unload(sc->sc_srcdmat, re->re_src_map);
1465	bus_dmamap_destroy(sc->sc_srcdmat, re->re_src_map);
1466
1467	/*
1468	 * If result was written to a differet mbuf chain, swap
1469	 * it in as the return value and reclaim the original.
1470	 */
1471	if ((crp->crp_flags & CRYPTO_F_IMBUF) && re->re_src_m != re->re_dst_m) {
1472		m_freem(re->re_src_m);
1473		crp->crp_buf = (caddr_t)re->re_dst_m;
1474	}
1475
1476	if (re->re_flags & SAFE_QFLAGS_COPYOUTIV) {
1477		/* copy out IV for future use */
1478		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1479			int ivsize;
1480
1481			if (crd->crd_alg == CRYPTO_DES_CBC ||
1482			    crd->crd_alg == CRYPTO_3DES_CBC) {
1483				ivsize = 2*sizeof(u_int32_t);
1484			} else if (crd->crd_alg == CRYPTO_AES_CBC) {
1485				ivsize = 4*sizeof(u_int32_t);
1486			} else
1487				continue;
1488			crypto_copydata(crp->crp_flags, crp->crp_buf,
1489			    crd->crd_skip + crd->crd_len - ivsize, ivsize,
1490			    (caddr_t)ses->ses_iv);
1491			break;
1492		}
1493	}
1494
1495	if (re->re_flags & SAFE_QFLAGS_COPYOUTICV) {
1496		/* copy out ICV result */
1497		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1498			if (!(crd->crd_alg == CRYPTO_MD5_HMAC ||
1499			    crd->crd_alg == CRYPTO_SHA1_HMAC ||
1500			    crd->crd_alg == CRYPTO_NULL_HMAC))
1501				continue;
1502			if (crd->crd_alg == CRYPTO_SHA1_HMAC) {
1503				/*
1504				 * SHA-1 ICV's are byte-swapped; fix 'em up
1505				 * before copy them to their destination.
1506				 */
1507				re->re_sastate.sa_saved_indigest[0] =
1508				    bswap32(re->re_sastate.sa_saved_indigest[0]);
1509				re->re_sastate.sa_saved_indigest[1] =
1510				    bswap32(re->re_sastate.sa_saved_indigest[1]);
1511				re->re_sastate.sa_saved_indigest[2] =
1512				    bswap32(re->re_sastate.sa_saved_indigest[2]);
1513			}
1514			crypto_copyback(crp->crp_flags, crp->crp_buf,
1515			    crd->crd_inject, ses->ses_mlen,
1516			    (caddr_t)re->re_sastate.sa_saved_indigest);
1517			break;
1518		}
1519	}
1520	crypto_done(crp);
1521}
1522
1523/*
1524 * Copy all data past offset from srcm to dstm.
1525 */
1526static void
1527safe_mcopy(struct mbuf *srcm, struct mbuf *dstm, u_int offset)
1528{
1529	u_int j, dlen, slen;
1530	caddr_t dptr, sptr;
1531
1532	/*
1533	 * Advance src and dst to offset.
1534	 */
1535	j = offset;
1536	while (j >= srcm->m_len) {
1537		j -= srcm->m_len;
1538		srcm = srcm->m_next;
1539		if (srcm == NULL)
1540			return;
1541	}
1542	sptr = mtod(srcm, caddr_t) + j;
1543	slen = srcm->m_len - j;
1544
1545	j = offset;
1546	while (j >= dstm->m_len) {
1547		j -= dstm->m_len;
1548		dstm = dstm->m_next;
1549		if (dstm == NULL)
1550			return;
1551	}
1552	dptr = mtod(dstm, caddr_t) + j;
1553	dlen = dstm->m_len - j;
1554
1555	/*
1556	 * Copy everything that remains.
1557	 */
1558	for (;;) {
1559		j = min(slen, dlen);
1560		bcopy(sptr, dptr, j);
1561		if (slen == j) {
1562			srcm = srcm->m_next;
1563			if (srcm == NULL)
1564				return;
1565			sptr = srcm->m_data;
1566			slen = srcm->m_len;
1567		} else
1568			sptr += j, slen -= j;
1569		if (dlen == j) {
1570			dstm = dstm->m_next;
1571			if (dstm == NULL)
1572				return;
1573			dptr = dstm->m_data;
1574			dlen = dstm->m_len;
1575		} else
1576			dptr += j, dlen -= j;
1577	}
1578}
1579
1580#ifndef SAFE_NO_RNG
1581#define	SAFE_RNG_MAXWAIT	1000
1582
1583static void
1584safe_rng_init(struct safe_softc *sc)
1585{
1586	u_int32_t w, v;
1587	int i;
1588
1589	WRITE_REG(sc, SAFE_RNG_CTRL, 0);
1590	/* use default value according to the manual */
1591	WRITE_REG(sc, SAFE_RNG_CNFG, 0x834);	/* magic from SafeNet */
1592	WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1593
1594	/*
1595	 * There is a bug in rev 1.0 of the 1140 that when the RNG
1596	 * is brought out of reset the ready status flag does not
1597	 * work until the RNG has finished its internal initialization.
1598	 *
1599	 * So in order to determine the device is through its
1600	 * initialization we must read the data register, using the
1601	 * status reg in the read in case it is initialized.  Then read
1602	 * the data register until it changes from the first read.
1603	 * Once it changes read the data register until it changes
1604	 * again.  At this time the RNG is considered initialized.
1605	 * This could take between 750ms - 1000ms in time.
1606	 */
1607	i = 0;
1608	w = READ_REG(sc, SAFE_RNG_OUT);
1609	do {
1610		v = READ_REG(sc, SAFE_RNG_OUT);
1611		if (v != w) {
1612			w = v;
1613			break;
1614		}
1615		DELAY(10);
1616	} while (++i < SAFE_RNG_MAXWAIT);
1617
1618	/* Wait Until data changes again */
1619	i = 0;
1620	do {
1621		v = READ_REG(sc, SAFE_RNG_OUT);
1622		if (v != w)
1623			break;
1624		DELAY(10);
1625	} while (++i < SAFE_RNG_MAXWAIT);
1626}
1627
1628static __inline void
1629safe_rng_disable_short_cycle(struct safe_softc *sc)
1630{
1631	WRITE_REG(sc, SAFE_RNG_CTRL,
1632		READ_REG(sc, SAFE_RNG_CTRL) &~ SAFE_RNG_CTRL_SHORTEN);
1633}
1634
1635static __inline void
1636safe_rng_enable_short_cycle(struct safe_softc *sc)
1637{
1638	WRITE_REG(sc, SAFE_RNG_CTRL,
1639		READ_REG(sc, SAFE_RNG_CTRL) | SAFE_RNG_CTRL_SHORTEN);
1640}
1641
1642static __inline u_int32_t
1643safe_rng_read(struct safe_softc *sc)
1644{
1645	int i;
1646
1647	i = 0;
1648	while (READ_REG(sc, SAFE_RNG_STAT) != 0 && ++i < SAFE_RNG_MAXWAIT)
1649		;
1650	return READ_REG(sc, SAFE_RNG_OUT);
1651}
1652
1653static void
1654safe_rng(void *arg)
1655{
1656	struct safe_softc *sc = arg;
1657	u_int32_t buf[SAFE_RNG_MAXBUFSIZ];	/* NB: maybe move to softc */
1658	u_int maxwords;
1659	int i;
1660
1661	safestats.st_rng++;
1662	/*
1663	 * Fetch the next block of data.
1664	 */
1665	maxwords = safe_rngbufsize;
1666	if (maxwords > SAFE_RNG_MAXBUFSIZ)
1667		maxwords = SAFE_RNG_MAXBUFSIZ;
1668retry:
1669	for (i = 0; i < maxwords; i++)
1670		buf[i] = safe_rng_read(sc);
1671	/*
1672	 * Check the comparator alarm count and reset the h/w if
1673	 * it exceeds our threshold.  This guards against the
1674	 * hardware oscillators resonating with external signals.
1675	 */
1676	if (READ_REG(sc, SAFE_RNG_ALM_CNT) > safe_rngmaxalarm) {
1677		u_int32_t freq_inc, w;
1678
1679		DPRINTF(("%s: alarm count %u exceeds threshold %u\n", __func__,
1680			READ_REG(sc, SAFE_RNG_ALM_CNT), safe_rngmaxalarm));
1681		safestats.st_rngalarm++;
1682		safe_rng_enable_short_cycle(sc);
1683		freq_inc = 18;
1684		for (i = 0; i < 64; i++) {
1685			w = READ_REG(sc, SAFE_RNG_CNFG);
1686			freq_inc = ((w + freq_inc) & 0x3fL);
1687			w = ((w & ~0x3fL) | freq_inc);
1688			WRITE_REG(sc, SAFE_RNG_CNFG, w);
1689
1690			WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1691
1692			(void) safe_rng_read(sc);
1693			DELAY(25);
1694
1695			if (READ_REG(sc, SAFE_RNG_ALM_CNT) == 0) {
1696				safe_rng_disable_short_cycle(sc);
1697				goto retry;
1698			}
1699			freq_inc = 1;
1700		}
1701		safe_rng_disable_short_cycle(sc);
1702	} else
1703		WRITE_REG(sc, SAFE_RNG_ALM_CNT, 0);
1704
1705	(*sc->sc_harvest)(sc->sc_rndtest, buf, maxwords*sizeof (u_int32_t));
1706	callout_reset(&sc->sc_rngto,
1707		hz * (safe_rnginterval ? safe_rnginterval : 1), safe_rng, sc);
1708}
1709#endif /* SAFE_NO_RNG */
1710
1711static void
1712safe_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1713{
1714	bus_addr_t *paddr = (bus_addr_t*) arg;
1715	*paddr = segs->ds_addr;
1716}
1717
1718static int
1719safe_dma_malloc(
1720	struct safe_softc *sc,
1721	bus_size_t size,
1722	struct safe_dma_alloc *dma,
1723	int mapflags
1724)
1725{
1726	int r;
1727
1728	r = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev),	/* parent */
1729			       sizeof(u_int32_t), 0,	/* alignment, bounds */
1730			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
1731			       BUS_SPACE_MAXADDR,	/* highaddr */
1732			       NULL, NULL,		/* filter, filterarg */
1733			       size,			/* maxsize */
1734			       1,			/* nsegments */
1735			       size,			/* maxsegsize */
1736			       BUS_DMA_ALLOCNOW,	/* flags */
1737			       NULL, NULL,		/* locking */
1738			       &dma->dma_tag);
1739	if (r != 0) {
1740		device_printf(sc->sc_dev, "safe_dma_malloc: "
1741			"bus_dma_tag_create failed; error %u\n", r);
1742		goto fail_0;
1743	}
1744
1745	r = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
1746			     BUS_DMA_NOWAIT, &dma->dma_map);
1747	if (r != 0) {
1748		device_printf(sc->sc_dev, "safe_dma_malloc: "
1749			"bus_dmammem_alloc failed; size %ju, error %u\n",
1750			(uintmax_t)size, r);
1751		goto fail_1;
1752	}
1753
1754	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
1755		            size,
1756			    safe_dmamap_cb,
1757			    &dma->dma_paddr,
1758			    mapflags | BUS_DMA_NOWAIT);
1759	if (r != 0) {
1760		device_printf(sc->sc_dev, "safe_dma_malloc: "
1761			"bus_dmamap_load failed; error %u\n", r);
1762		goto fail_2;
1763	}
1764
1765	dma->dma_size = size;
1766	return (0);
1767
1768	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1769fail_2:
1770	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1771fail_1:
1772	bus_dma_tag_destroy(dma->dma_tag);
1773fail_0:
1774	dma->dma_tag = NULL;
1775	return (r);
1776}
1777
1778static void
1779safe_dma_free(struct safe_softc *sc, struct safe_dma_alloc *dma)
1780{
1781	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1782	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1783	bus_dma_tag_destroy(dma->dma_tag);
1784}
1785
1786/*
1787 * Resets the board.  Values in the regesters are left as is
1788 * from the reset (i.e. initial values are assigned elsewhere).
1789 */
1790static void
1791safe_reset_board(struct safe_softc *sc)
1792{
1793	u_int32_t v;
1794	/*
1795	 * Reset the device.  The manual says no delay
1796	 * is needed between marking and clearing reset.
1797	 */
1798	v = READ_REG(sc, SAFE_PE_DMACFG) &~
1799		(SAFE_PE_DMACFG_PERESET | SAFE_PE_DMACFG_PDRRESET |
1800		 SAFE_PE_DMACFG_SGRESET);
1801	WRITE_REG(sc, SAFE_PE_DMACFG, v
1802				    | SAFE_PE_DMACFG_PERESET
1803				    | SAFE_PE_DMACFG_PDRRESET
1804				    | SAFE_PE_DMACFG_SGRESET);
1805	WRITE_REG(sc, SAFE_PE_DMACFG, v);
1806}
1807
1808/*
1809 * Initialize registers we need to touch only once.
1810 */
1811static void
1812safe_init_board(struct safe_softc *sc)
1813{
1814	u_int32_t v, dwords;
1815
1816	v = READ_REG(sc, SAFE_PE_DMACFG);
1817	v &=~ SAFE_PE_DMACFG_PEMODE;
1818	v |= SAFE_PE_DMACFG_FSENA		/* failsafe enable */
1819	  |  SAFE_PE_DMACFG_GPRPCI		/* gather ring on PCI */
1820	  |  SAFE_PE_DMACFG_SPRPCI		/* scatter ring on PCI */
1821	  |  SAFE_PE_DMACFG_ESDESC		/* endian-swap descriptors */
1822	  |  SAFE_PE_DMACFG_ESSA		/* endian-swap SA's */
1823	  |  SAFE_PE_DMACFG_ESPDESC		/* endian-swap part. desc's */
1824	  ;
1825	WRITE_REG(sc, SAFE_PE_DMACFG, v);
1826#if 0
1827	/* XXX select byte swap based on host byte order */
1828	WRITE_REG(sc, SAFE_ENDIAN, 0x1b);
1829#endif
1830	if (sc->sc_chiprev == SAFE_REV(1,0)) {
1831		/*
1832		 * Avoid large PCI DMA transfers.  Rev 1.0 has a bug where
1833		 * "target mode transfers" done while the chip is DMA'ing
1834		 * >1020 bytes cause the hardware to lockup.  To avoid this
1835		 * we reduce the max PCI transfer size and use small source
1836		 * particle descriptors (<= 256 bytes).
1837		 */
1838		WRITE_REG(sc, SAFE_DMA_CFG, 256);
1839		device_printf(sc->sc_dev,
1840			"Reduce max DMA size to %u words for rev %u.%u WAR\n",
1841			(READ_REG(sc, SAFE_DMA_CFG)>>2) & 0xff,
1842			SAFE_REV_MAJ(sc->sc_chiprev),
1843			SAFE_REV_MIN(sc->sc_chiprev));
1844	}
1845
1846	/* NB: operands+results are overlaid */
1847	WRITE_REG(sc, SAFE_PE_PDRBASE, sc->sc_ringalloc.dma_paddr);
1848	WRITE_REG(sc, SAFE_PE_RDRBASE, sc->sc_ringalloc.dma_paddr);
1849	/*
1850	 * Configure ring entry size and number of items in the ring.
1851	 */
1852	KASSERT((sizeof(struct safe_ringentry) % sizeof(u_int32_t)) == 0,
1853		("PE ring entry not 32-bit aligned!"));
1854	dwords = sizeof(struct safe_ringentry) / sizeof(u_int32_t);
1855	WRITE_REG(sc, SAFE_PE_RINGCFG,
1856		(dwords << SAFE_PE_RINGCFG_OFFSET_S) | SAFE_MAX_NQUEUE);
1857	WRITE_REG(sc, SAFE_PE_RINGPOLL, 0);	/* disable polling */
1858
1859	WRITE_REG(sc, SAFE_PE_GRNGBASE, sc->sc_spalloc.dma_paddr);
1860	WRITE_REG(sc, SAFE_PE_SRNGBASE, sc->sc_dpalloc.dma_paddr);
1861	WRITE_REG(sc, SAFE_PE_PARTSIZE,
1862		(SAFE_TOTAL_DPART<<16) | SAFE_TOTAL_SPART);
1863	/*
1864	 * NB: destination particles are fixed size.  We use
1865	 *     an mbuf cluster and require all results go to
1866	 *     clusters or smaller.
1867	 */
1868	WRITE_REG(sc, SAFE_PE_PARTCFG, SAFE_MAX_DSIZE);
1869
1870	/* it's now safe to enable PE mode, do it */
1871	WRITE_REG(sc, SAFE_PE_DMACFG, v | SAFE_PE_DMACFG_PEMODE);
1872
1873	/*
1874	 * Configure hardware to use level-triggered interrupts and
1875	 * to interrupt after each descriptor is processed.
1876	 */
1877	WRITE_REG(sc, SAFE_HI_CFG, SAFE_HI_CFG_LEVEL);
1878	WRITE_REG(sc, SAFE_HI_DESC_CNT, 1);
1879	WRITE_REG(sc, SAFE_HI_MASK, SAFE_INT_PE_DDONE | SAFE_INT_PE_ERROR);
1880}
1881
1882/*
1883 * Init PCI registers
1884 */
1885static void
1886safe_init_pciregs(device_t dev)
1887{
1888}
1889
1890/*
1891 * Clean up after a chip crash.
1892 * It is assumed that the caller in splimp()
1893 */
1894static void
1895safe_cleanchip(struct safe_softc *sc)
1896{
1897
1898	if (sc->sc_nqchip != 0) {
1899		struct safe_ringentry *re = sc->sc_back;
1900
1901		while (re != sc->sc_front) {
1902			if (re->re_desc.d_csr != 0)
1903				safe_free_entry(sc, re);
1904			if (++re == sc->sc_ringtop)
1905				re = sc->sc_ring;
1906		}
1907		sc->sc_back = re;
1908		sc->sc_nqchip = 0;
1909	}
1910}
1911
1912/*
1913 * free a safe_q
1914 * It is assumed that the caller is within splimp().
1915 */
1916static int
1917safe_free_entry(struct safe_softc *sc, struct safe_ringentry *re)
1918{
1919	struct cryptop *crp;
1920
1921	/*
1922	 * Free header MCR
1923	 */
1924	if ((re->re_dst_m != NULL) && (re->re_src_m != re->re_dst_m))
1925		m_freem(re->re_dst_m);
1926
1927	crp = (struct cryptop *)re->re_crp;
1928
1929	re->re_desc.d_csr = 0;
1930
1931	crp->crp_etype = EFAULT;
1932	crypto_done(crp);
1933	return(0);
1934}
1935
1936/*
1937 * Routine to reset the chip and clean up.
1938 * It is assumed that the caller is in splimp()
1939 */
1940static void
1941safe_totalreset(struct safe_softc *sc)
1942{
1943	safe_reset_board(sc);
1944	safe_init_board(sc);
1945	safe_cleanchip(sc);
1946}
1947
1948/*
1949 * Is the operand suitable aligned for direct DMA.  Each
1950 * segment must be aligned on a 32-bit boundary and all
1951 * but the last segment must be a multiple of 4 bytes.
1952 */
1953static int
1954safe_dmamap_aligned(const struct safe_operand *op)
1955{
1956	int i;
1957
1958	for (i = 0; i < op->nsegs; i++) {
1959		if (op->segs[i].ds_addr & 3)
1960			return (0);
1961		if (i != (op->nsegs - 1) && (op->segs[i].ds_len & 3))
1962			return (0);
1963	}
1964	return (1);
1965}
1966
1967/*
1968 * Is the operand suitable for direct DMA as the destination
1969 * of an operation.  The hardware requires that each ``particle''
1970 * but the last in an operation result have the same size.  We
1971 * fix that size at SAFE_MAX_DSIZE bytes.  This routine returns
1972 * 0 if some segment is not a multiple of of this size, 1 if all
1973 * segments are exactly this size, or 2 if segments are at worst
1974 * a multple of this size.
1975 */
1976static int
1977safe_dmamap_uniform(const struct safe_operand *op)
1978{
1979	int result = 1;
1980
1981	if (op->nsegs > 0) {
1982		int i;
1983
1984		for (i = 0; i < op->nsegs-1; i++) {
1985			if (op->segs[i].ds_len % SAFE_MAX_DSIZE)
1986				return (0);
1987			if (op->segs[i].ds_len != SAFE_MAX_DSIZE)
1988				result = 2;
1989		}
1990	}
1991	return (result);
1992}
1993
1994#ifdef SAFE_DEBUG
1995static void
1996safe_dump_dmastatus(struct safe_softc *sc, const char *tag)
1997{
1998	printf("%s: ENDIAN 0x%x SRC 0x%x DST 0x%x STAT 0x%x\n"
1999		, tag
2000		, READ_REG(sc, SAFE_DMA_ENDIAN)
2001		, READ_REG(sc, SAFE_DMA_SRCADDR)
2002		, READ_REG(sc, SAFE_DMA_DSTADDR)
2003		, READ_REG(sc, SAFE_DMA_STAT)
2004	);
2005}
2006
2007static void
2008safe_dump_intrstate(struct safe_softc *sc, const char *tag)
2009{
2010	printf("%s: HI_CFG 0x%x HI_MASK 0x%x HI_DESC_CNT 0x%x HU_STAT 0x%x HM_STAT 0x%x\n"
2011		, tag
2012		, READ_REG(sc, SAFE_HI_CFG)
2013		, READ_REG(sc, SAFE_HI_MASK)
2014		, READ_REG(sc, SAFE_HI_DESC_CNT)
2015		, READ_REG(sc, SAFE_HU_STAT)
2016		, READ_REG(sc, SAFE_HM_STAT)
2017	);
2018}
2019
2020static void
2021safe_dump_ringstate(struct safe_softc *sc, const char *tag)
2022{
2023	u_int32_t estat = READ_REG(sc, SAFE_PE_ERNGSTAT);
2024
2025	/* NB: assume caller has lock on ring */
2026	printf("%s: ERNGSTAT %x (next %u) back %lu front %lu\n",
2027		tag,
2028		estat, (estat >> SAFE_PE_ERNGSTAT_NEXT_S),
2029		(unsigned long)(sc->sc_back - sc->sc_ring),
2030		(unsigned long)(sc->sc_front - sc->sc_ring));
2031}
2032
2033static void
2034safe_dump_request(struct safe_softc *sc, const char* tag, struct safe_ringentry *re)
2035{
2036	int ix, nsegs;
2037
2038	ix = re - sc->sc_ring;
2039	printf("%s: %p (%u): csr %x src %x dst %x sa %x len %x\n"
2040		, tag
2041		, re, ix
2042		, re->re_desc.d_csr
2043		, re->re_desc.d_src
2044		, re->re_desc.d_dst
2045		, re->re_desc.d_sa
2046		, re->re_desc.d_len
2047	);
2048	if (re->re_src.nsegs > 1) {
2049		ix = (re->re_desc.d_src - sc->sc_spalloc.dma_paddr) /
2050			sizeof(struct safe_pdesc);
2051		for (nsegs = re->re_src.nsegs; nsegs; nsegs--) {
2052			printf(" spd[%u] %p: %p size %u flags %x"
2053				, ix, &sc->sc_spring[ix]
2054				, (caddr_t)(uintptr_t) sc->sc_spring[ix].pd_addr
2055				, sc->sc_spring[ix].pd_size
2056				, sc->sc_spring[ix].pd_flags
2057			);
2058			if (sc->sc_spring[ix].pd_size == 0)
2059				printf(" (zero!)");
2060			printf("\n");
2061			if (++ix == SAFE_TOTAL_SPART)
2062				ix = 0;
2063		}
2064	}
2065	if (re->re_dst.nsegs > 1) {
2066		ix = (re->re_desc.d_dst - sc->sc_dpalloc.dma_paddr) /
2067			sizeof(struct safe_pdesc);
2068		for (nsegs = re->re_dst.nsegs; nsegs; nsegs--) {
2069			printf(" dpd[%u] %p: %p flags %x\n"
2070				, ix, &sc->sc_dpring[ix]
2071				, (caddr_t)(uintptr_t) sc->sc_dpring[ix].pd_addr
2072				, sc->sc_dpring[ix].pd_flags
2073			);
2074			if (++ix == SAFE_TOTAL_DPART)
2075				ix = 0;
2076		}
2077	}
2078	printf("sa: cmd0 %08x cmd1 %08x staterec %x\n",
2079		re->re_sa.sa_cmd0, re->re_sa.sa_cmd1, re->re_sa.sa_staterec);
2080	printf("sa: key %x %x %x %x %x %x %x %x\n"
2081		, re->re_sa.sa_key[0]
2082		, re->re_sa.sa_key[1]
2083		, re->re_sa.sa_key[2]
2084		, re->re_sa.sa_key[3]
2085		, re->re_sa.sa_key[4]
2086		, re->re_sa.sa_key[5]
2087		, re->re_sa.sa_key[6]
2088		, re->re_sa.sa_key[7]
2089	);
2090	printf("sa: indigest %x %x %x %x %x\n"
2091		, re->re_sa.sa_indigest[0]
2092		, re->re_sa.sa_indigest[1]
2093		, re->re_sa.sa_indigest[2]
2094		, re->re_sa.sa_indigest[3]
2095		, re->re_sa.sa_indigest[4]
2096	);
2097	printf("sa: outdigest %x %x %x %x %x\n"
2098		, re->re_sa.sa_outdigest[0]
2099		, re->re_sa.sa_outdigest[1]
2100		, re->re_sa.sa_outdigest[2]
2101		, re->re_sa.sa_outdigest[3]
2102		, re->re_sa.sa_outdigest[4]
2103	);
2104	printf("sr: iv %x %x %x %x\n"
2105		, re->re_sastate.sa_saved_iv[0]
2106		, re->re_sastate.sa_saved_iv[1]
2107		, re->re_sastate.sa_saved_iv[2]
2108		, re->re_sastate.sa_saved_iv[3]
2109	);
2110	printf("sr: hashbc %u indigest %x %x %x %x %x\n"
2111		, re->re_sastate.sa_saved_hashbc
2112		, re->re_sastate.sa_saved_indigest[0]
2113		, re->re_sastate.sa_saved_indigest[1]
2114		, re->re_sastate.sa_saved_indigest[2]
2115		, re->re_sastate.sa_saved_indigest[3]
2116		, re->re_sastate.sa_saved_indigest[4]
2117	);
2118}
2119
2120static void
2121safe_dump_ring(struct safe_softc *sc, const char *tag)
2122{
2123	mtx_lock(&sc->sc_ringmtx);
2124	printf("\nSafeNet Ring State:\n");
2125	safe_dump_intrstate(sc, tag);
2126	safe_dump_dmastatus(sc, tag);
2127	safe_dump_ringstate(sc, tag);
2128	if (sc->sc_nqchip) {
2129		struct safe_ringentry *re = sc->sc_back;
2130		do {
2131			safe_dump_request(sc, tag, re);
2132			if (++re == sc->sc_ringtop)
2133				re = sc->sc_ring;
2134		} while (re != sc->sc_front);
2135	}
2136	mtx_unlock(&sc->sc_ringmtx);
2137}
2138
2139static int
2140sysctl_hw_safe_dump(SYSCTL_HANDLER_ARGS)
2141{
2142	char dmode[64];
2143	int error;
2144
2145	strncpy(dmode, "", sizeof(dmode) - 1);
2146	dmode[sizeof(dmode) - 1] = '\0';
2147	error = sysctl_handle_string(oidp, &dmode[0], sizeof(dmode), req);
2148
2149	if (error == 0 && req->newptr != NULL) {
2150		struct safe_softc *sc = safec;
2151
2152		if (!sc)
2153			return EINVAL;
2154		if (strncmp(dmode, "dma", 3) == 0)
2155			safe_dump_dmastatus(sc, "safe0");
2156		else if (strncmp(dmode, "int", 3) == 0)
2157			safe_dump_intrstate(sc, "safe0");
2158		else if (strncmp(dmode, "ring", 4) == 0)
2159			safe_dump_ring(sc, "safe0");
2160		else
2161			return EINVAL;
2162	}
2163	return error;
2164}
2165SYSCTL_PROC(_hw_safe, OID_AUTO, dump, CTLTYPE_STRING | CTLFLAG_RW,
2166	0, 0, sysctl_hw_safe_dump, "A", "Dump driver state");
2167#endif /* SAFE_DEBUG */
2168