1/*	$NetBSD: ubsec.c,v 1.65 2024/02/23 22:03:45 andvar Exp $	*/
2/* $FreeBSD: src/sys/dev/ubsec/ubsec.c,v 1.6.2.6 2003/01/23 21:06:43 sam Exp $ */
3/*	$OpenBSD: ubsec.c,v 1.143 2009/03/27 13:31:30 reyk Exp$	*/
4
5/*
6 * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
7 * Copyright (c) 2000 Theo de Raadt (deraadt@openbsd.org)
8 * Copyright (c) 2001 Patrik Lindergren (patrik@ipunplugged.com)
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * Effort sponsored in part by the Defense Advanced Research Projects
32 * Agency (DARPA) and Air Force Research Laboratory, Air Force
33 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
34 *
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: ubsec.c,v 1.65 2024/02/23 22:03:45 andvar Exp $");
39
40#undef UBSEC_DEBUG
41
42/*
43 * uBsec 5[56]01, 58xx hardware crypto accelerator
44 */
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/proc.h>
49#include <sys/endian.h>
50#include <sys/errno.h>
51#include <sys/malloc.h>
52#include <sys/kernel.h>
53#include <sys/mbuf.h>
54#include <sys/device.h>
55#include <sys/module.h>
56#include <sys/queue.h>
57#include <sys/sysctl.h>
58
59#include <opencrypto/cryptodev.h>
60#include <opencrypto/xform.h>
61#include <sys/cprng.h>
62#include <sys/md5.h>
63#include <sys/rndsource.h>
64#include <sys/sha1.h>
65
66#include <dev/pci/pcireg.h>
67#include <dev/pci/pcivar.h>
68#include <dev/pci/pcidevs.h>
69
70#include <dev/pci/ubsecreg.h>
71#include <dev/pci/ubsecvar.h>
72
73#define UBSEC_NO_RNG	/* hangs on attach */
74#define letoh16 htole16
75#define letoh32 htole32
76
77/*
78 * Prototypes and count for the pci_device structure
79 */
80static	int  ubsec_probe(device_t, cfdata_t, void *);
81static	void ubsec_attach(device_t, device_t, void *);
82static	int  ubsec_detach(device_t, int);
83static	void ubsec_reset_board(struct ubsec_softc *);
84static	void ubsec_init_board(struct ubsec_softc *);
85static	void ubsec_init_pciregs(struct pci_attach_args *pa);
86static	void ubsec_cleanchip(struct ubsec_softc *);
87static	void ubsec_totalreset(struct ubsec_softc *);
88static	int  ubsec_free_q(struct ubsec_softc*, struct ubsec_q *);
89
90CFATTACH_DECL_NEW(ubsec, sizeof(struct ubsec_softc), ubsec_probe, ubsec_attach,
91	      ubsec_detach, NULL);
92extern struct cfdriver ubsec_cd;
93
94/* patchable */
95#ifdef	UBSEC_DEBUG
96extern int ubsec_debug;
97int ubsec_debug=1;
98#endif
99
100static	int	ubsec_intr(void *);
101static	int	ubsec_newsession(void*, u_int32_t *, struct cryptoini *);
102static	void	ubsec_freesession(void*, u_int64_t);
103static	int	ubsec_process(void*, struct cryptop *, int hint);
104static	void	ubsec_callback(struct ubsec_softc *, struct ubsec_q *);
105static	void	ubsec_feed(struct ubsec_softc *);
106static	void	ubsec_mcopy(struct mbuf *, struct mbuf *, int, int);
107static	void	ubsec_callback2(struct ubsec_softc *, struct ubsec_q2 *);
108static	void	ubsec_feed2(struct ubsec_softc *);
109static	void	ubsec_feed4(struct ubsec_softc *);
110#ifndef UBSEC_NO_RNG
111static  void	ubsec_rng(void *);
112static  void	ubsec_rng_locked(void *);
113static  void	ubsec_rng_get(size_t, void *);
114#endif /* UBSEC_NO_RNG */
115static	int 	ubsec_dma_malloc(struct ubsec_softc *, bus_size_t,
116				 struct ubsec_dma_alloc *, int);
117static	void	ubsec_dma_free(struct ubsec_softc *, struct ubsec_dma_alloc *);
118static	int	ubsec_dmamap_aligned(bus_dmamap_t);
119
120static	int	ubsec_kprocess(void*, struct cryptkop *, int);
121static	void	ubsec_kprocess_modexp_sw(struct ubsec_softc *,
122					 struct cryptkop *, int);
123static	void	ubsec_kprocess_modexp_hw(struct ubsec_softc *,
124					 struct cryptkop *, int);
125static	void	ubsec_kprocess_rsapriv(struct ubsec_softc *,
126				       struct cryptkop *, int);
127static	void	ubsec_kfree(struct ubsec_softc *, struct ubsec_q2 *);
128static	int	ubsec_ksigbits(struct crparam *);
129static	void	ubsec_kshift_r(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
130static	void	ubsec_kshift_l(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
131
132#ifdef UBSEC_DEBUG
133static void	ubsec_dump_pb(volatile struct ubsec_pktbuf *);
134static void	ubsec_dump_mcr(struct ubsec_mcr *);
135static	void	ubsec_dump_ctx2(volatile struct ubsec_ctx_keyop *);
136#endif
137
138#define	READ_REG(sc,r) \
139	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
140
141#define WRITE_REG(sc,reg,val) \
142	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
143
144#define	SWAP32(x) (x) = htole32(ntohl((x)))
145#ifndef HTOLE32
146 #define	HTOLE32(x) (x) = htole32(x)
147#endif
148
149struct ubsec_stats ubsecstats;
150
151/*
152 * ubsec_maxbatch controls the number of crypto ops to voluntarily
153 * collect into one submission to the hardware.  This batching happens
154 * when ops are dispatched from the crypto subsystem with a hint that
155 * more are to follow immediately.  These ops must also not be marked
156 * with a ``no delay'' flag.
157 */
158static	int ubsec_maxbatch = 1;
159
160/*
161 * ubsec_maxaggr controls the number of crypto ops to submit to the
162 * hardware as a unit.  This aggregation reduces the number of interrupts
163 * to the host at the expense of increased latency (for all but the last
164 * operation).  For network traffic setting this to one yields the highest
165 * performance but at the expense of more interrupt processing.
166 */
167static	int ubsec_maxaggr = 1;
168
169static const struct ubsec_product {
170	pci_vendor_id_t		ubsec_vendor;
171	pci_product_id_t	ubsec_product;
172	int			ubsec_flags;
173	int			ubsec_statmask;
174	int			ubsec_maxaggr;
175	const char		*ubsec_name;
176} ubsec_products[] = {
177	{ PCI_VENDOR_BLUESTEEL,	PCI_PRODUCT_BLUESTEEL_5501,
178	  0,
179	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
180	  UBS_MIN_AGGR,
181	  "Bluesteel 5501"
182	},
183	{ PCI_VENDOR_BLUESTEEL,	PCI_PRODUCT_BLUESTEEL_5601,
184	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
185	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
186	  UBS_MIN_AGGR,
187	  "Bluesteel 5601"
188	},
189
190	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5801,
191	  0,
192	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
193	  UBS_MIN_AGGR,
194	  "Broadcom BCM5801"
195	},
196
197	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5802,
198	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
199	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
200	  UBS_MIN_AGGR,
201	  "Broadcom BCM5802"
202	},
203
204	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5805,
205	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
206	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
207	  UBS_MIN_AGGR,
208	  "Broadcom BCM5805"
209	},
210
211	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5820,
212	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
213	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
214	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
215	  UBS_MIN_AGGR,
216	  "Broadcom BCM5820"
217	},
218
219	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5821,
220	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
221	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
222	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
223	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
224	  UBS_MIN_AGGR,
225	  "Broadcom BCM5821"
226	},
227	{ PCI_VENDOR_SUN,	PCI_PRODUCT_SUN_SCA1K,
228	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
229	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
230	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
231	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
232	  UBS_MIN_AGGR,
233	  "Sun Crypto Accelerator 1000"
234	},
235	{ PCI_VENDOR_SUN,	PCI_PRODUCT_SUN_5821,
236	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
237	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
238	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
239	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
240	  UBS_MIN_AGGR,
241	  "Broadcom BCM5821 (Sun)"
242	},
243
244	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5822,
245	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
246	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
247	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
248	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
249	  UBS_MIN_AGGR,
250	  "Broadcom BCM5822"
251	},
252
253	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5823,
254	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
255	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY | UBS_FLAGS_AES,
256	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
257	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
258	  UBS_MIN_AGGR,
259	  "Broadcom BCM5823"
260	},
261
262	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5825,
263	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
264	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY | UBS_FLAGS_AES,
265	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
266	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
267	  UBS_MIN_AGGR,
268	  "Broadcom BCM5825"
269	},
270
271	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5860,
272	  UBS_FLAGS_MULTIMCR | UBS_FLAGS_HWNORM |
273	      UBS_FLAGS_LONGCTX |
274	      UBS_FLAGS_RNG | UBS_FLAGS_RNG4 |
275	      UBS_FLAGS_KEY | UBS_FLAGS_BIGKEY | UBS_FLAGS_AES,
276	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
277	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY |
278	      BS_STAT_MCR3_ALLEMPTY | BS_STAT_MCR4_ALLEMPTY,
279	  UBS_MAX_AGGR,
280	  "Broadcom BCM5860"
281	},
282
283	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5861,
284	  UBS_FLAGS_MULTIMCR | UBS_FLAGS_HWNORM |
285	      UBS_FLAGS_LONGCTX |
286	      UBS_FLAGS_RNG | UBS_FLAGS_RNG4 |
287	      UBS_FLAGS_KEY | UBS_FLAGS_BIGKEY | UBS_FLAGS_AES,
288	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
289	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY |
290	      BS_STAT_MCR3_ALLEMPTY | BS_STAT_MCR4_ALLEMPTY,
291	  UBS_MAX_AGGR,
292	  "Broadcom BCM5861"
293	},
294
295	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5862,
296	  UBS_FLAGS_MULTIMCR | UBS_FLAGS_HWNORM |
297	      UBS_FLAGS_LONGCTX |
298	      UBS_FLAGS_RNG | UBS_FLAGS_RNG4 |
299	      UBS_FLAGS_KEY | UBS_FLAGS_BIGKEY | UBS_FLAGS_AES,
300	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
301	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY |
302	      BS_STAT_MCR3_ALLEMPTY | BS_STAT_MCR4_ALLEMPTY,
303	  UBS_MAX_AGGR,
304	  "Broadcom BCM5862"
305	},
306
307	{ 0,			0,
308	  0,
309	  0,
310	  0,
311	  NULL
312	}
313};
314
315static const struct ubsec_product *
316ubsec_lookup(const struct pci_attach_args *pa)
317{
318	const struct ubsec_product *up;
319
320	for (up = ubsec_products; up->ubsec_name != NULL; up++) {
321		if (PCI_VENDOR(pa->pa_id) == up->ubsec_vendor &&
322		    PCI_PRODUCT(pa->pa_id) == up->ubsec_product)
323			return (up);
324	}
325	return (NULL);
326}
327
328static int
329ubsec_probe(device_t parent, cfdata_t match, void *aux)
330{
331	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
332
333	if (ubsec_lookup(pa) != NULL)
334		return (1);
335
336	return (0);
337}
338
339static void
340ubsec_attach(device_t parent, device_t self, void *aux)
341{
342	struct ubsec_softc *sc = device_private(self);
343	struct pci_attach_args *pa = aux;
344	const struct ubsec_product *up;
345	pci_chipset_tag_t pc = pa->pa_pc;
346	pci_intr_handle_t ih;
347	const char *intrstr = NULL;
348	pcireg_t memtype;
349	struct ubsec_dma *dmap;
350	u_int32_t cmd, i;
351	char intrbuf[PCI_INTRSTR_LEN];
352
353	sc->sc_dev = self;
354	sc->sc_pct = pc;
355
356	up = ubsec_lookup(pa);
357	if (up == NULL) {
358		printf("\n");
359		panic("ubsec_attach: impossible");
360	}
361
362	pci_aprint_devinfo_fancy(pa, "Crypto processor", up->ubsec_name, 1);
363
364	SIMPLEQ_INIT(&sc->sc_queue);
365	SIMPLEQ_INIT(&sc->sc_qchip);
366	SIMPLEQ_INIT(&sc->sc_queue2);
367	SIMPLEQ_INIT(&sc->sc_qchip2);
368	SIMPLEQ_INIT(&sc->sc_queue4);
369	SIMPLEQ_INIT(&sc->sc_qchip4);
370	SIMPLEQ_INIT(&sc->sc_q2free);
371
372	sc->sc_flags = up->ubsec_flags;
373	sc->sc_statmask = up->ubsec_statmask;
374	sc->sc_maxaggr = up->ubsec_maxaggr;
375
376	cmd = pci_conf_read(pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
377	cmd |= PCI_COMMAND_MASTER_ENABLE;
378	pci_conf_write(pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, cmd);
379
380	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, BS_BAR);
381	if (pci_mapreg_map(pa, BS_BAR, memtype, 0,
382	    &sc->sc_st, &sc->sc_sh, NULL, &sc->sc_memsize)) {
383		aprint_error_dev(self, "can't find mem space");
384		return;
385	}
386
387	sc->sc_dmat = pa->pa_dmat;
388
389	if (pci_intr_map(pa, &ih)) {
390		aprint_error_dev(self, "couldn't map interrupt\n");
391		return;
392	}
393	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
394	sc->sc_ih = pci_intr_establish_xname(pc, ih, IPL_NET, ubsec_intr, sc,
395	    device_xname(self));
396	if (sc->sc_ih == NULL) {
397		aprint_error_dev(self, "couldn't establish interrupt");
398		if (intrstr != NULL)
399			aprint_error(" at %s", intrstr);
400		aprint_error("\n");
401		return;
402	}
403	aprint_normal_dev(self, "interrupting at %s\n", intrstr);
404
405	sc->sc_cid = crypto_get_driverid(0);
406	if (sc->sc_cid < 0) {
407		aprint_error_dev(self, "couldn't get crypto driver id\n");
408		pci_intr_disestablish(pc, sc->sc_ih);
409		return;
410	}
411
412	mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_VM);
413
414	SIMPLEQ_INIT(&sc->sc_freequeue);
415	dmap = sc->sc_dmaa;
416	for (i = 0; i < UBS_MAX_NQUEUE; i++, dmap++) {
417		struct ubsec_q *q;
418
419		q = malloc(sizeof(struct ubsec_q), M_DEVBUF, M_ZERO|M_WAITOK);
420
421		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_dmachunk),
422		    &dmap->d_alloc, 0)) {
423			aprint_error_dev(self, "can't allocate dma buffers\n");
424			free(q, M_DEVBUF);
425			break;
426		}
427		dmap->d_dma = (struct ubsec_dmachunk *)dmap->d_alloc.dma_vaddr;
428
429		q->q_dma = dmap;
430		sc->sc_queuea[i] = q;
431
432		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
433	}
434
435	crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0,
436	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
437	crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0,
438	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
439	crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC_96, 0, 0,
440	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
441	crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC_96, 0, 0,
442	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
443	if (sc->sc_flags & UBS_FLAGS_AES) {
444		crypto_register(sc->sc_cid, CRYPTO_AES_CBC, 0, 0,
445		    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
446	}
447
448	/*
449	 * Reset Broadcom chip
450	 */
451	ubsec_reset_board(sc);
452
453	/*
454	 * Init Broadcom specific PCI settings
455	 */
456	ubsec_init_pciregs(pa);
457
458	/*
459	 * Init Broadcom chip
460	 */
461	ubsec_init_board(sc);
462
463#ifndef UBSEC_NO_RNG
464	if (sc->sc_flags & UBS_FLAGS_RNG) {
465		if (sc->sc_flags & UBS_FLAGS_RNG4)
466			sc->sc_statmask |= BS_STAT_MCR4_DONE;
467		else
468			sc->sc_statmask |= BS_STAT_MCR2_DONE;
469
470		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
471		    &sc->sc_rng.rng_q.q_mcr, 0))
472			goto skip_rng;
473
474		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rngbypass),
475		    &sc->sc_rng.rng_q.q_ctx, 0)) {
476			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
477			goto skip_rng;
478		}
479
480		if (ubsec_dma_malloc(sc, sizeof(u_int32_t) *
481		    UBSEC_RNG_BUFSIZ, &sc->sc_rng.rng_buf, 0)) {
482			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
483			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
484			goto skip_rng;
485		}
486		if (hz >= 100)
487			sc->sc_rnghz = hz / 100;
488		else
489			sc->sc_rnghz = 1;
490		callout_init(&sc->sc_rngto, 0);
491		callout_setfunc(&sc->sc_rngto, ubsec_rng, sc);
492		rndsource_setcb(&sc->sc_rnd_source, ubsec_rng_get, sc);
493		rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
494				  RND_TYPE_RNG,
495				  RND_FLAG_COLLECT_VALUE|RND_FLAG_HASCB);
496
497 skip_rng:
498		if (sc->sc_rnghz)
499			aprint_normal_dev(self,
500			    "random number generator enabled\n");
501		else
502			aprint_error_dev(self,
503			    "WARNING: random number generator disabled\n");
504	}
505#endif /* UBSEC_NO_RNG */
506
507	if (sc->sc_flags & UBS_FLAGS_KEY) {
508		sc->sc_statmask |= BS_STAT_MCR2_DONE;
509
510		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0,
511				 ubsec_kprocess, sc);
512#if 0
513		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0,
514				 ubsec_kprocess, sc);
515#endif
516	}
517}
518
519static int
520ubsec_detach(device_t self, int flags)
521{
522	struct ubsec_softc *sc = device_private(self);
523	struct ubsec_q *q, *qtmp;
524	volatile u_int32_t ctrl;
525
526	/* disable interrupts */
527	/* XXX wait/abort current ops? where is DMAERR enabled? */
528	ctrl = READ_REG(sc, BS_CTRL);
529
530	ctrl &= ~(BS_CTRL_MCR2INT | BS_CTRL_MCR1INT | BS_CTRL_DMAERR);
531	if (sc->sc_flags & UBS_FLAGS_MULTIMCR)
532		ctrl &= ~BS_CTRL_MCR4INT;
533
534	WRITE_REG(sc, BS_CTRL, ctrl);
535
536#ifndef UBSEC_NO_RNG
537	if (sc->sc_flags & UBS_FLAGS_RNG) {
538		callout_halt(&sc->sc_rngto, NULL);
539		ubsec_dma_free(sc, &sc->sc_rng.rng_buf);
540		ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
541		ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
542		rnd_detach_source(&sc->sc_rnd_source);
543	}
544#endif /* UBSEC_NO_RNG */
545
546	crypto_unregister_all(sc->sc_cid);
547
548	mutex_spin_enter(&sc->sc_mtx);
549
550	ubsec_totalreset(sc);  /* XXX leaves the chip running */
551
552	SIMPLEQ_FOREACH_SAFE(q, &sc->sc_freequeue, q_next, qtmp) {
553		ubsec_dma_free(sc, &q->q_dma->d_alloc);
554		if (q->q_src_map != NULL)
555			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
556		if (q->q_cached_dst_map != NULL)
557			bus_dmamap_destroy(sc->sc_dmat, q->q_cached_dst_map);
558		free(q, M_DEVBUF);
559	}
560
561	mutex_spin_exit(&sc->sc_mtx);
562
563	if (sc->sc_ih != NULL) {
564		pci_intr_disestablish(sc->sc_pct, sc->sc_ih);
565		sc->sc_ih = NULL;
566	}
567
568	if (sc->sc_memsize != 0) {
569		bus_space_unmap(sc->sc_st, sc->sc_sh, sc->sc_memsize);
570		sc->sc_memsize = 0;
571	}
572
573	return 0;
574}
575
576MODULE(MODULE_CLASS_DRIVER, ubsec, "pci,opencrypto");
577
578#ifdef _MODULE
579#include "ioconf.c"
580#endif
581
582static int
583ubsec_modcmd(modcmd_t cmd, void *data)
584{
585	int error = 0;
586
587	switch (cmd) {
588	case MODULE_CMD_INIT:
589#ifdef _MODULE
590		error = config_init_component(cfdriver_ioconf_ubsec,
591		    cfattach_ioconf_ubsec, cfdata_ioconf_ubsec);
592#endif
593		return error;
594	case MODULE_CMD_FINI:
595#ifdef _MODULE
596		error = config_fini_component(cfdriver_ioconf_ubsec,
597		    cfattach_ioconf_ubsec, cfdata_ioconf_ubsec);
598#endif
599		return error;
600	default:
601		return ENOTTY;
602	}
603}
604
605SYSCTL_SETUP(ubsec_sysctl_init, "ubsec sysctl")
606{
607	const struct sysctlnode *node = NULL;
608
609	sysctl_createv(clog, 0, NULL, &node,
610		CTLFLAG_PERMANENT,
611		CTLTYPE_NODE, "ubsec",
612		SYSCTL_DESCR("ubsec options"),
613		NULL, 0, NULL, 0,
614		CTL_HW, CTL_CREATE, CTL_EOL);
615	sysctl_createv(clog, 0, &node, NULL,
616		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
617		CTLTYPE_INT, "maxbatch",
618		SYSCTL_DESCR("max ops to batch w/o interrupt"),
619		NULL, 0, &ubsec_maxbatch, 0,
620		CTL_CREATE, CTL_EOL);
621	sysctl_createv(clog, 0, &node, NULL,
622		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
623		CTLTYPE_INT, "maxaggr",
624		SYSCTL_DESCR("max ops to aggregate under one interrupt"),
625		NULL, 0, &ubsec_maxaggr, 0,
626		CTL_CREATE, CTL_EOL);
627
628	return;
629}
630
631/*
632 * UBSEC Interrupt routine
633 */
634static int
635ubsec_intr(void *arg)
636{
637	struct ubsec_softc *sc = arg;
638	volatile u_int32_t stat;
639	struct ubsec_q *q;
640	struct ubsec_dma *dmap;
641	int flags;
642	int npkts = 0, i;
643
644	mutex_spin_enter(&sc->sc_mtx);
645	stat = READ_REG(sc, BS_STAT);
646	stat &= sc->sc_statmask;
647	if (stat == 0) {
648		mutex_spin_exit(&sc->sc_mtx);
649		return (0);
650	}
651
652	WRITE_REG(sc, BS_STAT, stat);		/* IACK */
653
654	/*
655	 * Check to see if we have any packets waiting for us
656	 */
657	if ((stat & BS_STAT_MCR1_DONE)) {
658		while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
659			q = SIMPLEQ_FIRST(&sc->sc_qchip);
660			dmap = q->q_dma;
661
662			if ((dmap->d_dma->d_mcr.mcr_flags
663			    & htole16(UBS_MCR_DONE)) == 0)
664				break;
665
666			q = SIMPLEQ_FIRST(&sc->sc_qchip);
667			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, /*q,*/ q_next);
668
669			npkts = q->q_nstacked_mcrs;
670			sc->sc_nqchip -= 1+npkts;
671			/*
672			 * search for further sc_qchip ubsec_q's that share
673			 * the same MCR, and complete them too, they must be
674			 * at the top.
675			 */
676			for (i = 0; i < npkts; i++) {
677				if(q->q_stacked_mcr[i])
678					ubsec_callback(sc, q->q_stacked_mcr[i]);
679				else
680					break;
681			}
682			ubsec_callback(sc, q);
683		}
684
685		/*
686		 * Don't send any more packet to chip if there has been
687		 * a DMAERR.
688		 */
689		if (!(stat & BS_STAT_DMAERR))
690			ubsec_feed(sc);
691	}
692
693	/*
694	 * Check to see if we have any key setups/rng's waiting for us
695	 */
696	if ((sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) &&
697	    (stat & BS_STAT_MCR2_DONE)) {
698		struct ubsec_q2 *q2;
699		struct ubsec_mcr *mcr;
700
701		while (!SIMPLEQ_EMPTY(&sc->sc_qchip2)) {
702			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
703
704			bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map,
705			    0, q2->q_mcr.dma_map->dm_mapsize,
706			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
707
708			mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr;
709
710			/* A bug in new devices requires to swap this field */
711			if (sc->sc_flags & UBS_FLAGS_MULTIMCR)
712				flags = htole16(mcr->mcr_flags);
713			else
714				flags = mcr->mcr_flags;
715			if ((flags & htole16(UBS_MCR_DONE)) == 0) {
716				bus_dmamap_sync(sc->sc_dmat,
717				    q2->q_mcr.dma_map, 0,
718				    q2->q_mcr.dma_map->dm_mapsize,
719				    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
720				break;
721			}
722			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
723			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip2, /*q2,*/ q_next);
724			ubsec_callback2(sc, q2);
725			/*
726			 * Don't send any more packet to chip if there has been
727			 * a DMAERR.
728			 */
729			if (!(stat & BS_STAT_DMAERR))
730				ubsec_feed2(sc);
731		}
732	}
733	if ((sc->sc_flags & UBS_FLAGS_RNG4) && (stat & BS_STAT_MCR4_DONE)) {
734		struct ubsec_q2 *q2;
735		struct ubsec_mcr *mcr;
736
737		while (!SIMPLEQ_EMPTY(&sc->sc_qchip4)) {
738			q2 = SIMPLEQ_FIRST(&sc->sc_qchip4);
739
740			bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map,
741			    0, q2->q_mcr.dma_map->dm_mapsize,
742			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
743
744			mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr;
745
746			/* A bug in new devices requires to swap this field */
747			flags = htole16(mcr->mcr_flags);
748
749			if ((flags & htole16(UBS_MCR_DONE)) == 0) {
750				bus_dmamap_sync(sc->sc_dmat,
751				    q2->q_mcr.dma_map, 0,
752				    q2->q_mcr.dma_map->dm_mapsize,
753				    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
754				break;
755			}
756			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip4, q_next);
757			ubsec_callback2(sc, q2);
758			/*
759			 * Don't send any more packet to chip if there has been
760			 * a DMAERR.
761			 */
762			if (!(stat & BS_STAT_DMAERR))
763				ubsec_feed4(sc);
764		}
765	}
766
767	/*
768	 * Check to see if we got any DMA Error
769	 */
770	if (stat & BS_STAT_DMAERR) {
771#ifdef UBSEC_DEBUG
772		if (ubsec_debug) {
773			volatile u_int32_t a = READ_REG(sc, BS_ERR);
774
775			printf("%s: dmaerr %s@%08x\n", device_xname(sc->sc_dev),
776			    (a & BS_ERR_READ) ? "read" : "write",
777			       a & BS_ERR_ADDR);
778		}
779#endif /* UBSEC_DEBUG */
780		ubsecstats.hst_dmaerr++;
781		ubsec_totalreset(sc);
782		ubsec_feed(sc);
783	}
784
785	if (sc->sc_needwakeup) {		/* XXX check high watermark */
786		int wkeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
787#ifdef UBSEC_DEBUG
788		if (ubsec_debug)
789			printf("%s: wakeup crypto (%x)\n",
790			    device_xname(sc->sc_dev), sc->sc_needwakeup);
791#endif /* UBSEC_DEBUG */
792		sc->sc_needwakeup &= ~wkeup;
793		crypto_unblock(sc->sc_cid, wkeup);
794	}
795	mutex_spin_exit(&sc->sc_mtx);
796	return (1);
797}
798
799/*
800 * ubsec_feed() - aggregate and post requests to chip
801 * OpenBSD comments:
802 *		  It is assumed that the caller set splnet()
803 */
804static void
805ubsec_feed(struct ubsec_softc *sc)
806{
807	struct ubsec_q *q, *q2;
808	int npkts, i;
809	void *v;
810	u_int32_t stat;
811#ifdef UBSEC_DEBUG
812	static int max;
813#endif /* UBSEC_DEBUG */
814
815	npkts = sc->sc_nqueue;
816	if (npkts > ubsecstats.hst_maxqueue)
817		ubsecstats.hst_maxqueue = npkts;
818	if (npkts < 2)
819		goto feed1;
820
821	/*
822	 * Decide how many ops to combine in a single MCR.  We cannot
823	 * aggregate more than UBS_MAX_AGGR because this is the number
824	 * of slots defined in the data structure.  Otherwise we clamp
825	 * based on the tunable parameter ubsec_maxaggr.  Note that
826	 * aggregation can happen in two ways: either by batching ops
827	 * from above or because the h/w backs up and throttles us.
828	 * Aggregating ops reduces the number of interrupts to the host
829	 * but also (potentially) increases the latency for processing
830	 * completed ops as we only get an interrupt when all aggregated
831	 * ops have completed.
832	 */
833	if (npkts > sc->sc_maxaggr)
834		npkts = sc->sc_maxaggr;
835	if (npkts > ubsec_maxaggr)
836		npkts = ubsec_maxaggr;
837	if (npkts > ubsecstats.hst_maxbatch)
838		ubsecstats.hst_maxbatch = npkts;
839	if (npkts < 2)
840		goto feed1;
841	ubsecstats.hst_totbatch += npkts-1;
842
843	if ((stat = READ_REG(sc, BS_STAT))
844	    & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
845		if (stat & BS_STAT_DMAERR) {
846			ubsec_totalreset(sc);
847			ubsecstats.hst_dmaerr++;
848		} else {
849			ubsecstats.hst_mcr1full++;
850		}
851		return;
852	}
853
854#ifdef UBSEC_DEBUG
855	if (ubsec_debug)
856	    printf("merging %d records\n", npkts);
857	/* XXX temporary aggregation statistics reporting code */
858	if (max < npkts) {
859		max = npkts;
860		printf("%s: new max aggregate %d\n", device_xname(sc->sc_dev),
861		    max);
862	}
863#endif /* UBSEC_DEBUG */
864
865	q = SIMPLEQ_FIRST(&sc->sc_queue);
866	SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q,*/ q_next);
867	--sc->sc_nqueue;
868
869	bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
870	    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
871	if (q->q_dst_map != NULL)
872		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
873		    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
874
875	q->q_nstacked_mcrs = npkts - 1;		/* Number of packets stacked */
876
877	for (i = 0; i < q->q_nstacked_mcrs; i++) {
878		q2 = SIMPLEQ_FIRST(&sc->sc_queue);
879		bus_dmamap_sync(sc->sc_dmat, q2->q_src_map,
880		    0, q2->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
881		if (q2->q_dst_map != NULL)
882			bus_dmamap_sync(sc->sc_dmat, q2->q_dst_map,
883			    0, q2->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
884		q2= SIMPLEQ_FIRST(&sc->sc_queue);
885		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q2,*/ q_next);
886		--sc->sc_nqueue;
887
888		v = ((void *)&q2->q_dma->d_dma->d_mcr);
889		v = (char*)v + (sizeof(struct ubsec_mcr) -
890				 sizeof(struct ubsec_mcr_add));
891		memcpy(&q->q_dma->d_dma->d_mcradd[i], v,
892		    sizeof(struct ubsec_mcr_add));
893		q->q_stacked_mcr[i] = q2;
894	}
895	q->q_dma->d_dma->d_mcr.mcr_pkts = htole16(npkts);
896	SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
897	sc->sc_nqchip += npkts;
898	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
899		ubsecstats.hst_maxqchip = sc->sc_nqchip;
900	bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
901	    0, q->q_dma->d_alloc.dma_map->dm_mapsize,
902	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
903	WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
904	    offsetof(struct ubsec_dmachunk, d_mcr));
905	return;
906
907feed1:
908	while (!SIMPLEQ_EMPTY(&sc->sc_queue)) {
909		if ((stat = READ_REG(sc, BS_STAT))
910		    & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
911			if (stat & BS_STAT_DMAERR) {
912				ubsec_totalreset(sc);
913				ubsecstats.hst_dmaerr++;
914			} else {
915				ubsecstats.hst_mcr1full++;
916			}
917			break;
918		}
919
920		q = SIMPLEQ_FIRST(&sc->sc_queue);
921
922		bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
923		    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
924		if (q->q_dst_map != NULL)
925			bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
926			    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
927		bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
928		    0, q->q_dma->d_alloc.dma_map->dm_mapsize,
929		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
930
931		WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
932		    offsetof(struct ubsec_dmachunk, d_mcr));
933#ifdef UBSEC_DEBUG
934		if (ubsec_debug)
935			printf("feed: q->chip %p %08x stat %08x\n",
936 		    	       q, (u_int32_t)q->q_dma->d_alloc.dma_paddr,
937			       stat);
938#endif /* UBSEC_DEBUG */
939		q = SIMPLEQ_FIRST(&sc->sc_queue);
940		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q,*/ q_next);
941		--sc->sc_nqueue;
942		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
943		sc->sc_nqchip++;
944	}
945	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
946		ubsecstats.hst_maxqchip = sc->sc_nqchip;
947}
948
949/*
950 * Allocate a new 'session' and return an encoded session id.  'sidp'
951 * contains our registration id, and should contain an encoded session
952 * id on successful allocation.
953 */
954static int
955ubsec_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri)
956{
957	struct cryptoini *c, *encini = NULL, *macini = NULL;
958	struct ubsec_softc *sc = arg;
959	struct ubsec_session *ses = NULL;
960	MD5_CTX md5ctx;
961	SHA1_CTX sha1ctx;
962	int i, sesn;
963
964	for (c = cri; c != NULL; c = c->cri_next) {
965		if (c->cri_alg == CRYPTO_MD5_HMAC_96 ||
966		    c->cri_alg == CRYPTO_SHA1_HMAC_96) {
967			if (macini)
968				return (EINVAL);
969			macini = c;
970		} else if (c->cri_alg == CRYPTO_DES_CBC ||
971		    c->cri_alg == CRYPTO_3DES_CBC ||
972		    c->cri_alg == CRYPTO_AES_CBC) {
973			if (encini)
974				return (EINVAL);
975			encini = c;
976		} else
977			return (EINVAL);
978	}
979	if (encini == NULL && macini == NULL)
980		return (EINVAL);
981
982	if (encini && encini->cri_alg == CRYPTO_AES_CBC) {
983		switch (encini->cri_klen) {
984		case 128:
985		case 192:
986		case 256:
987			break;
988		default:
989			return (EINVAL);
990		}
991	}
992
993	if (sc->sc_sessions == NULL) {
994		ses = sc->sc_sessions = (struct ubsec_session *)malloc(
995		    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
996		if (ses == NULL)
997			return (ENOMEM);
998		sesn = 0;
999		sc->sc_nsessions = 1;
1000	} else {
1001		for (sesn = 0; sesn < sc->sc_nsessions; sesn++) {
1002			if (sc->sc_sessions[sesn].ses_used == 0) {
1003				ses = &sc->sc_sessions[sesn];
1004				break;
1005			}
1006		}
1007
1008		if (ses == NULL) {
1009			sesn = sc->sc_nsessions;
1010			ses = (struct ubsec_session *)malloc((sesn + 1) *
1011			    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
1012			if (ses == NULL)
1013				return (ENOMEM);
1014			memcpy(ses, sc->sc_sessions, sesn *
1015			    sizeof(struct ubsec_session));
1016			memset(sc->sc_sessions, 0, sesn *
1017			    sizeof(struct ubsec_session));
1018			free(sc->sc_sessions, M_DEVBUF);
1019			sc->sc_sessions = ses;
1020			ses = &sc->sc_sessions[sesn];
1021			sc->sc_nsessions++;
1022		}
1023	}
1024
1025	memset(ses, 0, sizeof(struct ubsec_session));
1026	ses->ses_used = 1;
1027	if (encini) {
1028		/* Go ahead and compute key in ubsec's byte order */
1029		if (encini->cri_alg == CRYPTO_AES_CBC) {
1030			memcpy(ses->ses_key, encini->cri_key,
1031			    encini->cri_klen / 8);
1032		}
1033		if (encini->cri_alg == CRYPTO_DES_CBC) {
1034			memcpy(&ses->ses_key[0], encini->cri_key, 8);
1035			memcpy(&ses->ses_key[2], encini->cri_key, 8);
1036			memcpy(&ses->ses_key[4], encini->cri_key, 8);
1037		} else
1038			memcpy(ses->ses_key, encini->cri_key, 24);
1039
1040		SWAP32(ses->ses_key[0]);
1041		SWAP32(ses->ses_key[1]);
1042		SWAP32(ses->ses_key[2]);
1043		SWAP32(ses->ses_key[3]);
1044		SWAP32(ses->ses_key[4]);
1045		SWAP32(ses->ses_key[5]);
1046	}
1047
1048	if (macini) {
1049		for (i = 0; i < macini->cri_klen / 8; i++)
1050			macini->cri_key[i] ^= HMAC_IPAD_VAL;
1051
1052		if (macini->cri_alg == CRYPTO_MD5_HMAC_96) {
1053			MD5Init(&md5ctx);
1054			MD5Update(&md5ctx, macini->cri_key,
1055			    macini->cri_klen / 8);
1056			MD5Update(&md5ctx, hmac_ipad_buffer,
1057			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
1058			memcpy(ses->ses_hminner, md5ctx.state,
1059			    sizeof(md5ctx.state));
1060		} else {
1061			SHA1Init(&sha1ctx);
1062			SHA1Update(&sha1ctx, macini->cri_key,
1063			    macini->cri_klen / 8);
1064			SHA1Update(&sha1ctx, hmac_ipad_buffer,
1065			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
1066			memcpy(ses->ses_hminner, sha1ctx.state,
1067			    sizeof(sha1ctx.state));
1068		}
1069
1070		for (i = 0; i < macini->cri_klen / 8; i++)
1071			macini->cri_key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
1072
1073		if (macini->cri_alg == CRYPTO_MD5_HMAC_96) {
1074			MD5Init(&md5ctx);
1075			MD5Update(&md5ctx, macini->cri_key,
1076			    macini->cri_klen / 8);
1077			MD5Update(&md5ctx, hmac_opad_buffer,
1078			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
1079			memcpy(ses->ses_hmouter, md5ctx.state,
1080			    sizeof(md5ctx.state));
1081		} else {
1082			SHA1Init(&sha1ctx);
1083			SHA1Update(&sha1ctx, macini->cri_key,
1084			    macini->cri_klen / 8);
1085			SHA1Update(&sha1ctx, hmac_opad_buffer,
1086			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
1087			memcpy(ses->ses_hmouter, sha1ctx.state,
1088			    sizeof(sha1ctx.state));
1089		}
1090
1091		for (i = 0; i < macini->cri_klen / 8; i++)
1092			macini->cri_key[i] ^= HMAC_OPAD_VAL;
1093	}
1094
1095	*sidp = UBSEC_SID(device_unit(sc->sc_dev), sesn);
1096	return (0);
1097}
1098
1099/*
1100 * Deallocate a session.
1101 */
1102static void
1103ubsec_freesession(void *arg, u_int64_t tid)
1104{
1105	struct ubsec_softc *sc = arg;
1106	int session;
1107	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
1108
1109	session = UBSEC_SESSION(sid);
1110	KASSERTMSG(session >= 0, "session=%d", session);
1111	KASSERTMSG(session < sc->sc_nsessions, "session=%d nsessions=%d",
1112	    session, sc->sc_nsessions);
1113
1114	memset(&sc->sc_sessions[session], 0, sizeof(sc->sc_sessions[session]));
1115}
1116
1117#ifdef __FreeBSD__ /* Ugly gratuitous changes to bus_dma */
1118static void
1119ubsec_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize,
1120    int error)
1121{
1122	struct ubsec_operand *op = arg;
1123
1124	KASSERT(nsegs <= UBS_MAX_SCATTER
1125		/*, ("Too many DMA segments returned when mapping operand")*/);
1126#ifdef UBSEC_DEBUG
1127	if (ubsec_debug)
1128		printf("ubsec_op_cb: mapsize %u nsegs %d\n",
1129			(u_int) mapsize, nsegs);
1130#endif
1131	op->mapsize = mapsize;
1132	op->nsegs = nsegs;
1133	memcpy(op->segs, seg, nsegs * sizeof (seg[0]));
1134}
1135#endif
1136
1137static int
1138ubsec_process(void *arg, struct cryptop *crp, int hint)
1139{
1140	struct ubsec_q *q = NULL;
1141	int err = 0, i, j, nicealign;
1142	struct ubsec_softc *sc = arg;
1143	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
1144	int encoffset = 0, macoffset = 0, cpskip, cpoffset;
1145	int sskip, dskip, stheend, dtheend;
1146	int16_t coffset;
1147	struct ubsec_session *ses, key;
1148	struct ubsec_dma *dmap = NULL;
1149	u_int16_t flags = 0;
1150	int ivlen = 0, keylen = 0;
1151
1152	KASSERTMSG(UBSEC_SESSION(crp->crp_sid) < sc->sc_nsessions,
1153	    "invalid session id 0x%"PRIx64", nsessions=%d",
1154	    crp->crp_sid, sc->sc_nsessions);
1155
1156	mutex_spin_enter(&sc->sc_mtx);
1157	if (SIMPLEQ_EMPTY(&sc->sc_freequeue)) {
1158		ubsecstats.hst_queuefull++;
1159		mutex_spin_exit(&sc->sc_mtx);
1160		err = ERESTART;
1161		goto errout;
1162	}
1163	q = SIMPLEQ_FIRST(&sc->sc_freequeue);
1164	SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, /*q,*/ q_next);
1165	mutex_spin_exit(&sc->sc_mtx);
1166
1167	dmap = q->q_dma; /* Save dma pointer */
1168	/* don't lose the cached dmamaps q_src_map and q_cached_dst_map */
1169	memset(q, 0, offsetof(struct ubsec_q, q_src_map));
1170	memset(&key, 0, sizeof(key));
1171
1172	q->q_sesn = UBSEC_SESSION(crp->crp_sid);
1173	q->q_dma = dmap;
1174	ses = &sc->sc_sessions[q->q_sesn];
1175
1176	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1177		q->q_src_m = (struct mbuf *)crp->crp_buf;
1178		q->q_dst_m = (struct mbuf *)crp->crp_buf;
1179	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1180		q->q_src_io = (struct uio *)crp->crp_buf;
1181		q->q_dst_io = (struct uio *)crp->crp_buf;
1182	} else {
1183		ubsecstats.hst_badflags++;
1184		err = EINVAL;
1185		goto errout;	/* XXX we don't handle contiguous blocks! */
1186	}
1187
1188	memset(&dmap->d_dma->d_mcr, 0, sizeof(struct ubsec_mcr));
1189
1190	dmap->d_dma->d_mcr.mcr_pkts = htole16(1);
1191	dmap->d_dma->d_mcr.mcr_flags = 0;
1192	q->q_crp = crp;
1193
1194	crd1 = crp->crp_desc;
1195	if (crd1 == NULL) {
1196		ubsecstats.hst_nodesc++;
1197		err = EINVAL;
1198		goto errout;
1199	}
1200	crd2 = crd1->crd_next;
1201
1202	if (crd2 == NULL) {
1203		if (crd1->crd_alg == CRYPTO_MD5_HMAC_96 ||
1204		    crd1->crd_alg == CRYPTO_SHA1_HMAC_96) {
1205			maccrd = crd1;
1206			enccrd = NULL;
1207		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
1208		    crd1->crd_alg == CRYPTO_3DES_CBC ||
1209		    crd1->crd_alg == CRYPTO_AES_CBC) {
1210			maccrd = NULL;
1211			enccrd = crd1;
1212		} else {
1213			ubsecstats.hst_badalg++;
1214			err = EINVAL;
1215			goto errout;
1216		}
1217	} else {
1218		if ((crd1->crd_alg == CRYPTO_MD5_HMAC_96 ||
1219		    crd1->crd_alg == CRYPTO_SHA1_HMAC_96) &&
1220		    (crd2->crd_alg == CRYPTO_DES_CBC ||
1221		    crd2->crd_alg == CRYPTO_3DES_CBC ||
1222		    crd2->crd_alg == CRYPTO_AES_CBC) &&
1223		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
1224			maccrd = crd1;
1225			enccrd = crd2;
1226		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
1227		    crd1->crd_alg == CRYPTO_3DES_CBC ||
1228		    crd1->crd_alg == CRYPTO_AES_CBC) &&
1229		    (crd2->crd_alg == CRYPTO_MD5_HMAC_96 ||
1230		    crd2->crd_alg == CRYPTO_SHA1_HMAC_96) &&
1231		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
1232			enccrd = crd1;
1233			maccrd = crd2;
1234		} else {
1235			/*
1236			 * We cannot order the ubsec as requested
1237			 */
1238			ubsecstats.hst_badalg++;
1239			err = EINVAL;
1240			goto errout;
1241		}
1242	}
1243
1244	if (enccrd) {
1245		if (enccrd->crd_alg == CRYPTO_AES_CBC) {
1246			if ((sc->sc_flags & UBS_FLAGS_AES) == 0) {
1247				/*
1248				 * We cannot order the ubsec as requested
1249				 */
1250				ubsecstats.hst_badalg++;
1251				err = EINVAL;
1252				goto errout;
1253			}
1254			flags |= htole16(UBS_PKTCTX_ENC_AES);
1255			switch (enccrd->crd_klen) {
1256			case 128:
1257			case 192:
1258			case 256:
1259				keylen = enccrd->crd_klen / 8;
1260				break;
1261			default:
1262				err = EINVAL;
1263				goto errout;
1264			}
1265			ivlen = 16;
1266		} else {
1267			flags |= htole16(UBS_PKTCTX_ENC_3DES);
1268			ivlen = 8;
1269			keylen = 24;
1270		}
1271
1272		encoffset = enccrd->crd_skip;
1273
1274		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
1275			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1276				memcpy(key.ses_iv, enccrd->crd_iv, ivlen);
1277			else
1278				cprng_fast(key.ses_iv, ivlen);
1279
1280			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
1281				if (crp->crp_flags & CRYPTO_F_IMBUF)
1282					m_copyback(q->q_src_m,
1283					    enccrd->crd_inject,
1284					    ivlen, (void *)key.ses_iv);
1285				else if (crp->crp_flags & CRYPTO_F_IOV)
1286					cuio_copyback(q->q_src_io,
1287					    enccrd->crd_inject,
1288					    ivlen, (void *)key.ses_iv);
1289			}
1290		} else {
1291			flags |= htole16(UBS_PKTCTX_INBOUND);
1292
1293			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1294				memcpy(key.ses_iv, enccrd->crd_iv, ivlen);
1295			else if (crp->crp_flags & CRYPTO_F_IMBUF)
1296				m_copydata(q->q_src_m, enccrd->crd_inject,
1297				    ivlen, (void *)key.ses_iv);
1298			else if (crp->crp_flags & CRYPTO_F_IOV)
1299				cuio_copydata(q->q_src_io,
1300				    enccrd->crd_inject, 8,
1301				    (void *)key.ses_iv);
1302		}
1303
1304		for (i = 0; i < (keylen / 4); i++)
1305			key.ses_key[i] = ses->ses_key[i];
1306		for (i = 0; i < (ivlen / 4); i++)
1307			SWAP32(key.ses_iv[i]);
1308	}
1309
1310	if (maccrd) {
1311		macoffset = maccrd->crd_skip;
1312
1313		if (maccrd->crd_alg == CRYPTO_MD5_HMAC_96)
1314			flags |= htole16(UBS_PKTCTX_AUTH_MD5);
1315		else
1316			flags |= htole16(UBS_PKTCTX_AUTH_SHA1);
1317
1318		for (i = 0; i < 5; i++) {
1319			key.ses_hminner[i] = ses->ses_hminner[i];
1320			key.ses_hmouter[i] = ses->ses_hmouter[i];
1321
1322			HTOLE32(key.ses_hminner[i]);
1323			HTOLE32(key.ses_hmouter[i]);
1324		}
1325	}
1326
1327	if (enccrd && maccrd) {
1328		/*
1329		 * ubsec cannot handle packets where the end of encryption
1330		 * and authentication are not the same, or where the
1331		 * encrypted part begins before the authenticated part.
1332		 */
1333		if ((encoffset + enccrd->crd_len) !=
1334		    (macoffset + maccrd->crd_len)) {
1335			ubsecstats.hst_lenmismatch++;
1336			err = EINVAL;
1337			goto errout;
1338		}
1339		if (enccrd->crd_skip < maccrd->crd_skip) {
1340			ubsecstats.hst_skipmismatch++;
1341			err = EINVAL;
1342			goto errout;
1343		}
1344		sskip = maccrd->crd_skip;
1345		cpskip = dskip = enccrd->crd_skip;
1346		stheend = maccrd->crd_len;
1347		dtheend = enccrd->crd_len;
1348		coffset = enccrd->crd_skip - maccrd->crd_skip;
1349		cpoffset = cpskip + dtheend;
1350#ifdef UBSEC_DEBUG
1351		if (ubsec_debug) {
1352			printf("mac: skip %d, len %d, inject %d\n",
1353			    maccrd->crd_skip, maccrd->crd_len,
1354			    maccrd->crd_inject);
1355			printf("enc: skip %d, len %d, inject %d\n",
1356			    enccrd->crd_skip, enccrd->crd_len,
1357			    enccrd->crd_inject);
1358			printf("src: skip %d, len %d\n", sskip, stheend);
1359			printf("dst: skip %d, len %d\n", dskip, dtheend);
1360			printf("ubs: coffset %d, pktlen %d, cpskip %d, cpoffset %d\n",
1361			       coffset, stheend, cpskip, cpoffset);
1362		}
1363#endif
1364	} else {
1365		cpskip = dskip = sskip = macoffset + encoffset;
1366		dtheend = stheend = (enccrd)?enccrd->crd_len:maccrd->crd_len;
1367		cpoffset = cpskip + dtheend;
1368		coffset = 0;
1369	}
1370
1371	if (q->q_src_map == NULL) {
1372		/* XXX FIXME: jonathan asks, what the heck's that 0xfff0?  */
1373		if (bus_dmamap_create(sc->sc_dmat, 0xfff0, UBS_MAX_SCATTER,
1374			0xfff0, 0, BUS_DMA_NOWAIT, &q->q_src_map) != 0) {
1375			err = ENOMEM;
1376			goto errout;
1377		}
1378	}
1379	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1380		if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_src_map,
1381		    q->q_src_m, BUS_DMA_NOWAIT) != 0) {
1382			ubsecstats.hst_noload++;
1383			err = ENOMEM;
1384			goto errout;
1385		}
1386	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1387		if (bus_dmamap_load_uio(sc->sc_dmat, q->q_src_map,
1388		    q->q_src_io, BUS_DMA_NOWAIT) != 0) {
1389			ubsecstats.hst_noload++;
1390			err = ENOMEM;
1391			goto errout;
1392		}
1393	}
1394	nicealign = ubsec_dmamap_aligned(q->q_src_map);
1395
1396	dmap->d_dma->d_mcr.mcr_pktlen = htole16(stheend);
1397
1398#ifdef UBSEC_DEBUG
1399	if (ubsec_debug)
1400		printf("src skip: %d nicealign: %u\n", sskip, nicealign);
1401#endif
1402	for (i = j = 0; i < q->q_src_map->dm_nsegs; i++) {
1403		struct ubsec_pktbuf *pb;
1404		bus_size_t packl = q->q_src_map->dm_segs[i].ds_len;
1405		bus_addr_t packp = q->q_src_map->dm_segs[i].ds_addr;
1406
1407		if (sskip >= packl) {
1408			sskip -= packl;
1409			continue;
1410		}
1411
1412		packl -= sskip;
1413		packp += sskip;
1414		sskip = 0;
1415
1416		if (packl > 0xfffc) {
1417			err = EIO;
1418			goto errout;
1419		}
1420
1421		if (j == 0)
1422			pb = &dmap->d_dma->d_mcr.mcr_ipktbuf;
1423		else
1424			pb = &dmap->d_dma->d_sbuf[j - 1];
1425
1426		pb->pb_addr = htole32(packp);
1427
1428		if (stheend) {
1429			if (packl > stheend) {
1430				pb->pb_len = htole32(stheend);
1431				stheend = 0;
1432			} else {
1433				pb->pb_len = htole32(packl);
1434				stheend -= packl;
1435			}
1436		} else
1437			pb->pb_len = htole32(packl);
1438
1439		if ((i + 1) == q->q_src_map->dm_nsegs)
1440			pb->pb_next = 0;
1441		else
1442			pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1443			    offsetof(struct ubsec_dmachunk, d_sbuf[j]));
1444		j++;
1445	}
1446
1447	if (enccrd == NULL && maccrd != NULL) {
1448		dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr = 0;
1449		dmap->d_dma->d_mcr.mcr_opktbuf.pb_len = 0;
1450		dmap->d_dma->d_mcr.mcr_opktbuf.pb_next = htole32(dmap->d_alloc.dma_paddr +
1451		    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1452#ifdef UBSEC_DEBUG
1453		if (ubsec_debug)
1454			printf("opkt: %x %x %x\n",
1455	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr,
1456	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_len,
1457	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_next);
1458
1459#endif
1460	} else {
1461		if (crp->crp_flags & CRYPTO_F_IOV) {
1462			if (!nicealign) {
1463				ubsecstats.hst_iovmisaligned++;
1464				err = EINVAL;
1465				goto errout;
1466			}
1467			if (q->q_dst_map == NULL) {
1468				if (q->q_cached_dst_map == NULL) {
1469					/*
1470					 * XXX: ``what the heck's that''
1471					 * 0xfff0?
1472					 */
1473					if (bus_dmamap_create(sc->sc_dmat,
1474					    0xfff0, UBS_MAX_SCATTER, 0xfff0, 0,
1475					    BUS_DMA_NOWAIT,
1476					    &q->q_cached_dst_map) != 0) {
1477						ubsecstats.hst_nomap++;
1478						err = ENOMEM;
1479						goto errout;
1480					}
1481				}
1482				q->q_dst_map = q->q_cached_dst_map;
1483			}
1484			if (bus_dmamap_load_uio(sc->sc_dmat, q->q_dst_map,
1485			    q->q_dst_io, BUS_DMA_NOWAIT) != 0) {
1486				ubsecstats.hst_noload++;
1487				err = ENOMEM;
1488				goto errout;
1489			}
1490		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
1491			if (nicealign) {
1492				q->q_dst_m = q->q_src_m;
1493				q->q_dst_map = q->q_src_map;
1494			} else {
1495				int totlen, len;
1496				struct mbuf *m, *top, **mp;
1497
1498				ubsecstats.hst_unaligned++;
1499				totlen = q->q_src_map->dm_mapsize;
1500				if (q->q_src_m->m_flags & M_PKTHDR) {
1501					len = MHLEN;
1502					MGETHDR(m, M_DONTWAIT, MT_DATA);
1503					/*XXX FIXME: m_dup_pkthdr */
1504					if (m && 1 /*!m_dup_pkthdr(m, q->q_src_m, M_DONTWAIT)*/) {
1505						m_free(m);
1506						m = NULL;
1507					}
1508				} else {
1509					len = MLEN;
1510					MGET(m, M_DONTWAIT, MT_DATA);
1511				}
1512				if (m == NULL) {
1513					ubsecstats.hst_nombuf++;
1514					err = sc->sc_nqueue ? ERESTART : ENOMEM;
1515					goto errout;
1516				}
1517				if (len == MHLEN)
1518				  /*XXX was M_DUP_PKTHDR*/
1519				  m_copy_pkthdr(m, q->q_src_m);
1520				if (totlen >= MINCLSIZE) {
1521					MCLGET(m, M_DONTWAIT);
1522					if ((m->m_flags & M_EXT) == 0) {
1523						m_free(m);
1524						ubsecstats.hst_nomcl++;
1525						err = sc->sc_nqueue
1526						    ? ERESTART : ENOMEM;
1527						goto errout;
1528					}
1529					len = MCLBYTES;
1530				}
1531				m->m_len = len;
1532				top = NULL;
1533				mp = &top;
1534
1535				while (totlen > 0) {
1536					if (top) {
1537						MGET(m, M_DONTWAIT, MT_DATA);
1538						if (m == NULL) {
1539							m_freem(top);
1540							ubsecstats.hst_nombuf++;
1541							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1542							goto errout;
1543						}
1544						len = MLEN;
1545					}
1546					if (top && totlen >= MINCLSIZE) {
1547						MCLGET(m, M_DONTWAIT);
1548						if ((m->m_flags & M_EXT) == 0) {
1549							*mp = m;
1550							m_freem(top);
1551							ubsecstats.hst_nomcl++;
1552							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1553							goto errout;
1554						}
1555						len = MCLBYTES;
1556					}
1557					m->m_len = len = uimin(totlen, len);
1558					totlen -= len;
1559					*mp = m;
1560					mp = &m->m_next;
1561				}
1562				q->q_dst_m = top;
1563				ubsec_mcopy(q->q_src_m, q->q_dst_m,
1564				    cpskip, cpoffset);
1565				if (q->q_dst_map == NULL) {
1566					if (q->q_cached_dst_map == NULL) {
1567						/* XXX again, what the heck is that 0xfff0? */
1568						if (bus_dmamap_create(sc->sc_dmat, 0xfff0,
1569						    UBS_MAX_SCATTER, 0xfff0, 0, BUS_DMA_NOWAIT,
1570						    &q->q_cached_dst_map) != 0) {
1571							ubsecstats.hst_nomap++;
1572							err = ENOMEM;
1573							goto errout;
1574						}
1575					}
1576					q->q_dst_map = q->q_cached_dst_map;
1577				}
1578				if (bus_dmamap_load_mbuf(sc->sc_dmat,
1579				    q->q_dst_map, q->q_dst_m,
1580				    BUS_DMA_NOWAIT) != 0) {
1581					ubsecstats.hst_noload++;
1582					err = ENOMEM;
1583					goto errout;
1584				}
1585			}
1586		} else {
1587			ubsecstats.hst_badflags++;
1588			err = EINVAL;
1589			goto errout;
1590		}
1591
1592#ifdef UBSEC_DEBUG
1593		if (ubsec_debug)
1594			printf("dst skip: %d\n", dskip);
1595#endif
1596		for (i = j = 0; i < q->q_dst_map->dm_nsegs; i++) {
1597			struct ubsec_pktbuf *pb;
1598			bus_size_t packl = q->q_dst_map->dm_segs[i].ds_len;
1599			bus_addr_t packp = q->q_dst_map->dm_segs[i].ds_addr;
1600
1601			if (dskip >= packl) {
1602				dskip -= packl;
1603				continue;
1604			}
1605
1606			packl -= dskip;
1607			packp += dskip;
1608			dskip = 0;
1609
1610			if (packl > 0xfffc) {
1611				err = EIO;
1612				goto errout;
1613			}
1614
1615			if (j == 0)
1616				pb = &dmap->d_dma->d_mcr.mcr_opktbuf;
1617			else
1618				pb = &dmap->d_dma->d_dbuf[j - 1];
1619
1620			pb->pb_addr = htole32(packp);
1621
1622			if (dtheend) {
1623				if (packl > dtheend) {
1624					pb->pb_len = htole32(dtheend);
1625					dtheend = 0;
1626				} else {
1627					pb->pb_len = htole32(packl);
1628					dtheend -= packl;
1629				}
1630			} else
1631				pb->pb_len = htole32(packl);
1632
1633			if ((i + 1) == q->q_dst_map->dm_nsegs) {
1634				if (maccrd)
1635					pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1636					    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1637				else
1638					pb->pb_next = 0;
1639			} else
1640				pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1641				    offsetof(struct ubsec_dmachunk, d_dbuf[j]));
1642			j++;
1643		}
1644	}
1645
1646	dmap->d_dma->d_mcr.mcr_cmdctxp = htole32(dmap->d_alloc.dma_paddr +
1647	    offsetof(struct ubsec_dmachunk, d_ctx));
1648
1649	if (enccrd && enccrd->crd_alg == CRYPTO_AES_CBC) {
1650		struct ubsec_pktctx_aes128	*aes128;
1651		struct ubsec_pktctx_aes192	*aes192;
1652		struct ubsec_pktctx_aes256	*aes256;
1653		struct ubsec_pktctx_hdr		*ph;
1654		u_int8_t			*ctx;
1655
1656		ctx = (u_int8_t *)(dmap->d_alloc.dma_vaddr) +
1657		    offsetof(struct ubsec_dmachunk, d_ctx);
1658
1659		ph = (struct ubsec_pktctx_hdr *)ctx;
1660		ph->ph_type = htole16(UBS_PKTCTX_TYPE_IPSEC_AES);
1661		ph->ph_flags = flags;
1662		ph->ph_offset = htole16(coffset >> 2);
1663
1664		switch (enccrd->crd_klen) {
1665		case 128:
1666			aes128 = (struct ubsec_pktctx_aes128 *)ctx;
1667 			ph->ph_len = htole16(sizeof(*aes128));
1668			ph->ph_flags |= htole16(UBS_PKTCTX_KEYSIZE_128);
1669			for (i = 0; i < 4; i++)
1670				aes128->pc_aeskey[i] = key.ses_key[i];
1671			for (i = 0; i < 5; i++)
1672				aes128->pc_hminner[i] = key.ses_hminner[i];
1673			for (i = 0; i < 5; i++)
1674				aes128->pc_hmouter[i] = key.ses_hmouter[i];
1675			for (i = 0; i < 4; i++)
1676				aes128->pc_iv[i] = key.ses_iv[i];
1677			break;
1678		case 192:
1679			aes192 = (struct ubsec_pktctx_aes192 *)ctx;
1680			ph->ph_len = htole16(sizeof(*aes192));
1681			ph->ph_flags |= htole16(UBS_PKTCTX_KEYSIZE_192);
1682			for (i = 0; i < 6; i++)
1683				aes192->pc_aeskey[i] = key.ses_key[i];
1684			for (i = 0; i < 5; i++)
1685				aes192->pc_hminner[i] = key.ses_hminner[i];
1686			for (i = 0; i < 5; i++)
1687				aes192->pc_hmouter[i] = key.ses_hmouter[i];
1688			for (i = 0; i < 4; i++)
1689				aes192->pc_iv[i] = key.ses_iv[i];
1690			break;
1691		case 256:
1692			aes256 = (struct ubsec_pktctx_aes256 *)ctx;
1693			ph->ph_len = htole16(sizeof(*aes256));
1694			ph->ph_flags |= htole16(UBS_PKTCTX_KEYSIZE_256);
1695			for (i = 0; i < 8; i++)
1696				aes256->pc_aeskey[i] = key.ses_key[i];
1697			for (i = 0; i < 5; i++)
1698				aes256->pc_hminner[i] = key.ses_hminner[i];
1699			for (i = 0; i < 5; i++)
1700				aes256->pc_hmouter[i] = key.ses_hmouter[i];
1701			for (i = 0; i < 4; i++)
1702				aes256->pc_iv[i] = key.ses_iv[i];
1703			break;
1704		}
1705	} else if (sc->sc_flags & UBS_FLAGS_LONGCTX) {
1706		struct ubsec_pktctx_3des	*ctx;
1707		struct ubsec_pktctx_hdr		*ph;
1708
1709		ctx = (struct ubsec_pktctx_3des *)
1710		    ((u_int8_t *)(dmap->d_alloc.dma_vaddr) +
1711		    offsetof(struct ubsec_dmachunk, d_ctx));
1712
1713		ph = (struct ubsec_pktctx_hdr *)ctx;
1714		ph->ph_len = htole16(sizeof(*ctx));
1715		ph->ph_type = htole16(UBS_PKTCTX_TYPE_IPSEC_3DES);
1716		ph->ph_flags = flags;
1717		ph->ph_offset = htole16(coffset >> 2);
1718
1719		for (i = 0; i < 6; i++)
1720			ctx->pc_deskey[i] = key.ses_key[i];
1721		for (i = 0; i < 5; i++)
1722			ctx->pc_hminner[i] = key.ses_hminner[i];
1723		for (i = 0; i < 5; i++)
1724			ctx->pc_hmouter[i] = key.ses_hmouter[i];
1725		for (i = 0; i < 2; i++)
1726			ctx->pc_iv[i] = key.ses_iv[i];
1727	} else {
1728		struct ubsec_pktctx *ctx = (struct ubsec_pktctx *)
1729		    ((u_int8_t *)dmap->d_alloc.dma_vaddr +
1730		    offsetof(struct ubsec_dmachunk, d_ctx));
1731
1732		ctx->pc_flags = flags;
1733		ctx->pc_offset = htole16(coffset >> 2);
1734		for (i = 0; i < 6; i++)
1735			ctx->pc_deskey[i] = key.ses_key[i];
1736		for (i = 0; i < 5; i++)
1737			ctx->pc_hminner[i] = key.ses_hminner[i];
1738		for (i = 0; i < 5; i++)
1739			ctx->pc_hmouter[i] = key.ses_hmouter[i];
1740		for (i = 0; i < 2; i++)
1741			ctx->pc_iv[i] = key.ses_iv[i];
1742	}
1743
1744	mutex_spin_enter(&sc->sc_mtx);
1745	SIMPLEQ_INSERT_TAIL(&sc->sc_queue, q, q_next);
1746	sc->sc_nqueue++;
1747	ubsecstats.hst_ipackets++;
1748	ubsecstats.hst_ibytes += dmap->d_alloc.dma_map->dm_mapsize;
1749	if ((hint & CRYPTO_HINT_MORE) == 0 || sc->sc_nqueue >= ubsec_maxbatch)
1750		ubsec_feed(sc);
1751	mutex_spin_exit(&sc->sc_mtx);
1752	return 0;
1753
1754errout:
1755	if (q != NULL) {
1756		if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1757			bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1758		}
1759		if (q->q_src_map != NULL) {
1760			bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1761		}
1762
1763		if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
1764			m_freem(q->q_dst_m);
1765
1766		mutex_spin_enter(&sc->sc_mtx);
1767		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1768		mutex_spin_exit(&sc->sc_mtx);
1769	}
1770	if (err == ERESTART) {
1771		mutex_spin_enter(&sc->sc_mtx);
1772		sc->sc_needwakeup |= CRYPTO_SYMQ;
1773		mutex_spin_exit(&sc->sc_mtx);
1774		return ERESTART;
1775	}
1776	crp->crp_etype = err;
1777	crypto_done(crp);
1778	return 0;
1779}
1780
1781static void
1782ubsec_callback(struct ubsec_softc *sc, struct ubsec_q *q)
1783{
1784	struct cryptop *crp = (struct cryptop *)q->q_crp;
1785	struct cryptodesc *crd;
1786	struct ubsec_dma *dmap = q->q_dma;
1787
1788	ubsecstats.hst_opackets++;
1789	ubsecstats.hst_obytes += dmap->d_alloc.dma_size;
1790
1791	bus_dmamap_sync(sc->sc_dmat, dmap->d_alloc.dma_map, 0,
1792	    dmap->d_alloc.dma_map->dm_mapsize,
1793	    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1794	if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1795		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
1796		    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1797		bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1798	}
1799	bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
1800	    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1801	bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1802
1803	if ((crp->crp_flags & CRYPTO_F_IMBUF) && (q->q_src_m != q->q_dst_m)) {
1804		m_freem(q->q_src_m);
1805		crp->crp_buf = (void *)q->q_dst_m;
1806	}
1807
1808	for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1809		if (crd->crd_alg != CRYPTO_MD5_HMAC_96 &&
1810		    crd->crd_alg != CRYPTO_SHA1_HMAC_96)
1811			continue;
1812		if (crp->crp_flags & CRYPTO_F_IMBUF)
1813			m_copyback((struct mbuf *)crp->crp_buf,
1814			    crd->crd_inject, 12,
1815			    (void *)dmap->d_dma->d_macbuf);
1816		else if (crp->crp_flags & CRYPTO_F_IOV && crp->crp_mac)
1817			bcopy((void *)dmap->d_dma->d_macbuf,
1818			    crp->crp_mac, 12);
1819		break;
1820	}
1821	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1822	crypto_done(crp);
1823}
1824
1825static void
1826ubsec_mcopy(struct mbuf *srcm, struct mbuf *dstm, int hoffset, int toffset)
1827{
1828	int i, j, dlen, slen;
1829	char *dptr, *sptr;
1830
1831	j = 0;
1832	sptr = srcm->m_data;
1833	slen = srcm->m_len;
1834	dptr = dstm->m_data;
1835	dlen = dstm->m_len;
1836
1837	while (1) {
1838		for (i = 0; i < uimin(slen, dlen); i++) {
1839			if (j < hoffset || j >= toffset)
1840				*dptr++ = *sptr++;
1841			slen--;
1842			dlen--;
1843			j++;
1844		}
1845		if (slen == 0) {
1846			srcm = srcm->m_next;
1847			if (srcm == NULL)
1848				return;
1849			sptr = srcm->m_data;
1850			slen = srcm->m_len;
1851		}
1852		if (dlen == 0) {
1853			dstm = dstm->m_next;
1854			if (dstm == NULL)
1855				return;
1856			dptr = dstm->m_data;
1857			dlen = dstm->m_len;
1858		}
1859	}
1860}
1861
1862/*
1863 * feed the key generator, must be called at splnet() or higher.
1864 */
1865static void
1866ubsec_feed2(struct ubsec_softc *sc)
1867{
1868	struct ubsec_q2 *q;
1869
1870	while (!SIMPLEQ_EMPTY(&sc->sc_queue2)) {
1871		if (READ_REG(sc, BS_STAT) & BS_STAT_MCR2_FULL)
1872			break;
1873		q = SIMPLEQ_FIRST(&sc->sc_queue2);
1874
1875		bus_dmamap_sync(sc->sc_dmat, q->q_mcr.dma_map, 0,
1876		    q->q_mcr.dma_map->dm_mapsize,
1877		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1878		bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, 0,
1879		    q->q_ctx.dma_map->dm_mapsize,
1880		    BUS_DMASYNC_PREWRITE);
1881
1882		WRITE_REG(sc, BS_MCR2, q->q_mcr.dma_paddr);
1883		q = SIMPLEQ_FIRST(&sc->sc_queue2);
1884		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue2, /*q,*/ q_next);
1885		--sc->sc_nqueue2;
1886		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip2, q, q_next);
1887	}
1888}
1889
1890/*
1891 * feed the RNG (used instead of ubsec_feed2() on 5827+ devices)
1892 */
1893void
1894ubsec_feed4(struct ubsec_softc *sc)
1895{
1896	struct ubsec_q2 *q;
1897
1898	while (!SIMPLEQ_EMPTY(&sc->sc_queue4)) {
1899		if (READ_REG(sc, BS_STAT) & BS_STAT_MCR4_FULL)
1900			break;
1901		q = SIMPLEQ_FIRST(&sc->sc_queue4);
1902
1903		bus_dmamap_sync(sc->sc_dmat, q->q_mcr.dma_map, 0,
1904		    q->q_mcr.dma_map->dm_mapsize,
1905		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1906		bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, 0,
1907		    q->q_ctx.dma_map->dm_mapsize,
1908		    BUS_DMASYNC_PREWRITE);
1909
1910		WRITE_REG(sc, BS_MCR4, q->q_mcr.dma_paddr);
1911		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue4, q_next);
1912		--sc->sc_nqueue4;
1913		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip4, q, q_next);
1914	}
1915}
1916
1917/*
1918 * Callback for handling random numbers
1919 */
1920static void
1921ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q)
1922{
1923	struct cryptkop *krp;
1924	struct ubsec_ctx_keyop *ctx;
1925
1926	ctx = (struct ubsec_ctx_keyop *)q->q_ctx.dma_vaddr;
1927	bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, 0,
1928	    q->q_ctx.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1929
1930	switch (q->q_type) {
1931#ifndef UBSEC_NO_RNG
1932	case UBS_CTXOP_RNGSHA1:
1933	case UBS_CTXOP_RNGBYPASS: {
1934		struct ubsec_q2_rng *rng = (struct ubsec_q2_rng *)q;
1935		u_int32_t *p;
1936		int i;
1937
1938		bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, 0,
1939		    rng->rng_buf.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1940		p = (u_int32_t *)rng->rng_buf.dma_vaddr;
1941		i = UBSEC_RNG_BUFSIZ * sizeof(u_int32_t);
1942		rnd_add_data_intr(&sc->sc_rnd_source, (char *)p, i, i * NBBY);
1943		sc->sc_rng_need -= i;
1944		rng->rng_used = 0;
1945		if (sc->sc_rng_need > 0) {
1946			callout_schedule(&sc->sc_rngto, sc->sc_rnghz);
1947		}
1948		break;
1949	}
1950#endif
1951	case UBS_CTXOP_MODEXP: {
1952		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
1953		u_int rlen, clen;
1954
1955		krp = me->me_krp;
1956		rlen = (me->me_modbits + 7) / 8;
1957		clen = (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8;
1958
1959		bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
1960		    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1961		bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
1962		    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1963		bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
1964		    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1965		bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
1966		    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1967
1968		if (clen < rlen)
1969			krp->krp_status = E2BIG;
1970		else {
1971			if (sc->sc_flags & UBS_FLAGS_HWNORM) {
1972				memset(krp->krp_param[krp->krp_iparams].crp_p, 0,
1973				    (krp->krp_param[krp->krp_iparams].crp_nbits
1974					+ 7) / 8);
1975				bcopy(me->me_C.dma_vaddr,
1976				    krp->krp_param[krp->krp_iparams].crp_p,
1977				    (me->me_modbits + 7) / 8);
1978			} else
1979				ubsec_kshift_l(me->me_shiftbits,
1980				    me->me_C.dma_vaddr, me->me_normbits,
1981				    krp->krp_param[krp->krp_iparams].crp_p,
1982				    krp->krp_param[krp->krp_iparams].crp_nbits);
1983		}
1984
1985		crypto_kdone(krp);
1986
1987		/* bzero all potentially sensitive data */
1988		memset(me->me_E.dma_vaddr, 0, me->me_E.dma_size);
1989		memset(me->me_M.dma_vaddr, 0, me->me_M.dma_size);
1990		memset(me->me_C.dma_vaddr, 0, me->me_C.dma_size);
1991		memset(me->me_q.q_ctx.dma_vaddr, 0, me->me_q.q_ctx.dma_size);
1992
1993		/* Can't free here, so put us on the free list. */
1994		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &me->me_q, q_next);
1995		break;
1996	}
1997	case UBS_CTXOP_RSAPRIV: {
1998		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
1999		u_int len;
2000
2001		krp = rp->rpr_krp;
2002		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map, 0,
2003		    rp->rpr_msgin.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
2004		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map, 0,
2005		    rp->rpr_msgout.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
2006
2007		len = (krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_nbits + 7)
2008		    / 8;
2009		bcopy(rp->rpr_msgout.dma_vaddr,
2010		    krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_p, len);
2011
2012		crypto_kdone(krp);
2013
2014		memset(rp->rpr_msgin.dma_vaddr, 0, rp->rpr_msgin.dma_size);
2015		memset(rp->rpr_msgout.dma_vaddr, 0, rp->rpr_msgout.dma_size);
2016		memset(rp->rpr_q.q_ctx.dma_vaddr, 0, rp->rpr_q.q_ctx.dma_size);
2017
2018		/* Can't free here, so put us on the free list. */
2019		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &rp->rpr_q, q_next);
2020		break;
2021	}
2022	default:
2023		printf("%s: unknown ctx op: %x\n", device_xname(sc->sc_dev),
2024		    letoh16(ctx->ctx_op));
2025		break;
2026	}
2027}
2028
2029#ifndef UBSEC_NO_RNG
2030
2031static void
2032ubsec_rng_get(size_t bytes, void *vsc)
2033{
2034	struct ubsec_softc *sc = vsc;
2035
2036	mutex_spin_enter(&sc->sc_mtx);
2037	sc->sc_rng_need = bytes;
2038	ubsec_rng_locked(sc);
2039	mutex_spin_exit(&sc->sc_mtx);
2040
2041}
2042
2043static void
2044ubsec_rng(void *vsc)
2045{
2046	struct ubsec_softc *sc = vsc;
2047	mutex_spin_enter(&sc->sc_mtx);
2048	ubsec_rng_locked(sc);
2049	mutex_spin_exit(&sc->sc_mtx);
2050}
2051
2052static void
2053ubsec_rng_locked(void *vsc)
2054{
2055	struct ubsec_softc *sc = vsc;
2056	struct ubsec_q2_rng *rng = &sc->sc_rng;
2057	struct ubsec_mcr *mcr;
2058	struct ubsec_ctx_rngbypass *ctx;
2059	int *nqueue;
2060
2061	/* Caller is responsible to lock and release sc_mtx. */
2062	KASSERT(mutex_owned(&sc->sc_mtx));
2063
2064	if (rng->rng_used) {
2065		return;
2066	}
2067
2068	if (sc->sc_rng_need < 1) {
2069		callout_stop(&sc->sc_rngto);
2070		return;
2071	}
2072
2073	if (sc->sc_flags & UBS_FLAGS_RNG4)
2074		nqueue = &sc->sc_nqueue4;
2075	else
2076		nqueue = &sc->sc_nqueue2;
2077
2078	(*nqueue)++;
2079	if (*nqueue >= UBS_MAX_NQUEUE)
2080 		goto out;
2081
2082	mcr = (struct ubsec_mcr *)rng->rng_q.q_mcr.dma_vaddr;
2083	ctx = (struct ubsec_ctx_rngbypass *)rng->rng_q.q_ctx.dma_vaddr;
2084
2085	mcr->mcr_pkts = htole16(1);
2086	mcr->mcr_flags = 0;
2087	mcr->mcr_cmdctxp = htole32(rng->rng_q.q_ctx.dma_paddr);
2088	mcr->mcr_ipktbuf.pb_addr = mcr->mcr_ipktbuf.pb_next = 0;
2089	mcr->mcr_ipktbuf.pb_len = 0;
2090	mcr->mcr_reserved = mcr->mcr_pktlen = 0;
2091	mcr->mcr_opktbuf.pb_addr = htole32(rng->rng_buf.dma_paddr);
2092	mcr->mcr_opktbuf.pb_len = htole32(((sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ)) &
2093	    UBS_PKTBUF_LEN);
2094	mcr->mcr_opktbuf.pb_next = 0;
2095
2096	ctx->rbp_len = htole16(sizeof(struct ubsec_ctx_rngbypass));
2097	ctx->rbp_op = htole16(UBS_CTXOP_RNGSHA1);
2098	rng->rng_q.q_type = UBS_CTXOP_RNGSHA1;
2099
2100	bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, 0,
2101	    rng->rng_buf.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2102
2103	if (sc->sc_flags & UBS_FLAGS_RNG4) {
2104		SIMPLEQ_INSERT_TAIL(&sc->sc_queue4, &rng->rng_q, q_next);
2105		ubsec_feed4(sc);
2106	} else {
2107		SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rng->rng_q, q_next);
2108		ubsec_feed2(sc);
2109	}
2110	rng->rng_used = 1;
2111	ubsecstats.hst_rng++;
2112
2113	return;
2114
2115out:
2116	/*
2117	 * Something weird happened, generate our own call back.
2118	 */
2119	(*nqueue)--;
2120	callout_schedule(&sc->sc_rngto, sc->sc_rnghz);
2121}
2122#endif /* UBSEC_NO_RNG */
2123
2124static int
2125ubsec_dma_malloc(struct ubsec_softc *sc, bus_size_t size,
2126		 struct ubsec_dma_alloc *dma,int mapflags)
2127{
2128	int r;
2129
2130	if ((r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0,
2131	    &dma->dma_seg, 1, &dma->dma_nseg, BUS_DMA_NOWAIT)) != 0)
2132		goto fail_0;
2133
2134	if ((r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg,
2135	    size, &dma->dma_vaddr, mapflags | BUS_DMA_NOWAIT)) != 0)
2136		goto fail_1;
2137
2138	if ((r = bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
2139	    BUS_DMA_NOWAIT, &dma->dma_map)) != 0)
2140		goto fail_2;
2141
2142	if ((r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr,
2143	    size, NULL, BUS_DMA_NOWAIT)) != 0)
2144		goto fail_3;
2145
2146	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2147	dma->dma_size = size;
2148	return (0);
2149
2150fail_3:
2151	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2152fail_2:
2153	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2154fail_1:
2155	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2156fail_0:
2157	dma->dma_map = NULL;
2158	return (r);
2159}
2160
2161static void
2162ubsec_dma_free(struct ubsec_softc *sc, struct ubsec_dma_alloc *dma)
2163{
2164	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2165	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2166	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2167	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2168}
2169
2170/*
2171 * Resets the board.  Values in the registers are left as is
2172 * from the reset (i.e. initial values are assigned elsewhere).
2173 */
2174static void
2175ubsec_reset_board(struct ubsec_softc *sc)
2176{
2177	volatile u_int32_t ctrl;
2178
2179	ctrl = READ_REG(sc, BS_CTRL);
2180	ctrl |= BS_CTRL_RESET;
2181	WRITE_REG(sc, BS_CTRL, ctrl);
2182
2183	/*
2184	* Wait approx. 30 PCI clocks = 900 ns = 0.9 us
2185	*/
2186	DELAY(10);
2187
2188	/* Enable RNG and interrupts on newer devices */
2189	if (sc->sc_flags & UBS_FLAGS_MULTIMCR) {
2190#ifndef UBSEC_NO_RNG
2191		WRITE_REG(sc, BS_CFG, BS_CFG_RNG);
2192#endif
2193		WRITE_REG(sc, BS_INT, BS_INT_DMAINT);
2194	}
2195}
2196
2197/*
2198 * Init Broadcom registers
2199 */
2200static void
2201ubsec_init_board(struct ubsec_softc *sc)
2202{
2203	u_int32_t ctrl;
2204
2205	ctrl = READ_REG(sc, BS_CTRL);
2206	ctrl &= ~(BS_CTRL_BE32 | BS_CTRL_BE64);
2207	ctrl |= BS_CTRL_LITTLE_ENDIAN | BS_CTRL_MCR1INT;
2208
2209	/*
2210	 * XXX: Sam Leffler's code has (UBS_FLAGS_KEY|UBS_FLAGS_RNG)).
2211	 * anyone got hw docs?
2212	 */
2213	if (sc->sc_flags & UBS_FLAGS_KEY)
2214		ctrl |= BS_CTRL_MCR2INT;
2215	else
2216		ctrl &= ~BS_CTRL_MCR2INT;
2217
2218	if (sc->sc_flags & UBS_FLAGS_HWNORM)
2219		ctrl &= ~BS_CTRL_SWNORM;
2220
2221	if (sc->sc_flags & UBS_FLAGS_MULTIMCR) {
2222		ctrl |= BS_CTRL_BSIZE240;
2223		ctrl &= ~BS_CTRL_MCR3INT; /* MCR3 is reserved for SSL */
2224
2225		if (sc->sc_flags & UBS_FLAGS_RNG4)
2226			ctrl |= BS_CTRL_MCR4INT;
2227		else
2228			ctrl &= ~BS_CTRL_MCR4INT;
2229	}
2230
2231	WRITE_REG(sc, BS_CTRL, ctrl);
2232}
2233
2234/*
2235 * Init Broadcom PCI registers
2236 */
2237static void
2238ubsec_init_pciregs(struct pci_attach_args *pa)
2239{
2240	pci_chipset_tag_t pc = pa->pa_pc;
2241	u_int32_t misc;
2242
2243	/*
2244	 * This will set the cache line size to 1, this will
2245	 * force the BCM58xx chip just to do burst read/writes.
2246	 * Cache line read/writes are to slow
2247	 */
2248	misc = pci_conf_read(pc, pa->pa_tag, PCI_BHLC_REG);
2249	misc = (misc & ~(PCI_CACHELINE_MASK << PCI_CACHELINE_SHIFT))
2250	    | ((UBS_DEF_CACHELINE & 0xff) << PCI_CACHELINE_SHIFT);
2251	pci_conf_write(pc, pa->pa_tag, PCI_BHLC_REG, misc);
2252}
2253
2254/*
2255 * Clean up after a chip crash.
2256 * It is assumed that the caller in splnet()
2257 */
2258static void
2259ubsec_cleanchip(struct ubsec_softc *sc)
2260{
2261	struct ubsec_q *q;
2262
2263	while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
2264		q = SIMPLEQ_FIRST(&sc->sc_qchip);
2265		SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, /*q,*/ q_next);
2266		ubsec_free_q(sc, q);
2267	}
2268	sc->sc_nqchip = 0;
2269}
2270
2271/*
2272 * free a ubsec_q
2273 * It is assumed that the caller is within splnet()
2274 */
2275static int
2276ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q)
2277{
2278	struct ubsec_q *q2;
2279	struct cryptop *crp;
2280	int npkts;
2281	int i;
2282
2283	npkts = q->q_nstacked_mcrs;
2284
2285	for (i = 0; i < npkts; i++) {
2286		if(q->q_stacked_mcr[i]) {
2287			q2 = q->q_stacked_mcr[i];
2288
2289			if ((q2->q_dst_m != NULL)
2290			    && (q2->q_src_m != q2->q_dst_m))
2291				m_freem(q2->q_dst_m);
2292
2293			crp = (struct cryptop *)q2->q_crp;
2294
2295			SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q2, q_next);
2296
2297			crp->crp_etype = EFAULT;
2298			crypto_done(crp);
2299		} else {
2300			break;
2301		}
2302	}
2303
2304	/*
2305	 * Free header MCR
2306	 */
2307	if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
2308		m_freem(q->q_dst_m);
2309
2310	crp = (struct cryptop *)q->q_crp;
2311
2312	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
2313
2314	crp->crp_etype = EFAULT;
2315	crypto_done(crp);
2316	return(0);
2317}
2318
2319/*
2320 * Routine to reset the chip and clean up.
2321 * It is assumed that the caller is in splnet()
2322 */
2323static void
2324ubsec_totalreset(struct ubsec_softc *sc)
2325{
2326	ubsec_reset_board(sc);
2327	ubsec_init_board(sc);
2328	ubsec_cleanchip(sc);
2329}
2330
2331static int
2332ubsec_dmamap_aligned(bus_dmamap_t map)
2333{
2334	int i;
2335
2336	for (i = 0; i < map->dm_nsegs; i++) {
2337		if (map->dm_segs[i].ds_addr & 3)
2338			return (0);
2339		if ((i != (map->dm_nsegs - 1)) &&
2340		    (map->dm_segs[i].ds_len & 3))
2341			return (0);
2342	}
2343	return (1);
2344}
2345
2346static void
2347ubsec_kfree(struct ubsec_softc *sc, struct ubsec_q2 *q)
2348{
2349	switch (q->q_type) {
2350	case UBS_CTXOP_MODEXP: {
2351		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
2352
2353		ubsec_dma_free(sc, &me->me_q.q_mcr);
2354		ubsec_dma_free(sc, &me->me_q.q_ctx);
2355		ubsec_dma_free(sc, &me->me_M);
2356		ubsec_dma_free(sc, &me->me_E);
2357		ubsec_dma_free(sc, &me->me_C);
2358		ubsec_dma_free(sc, &me->me_epb);
2359		free(me, M_DEVBUF);
2360		break;
2361	}
2362	case UBS_CTXOP_RSAPRIV: {
2363		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
2364
2365		ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2366		ubsec_dma_free(sc, &rp->rpr_q.q_ctx);
2367		ubsec_dma_free(sc, &rp->rpr_msgin);
2368		ubsec_dma_free(sc, &rp->rpr_msgout);
2369		free(rp, M_DEVBUF);
2370		break;
2371	}
2372	default:
2373		printf("%s: invalid kfree 0x%x\n", device_xname(sc->sc_dev),
2374		    q->q_type);
2375		break;
2376	}
2377}
2378
2379static int
2380ubsec_kprocess(void *arg, struct cryptkop *krp, int hint)
2381{
2382	struct ubsec_softc *sc = arg;
2383
2384	while (!SIMPLEQ_EMPTY(&sc->sc_q2free)) {
2385		struct ubsec_q2 *q;
2386
2387		q = SIMPLEQ_FIRST(&sc->sc_q2free);
2388		SIMPLEQ_REMOVE_HEAD(&sc->sc_q2free, /*q,*/ q_next);
2389		ubsec_kfree(sc, q);
2390	}
2391
2392	switch (krp->krp_op) {
2393	case CRK_MOD_EXP:
2394		if (sc->sc_flags & UBS_FLAGS_HWNORM)
2395			ubsec_kprocess_modexp_hw(sc, krp, hint);
2396		else
2397			ubsec_kprocess_modexp_sw(sc, krp, hint);
2398		break;
2399	case CRK_MOD_EXP_CRT:
2400		ubsec_kprocess_rsapriv(sc, krp, hint);
2401		break;
2402	default:
2403		printf("%s: kprocess: invalid op 0x%x\n",
2404		    device_xname(sc->sc_dev), krp->krp_op);
2405		krp->krp_status = EOPNOTSUPP;
2406		crypto_kdone(krp);
2407	}
2408	return 0;
2409}
2410
2411/*
2412 * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (sw normalization)
2413 */
2414static void
2415ubsec_kprocess_modexp_sw(struct ubsec_softc *sc, struct cryptkop *krp,
2416			 int hint)
2417{
2418	struct ubsec_q2_modexp *me;
2419	struct ubsec_mcr *mcr;
2420	struct ubsec_ctx_modexp *ctx;
2421	struct ubsec_pktbuf *epb;
2422	int err = 0;
2423	u_int nbits, normbits, mbits, shiftbits, ebits;
2424
2425	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
2426	if (me == NULL) {
2427		err = ENOMEM;
2428		goto errout;
2429	}
2430	memset(me, 0, sizeof *me);
2431	me->me_krp = krp;
2432	me->me_q.q_type = UBS_CTXOP_MODEXP;
2433
2434	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2435	if (nbits <= 512)
2436		normbits = 512;
2437	else if (nbits <= 768)
2438		normbits = 768;
2439	else if (nbits <= 1024)
2440		normbits = 1024;
2441	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2442		normbits = 1536;
2443	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2444		normbits = 2048;
2445	else {
2446		err = E2BIG;
2447		goto errout;
2448	}
2449
2450	shiftbits = normbits - nbits;
2451
2452	me->me_modbits = nbits;
2453	me->me_shiftbits = shiftbits;
2454	me->me_normbits = normbits;
2455
2456	/* Sanity check: result bits must be >= true modulus bits. */
2457	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2458		err = ERANGE;
2459		goto errout;
2460	}
2461
2462	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2463	    &me->me_q.q_mcr, 0)) {
2464		err = ENOMEM;
2465		goto errout;
2466	}
2467	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2468
2469	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2470	    &me->me_q.q_ctx, 0)) {
2471		err = ENOMEM;
2472		goto errout;
2473	}
2474
2475	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2476	if (mbits > nbits) {
2477		err = E2BIG;
2478		goto errout;
2479	}
2480	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2481		err = ENOMEM;
2482		goto errout;
2483	}
2484	ubsec_kshift_r(shiftbits,
2485	    krp->krp_param[UBS_MODEXP_PAR_M].crp_p, mbits,
2486	    me->me_M.dma_vaddr, normbits);
2487
2488	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2489		err = ENOMEM;
2490		goto errout;
2491	}
2492	memset(me->me_C.dma_vaddr, 0, me->me_C.dma_size);
2493
2494	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2495	if (ebits > nbits) {
2496		err = E2BIG;
2497		goto errout;
2498	}
2499	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2500		err = ENOMEM;
2501		goto errout;
2502	}
2503	ubsec_kshift_r(shiftbits,
2504	    krp->krp_param[UBS_MODEXP_PAR_E].crp_p, ebits,
2505	    me->me_E.dma_vaddr, normbits);
2506
2507	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2508	    &me->me_epb, 0)) {
2509		err = ENOMEM;
2510		goto errout;
2511	}
2512	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2513	epb->pb_addr = htole32(me->me_E.dma_paddr);
2514	epb->pb_next = 0;
2515	epb->pb_len = htole32(normbits / 8);
2516
2517#ifdef UBSEC_DEBUG
2518	if (ubsec_debug) {
2519		printf("Epb ");
2520		ubsec_dump_pb(epb);
2521	}
2522#endif
2523
2524	mcr->mcr_pkts = htole16(1);
2525	mcr->mcr_flags = 0;
2526	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2527	mcr->mcr_reserved = 0;
2528	mcr->mcr_pktlen = 0;
2529
2530	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2531	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2532	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2533
2534	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2535	mcr->mcr_opktbuf.pb_next = 0;
2536	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2537
2538#ifdef DIAGNOSTIC
2539	/* Misaligned output buffer will hang the chip. */
2540	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2541		panic("%s: modexp invalid addr 0x%x", device_xname(sc->sc_dev),
2542		    letoh32(mcr->mcr_opktbuf.pb_addr));
2543	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2544		panic("%s: modexp invalid len 0x%x",  device_xname(sc->sc_dev),
2545		    letoh32(mcr->mcr_opktbuf.pb_len));
2546#endif
2547
2548	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2549	memset(ctx, 0, sizeof(*ctx));
2550	ubsec_kshift_r(shiftbits,
2551	    krp->krp_param[UBS_MODEXP_PAR_N].crp_p, nbits,
2552	    ctx->me_N, normbits);
2553	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2554	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2555	ctx->me_E_len = htole16(nbits);
2556	ctx->me_N_len = htole16(nbits);
2557
2558#ifdef UBSEC_DEBUG
2559	if (ubsec_debug) {
2560		ubsec_dump_mcr(mcr);
2561		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2562	}
2563#endif
2564
2565	/*
2566	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2567	 * everything else.
2568	 */
2569	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
2570	    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2571	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
2572	    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2573	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
2574	    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2575	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
2576	    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2577
2578	/* Enqueue and we're done... */
2579	mutex_spin_enter(&sc->sc_mtx);
2580	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2581	ubsec_feed2(sc);
2582	ubsecstats.hst_modexp++;
2583	mutex_spin_exit(&sc->sc_mtx);
2584
2585	return;
2586
2587errout:
2588	if (me != NULL) {
2589		if (me->me_q.q_mcr.dma_map != NULL)
2590			ubsec_dma_free(sc, &me->me_q.q_mcr);
2591		if (me->me_q.q_ctx.dma_map != NULL) {
2592			memset(me->me_q.q_ctx.dma_vaddr, 0,
2593			    me->me_q.q_ctx.dma_size);
2594			ubsec_dma_free(sc, &me->me_q.q_ctx);
2595		}
2596		if (me->me_M.dma_map != NULL) {
2597			memset(me->me_M.dma_vaddr, 0, me->me_M.dma_size);
2598			ubsec_dma_free(sc, &me->me_M);
2599		}
2600		if (me->me_E.dma_map != NULL) {
2601			memset(me->me_E.dma_vaddr, 0, me->me_E.dma_size);
2602			ubsec_dma_free(sc, &me->me_E);
2603		}
2604		if (me->me_C.dma_map != NULL) {
2605			memset(me->me_C.dma_vaddr, 0, me->me_C.dma_size);
2606			ubsec_dma_free(sc, &me->me_C);
2607		}
2608		if (me->me_epb.dma_map != NULL)
2609			ubsec_dma_free(sc, &me->me_epb);
2610		free(me, M_DEVBUF);
2611	}
2612	krp->krp_status = err;
2613	crypto_kdone(krp);
2614}
2615
2616/*
2617 * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (hw normalization)
2618 */
2619static void
2620ubsec_kprocess_modexp_hw(struct ubsec_softc *sc, struct cryptkop *krp,
2621			 int hint)
2622{
2623	struct ubsec_q2_modexp *me;
2624	struct ubsec_mcr *mcr;
2625	struct ubsec_ctx_modexp *ctx;
2626	struct ubsec_pktbuf *epb;
2627	int err = 0;
2628	u_int nbits, normbits, mbits, shiftbits, ebits;
2629
2630	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
2631	if (me == NULL) {
2632		err = ENOMEM;
2633		goto errout;
2634	}
2635	memset(me, 0, sizeof *me);
2636	me->me_krp = krp;
2637	me->me_q.q_type = UBS_CTXOP_MODEXP;
2638
2639	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2640	if (nbits <= 512)
2641		normbits = 512;
2642	else if (nbits <= 768)
2643		normbits = 768;
2644	else if (nbits <= 1024)
2645		normbits = 1024;
2646	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2647		normbits = 1536;
2648	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2649		normbits = 2048;
2650	else {
2651		err = E2BIG;
2652		goto errout;
2653	}
2654
2655	shiftbits = normbits - nbits;
2656
2657	/* XXX ??? */
2658	me->me_modbits = nbits;
2659	me->me_shiftbits = shiftbits;
2660	me->me_normbits = normbits;
2661
2662	/* Sanity check: result bits must be >= true modulus bits. */
2663	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2664		err = ERANGE;
2665		goto errout;
2666	}
2667
2668	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2669	    &me->me_q.q_mcr, 0)) {
2670		err = ENOMEM;
2671		goto errout;
2672	}
2673	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2674
2675	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2676	    &me->me_q.q_ctx, 0)) {
2677		err = ENOMEM;
2678		goto errout;
2679	}
2680
2681	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2682	if (mbits > nbits) {
2683		err = E2BIG;
2684		goto errout;
2685	}
2686	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2687		err = ENOMEM;
2688		goto errout;
2689	}
2690	memset(me->me_M.dma_vaddr, 0, normbits / 8);
2691	bcopy(krp->krp_param[UBS_MODEXP_PAR_M].crp_p,
2692	    me->me_M.dma_vaddr, (mbits + 7) / 8);
2693
2694	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2695		err = ENOMEM;
2696		goto errout;
2697	}
2698	memset(me->me_C.dma_vaddr, 0, me->me_C.dma_size);
2699
2700	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2701	if (ebits > nbits) {
2702		err = E2BIG;
2703		goto errout;
2704	}
2705	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2706		err = ENOMEM;
2707		goto errout;
2708	}
2709	memset(me->me_E.dma_vaddr, 0, normbits / 8);
2710	bcopy(krp->krp_param[UBS_MODEXP_PAR_E].crp_p,
2711	    me->me_E.dma_vaddr, (ebits + 7) / 8);
2712
2713	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2714	    &me->me_epb, 0)) {
2715		err = ENOMEM;
2716		goto errout;
2717	}
2718	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2719	epb->pb_addr = htole32(me->me_E.dma_paddr);
2720	epb->pb_next = 0;
2721	epb->pb_len = htole32((ebits + 7) / 8);
2722
2723#ifdef UBSEC_DEBUG
2724	if (ubsec_debug) {
2725		printf("Epb ");
2726		ubsec_dump_pb(epb);
2727	}
2728#endif
2729
2730	mcr->mcr_pkts = htole16(1);
2731	mcr->mcr_flags = 0;
2732	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2733	mcr->mcr_reserved = 0;
2734	mcr->mcr_pktlen = 0;
2735
2736	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2737	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2738	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2739
2740	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2741	mcr->mcr_opktbuf.pb_next = 0;
2742	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2743
2744#ifdef DIAGNOSTIC
2745	/* Misaligned output buffer will hang the chip. */
2746	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2747		panic("%s: modexp invalid addr 0x%x", device_xname(sc->sc_dev),
2748		    letoh32(mcr->mcr_opktbuf.pb_addr));
2749	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2750		panic("%s: modexp invalid len 0x%x", device_xname(sc->sc_dev),
2751		    letoh32(mcr->mcr_opktbuf.pb_len));
2752#endif
2753
2754	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2755	memset(ctx, 0, sizeof(*ctx));
2756	memcpy(ctx->me_N, krp->krp_param[UBS_MODEXP_PAR_N].crp_p,
2757	    (nbits + 7) / 8);
2758	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2759	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2760	ctx->me_E_len = htole16(ebits);
2761	ctx->me_N_len = htole16(nbits);
2762
2763#ifdef UBSEC_DEBUG
2764	if (ubsec_debug) {
2765		ubsec_dump_mcr(mcr);
2766		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2767	}
2768#endif
2769
2770	/*
2771	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2772	 * everything else.
2773	 */
2774	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
2775	    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2776	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
2777	    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2778	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
2779	    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2780	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
2781	    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2782
2783	/* Enqueue and we're done... */
2784	mutex_spin_enter(&sc->sc_mtx);
2785	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2786	ubsec_feed2(sc);
2787	mutex_spin_exit(&sc->sc_mtx);
2788
2789	return;
2790
2791errout:
2792	if (me != NULL) {
2793		if (me->me_q.q_mcr.dma_map != NULL)
2794			ubsec_dma_free(sc, &me->me_q.q_mcr);
2795		if (me->me_q.q_ctx.dma_map != NULL) {
2796			memset(me->me_q.q_ctx.dma_vaddr, 0,
2797			    me->me_q.q_ctx.dma_size);
2798			ubsec_dma_free(sc, &me->me_q.q_ctx);
2799		}
2800		if (me->me_M.dma_map != NULL) {
2801			memset(me->me_M.dma_vaddr, 0, me->me_M.dma_size);
2802			ubsec_dma_free(sc, &me->me_M);
2803		}
2804		if (me->me_E.dma_map != NULL) {
2805			memset(me->me_E.dma_vaddr, 0, me->me_E.dma_size);
2806			ubsec_dma_free(sc, &me->me_E);
2807		}
2808		if (me->me_C.dma_map != NULL) {
2809			memset(me->me_C.dma_vaddr, 0, me->me_C.dma_size);
2810			ubsec_dma_free(sc, &me->me_C);
2811		}
2812		if (me->me_epb.dma_map != NULL)
2813			ubsec_dma_free(sc, &me->me_epb);
2814		free(me, M_DEVBUF);
2815	}
2816	krp->krp_status = err;
2817	crypto_kdone(krp);
2818}
2819
2820static void
2821ubsec_kprocess_rsapriv(struct ubsec_softc *sc, struct cryptkop *krp,
2822		       int hint)
2823{
2824	struct ubsec_q2_rsapriv *rp = NULL;
2825	struct ubsec_mcr *mcr;
2826	struct ubsec_ctx_rsapriv *ctx;
2827	int err = 0;
2828	u_int padlen, msglen;
2829
2830	msglen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_P]);
2831	padlen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_Q]);
2832	if (msglen > padlen)
2833		padlen = msglen;
2834
2835	if (padlen <= 256)
2836		padlen = 256;
2837	else if (padlen <= 384)
2838		padlen = 384;
2839	else if (padlen <= 512)
2840		padlen = 512;
2841	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 768)
2842		padlen = 768;
2843	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 1024)
2844		padlen = 1024;
2845	else {
2846		err = E2BIG;
2847		goto errout;
2848	}
2849
2850	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DP]) > padlen) {
2851		err = E2BIG;
2852		goto errout;
2853	}
2854
2855	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DQ]) > padlen) {
2856		err = E2BIG;
2857		goto errout;
2858	}
2859
2860	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_PINV]) > padlen) {
2861		err = E2BIG;
2862		goto errout;
2863	}
2864
2865	rp = malloc(sizeof *rp, M_DEVBUF, M_NOWAIT|M_ZERO);
2866	if (rp == NULL) {
2867		err = ENOMEM;
2868		goto errout;
2869	}
2870	rp->rpr_krp = krp;
2871	rp->rpr_q.q_type = UBS_CTXOP_RSAPRIV;
2872
2873	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2874	    &rp->rpr_q.q_mcr, 0)) {
2875		err = ENOMEM;
2876		goto errout;
2877	}
2878	mcr = (struct ubsec_mcr *)rp->rpr_q.q_mcr.dma_vaddr;
2879
2880	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rsapriv),
2881	    &rp->rpr_q.q_ctx, 0)) {
2882		err = ENOMEM;
2883		goto errout;
2884	}
2885	ctx = (struct ubsec_ctx_rsapriv *)rp->rpr_q.q_ctx.dma_vaddr;
2886	memset(ctx, 0, sizeof *ctx);
2887
2888	/* Copy in p */
2889	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_P].crp_p,
2890	    &ctx->rpr_buf[0 * (padlen / 8)],
2891	    (krp->krp_param[UBS_RSAPRIV_PAR_P].crp_nbits + 7) / 8);
2892
2893	/* Copy in q */
2894	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_p,
2895	    &ctx->rpr_buf[1 * (padlen / 8)],
2896	    (krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_nbits + 7) / 8);
2897
2898	/* Copy in dp */
2899	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_p,
2900	    &ctx->rpr_buf[2 * (padlen / 8)],
2901	    (krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_nbits + 7) / 8);
2902
2903	/* Copy in dq */
2904	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_p,
2905	    &ctx->rpr_buf[3 * (padlen / 8)],
2906	    (krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_nbits + 7) / 8);
2907
2908	/* Copy in pinv */
2909	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_p,
2910	    &ctx->rpr_buf[4 * (padlen / 8)],
2911	    (krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_nbits + 7) / 8);
2912
2913	msglen = padlen * 2;
2914
2915	/* Copy in input message (aligned buffer/length). */
2916	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGIN]) > msglen) {
2917		/* Is this likely? */
2918		err = E2BIG;
2919		goto errout;
2920	}
2921	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgin, 0)) {
2922		err = ENOMEM;
2923		goto errout;
2924	}
2925	memset(rp->rpr_msgin.dma_vaddr, 0, (msglen + 7) / 8);
2926	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_p,
2927	    rp->rpr_msgin.dma_vaddr,
2928	    (krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_nbits + 7) / 8);
2929
2930	/* Prepare space for output message (aligned buffer/length). */
2931	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT]) < msglen) {
2932		/* Is this likely? */
2933		err = E2BIG;
2934		goto errout;
2935	}
2936	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgout, 0)) {
2937		err = ENOMEM;
2938		goto errout;
2939	}
2940	memset(rp->rpr_msgout.dma_vaddr, 0, (msglen + 7) / 8);
2941
2942	mcr->mcr_pkts = htole16(1);
2943	mcr->mcr_flags = 0;
2944	mcr->mcr_cmdctxp = htole32(rp->rpr_q.q_ctx.dma_paddr);
2945	mcr->mcr_ipktbuf.pb_addr = htole32(rp->rpr_msgin.dma_paddr);
2946	mcr->mcr_ipktbuf.pb_next = 0;
2947	mcr->mcr_ipktbuf.pb_len = htole32(rp->rpr_msgin.dma_size);
2948	mcr->mcr_reserved = 0;
2949	mcr->mcr_pktlen = htole16(msglen);
2950	mcr->mcr_opktbuf.pb_addr = htole32(rp->rpr_msgout.dma_paddr);
2951	mcr->mcr_opktbuf.pb_next = 0;
2952	mcr->mcr_opktbuf.pb_len = htole32(rp->rpr_msgout.dma_size);
2953
2954#ifdef DIAGNOSTIC
2955	if (rp->rpr_msgin.dma_paddr & 3 || rp->rpr_msgin.dma_size & 3) {
2956		panic("%s: rsapriv: invalid msgin 0x%lx(0x%lx)",
2957		    device_xname(sc->sc_dev), (u_long) rp->rpr_msgin.dma_paddr,
2958		    (u_long) rp->rpr_msgin.dma_size);
2959	}
2960	if (rp->rpr_msgout.dma_paddr & 3 || rp->rpr_msgout.dma_size & 3) {
2961		panic("%s: rsapriv: invalid msgout 0x%lx(0x%lx)",
2962		    device_xname(sc->sc_dev), (u_long) rp->rpr_msgout.dma_paddr,
2963		    (u_long) rp->rpr_msgout.dma_size);
2964	}
2965#endif
2966
2967	ctx->rpr_len = (sizeof(u_int16_t) * 4) + (5 * (padlen / 8));
2968	ctx->rpr_op = htole16(UBS_CTXOP_RSAPRIV);
2969	ctx->rpr_q_len = htole16(padlen);
2970	ctx->rpr_p_len = htole16(padlen);
2971
2972	/*
2973	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2974	 * everything else.
2975	 */
2976	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map,
2977	    0, rp->rpr_msgin.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2978	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map,
2979	    0, rp->rpr_msgout.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2980
2981	/* Enqueue and we're done... */
2982	mutex_spin_enter(&sc->sc_mtx);
2983	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rp->rpr_q, q_next);
2984	ubsec_feed2(sc);
2985	ubsecstats.hst_modexpcrt++;
2986	mutex_spin_exit(&sc->sc_mtx);
2987	return;
2988
2989errout:
2990	if (rp != NULL) {
2991		if (rp->rpr_q.q_mcr.dma_map != NULL)
2992			ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2993		if (rp->rpr_msgin.dma_map != NULL) {
2994			memset(rp->rpr_msgin.dma_vaddr, 0,
2995			    rp->rpr_msgin.dma_size);
2996			ubsec_dma_free(sc, &rp->rpr_msgin);
2997		}
2998		if (rp->rpr_msgout.dma_map != NULL) {
2999			memset(rp->rpr_msgout.dma_vaddr, 0,
3000			    rp->rpr_msgout.dma_size);
3001			ubsec_dma_free(sc, &rp->rpr_msgout);
3002		}
3003		free(rp, M_DEVBUF);
3004	}
3005	krp->krp_status = err;
3006	crypto_kdone(krp);
3007}
3008
3009#ifdef UBSEC_DEBUG
3010static void
3011ubsec_dump_pb(volatile struct ubsec_pktbuf *pb)
3012{
3013	printf("addr 0x%x (0x%x) next 0x%x\n",
3014	    pb->pb_addr, pb->pb_len, pb->pb_next);
3015}
3016
3017static void
3018ubsec_dump_ctx2(volatile struct ubsec_ctx_keyop *c)
3019{
3020	printf("CTX (0x%x):\n", c->ctx_len);
3021	switch (letoh16(c->ctx_op)) {
3022	case UBS_CTXOP_RNGBYPASS:
3023	case UBS_CTXOP_RNGSHA1:
3024		break;
3025	case UBS_CTXOP_MODEXP:
3026	{
3027		struct ubsec_ctx_modexp *cx = (void *)c;
3028		int i, len;
3029
3030		printf(" Elen %u, Nlen %u\n",
3031		    letoh16(cx->me_E_len), letoh16(cx->me_N_len));
3032		len = (cx->me_N_len + 7)/8;
3033		for (i = 0; i < len; i++)
3034			printf("%s%02x", (i == 0) ? " N: " : ":", cx->me_N[i]);
3035		printf("\n");
3036		break;
3037	}
3038	default:
3039		printf("unknown context: %x\n", c->ctx_op);
3040	}
3041	printf("END CTX\n");
3042}
3043
3044static void
3045ubsec_dump_mcr(struct ubsec_mcr *mcr)
3046{
3047	volatile struct ubsec_mcr_add *ma;
3048	int i;
3049
3050	printf("MCR:\n");
3051	printf(" pkts: %u, flags 0x%x\n",
3052	    letoh16(mcr->mcr_pkts), letoh16(mcr->mcr_flags));
3053	ma = (volatile struct ubsec_mcr_add *)&mcr->mcr_cmdctxp;
3054	for (i = 0; i < letoh16(mcr->mcr_pkts); i++) {
3055		printf(" %d: ctx 0x%x len 0x%x rsvd 0x%x\n", i,
3056		    letoh32(ma->mcr_cmdctxp), letoh16(ma->mcr_pktlen),
3057		    letoh16(ma->mcr_reserved));
3058		printf(" %d: ipkt ", i);
3059		ubsec_dump_pb(&ma->mcr_ipktbuf);
3060		printf(" %d: opkt ", i);
3061		ubsec_dump_pb(&ma->mcr_opktbuf);
3062		ma++;
3063	}
3064	printf("END MCR\n");
3065}
3066#endif /* UBSEC_DEBUG */
3067
3068/*
3069 * Return the number of significant bits of a big number.
3070 */
3071static int
3072ubsec_ksigbits(struct crparam *cr)
3073{
3074	u_int plen = (cr->crp_nbits + 7) / 8;
3075	int i, sig = plen * 8;
3076	u_int8_t c, *p = cr->crp_p;
3077
3078	for (i = plen - 1; i >= 0; i--) {
3079		c = p[i];
3080		if (c != 0) {
3081			while ((c & 0x80) == 0) {
3082				sig--;
3083				c <<= 1;
3084			}
3085			break;
3086		}
3087		sig -= 8;
3088	}
3089	return (sig);
3090}
3091
3092static void
3093ubsec_kshift_r(u_int shiftbits, u_int8_t *src, u_int srcbits,
3094    u_int8_t *dst, u_int dstbits)
3095{
3096	u_int slen, dlen;
3097	int i, si, di, n;
3098
3099	slen = (srcbits + 7) / 8;
3100	dlen = (dstbits + 7) / 8;
3101
3102	for (i = 0; i < slen; i++)
3103		dst[i] = src[i];
3104	for (i = 0; i < dlen - slen; i++)
3105		dst[slen + i] = 0;
3106
3107	n = shiftbits / 8;
3108	if (n != 0) {
3109		si = dlen - n - 1;
3110		di = dlen - 1;
3111		while (si >= 0)
3112			dst[di--] = dst[si--];
3113		while (di >= 0)
3114			dst[di--] = 0;
3115	}
3116
3117	n = shiftbits % 8;
3118	if (n != 0) {
3119		for (i = dlen - 1; i > 0; i--)
3120			dst[i] = (dst[i] << n) |
3121			    (dst[i - 1] >> (8 - n));
3122		dst[0] = dst[0] << n;
3123	}
3124}
3125
3126static void
3127ubsec_kshift_l(u_int shiftbits, u_int8_t *src, u_int srcbits,
3128    u_int8_t *dst, u_int dstbits)
3129{
3130	int slen, dlen, i, n;
3131
3132	slen = (srcbits + 7) / 8;
3133	dlen = (dstbits + 7) / 8;
3134
3135	n = shiftbits / 8;
3136	for (i = 0; i < slen; i++)
3137		dst[i] = src[i + n];
3138	for (i = 0; i < dlen - slen; i++)
3139		dst[slen + i] = 0;
3140
3141	n = shiftbits % 8;
3142	if (n != 0) {
3143		for (i = 0; i < (dlen - 1); i++)
3144			dst[i] = (dst[i] >> n) | (dst[i + 1] << (8 - n));
3145		dst[dlen - 1] = dst[dlen - 1] >> n;
3146	}
3147}
3148