1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6 * All rights reserved.
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_ddb.h"
35
36#include <sys/types.h>
37#include <sys/bus.h>
38#include <sys/lock.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/module.h>
43#include <sys/rman.h>
44#include <sys/sglist.h>
45#include <sys/sysctl.h>
46
47#ifdef DDB
48#include <ddb/ddb.h>
49#endif
50
51#include <dev/pci/pcireg.h>
52#include <dev/pci/pcivar.h>
53
54#include <machine/bus.h>
55#include <machine/resource.h>
56#include <machine/vmparam.h>
57
58#include <opencrypto/cryptodev.h>
59#include <opencrypto/xform.h>
60
61#include <vm/vm.h>
62#include <vm/pmap.h>
63
64#include "cryptodev_if.h"
65
66#include "ccp.h"
67#include "ccp_hardware.h"
68#include "ccp_lsb.h"
69
70CTASSERT(sizeof(struct ccp_desc) == 32);
71
72static struct ccp_xts_unitsize_map_entry {
73	enum ccp_xts_unitsize cxu_id;
74	unsigned cxu_size;
75} ccp_xts_unitsize_map[] = {
76	{ CCP_XTS_AES_UNIT_SIZE_16, 16 },
77	{ CCP_XTS_AES_UNIT_SIZE_512, 512 },
78	{ CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79	{ CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80	{ CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
81};
82
83SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD, 0, "ccp node");
84
85unsigned g_ccp_ring_order = 11;
86SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
87    0, "Set CCP ring order.  (1 << this) == ring size.  Min: 6, Max: 16");
88
89/*
90 * Zero buffer, sufficient for padding LSB entries, that does not span a page
91 * boundary
92 */
93static const char g_zeroes[32] __aligned(32);
94
95static inline uint32_t
96ccp_read_4(struct ccp_softc *sc, uint32_t offset)
97{
98	return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
99}
100
101static inline void
102ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
103{
104	bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
105}
106
107static inline uint32_t
108ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
109{
110	/*
111	 * Each queue gets its own 4kB register space.  Queue 0 is at 0x1000.
112	 */
113	return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
114}
115
116static inline void
117ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
118    uint32_t value)
119{
120	ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
121}
122
123void
124ccp_queue_write_tail(struct ccp_queue *qp)
125{
126	ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
127	    ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
128}
129
130/*
131 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
132 * that entry for the queue's private LSB region.
133 */
134static inline uint8_t
135ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
136{
137	return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
138}
139
140/*
141 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
142 * that entry for the queue's private LSB region.
143 */
144static inline uint32_t
145ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
146{
147	return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
148}
149
150/*
151 * Some terminology:
152 *
153 * LSB - Local Storage Block
154 * =========================
155 *
156 * 8 segments/regions, each containing 16 entries.
157 *
158 * Each entry contains 256 bits (32 bytes).
159 *
160 * Segments are virtually addressed in commands, but accesses cannot cross
161 * segment boundaries.  Virtual map uses an identity mapping by default
162 * (virtual segment N corresponds to physical segment N).
163 *
164 * Access to a physical region can be restricted to any subset of all five
165 * queues.
166 *
167 * "Pass-through" mode
168 * ===================
169 *
170 * Pass-through is a generic DMA engine, much like ioat(4).  Some nice
171 * features:
172 *
173 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
174 * - AND, OR, XOR with fixed 256-bit mask
175 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
176 * - Read/write of LSB
177 * - Memset
178 *
179 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
180 * (32 bytes).
181 *
182 * If byte-swapping is enabled, input must be a multiple of the word size.
183 *
184 * Zlib mode -- only usable from one queue at a time, single job at a time.
185 * ========================================================================
186 *
187 * Only usable from private host, aka PSP?  Not host processor?
188 *
189 * RNG.
190 * ====
191 *
192 * Raw bits are conditioned with AES and fed through CTR_DRBG.  Output goes in
193 * a ring buffer readable by software.
194 *
195 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
196 * implemented on the raw input stream and may be enabled to verify min-entropy
197 * of 0.5 bits per bit.
198 */
199
200static void
201ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
202{
203	bus_addr_t *baddr;
204
205	KASSERT(error == 0, ("%s: error:%d", __func__, error));
206	baddr = arg;
207	*baddr = segs->ds_addr;
208}
209
210static int
211ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
212{
213	struct ccp_softc *sc;
214	struct ccp_queue *qp;
215	void *desc;
216	size_t ringsz, num_descriptors;
217	int error;
218
219	desc = NULL;
220	sc = device_get_softc(dev);
221	qp = &sc->queues[queue];
222
223	/*
224	 * Don't bother allocating a ring for queues the host isn't allowed to
225	 * drive.
226	 */
227	if ((sc->valid_queues & (1 << queue)) == 0)
228		return (0);
229
230	ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
231
232	/* Ignore queues that do not have any LSB access. */
233	if (qp->lsb_mask == 0) {
234		device_printf(dev, "Ignoring queue %u with no LSB access\n",
235		    queue);
236		sc->valid_queues &= ~(1 << queue);
237		return (0);
238	}
239
240	num_descriptors = 1 << sc->ring_size_order;
241	ringsz = sizeof(struct ccp_desc) * num_descriptors;
242
243	/*
244	 * "Queue_Size" is order - 1.
245	 *
246	 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
247	 */
248	error = bus_dma_tag_create(bus_get_dma_tag(dev),
249	    1 << (5 + sc->ring_size_order),
250#if defined(__i386__) && !defined(PAE)
251	    0, BUS_SPACE_MAXADDR,
252#else
253	    (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
254#endif
255	    BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
256	    ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
257	if (error != 0)
258		goto out;
259
260	error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
261	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
262	if (error != 0)
263		goto out;
264
265	error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
266	    ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
267	if (error != 0)
268		goto out;
269
270	qp->desc_ring = desc;
271	qp->completions_ring = malloc(num_descriptors *
272	    sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
273
274	/* Zero control register; among other things, clears the RUN flag. */
275	qp->qcontrol = 0;
276	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
277	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
278
279	/* Clear any leftover interrupt status flags */
280	ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
281	    ALL_INTERRUPTS);
282
283	qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
284
285	ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
286	    (uint32_t)qp->desc_ring_bus_addr);
287	ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
288	    (uint32_t)qp->desc_ring_bus_addr);
289
290	/*
291	 * Enable completion interrupts, as well as error or administrative
292	 * halt interrupts.  We don't use administrative halts, but they
293	 * shouldn't trip unless we do, so it ought to be harmless.
294	 */
295	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
296	    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
297
298	qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
299	qp->qcontrol |= CMD_Q_RUN;
300	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
301
302out:
303	if (error != 0) {
304		if (qp->desc_ring != NULL)
305			bus_dmamap_unload(qp->ring_desc_tag,
306			    qp->ring_desc_map);
307		if (desc != NULL)
308			bus_dmamem_free(qp->ring_desc_tag, desc,
309			    qp->ring_desc_map);
310		if (qp->ring_desc_tag != NULL)
311			bus_dma_tag_destroy(qp->ring_desc_tag);
312	}
313	return (error);
314}
315
316static void
317ccp_hw_detach_queue(device_t dev, unsigned queue)
318{
319	struct ccp_softc *sc;
320	struct ccp_queue *qp;
321
322	sc = device_get_softc(dev);
323	qp = &sc->queues[queue];
324
325	/*
326	 * Don't bother allocating a ring for queues the host isn't allowed to
327	 * drive.
328	 */
329	if ((sc->valid_queues & (1 << queue)) == 0)
330		return;
331
332	free(qp->completions_ring, M_CCP);
333	bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
334	bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
335	bus_dma_tag_destroy(qp->ring_desc_tag);
336}
337
338static int
339ccp_map_pci_bar(device_t dev)
340{
341	struct ccp_softc *sc;
342
343	sc = device_get_softc(dev);
344
345	sc->pci_resource_id = PCIR_BAR(2);
346	sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
347	    &sc->pci_resource_id, RF_ACTIVE);
348	if (sc->pci_resource == NULL) {
349		device_printf(dev, "unable to allocate pci resource\n");
350		return (ENODEV);
351	}
352
353	sc->pci_resource_id_msix = PCIR_BAR(5);
354	sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
355	    &sc->pci_resource_id_msix, RF_ACTIVE);
356	if (sc->pci_resource_msix == NULL) {
357		device_printf(dev, "unable to allocate pci resource msix\n");
358		bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
359		    sc->pci_resource);
360		return (ENODEV);
361	}
362
363	sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
364	sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
365	return (0);
366}
367
368static void
369ccp_unmap_pci_bar(device_t dev)
370{
371	struct ccp_softc *sc;
372
373	sc = device_get_softc(dev);
374
375	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
376	    sc->pci_resource_msix);
377	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
378	    sc->pci_resource);
379}
380
381const static struct ccp_error_code {
382	uint8_t		ce_code;
383	const char	*ce_name;
384	int		ce_errno;
385	const char	*ce_desc;
386} ccp_error_codes[] = {
387	{ 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
388	{ 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
389	    "A non-supported function type was specified" },
390	{ 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
391	    "A non-supported function mode was specified" },
392	{ 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
393	    "A CMAC type was specified when ENCRYPT was not specified" },
394	{ 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
395	    "A non-supported function size was specified.\n"
396	    "AES-CFB: Size was not 127 or 7;\n"
397	    "3DES-CFB: Size was not 7;\n"
398	    "RSA: See supported size table (7.4.2);\n"
399	    "ECC: Size was greater than 576 bits." },
400	{ 0x07, "Zlib_MISSING_INIT_EOM", EIO,
401	    "Zlib command does not have INIT and EOM set" },
402	{ 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
403	    "Reserved bits in a function specification were not 0" },
404	{ 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
405	    "The buffer length specified was not correct for the selected engine"
406	},
407	{ 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
408	    "Undefined VLSB segment mapping or\n"
409	    "mapping to unsupported LSB segment id" },
410	{ 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
411	    "The specified source/destination buffer access was illegal:\n"
412	    "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
413	    "Data buffer not completely contained within a single segment; or\n"
414	    "Pointer with Fixed=1 is not 32-bit aligned; or\n"
415	    "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
416	},
417	{ 0x0C, "ILLEGAL_MEM_SEL", EIO,
418	    "A src_mem, dst_mem, or key_mem field was illegal:\n"
419	    "A field was set to a reserved value; or\n"
420	    "A public command attempted to reference AXI1 (local) or GART memory; or\n"
421	    "A Zlib command attmpted to use the LSB." },
422	{ 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
423	    "The specified context location was illegal:\n"
424	    "Context located in a LSB location disallowed by the LSB protection masks; or\n"
425	    "Context not completely contained within a single segment." },
426	{ 0x0E, "ILLEGAL_KEY_ADDR", EIO,
427	    "The specified key location was illegal:\n"
428	    "Key located in a LSB location disallowed by the LSB protection masks; or\n"
429	    "Key not completely contained within a single segment." },
430	{ 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
431	/* XXX Could fill out these descriptions too */
432	{ 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
433	{ 0x14, "IDMA0_AXI_DECERR", EIO, "" },
434	{ 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
435	{ 0x17, "IDMA1_AXI_DECERR", EIO, "" },
436	{ 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
437	{ 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
438	{ 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
439	{ 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
440	{ 0x1E, "ZLIB_BTYPE", EIO, "" },
441	{ 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
442	{ 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
443	{ 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
444	{ 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
445	{ 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
446	{ 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
447	{ 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
448	{ 0x27, "ODMA0_AXI_DECERR", EIO, "" },
449	{ 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
450	{ 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
451	{ 0x2B, "LSB_PARITY_ERR", EIO,
452	    "A read from the LSB encountered a parity error" },
453};
454
455static void
456ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
457{
458	struct ccp_completion_ctx *cctx;
459	const struct ccp_error_code *ec;
460	struct ccp_softc *sc;
461	uint32_t status, error, esource, faultblock;
462	unsigned q, idx;
463	int errno;
464
465	sc = qp->cq_softc;
466	q = qp->cq_qindex;
467
468	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
469
470	error = status & STATUS_ERROR_MASK;
471
472	/* Decode error status */
473	ec = NULL;
474	for (idx = 0; idx < nitems(ccp_error_codes); idx++)
475		if (ccp_error_codes[idx].ce_code == error) {
476			ec = &ccp_error_codes[idx];
477			break;
478		}
479
480	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
481	    STATUS_ERRORSOURCE_MASK;
482	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
483	    STATUS_VLSB_FAULTBLOCK_MASK;
484	device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
485	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
486	    faultblock);
487	if (ec != NULL)
488		device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
489
490	/* TODO Could format the desc nicely here */
491	idx = desc - qp->desc_ring;
492	DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
493	    (const void *)desc, " ");
494
495	/*
496	 * TODO Per �� 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
497	 * Zlib Decompress status may be interesting.
498	 */
499
500	while (true) {
501		/* Keep unused descriptors zero for next use. */
502		memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
503
504		cctx = &qp->completions_ring[idx];
505
506		/*
507		 * Restart procedure described in �� 14.2.5.  Could be used by HoC if we
508		 * used that.
509		 *
510		 * Advance HEAD_LO past bad descriptor + any remaining in
511		 * transaction manually, then restart queue.
512		 */
513		idx = (idx + 1) % (1 << sc->ring_size_order);
514
515		/* Callback function signals end of transaction */
516		if (cctx->callback_fn != NULL) {
517			if (ec == NULL)
518				errno = EIO;
519			else
520				errno = ec->ce_errno;
521			/* TODO More specific error code */
522			cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
523			cctx->callback_fn = NULL;
524			break;
525		}
526	}
527
528	qp->cq_head = idx;
529	qp->cq_waiting = false;
530	wakeup(&qp->cq_tail);
531	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
532	ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
533	    (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
534	ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
535	DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
536}
537
538static void
539ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
540{
541	struct ccp_completion_ctx *cctx;
542	struct ccp_softc *sc;
543	const struct ccp_desc *desc;
544	uint32_t headlo, idx;
545	unsigned q, completed;
546
547	sc = qp->cq_softc;
548	q = qp->cq_qindex;
549
550	mtx_lock(&qp->cq_lock);
551
552	/*
553	 * Hardware HEAD_LO points to the first incomplete descriptor.  Process
554	 * any submitted and completed descriptors, up to but not including
555	 * HEAD_LO.
556	 */
557	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
558	idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
559
560	DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
561	    qp->cq_head);
562	completed = 0;
563	while (qp->cq_head != idx) {
564		DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
565
566		cctx = &qp->completions_ring[qp->cq_head];
567		if (cctx->callback_fn != NULL) {
568			cctx->callback_fn(qp, cctx->session,
569			    cctx->callback_arg, 0);
570			cctx->callback_fn = NULL;
571		}
572
573		/* Keep unused descriptors zero for next use. */
574		memset(&qp->desc_ring[qp->cq_head], 0,
575		    sizeof(qp->desc_ring[qp->cq_head]));
576
577		qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
578		completed++;
579	}
580	if (completed > 0) {
581		qp->cq_waiting = false;
582		wakeup(&qp->cq_tail);
583	}
584
585	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
586
587	/*
588	 * Desc points to the first incomplete descriptor, at the time we read
589	 * HEAD_LO.  If there was an error flagged in interrupt status, the HW
590	 * will not proceed past the erroneous descriptor by itself.
591	 */
592	desc = &qp->desc_ring[idx];
593	if ((ints & INT_ERROR) != 0)
594		ccp_intr_handle_error(qp, desc);
595
596	mtx_unlock(&qp->cq_lock);
597}
598
599static void
600ccp_intr_handler(void *arg)
601{
602	struct ccp_softc *sc = arg;
603	size_t i;
604	uint32_t ints;
605
606	DPRINTF(sc->dev, "%s: interrupt\n", __func__);
607
608	/*
609	 * We get one global interrupt per PCI device, shared over all of
610	 * its queues.  Scan each valid queue on interrupt for flags indicating
611	 * activity.
612	 */
613	for (i = 0; i < nitems(sc->queues); i++) {
614		if ((sc->valid_queues & (1 << i)) == 0)
615			continue;
616
617		ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
618		if (ints == 0)
619			continue;
620
621#if 0
622		DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
623		    (unsigned)ints, i);
624#endif
625		/* Write back 1s to clear interrupt status bits. */
626		ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
627
628		/*
629		 * If there was an error, we still need to run completions on
630		 * any descriptors prior to the error.  The completions handler
631		 * invoked below will also handle the error descriptor.
632		 */
633		if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
634			ccp_intr_run_completions(&sc->queues[i], ints);
635
636		if ((ints & INT_QUEUE_STOPPED) != 0)
637			device_printf(sc->dev, "%s: queue %zu stopped\n",
638			    __func__, i);
639	}
640
641	/* Re-enable interrupts after processing */
642	for (i = 0; i < nitems(sc->queues); i++) {
643		if ((sc->valid_queues & (1 << i)) == 0)
644			continue;
645		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
646		    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
647	}
648}
649
650static int
651ccp_intr_filter(void *arg)
652{
653	struct ccp_softc *sc = arg;
654	size_t i;
655
656	/* TODO: Split individual queues into separate taskqueues? */
657	for (i = 0; i < nitems(sc->queues); i++) {
658		if ((sc->valid_queues & (1 << i)) == 0)
659			continue;
660
661		/* Mask interrupt until task completes */
662		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
663	}
664
665	return (FILTER_SCHEDULE_THREAD);
666}
667
668static int
669ccp_setup_interrupts(struct ccp_softc *sc)
670{
671	uint32_t nvec;
672	int rid, error, n, ridcopy;
673
674	n = pci_msix_count(sc->dev);
675	if (n < 1) {
676		device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
677		return (ENXIO);
678	}
679
680	nvec = n;
681	error = pci_alloc_msix(sc->dev, &nvec);
682	if (error != 0) {
683		device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
684		    error);
685		return (error);
686	}
687	if (nvec < 1) {
688		device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
689		    __func__);
690		return (ENXIO);
691	}
692	if (nvec > nitems(sc->intr_res)) {
693		device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
694		    nvec);
695		nvec = nitems(sc->intr_res);
696	}
697
698	for (rid = 1; rid < 1 + nvec; rid++) {
699		ridcopy = rid;
700		sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
701		    SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
702		if (sc->intr_res[rid - 1] == NULL) {
703			device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
704			    __func__);
705			return (ENXIO);
706		}
707
708		sc->intr_tag[rid - 1] = NULL;
709		error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
710		    INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
711		    ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
712		if (error != 0)
713			device_printf(sc->dev, "%s: setup_intr: %d\n",
714			    __func__, error);
715	}
716	sc->intr_count = nvec;
717
718	return (error);
719}
720
721static void
722ccp_release_interrupts(struct ccp_softc *sc)
723{
724	unsigned i;
725
726	for (i = 0; i < sc->intr_count; i++) {
727		if (sc->intr_tag[i] != NULL)
728			bus_teardown_intr(sc->dev, sc->intr_res[i],
729			    sc->intr_tag[i]);
730		if (sc->intr_res[i] != NULL)
731			bus_release_resource(sc->dev, SYS_RES_IRQ,
732			    rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
733	}
734
735	pci_release_msi(sc->dev);
736}
737
738int
739ccp_hw_attach(device_t dev)
740{
741	struct ccp_softc *sc;
742	uint64_t lsbmask;
743	uint32_t version, lsbmasklo, lsbmaskhi;
744	unsigned queue_idx, j;
745	int error;
746	bool bars_mapped, interrupts_setup;
747
748	queue_idx = 0;
749	bars_mapped = interrupts_setup = false;
750	sc = device_get_softc(dev);
751
752	error = ccp_map_pci_bar(dev);
753	if (error != 0) {
754		device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
755		goto out;
756	}
757	bars_mapped = true;
758
759	error = pci_enable_busmaster(dev);
760	if (error != 0) {
761		device_printf(dev, "%s: couldn't enable busmaster\n",
762		    __func__);
763		goto out;
764	}
765
766	sc->ring_size_order = g_ccp_ring_order;
767	if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
768		device_printf(dev, "bogus hw.ccp.ring_order\n");
769		error = EINVAL;
770		goto out;
771	}
772	sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
773
774	version = ccp_read_4(sc, VERSION_REG);
775	if ((version & VERSION_NUM_MASK) < 5) {
776		device_printf(dev,
777		    "driver supports version 5 and later hardware\n");
778		error = ENXIO;
779		goto out;
780	}
781
782	error = ccp_setup_interrupts(sc);
783	if (error != 0)
784		goto out;
785	interrupts_setup = true;
786
787	sc->hw_version = version & VERSION_NUM_MASK;
788	sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
789	    VERSION_NUMVQM_MASK;
790	sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
791	    VERSION_LSBSIZE_MASK;
792	sc->hw_features = version & VERSION_CAP_MASK;
793
794	/*
795	 * Copy private LSB mask to public registers to enable access to LSB
796	 * from all queues allowed by BIOS.
797	 */
798	lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
799	lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
800	ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
801	ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
802
803	lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
804
805	for (; queue_idx < nitems(sc->queues); queue_idx++) {
806		error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
807		if (error != 0) {
808			device_printf(dev, "%s: couldn't attach queue %u\n",
809			    __func__, queue_idx);
810			goto out;
811		}
812	}
813	ccp_assign_lsb_regions(sc, lsbmask);
814
815out:
816	if (error != 0) {
817		if (interrupts_setup)
818			ccp_release_interrupts(sc);
819		for (j = 0; j < queue_idx; j++)
820			ccp_hw_detach_queue(dev, j);
821		if (sc->ring_size_order != 0)
822			pci_disable_busmaster(dev);
823		if (bars_mapped)
824			ccp_unmap_pci_bar(dev);
825	}
826	return (error);
827}
828
829void
830ccp_hw_detach(device_t dev)
831{
832	struct ccp_softc *sc;
833	unsigned i;
834
835	sc = device_get_softc(dev);
836
837	for (i = 0; i < nitems(sc->queues); i++)
838		ccp_hw_detach_queue(dev, i);
839
840	ccp_release_interrupts(sc);
841	pci_disable_busmaster(dev);
842	ccp_unmap_pci_bar(dev);
843}
844
845static int __must_check
846ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
847    enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
848    bus_size_t len, enum ccp_passthru_byteswap swapmode,
849    enum ccp_passthru_bitwise bitmode, bool interrupt,
850    const struct ccp_completion_ctx *cctx)
851{
852	struct ccp_desc *desc;
853
854	if (ccp_queue_get_ring_space(qp) == 0)
855		return (EAGAIN);
856
857	desc = &qp->desc_ring[qp->cq_tail];
858
859	memset(desc, 0, sizeof(*desc));
860	desc->engine = CCP_ENGINE_PASSTHRU;
861
862	desc->pt.ioc = interrupt;
863	desc->pt.byteswap = swapmode;
864	desc->pt.bitwise = bitmode;
865	desc->length = len;
866
867	desc->src_lo = (uint32_t)src;
868	desc->src_hi = src >> 32;
869	desc->src_mem = src_type;
870
871	desc->dst_lo = (uint32_t)dst;
872	desc->dst_hi = dst >> 32;
873	desc->dst_mem = dst_type;
874
875	if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
876		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
877
878	if (cctx != NULL)
879		memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
880
881	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
882	return (0);
883}
884
885static int __must_check
886ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
887    struct sglist *sgl, bus_size_t len, bool interrupt,
888    const struct ccp_completion_ctx *cctx)
889{
890	struct sglist_seg *seg;
891	size_t i, remain, nb;
892	int error;
893
894	remain = len;
895	for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
896		seg = &sgl->sg_segs[i];
897		/* crd_len is int, so 32-bit min() is ok. */
898		nb = min(remain, seg->ss_len);
899
900		if (tolsb)
901			error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
902			    seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
903			    CCP_PASSTHRU_BYTESWAP_NOOP,
904			    CCP_PASSTHRU_BITWISE_NOOP,
905			    (nb == remain) && interrupt, cctx);
906		else
907			error = ccp_passthrough(qp, seg->ss_paddr,
908			    CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
909			    CCP_PASSTHRU_BYTESWAP_NOOP,
910			    CCP_PASSTHRU_BITWISE_NOOP,
911			    (nb == remain) && interrupt, cctx);
912		if (error != 0)
913			return (error);
914
915		remain -= nb;
916	}
917	return (0);
918}
919
920/*
921 * Note that these vectors are in reverse of the usual order.
922 */
923const struct SHA_vectors {
924	uint32_t SHA1[8];
925	uint32_t SHA224[8];
926	uint32_t SHA256[8];
927	uint64_t SHA384[8];
928	uint64_t SHA512[8];
929} SHA_H __aligned(PAGE_SIZE) = {
930	.SHA1 = {
931		0xc3d2e1f0ul,
932		0x10325476ul,
933		0x98badcfeul,
934		0xefcdab89ul,
935		0x67452301ul,
936		0,
937		0,
938		0,
939	},
940	.SHA224 = {
941		0xbefa4fa4ul,
942		0x64f98fa7ul,
943		0x68581511ul,
944		0xffc00b31ul,
945		0xf70e5939ul,
946		0x3070dd17ul,
947		0x367cd507ul,
948		0xc1059ed8ul,
949	},
950	.SHA256 = {
951		0x5be0cd19ul,
952		0x1f83d9abul,
953		0x9b05688cul,
954		0x510e527ful,
955		0xa54ff53aul,
956		0x3c6ef372ul,
957		0xbb67ae85ul,
958		0x6a09e667ul,
959	},
960	.SHA384 = {
961		0x47b5481dbefa4fa4ull,
962		0xdb0c2e0d64f98fa7ull,
963		0x8eb44a8768581511ull,
964		0x67332667ffc00b31ull,
965		0x152fecd8f70e5939ull,
966		0x9159015a3070dd17ull,
967		0x629a292a367cd507ull,
968		0xcbbb9d5dc1059ed8ull,
969	},
970	.SHA512 = {
971		0x5be0cd19137e2179ull,
972		0x1f83d9abfb41bd6bull,
973		0x9b05688c2b3e6c1full,
974		0x510e527fade682d1ull,
975		0xa54ff53a5f1d36f1ull,
976		0x3c6ef372fe94f82bull,
977		0xbb67ae8584caa73bull,
978		0x6a09e667f3bcc908ull,
979	},
980};
981/*
982 * Ensure vectors do not cross a page boundary.
983 *
984 * Disabled due to a new Clang error:  "expression is not an integral constant
985 * expression."  GCC (cross toolchain) seems to handle this assertion with
986 * _Static_assert just fine.
987 */
988#if 0
989CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
990#endif
991
992const struct SHA_Defn {
993	enum sha_version version;
994	const void *H_vectors;
995	size_t H_size;
996	struct auth_hash *axf;
997	enum ccp_sha_type engine_type;
998} SHA_definitions[] = {
999	{
1000		.version = SHA1,
1001		.H_vectors = SHA_H.SHA1,
1002		.H_size = sizeof(SHA_H.SHA1),
1003		.axf = &auth_hash_hmac_sha1,
1004		.engine_type = CCP_SHA_TYPE_1,
1005	},
1006#if 0
1007	{
1008		.version = SHA2_224,
1009		.H_vectors = SHA_H.SHA224,
1010		.H_size = sizeof(SHA_H.SHA224),
1011		.axf = &auth_hash_hmac_sha2_224,
1012		.engine_type = CCP_SHA_TYPE_224,
1013	},
1014#endif
1015	{
1016		.version = SHA2_256,
1017		.H_vectors = SHA_H.SHA256,
1018		.H_size = sizeof(SHA_H.SHA256),
1019		.axf = &auth_hash_hmac_sha2_256,
1020		.engine_type = CCP_SHA_TYPE_256,
1021	},
1022	{
1023		.version = SHA2_384,
1024		.H_vectors = SHA_H.SHA384,
1025		.H_size = sizeof(SHA_H.SHA384),
1026		.axf = &auth_hash_hmac_sha2_384,
1027		.engine_type = CCP_SHA_TYPE_384,
1028	},
1029	{
1030		.version = SHA2_512,
1031		.H_vectors = SHA_H.SHA512,
1032		.H_size = sizeof(SHA_H.SHA512),
1033		.axf = &auth_hash_hmac_sha2_512,
1034		.engine_type = CCP_SHA_TYPE_512,
1035	},
1036};
1037
1038static int __must_check
1039ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1040    vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1041{
1042	struct ccp_desc *desc;
1043
1044	if (ccp_queue_get_ring_space(qp) == 0)
1045		return (EAGAIN);
1046
1047	desc = &qp->desc_ring[qp->cq_tail];
1048
1049	memset(desc, 0, sizeof(*desc));
1050	desc->engine = CCP_ENGINE_SHA;
1051	desc->som = start;
1052	desc->eom = end;
1053
1054	desc->sha.type = defn->engine_type;
1055	desc->length = len;
1056
1057	if (end) {
1058		desc->sha_len_lo = (uint32_t)msgbits;
1059		desc->sha_len_hi = msgbits >> 32;
1060	}
1061
1062	desc->src_lo = (uint32_t)addr;
1063	desc->src_hi = addr >> 32;
1064	desc->src_mem = CCP_MEMTYPE_SYSTEM;
1065
1066	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1067
1068	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1069	return (0);
1070}
1071
1072static int __must_check
1073ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1074    struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1075{
1076	const struct SHA_Defn *defn;
1077	struct sglist_seg *seg;
1078	size_t i, msgsize, remaining, nb;
1079	uint32_t lsbaddr;
1080	int error;
1081
1082	for (i = 0; i < nitems(SHA_definitions); i++)
1083		if (SHA_definitions[i].version == version)
1084			break;
1085	if (i == nitems(SHA_definitions))
1086		return (EINVAL);
1087	defn = &SHA_definitions[i];
1088
1089	/* XXX validate input ??? */
1090
1091	/* Load initial SHA state into LSB */
1092	/* XXX ensure H_vectors don't span page boundaries */
1093	error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1094	    CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1095	    CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1096	    CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1097	    NULL);
1098	if (error != 0)
1099		return (error);
1100
1101	/* Execute series of SHA updates on correctly sized buffers */
1102	msgsize = 0;
1103	for (i = 0; i < sgl_src->sg_nseg; i++) {
1104		seg = &sgl_src->sg_segs[i];
1105		msgsize += seg->ss_len;
1106		error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1107		    seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1108		    msgsize << 3);
1109		if (error != 0)
1110			return (error);
1111	}
1112
1113	/* Copy result out to sgl_dst */
1114	remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1115	lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1116	for (i = 0; i < sgl_dst->sg_nseg; i++) {
1117		seg = &sgl_dst->sg_segs[i];
1118		/* crd_len is int, so 32-bit min() is ok. */
1119		nb = min(remaining, seg->ss_len);
1120
1121		error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1122		    lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1123		    CCP_PASSTHRU_BITWISE_NOOP,
1124		    (cctx != NULL) ? (nb == remaining) : false,
1125		    (nb == remaining) ? cctx : NULL);
1126		if (error != 0)
1127			return (error);
1128
1129		remaining -= nb;
1130		lsbaddr += nb;
1131		if (remaining == 0)
1132			break;
1133	}
1134
1135	return (0);
1136}
1137
1138static void
1139byteswap256(uint64_t *buffer)
1140{
1141	uint64_t t;
1142
1143	t = bswap64(buffer[3]);
1144	buffer[3] = bswap64(buffer[0]);
1145	buffer[0] = t;
1146
1147	t = bswap64(buffer[2]);
1148	buffer[2] = bswap64(buffer[1]);
1149	buffer[1] = t;
1150}
1151
1152/*
1153 * Translate CCP internal LSB hash format into a standard hash ouput.
1154 *
1155 * Manipulates input buffer with byteswap256 operation.
1156 */
1157static void
1158ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1159{
1160	const struct SHA_Defn *defn;
1161	size_t i;
1162
1163	for (i = 0; i < nitems(SHA_definitions); i++)
1164		if (SHA_definitions[i].version == version)
1165			break;
1166	if (i == nitems(SHA_definitions))
1167		panic("bogus sha version auth_mode %u\n", (unsigned)version);
1168
1169	defn = &SHA_definitions[i];
1170
1171	/* Swap 256bit manually -- DMA engine can, but with limitations */
1172	byteswap256((void *)buffer);
1173	if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1174		byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1175
1176	switch (defn->version) {
1177	case SHA1:
1178		memcpy(output, buffer + 12, defn->axf->hashsize);
1179		break;
1180#if 0
1181	case SHA2_224:
1182		memcpy(output, buffer + XXX, defn->axf->hashsize);
1183		break;
1184#endif
1185	case SHA2_256:
1186		memcpy(output, buffer, defn->axf->hashsize);
1187		break;
1188	case SHA2_384:
1189		memcpy(output,
1190		    buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1191		    defn->axf->hashsize - LSB_ENTRY_SIZE);
1192		memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1193		    LSB_ENTRY_SIZE);
1194		break;
1195	case SHA2_512:
1196		memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1197		memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1198		break;
1199	}
1200}
1201
1202static void
1203ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1204    struct cryptop *crp, struct cryptodesc *crd, int error)
1205{
1206	char ihash[SHA2_512_HASH_LEN /* max hash len */];
1207	union authctx auth_ctx;
1208	struct auth_hash *axf;
1209
1210	axf = s->hmac.auth_hash;
1211
1212	s->pending--;
1213
1214	if (error != 0) {
1215		crp->crp_etype = error;
1216		goto out;
1217	}
1218
1219	/* Do remaining outer hash over small inner hash in software */
1220	axf->Init(&auth_ctx);
1221	axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1222	ccp_sha_copy_result(ihash, s->hmac.ipad, s->hmac.auth_mode);
1223#if 0
1224	INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1225	    (u_char *)ihash, " ");
1226#endif
1227	axf->Update(&auth_ctx, ihash, axf->hashsize);
1228	axf->Final(s->hmac.ipad, &auth_ctx);
1229
1230	crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject,
1231	    s->hmac.hash_len, s->hmac.ipad);
1232
1233	/* Avoid leaking key material */
1234	explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1235	explicit_bzero(s->hmac.ipad, sizeof(s->hmac.ipad));
1236	explicit_bzero(s->hmac.opad, sizeof(s->hmac.opad));
1237
1238out:
1239	crypto_done(crp);
1240}
1241
1242static void
1243ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1244    int error)
1245{
1246	struct cryptodesc *crd;
1247	struct cryptop *crp;
1248
1249	crp = vcrp;
1250	crd = crp->crp_desc;
1251	ccp_do_hmac_done(qp, s, crp, crd, error);
1252}
1253
1254static int __must_check
1255ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1256    struct cryptodesc *crd, const struct ccp_completion_ctx *cctx)
1257{
1258	device_t dev;
1259	struct auth_hash *axf;
1260	int error;
1261
1262	dev = qp->cq_softc->dev;
1263	axf = s->hmac.auth_hash;
1264
1265	/*
1266	 * Populate the SGL describing inside hash contents.  We want to hash
1267	 * the ipad (key XOR fixed bit pattern) concatenated with the user
1268	 * data.
1269	 */
1270	sglist_reset(qp->cq_sg_ulptx);
1271	error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1272	if (error != 0)
1273		return (error);
1274	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1275	    crd->crd_skip, crd->crd_len);
1276	if (error != 0) {
1277		DPRINTF(dev, "%s: sglist too short\n", __func__);
1278		return (error);
1279	}
1280	/* Populate SGL for output -- just reuse hmac.ipad buffer. */
1281	sglist_reset(qp->cq_sg_dst);
1282	error = sglist_append(qp->cq_sg_dst, s->hmac.ipad,
1283	    roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1284	if (error != 0)
1285		return (error);
1286
1287	error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1288	    cctx);
1289	if (error != 0) {
1290		DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1291		return (error);
1292	}
1293	return (0);
1294}
1295
1296int __must_check
1297ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1298{
1299	struct ccp_completion_ctx ctx;
1300	struct cryptodesc *crd;
1301
1302	crd = crp->crp_desc;
1303
1304	ctx.callback_fn = ccp_hmac_done;
1305	ctx.callback_arg = crp;
1306	ctx.session = s;
1307
1308	return (ccp_do_hmac(qp, s, crp, crd, &ctx));
1309}
1310
1311static void
1312ccp_byteswap(char *data, size_t len)
1313{
1314	size_t i;
1315	char t;
1316
1317	len--;
1318	for (i = 0; i < len; i++, len--) {
1319		t = data[i];
1320		data[i] = data[len];
1321		data[len] = t;
1322	}
1323}
1324
1325static void
1326ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1327    int error)
1328{
1329	struct cryptop *crp;
1330
1331	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1332
1333	crp = vcrp;
1334
1335	s->pending--;
1336
1337	if (error != 0)
1338		crp->crp_etype = error;
1339
1340	DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1341	crypto_done(crp);
1342}
1343
1344static void
1345ccp_collect_iv(struct ccp_session *s, struct cryptop *crp,
1346    struct cryptodesc *crd)
1347{
1348
1349	if (crd->crd_flags & CRD_F_ENCRYPT) {
1350		if (crd->crd_flags & CRD_F_IV_EXPLICIT)
1351			memcpy(s->blkcipher.iv, crd->crd_iv,
1352			    s->blkcipher.iv_len);
1353		else
1354			arc4rand(s->blkcipher.iv, s->blkcipher.iv_len, 0);
1355		if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0)
1356			crypto_copyback(crp->crp_flags, crp->crp_buf,
1357			    crd->crd_inject, s->blkcipher.iv_len,
1358			    s->blkcipher.iv);
1359	} else {
1360		if (crd->crd_flags & CRD_F_IV_EXPLICIT)
1361			memcpy(s->blkcipher.iv, crd->crd_iv,
1362			    s->blkcipher.iv_len);
1363		else
1364			crypto_copydata(crp->crp_flags, crp->crp_buf,
1365			    crd->crd_inject, s->blkcipher.iv_len,
1366			    s->blkcipher.iv);
1367	}
1368
1369	/*
1370	 * If the input IV is 12 bytes, append an explicit counter of 1.
1371	 */
1372	if (crd->crd_alg == CRYPTO_AES_NIST_GCM_16 &&
1373	    s->blkcipher.iv_len == 12) {
1374		*(uint32_t *)&s->blkcipher.iv[12] = htobe32(1);
1375		s->blkcipher.iv_len = AES_BLOCK_LEN;
1376	}
1377
1378	if (crd->crd_alg == CRYPTO_AES_XTS && s->blkcipher.iv_len != AES_BLOCK_LEN) {
1379		DPRINTF(NULL, "got ivlen != 16: %u\n", s->blkcipher.iv_len);
1380		if (s->blkcipher.iv_len < AES_BLOCK_LEN)
1381			memset(&s->blkcipher.iv[s->blkcipher.iv_len], 0,
1382			    AES_BLOCK_LEN - s->blkcipher.iv_len);
1383		s->blkcipher.iv_len = AES_BLOCK_LEN;
1384	}
1385
1386	/* Reverse order of IV material for HW */
1387	INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__,
1388	    s->blkcipher.iv, " ", s->blkcipher.iv_len);
1389
1390	/*
1391	 * For unknown reasons, XTS mode expects the IV in the reverse byte
1392	 * order to every other AES mode.
1393	 */
1394	if (crd->crd_alg != CRYPTO_AES_XTS)
1395		ccp_byteswap(s->blkcipher.iv, s->blkcipher.iv_len);
1396}
1397
1398static int __must_check
1399ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1400    size_t len)
1401{
1402	int error;
1403
1404	sglist_reset(qp->cq_sg_ulptx);
1405	error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1406	if (error != 0)
1407		return (error);
1408
1409	error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1410	    false, NULL);
1411	return (error);
1412}
1413
1414static int __must_check
1415ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1416    struct cryptodesc *crd, enum ccp_cipher_dir dir,
1417    const struct ccp_completion_ctx *cctx)
1418{
1419	struct ccp_desc *desc;
1420	device_t dev;
1421	unsigned i;
1422	enum ccp_xts_unitsize usize;
1423
1424	/* IV and Key data are already loaded */
1425
1426	dev = qp->cq_softc->dev;
1427
1428	for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1429		if (ccp_xts_unitsize_map[i].cxu_size == crd->crd_len) {
1430			usize = ccp_xts_unitsize_map[i].cxu_id;
1431			break;
1432		}
1433	if (i >= nitems(ccp_xts_unitsize_map))
1434		return (EINVAL);
1435
1436	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1437		struct sglist_seg *seg;
1438
1439		seg = &qp->cq_sg_ulptx->sg_segs[i];
1440
1441		desc = &qp->desc_ring[qp->cq_tail];
1442		desc->engine = CCP_ENGINE_XTS_AES;
1443		desc->som = (i == 0);
1444		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1445		desc->ioc = (desc->eom && cctx != NULL);
1446		DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1447		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1448		    (int)desc->ioc, (int)dir);
1449
1450		if (desc->ioc)
1451			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1452			    sizeof(*cctx));
1453
1454		desc->aes_xts.encrypt = dir;
1455		desc->aes_xts.type = s->blkcipher.cipher_type;
1456		desc->aes_xts.size = usize;
1457
1458		DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1459		    qp->cq_tail, (unsigned)desc->aes_xts.type,
1460		    (unsigned)desc->aes_xts.size);
1461
1462		desc->length = seg->ss_len;
1463		desc->src_lo = (uint32_t)seg->ss_paddr;
1464		desc->src_hi = (seg->ss_paddr >> 32);
1465		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1466
1467		/* Crypt in-place */
1468		desc->dst_lo = desc->src_lo;
1469		desc->dst_hi = desc->src_hi;
1470		desc->dst_mem = desc->src_mem;
1471
1472		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1473		desc->key_hi = 0;
1474		desc->key_mem = CCP_MEMTYPE_SB;
1475
1476		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1477
1478		qp->cq_tail = (qp->cq_tail + 1) %
1479		    (1 << qp->cq_softc->ring_size_order);
1480	}
1481	return (0);
1482}
1483
1484static int __must_check
1485ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1486    struct cryptop *crp, struct cryptodesc *crd,
1487    const struct ccp_completion_ctx *cctx)
1488{
1489	struct ccp_desc *desc;
1490	char *keydata;
1491	device_t dev;
1492	enum ccp_cipher_dir dir;
1493	int error;
1494	size_t keydata_len;
1495	unsigned i, j;
1496
1497	dev = qp->cq_softc->dev;
1498
1499	if (s->blkcipher.key_len == 0 || crd->crd_len == 0) {
1500		DPRINTF(dev, "%s: empty\n", __func__);
1501		return (EINVAL);
1502	}
1503	if ((crd->crd_len % AES_BLOCK_LEN) != 0) {
1504		DPRINTF(dev, "%s: len modulo: %d\n", __func__, crd->crd_len);
1505		return (EINVAL);
1506	}
1507
1508	/*
1509	 * Individual segments must be multiples of AES block size for the HW
1510	 * to process it.  Non-compliant inputs aren't bogus, just not doable
1511	 * on this hardware.
1512	 */
1513	for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1514		if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1515			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1516			    qp->cq_sg_crp->sg_segs[i].ss_len);
1517			return (EINVAL);
1518		}
1519
1520	/* Gather IV/nonce data */
1521	ccp_collect_iv(s, crp, crd);
1522
1523	if ((crd->crd_flags & CRD_F_ENCRYPT) != 0)
1524		dir = CCP_CIPHER_DIR_ENCRYPT;
1525	else
1526		dir = CCP_CIPHER_DIR_DECRYPT;
1527
1528	/* Set up passthrough op(s) to copy IV into LSB */
1529	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1530	    s->blkcipher.iv, s->blkcipher.iv_len);
1531	if (error != 0)
1532		return (error);
1533
1534	/*
1535	 * Initialize keydata and keydata_len for GCC.  The default case of the
1536	 * following switch is impossible to reach, but GCC doesn't know that.
1537	 */
1538	keydata_len = 0;
1539	keydata = NULL;
1540
1541	switch (crd->crd_alg) {
1542	case CRYPTO_AES_XTS:
1543		for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1544			if (ccp_xts_unitsize_map[j].cxu_size == crd->crd_len)
1545				break;
1546		/* Input buffer must be a supported UnitSize */
1547		if (j >= nitems(ccp_xts_unitsize_map)) {
1548			device_printf(dev, "%s: rejected block size: %u\n",
1549			    __func__, crd->crd_len);
1550			return (EOPNOTSUPP);
1551		}
1552		/* FALLTHROUGH */
1553	case CRYPTO_AES_CBC:
1554	case CRYPTO_AES_ICM:
1555		keydata = s->blkcipher.enckey;
1556		keydata_len = s->blkcipher.key_len;
1557		break;
1558	}
1559
1560	INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1561	    keydata, " ");
1562	if (crd->crd_alg == CRYPTO_AES_XTS)
1563		INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1564
1565	/* Reverse order of key material for HW */
1566	ccp_byteswap(keydata, keydata_len);
1567
1568	/* Store key material into LSB to avoid page boundaries */
1569	if (crd->crd_alg == CRYPTO_AES_XTS) {
1570		/*
1571		 * XTS mode uses 2 256-bit vectors for the primary key and the
1572		 * tweak key.  For 128-bit keys, the vectors are zero-padded.
1573		 *
1574		 * After byteswapping the combined OCF-provided K1:K2 vector
1575		 * above, we need to reverse the order again so the hardware
1576		 * gets the swapped keys in the order K1':K2'.
1577		 */
1578		error = ccp_do_pst_to_lsb(qp,
1579		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1580		    keydata_len / 2);
1581		if (error != 0)
1582			return (error);
1583		error = ccp_do_pst_to_lsb(qp,
1584		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1585		    keydata + (keydata_len / 2), keydata_len / 2);
1586
1587		/* Zero-pad 128 bit keys */
1588		if (keydata_len == 32) {
1589			if (error != 0)
1590				return (error);
1591			error = ccp_do_pst_to_lsb(qp,
1592			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1593			    keydata_len / 2, g_zeroes, keydata_len / 2);
1594			if (error != 0)
1595				return (error);
1596			error = ccp_do_pst_to_lsb(qp,
1597			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1598			    keydata_len / 2, g_zeroes, keydata_len / 2);
1599		}
1600	} else
1601		error = ccp_do_pst_to_lsb(qp,
1602		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1603		    keydata_len);
1604	if (error != 0)
1605		return (error);
1606
1607	/*
1608	 * Point SGLs at the subset of cryptop buffer contents representing the
1609	 * data.
1610	 */
1611	sglist_reset(qp->cq_sg_ulptx);
1612	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1613	    crd->crd_skip, crd->crd_len);
1614	if (error != 0)
1615		return (error);
1616
1617	INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1618	    (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1619
1620	DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1621
1622	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1623		return (EAGAIN);
1624
1625	if (crd->crd_alg == CRYPTO_AES_XTS)
1626		return (ccp_do_xts(qp, s, crp, crd, dir, cctx));
1627
1628	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1629		struct sglist_seg *seg;
1630
1631		seg = &qp->cq_sg_ulptx->sg_segs[i];
1632
1633		desc = &qp->desc_ring[qp->cq_tail];
1634		desc->engine = CCP_ENGINE_AES;
1635		desc->som = (i == 0);
1636		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1637		desc->ioc = (desc->eom && cctx != NULL);
1638		DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1639		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1640		    (int)desc->ioc, (int)dir);
1641
1642		if (desc->ioc)
1643			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1644			    sizeof(*cctx));
1645
1646		desc->aes.encrypt = dir;
1647		desc->aes.mode = s->blkcipher.cipher_mode;
1648		desc->aes.type = s->blkcipher.cipher_type;
1649		if (crd->crd_alg == CRYPTO_AES_ICM)
1650			/*
1651			 * Size of CTR value in bits, - 1.  ICM mode uses all
1652			 * 128 bits as counter.
1653			 */
1654			desc->aes.size = 127;
1655
1656		DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1657		    qp->cq_tail, (unsigned)desc->aes.mode,
1658		    (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1659
1660		desc->length = seg->ss_len;
1661		desc->src_lo = (uint32_t)seg->ss_paddr;
1662		desc->src_hi = (seg->ss_paddr >> 32);
1663		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1664
1665		/* Crypt in-place */
1666		desc->dst_lo = desc->src_lo;
1667		desc->dst_hi = desc->src_hi;
1668		desc->dst_mem = desc->src_mem;
1669
1670		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1671		desc->key_hi = 0;
1672		desc->key_mem = CCP_MEMTYPE_SB;
1673
1674		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1675
1676		qp->cq_tail = (qp->cq_tail + 1) %
1677		    (1 << qp->cq_softc->ring_size_order);
1678	}
1679	return (0);
1680}
1681
1682int __must_check
1683ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1684{
1685	struct ccp_completion_ctx ctx;
1686	struct cryptodesc *crd;
1687
1688	crd = crp->crp_desc;
1689
1690	ctx.callback_fn = ccp_blkcipher_done;
1691	ctx.session = s;
1692	ctx.callback_arg = crp;
1693
1694	return (ccp_do_blkcipher(qp, s, crp, crd, &ctx));
1695}
1696
1697static void
1698ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1699    int error)
1700{
1701	struct cryptodesc *crda;
1702	struct cryptop *crp;
1703
1704	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1705
1706	crp = vcrp;
1707	if (s->cipher_first)
1708		crda = crp->crp_desc->crd_next;
1709	else
1710		crda = crp->crp_desc;
1711
1712	ccp_do_hmac_done(qp, s, crp, crda, error);
1713}
1714
1715int __must_check
1716ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1717    struct cryptodesc *crda, struct cryptodesc *crde)
1718{
1719	struct ccp_completion_ctx ctx;
1720	int error;
1721
1722	ctx.callback_fn = ccp_authenc_done;
1723	ctx.session = s;
1724	ctx.callback_arg = crp;
1725
1726	/* Perform first operation */
1727	if (s->cipher_first)
1728		error = ccp_do_blkcipher(qp, s, crp, crde, NULL);
1729	else
1730		error = ccp_do_hmac(qp, s, crp, crda, NULL);
1731	if (error != 0)
1732		return (error);
1733
1734	/* Perform second operation */
1735	if (s->cipher_first)
1736		error = ccp_do_hmac(qp, s, crp, crda, &ctx);
1737	else
1738		error = ccp_do_blkcipher(qp, s, crp, crde, &ctx);
1739	return (error);
1740}
1741
1742static int __must_check
1743ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1744{
1745	struct ccp_desc *desc;
1746	struct sglist_seg *seg;
1747	unsigned i;
1748
1749	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1750		return (EAGAIN);
1751
1752	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1753		seg = &qp->cq_sg_ulptx->sg_segs[i];
1754
1755		desc = &qp->desc_ring[qp->cq_tail];
1756
1757		desc->engine = CCP_ENGINE_AES;
1758		desc->aes.mode = CCP_AES_MODE_GHASH;
1759		desc->aes.type = s->blkcipher.cipher_type;
1760		desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1761
1762		desc->som = (i == 0);
1763		desc->length = seg->ss_len;
1764
1765		desc->src_lo = (uint32_t)seg->ss_paddr;
1766		desc->src_hi = (seg->ss_paddr >> 32);
1767		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1768
1769		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1770
1771		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1772		desc->key_mem = CCP_MEMTYPE_SB;
1773
1774		qp->cq_tail = (qp->cq_tail + 1) %
1775		    (1 << qp->cq_softc->ring_size_order);
1776	}
1777	return (0);
1778}
1779
1780static int __must_check
1781ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1782    enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1783{
1784	struct ccp_desc *desc;
1785
1786	if (ccp_queue_get_ring_space(qp) == 0)
1787		return (EAGAIN);
1788
1789	desc = &qp->desc_ring[qp->cq_tail];
1790
1791	desc->engine = CCP_ENGINE_AES;
1792	desc->aes.mode = CCP_AES_MODE_GCTR;
1793	desc->aes.type = s->blkcipher.cipher_type;
1794	desc->aes.encrypt = dir;
1795	desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1796
1797	desc->som = som;
1798	desc->eom = eom;
1799
1800	/* Trailing bytes will be masked off by aes.size above. */
1801	desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1802
1803	desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1804	desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1805	desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1806
1807	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1808
1809	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1810	desc->key_mem = CCP_MEMTYPE_SB;
1811
1812	qp->cq_tail = (qp->cq_tail + 1) %
1813	    (1 << qp->cq_softc->ring_size_order);
1814	return (0);
1815}
1816
1817static int __must_check
1818ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1819{
1820	struct ccp_desc *desc;
1821
1822	if (ccp_queue_get_ring_space(qp) == 0)
1823		return (EAGAIN);
1824
1825	desc = &qp->desc_ring[qp->cq_tail];
1826
1827	desc->engine = CCP_ENGINE_AES;
1828	desc->aes.mode = CCP_AES_MODE_GHASH;
1829	desc->aes.type = s->blkcipher.cipher_type;
1830	desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1831
1832	desc->length = GMAC_BLOCK_LEN;
1833
1834	desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1835	desc->src_mem = CCP_MEMTYPE_SB;
1836
1837	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1838
1839	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1840	desc->key_mem = CCP_MEMTYPE_SB;
1841
1842	desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1843	desc->dst_mem = CCP_MEMTYPE_SB;
1844
1845	qp->cq_tail = (qp->cq_tail + 1) %
1846	    (1 << qp->cq_softc->ring_size_order);
1847	return (0);
1848}
1849
1850static void
1851ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1852    int error)
1853{
1854	char tag[GMAC_DIGEST_LEN];
1855	struct cryptodesc *crde, *crda;
1856	struct cryptop *crp;
1857
1858	crp = vcrp;
1859	if (s->cipher_first) {
1860		crde = crp->crp_desc;
1861		crda = crp->crp_desc->crd_next;
1862	} else {
1863		crde = crp->crp_desc->crd_next;
1864		crda = crp->crp_desc;
1865	}
1866
1867	s->pending--;
1868
1869	if (error != 0) {
1870		crp->crp_etype = error;
1871		goto out;
1872	}
1873
1874	/* Encrypt is done.  Decrypt needs to verify tag. */
1875	if ((crde->crd_flags & CRD_F_ENCRYPT) != 0)
1876		goto out;
1877
1878	/* Copy in message tag. */
1879	crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_inject,
1880	    sizeof(tag), tag);
1881
1882	/* Verify tag against computed GMAC */
1883	if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1884		crp->crp_etype = EBADMSG;
1885
1886out:
1887	explicit_bzero(&s->blkcipher, sizeof(s->blkcipher));
1888	explicit_bzero(&s->gmac, sizeof(s->gmac));
1889	crypto_done(crp);
1890}
1891
1892int __must_check
1893ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1894    struct cryptodesc *crda, struct cryptodesc *crde)
1895{
1896	struct ccp_completion_ctx ctx;
1897	enum ccp_cipher_dir dir;
1898	device_t dev;
1899	unsigned i;
1900	int error;
1901
1902	if (s->blkcipher.key_len == 0)
1903		return (EINVAL);
1904
1905	/*
1906	 * AAD is only permitted before the cipher/plain text, not
1907	 * after.
1908	 */
1909	if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip)
1910		return (EINVAL);
1911
1912	dev = qp->cq_softc->dev;
1913
1914	if ((crde->crd_flags & CRD_F_ENCRYPT) != 0)
1915		dir = CCP_CIPHER_DIR_ENCRYPT;
1916	else
1917		dir = CCP_CIPHER_DIR_DECRYPT;
1918
1919	/* Zero initial GHASH portion of context */
1920	memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1921
1922	/* Gather IV data */
1923	ccp_collect_iv(s, crp, crde);
1924
1925	/* Reverse order of key material for HW */
1926	ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1927
1928	/* Prepare input buffer of concatenated lengths for final GHASH */
1929	be64enc(s->gmac.final_block, (uint64_t)crda->crd_len * 8);
1930	be64enc(&s->gmac.final_block[8], (uint64_t)crde->crd_len * 8);
1931
1932	/* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1933	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1934	    s->blkcipher.iv, 32);
1935	if (error != 0)
1936		return (error);
1937	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1938	    s->blkcipher.enckey, s->blkcipher.key_len);
1939	if (error != 0)
1940		return (error);
1941	error = ccp_do_pst_to_lsb(qp,
1942	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1943	    GMAC_BLOCK_LEN);
1944	if (error != 0)
1945		return (error);
1946
1947	/* First step - compute GHASH over AAD */
1948	if (crda->crd_len != 0) {
1949		sglist_reset(qp->cq_sg_ulptx);
1950		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1951		    crda->crd_skip, crda->crd_len);
1952		if (error != 0)
1953			return (error);
1954
1955		/* This engine cannot process non-block multiple AAD data. */
1956		for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1957			if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1958			    GMAC_BLOCK_LEN) != 0) {
1959				DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1960				    __func__,
1961				    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1962				return (EINVAL);
1963			}
1964
1965		error = ccp_do_ghash_aad(qp, s);
1966		if (error != 0)
1967			return (error);
1968	}
1969
1970	/* Feed data piece by piece into GCTR */
1971	sglist_reset(qp->cq_sg_ulptx);
1972	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1973	    crde->crd_skip, crde->crd_len);
1974	if (error != 0)
1975		return (error);
1976
1977	/*
1978	 * All segments except the last must be even multiples of AES block
1979	 * size for the HW to process it.  Non-compliant inputs aren't bogus,
1980	 * just not doable on this hardware.
1981	 *
1982	 * XXX: Well, the hardware will produce a valid tag for shorter final
1983	 * segment inputs, but it will still write out a block-sized plaintext
1984	 * or ciphertext chunk.  For a typical CRP this tramples trailing data,
1985	 * including the provided message tag.  So, reject such inputs for now.
1986	 */
1987	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1988		if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1989			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1990			    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1991			return (EINVAL);
1992		}
1993
1994	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1995		struct sglist_seg *seg;
1996
1997		seg = &qp->cq_sg_ulptx->sg_segs[i];
1998		error = ccp_do_gctr(qp, s, dir, seg,
1999		    (i == 0 && crda->crd_len == 0),
2000		    i == (qp->cq_sg_ulptx->sg_nseg - 1));
2001		if (error != 0)
2002			return (error);
2003	}
2004
2005	/* Send just initial IV (not GHASH!) to LSB again */
2006	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
2007	    s->blkcipher.iv, s->blkcipher.iv_len);
2008	if (error != 0)
2009		return (error);
2010
2011	ctx.callback_fn = ccp_gcm_done;
2012	ctx.session = s;
2013	ctx.callback_arg = crp;
2014
2015	/* Compute final hash and copy result back */
2016	error = ccp_do_ghash_final(qp, s);
2017	if (error != 0)
2018		return (error);
2019
2020	/* When encrypting, copy computed tag out to caller buffer. */
2021	sglist_reset(qp->cq_sg_ulptx);
2022	if (dir == CCP_CIPHER_DIR_ENCRYPT)
2023		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
2024		    crda->crd_inject, s->gmac.hash_len);
2025	else
2026		/*
2027		 * For decrypting, copy the computed tag out to our session
2028		 * buffer to verify in our callback.
2029		 */
2030		error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
2031		    s->gmac.hash_len);
2032	if (error != 0)
2033		return (error);
2034	error = ccp_passthrough_sgl(qp,
2035	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2036	    s->gmac.hash_len, true, &ctx);
2037	return (error);
2038}
2039
2040#define MAX_TRNG_RETRIES	10
2041u_int
2042random_ccp_read(void *v, u_int c)
2043{
2044	uint32_t *buf;
2045	u_int i, j;
2046
2047	KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2048
2049	buf = v;
2050	for (i = c; i > 0; i -= sizeof(*buf)) {
2051		for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2052			*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2053			if (*buf != 0)
2054				break;
2055		}
2056		if (j == MAX_TRNG_RETRIES)
2057			return (0);
2058		buf++;
2059	}
2060	return (c);
2061
2062}
2063
2064#ifdef DDB
2065void
2066db_ccp_show_hw(struct ccp_softc *sc)
2067{
2068
2069	db_printf("  queue mask: 0x%x\n",
2070	    ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2071	db_printf("  queue prio: 0x%x\n",
2072	    ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2073	db_printf("  reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2074	db_printf("  trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2075	db_printf("  cmd timeout: 0x%x\n",
2076	    ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2077	db_printf("  lsb public mask lo: 0x%x\n",
2078	    ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2079	db_printf("  lsb public mask hi: 0x%x\n",
2080	    ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2081	db_printf("  lsb private mask lo: 0x%x\n",
2082	    ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2083	db_printf("  lsb private mask hi: 0x%x\n",
2084	    ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2085	db_printf("  version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2086}
2087
2088void
2089db_ccp_show_queue_hw(struct ccp_queue *qp)
2090{
2091	const struct ccp_error_code *ec;
2092	struct ccp_softc *sc;
2093	uint32_t status, error, esource, faultblock, headlo, qcontrol;
2094	unsigned q, i;
2095
2096	sc = qp->cq_softc;
2097	q = qp->cq_qindex;
2098
2099	qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2100	db_printf("  qcontrol: 0x%x%s%s\n", qcontrol,
2101	    (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2102	    (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2103	db_printf("  tail_lo: 0x%x\n",
2104	    ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2105	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2106	db_printf("  head_lo: 0x%x\n", headlo);
2107	db_printf("  int enable: 0x%x\n",
2108	    ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2109	db_printf("  interrupt status: 0x%x\n",
2110	    ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2111	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2112	db_printf("  status: 0x%x\n", status);
2113	db_printf("  int stats: 0x%x\n",
2114	    ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2115
2116	error = status & STATUS_ERROR_MASK;
2117	if (error == 0)
2118		return;
2119
2120	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2121	    STATUS_ERRORSOURCE_MASK;
2122	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2123	    STATUS_VLSB_FAULTBLOCK_MASK;
2124
2125	ec = NULL;
2126	for (i = 0; i < nitems(ccp_error_codes); i++)
2127		if (ccp_error_codes[i].ce_code == error)
2128			break;
2129	if (i < nitems(ccp_error_codes))
2130		ec = &ccp_error_codes[i];
2131
2132	db_printf("  Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2133	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2134	    faultblock);
2135	if (ec != NULL)
2136		db_printf("  Error description: %s\n", ec->ce_desc);
2137
2138	i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2139	db_printf("  Bad descriptor idx: %u contents:\n  %32D\n", i,
2140	    (void *)&qp->desc_ring[i], " ");
2141}
2142#endif
2143