1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2017 Chelsio Communications, Inc.
5 * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org>
6 * All rights reserved.
7 * Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_ddb.h"
35
36#include <sys/param.h>
37#include <sys/bus.h>
38#include <sys/lock.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/module.h>
43#include <sys/rman.h>
44#include <sys/sglist.h>
45#include <sys/sysctl.h>
46
47#ifdef DDB
48#include <ddb/ddb.h>
49#endif
50
51#include <dev/pci/pcireg.h>
52#include <dev/pci/pcivar.h>
53
54#include <machine/bus.h>
55#include <machine/resource.h>
56#include <machine/vmparam.h>
57
58#include <opencrypto/cryptodev.h>
59#include <opencrypto/xform.h>
60
61#include <vm/vm.h>
62#include <vm/pmap.h>
63
64#include "cryptodev_if.h"
65
66#include "ccp.h"
67#include "ccp_hardware.h"
68#include "ccp_lsb.h"
69
70CTASSERT(sizeof(struct ccp_desc) == 32);
71
72static struct ccp_xts_unitsize_map_entry {
73	enum ccp_xts_unitsize cxu_id;
74	unsigned cxu_size;
75} ccp_xts_unitsize_map[] = {
76	{ CCP_XTS_AES_UNIT_SIZE_16, 16 },
77	{ CCP_XTS_AES_UNIT_SIZE_512, 512 },
78	{ CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
79	{ CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
80	{ CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
81};
82
83SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
84    "ccp node");
85
86unsigned g_ccp_ring_order = 11;
87SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
88    0, "Set CCP ring order.  (1 << this) == ring size.  Min: 6, Max: 16");
89
90/*
91 * Zero buffer, sufficient for padding LSB entries, that does not span a page
92 * boundary
93 */
94static const char g_zeroes[32] __aligned(32);
95
96static inline uint32_t
97ccp_read_4(struct ccp_softc *sc, uint32_t offset)
98{
99	return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
100}
101
102static inline void
103ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
104{
105	bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
106}
107
108static inline uint32_t
109ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
110{
111	/*
112	 * Each queue gets its own 4kB register space.  Queue 0 is at 0x1000.
113	 */
114	return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
115}
116
117static inline void
118ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
119    uint32_t value)
120{
121	ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
122}
123
124void
125ccp_queue_write_tail(struct ccp_queue *qp)
126{
127	ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
128	    ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
129}
130
131/*
132 * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
133 * that entry for the queue's private LSB region.
134 */
135static inline uint8_t
136ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
137{
138	return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
139}
140
141/*
142 * Given a queue and a reserved LSB entry index, compute the LSB *address* of
143 * that entry for the queue's private LSB region.
144 */
145static inline uint32_t
146ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
147{
148	return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
149}
150
151/*
152 * Some terminology:
153 *
154 * LSB - Local Storage Block
155 * =========================
156 *
157 * 8 segments/regions, each containing 16 entries.
158 *
159 * Each entry contains 256 bits (32 bytes).
160 *
161 * Segments are virtually addressed in commands, but accesses cannot cross
162 * segment boundaries.  Virtual map uses an identity mapping by default
163 * (virtual segment N corresponds to physical segment N).
164 *
165 * Access to a physical region can be restricted to any subset of all five
166 * queues.
167 *
168 * "Pass-through" mode
169 * ===================
170 *
171 * Pass-through is a generic DMA engine, much like ioat(4).  Some nice
172 * features:
173 *
174 * - Supports byte-swapping for endian conversion (32- or 256-bit words)
175 * - AND, OR, XOR with fixed 256-bit mask
176 * - CRC32 of data (may be used in tandem with bswap, but not bit operations)
177 * - Read/write of LSB
178 * - Memset
179 *
180 * If bit manipulation mode is enabled, input must be a multiple of 256 bits
181 * (32 bytes).
182 *
183 * If byte-swapping is enabled, input must be a multiple of the word size.
184 *
185 * Zlib mode -- only usable from one queue at a time, single job at a time.
186 * ========================================================================
187 *
188 * Only usable from private host, aka PSP?  Not host processor?
189 *
190 * RNG.
191 * ====
192 *
193 * Raw bits are conditioned with AES and fed through CTR_DRBG.  Output goes in
194 * a ring buffer readable by software.
195 *
196 * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
197 * implemented on the raw input stream and may be enabled to verify min-entropy
198 * of 0.5 bits per bit.
199 */
200
201static void
202ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
203{
204	bus_addr_t *baddr;
205
206	KASSERT(error == 0, ("%s: error:%d", __func__, error));
207	baddr = arg;
208	*baddr = segs->ds_addr;
209}
210
211static int
212ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
213{
214	struct ccp_softc *sc;
215	struct ccp_queue *qp;
216	void *desc;
217	size_t ringsz, num_descriptors;
218	int error;
219
220	desc = NULL;
221	sc = device_get_softc(dev);
222	qp = &sc->queues[queue];
223
224	/*
225	 * Don't bother allocating a ring for queues the host isn't allowed to
226	 * drive.
227	 */
228	if ((sc->valid_queues & (1 << queue)) == 0)
229		return (0);
230
231	ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
232
233	/* Ignore queues that do not have any LSB access. */
234	if (qp->lsb_mask == 0) {
235		device_printf(dev, "Ignoring queue %u with no LSB access\n",
236		    queue);
237		sc->valid_queues &= ~(1 << queue);
238		return (0);
239	}
240
241	num_descriptors = 1 << sc->ring_size_order;
242	ringsz = sizeof(struct ccp_desc) * num_descriptors;
243
244	/*
245	 * "Queue_Size" is order - 1.
246	 *
247	 * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
248	 */
249	error = bus_dma_tag_create(bus_get_dma_tag(dev),
250	    1 << (5 + sc->ring_size_order),
251#if defined(__i386__) && !defined(PAE)
252	    0, BUS_SPACE_MAXADDR,
253#else
254	    (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
255#endif
256	    BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
257	    ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
258	if (error != 0)
259		goto out;
260
261	error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
262	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
263	if (error != 0)
264		goto out;
265
266	error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
267	    ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
268	if (error != 0)
269		goto out;
270
271	qp->desc_ring = desc;
272	qp->completions_ring = malloc(num_descriptors *
273	    sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
274
275	/* Zero control register; among other things, clears the RUN flag. */
276	qp->qcontrol = 0;
277	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
278	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
279
280	/* Clear any leftover interrupt status flags */
281	ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
282	    ALL_INTERRUPTS);
283
284	qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
285
286	ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
287	    (uint32_t)qp->desc_ring_bus_addr);
288	ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
289	    (uint32_t)qp->desc_ring_bus_addr);
290
291	/*
292	 * Enable completion interrupts, as well as error or administrative
293	 * halt interrupts.  We don't use administrative halts, but they
294	 * shouldn't trip unless we do, so it ought to be harmless.
295	 */
296	ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
297	    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
298
299	qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
300	qp->qcontrol |= CMD_Q_RUN;
301	ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
302
303out:
304	if (error != 0) {
305		if (qp->desc_ring != NULL)
306			bus_dmamap_unload(qp->ring_desc_tag,
307			    qp->ring_desc_map);
308		if (desc != NULL)
309			bus_dmamem_free(qp->ring_desc_tag, desc,
310			    qp->ring_desc_map);
311		if (qp->ring_desc_tag != NULL)
312			bus_dma_tag_destroy(qp->ring_desc_tag);
313	}
314	return (error);
315}
316
317static void
318ccp_hw_detach_queue(device_t dev, unsigned queue)
319{
320	struct ccp_softc *sc;
321	struct ccp_queue *qp;
322
323	sc = device_get_softc(dev);
324	qp = &sc->queues[queue];
325
326	/*
327	 * Don't bother allocating a ring for queues the host isn't allowed to
328	 * drive.
329	 */
330	if ((sc->valid_queues & (1 << queue)) == 0)
331		return;
332
333	free(qp->completions_ring, M_CCP);
334	bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
335	bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
336	bus_dma_tag_destroy(qp->ring_desc_tag);
337}
338
339static int
340ccp_map_pci_bar(device_t dev)
341{
342	struct ccp_softc *sc;
343
344	sc = device_get_softc(dev);
345
346	sc->pci_resource_id = PCIR_BAR(2);
347	sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
348	    &sc->pci_resource_id, RF_ACTIVE);
349	if (sc->pci_resource == NULL) {
350		device_printf(dev, "unable to allocate pci resource\n");
351		return (ENODEV);
352	}
353
354	sc->pci_resource_id_msix = PCIR_BAR(5);
355	sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356	    &sc->pci_resource_id_msix, RF_ACTIVE);
357	if (sc->pci_resource_msix == NULL) {
358		device_printf(dev, "unable to allocate pci resource msix\n");
359		bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
360		    sc->pci_resource);
361		return (ENODEV);
362	}
363
364	sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
365	sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
366	return (0);
367}
368
369static void
370ccp_unmap_pci_bar(device_t dev)
371{
372	struct ccp_softc *sc;
373
374	sc = device_get_softc(dev);
375
376	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
377	    sc->pci_resource_msix);
378	bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
379	    sc->pci_resource);
380}
381
382const static struct ccp_error_code {
383	uint8_t		ce_code;
384	const char	*ce_name;
385	int		ce_errno;
386	const char	*ce_desc;
387} ccp_error_codes[] = {
388	{ 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
389	{ 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
390	    "A non-supported function type was specified" },
391	{ 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
392	    "A non-supported function mode was specified" },
393	{ 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
394	    "A CMAC type was specified when ENCRYPT was not specified" },
395	{ 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
396	    "A non-supported function size was specified.\n"
397	    "AES-CFB: Size was not 127 or 7;\n"
398	    "3DES-CFB: Size was not 7;\n"
399	    "RSA: See supported size table (7.4.2);\n"
400	    "ECC: Size was greater than 576 bits." },
401	{ 0x07, "Zlib_MISSING_INIT_EOM", EIO,
402	    "Zlib command does not have INIT and EOM set" },
403	{ 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
404	    "Reserved bits in a function specification were not 0" },
405	{ 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
406	    "The buffer length specified was not correct for the selected engine"
407	},
408	{ 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
409	    "Undefined VLSB segment mapping or\n"
410	    "mapping to unsupported LSB segment id" },
411	{ 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
412	    "The specified source/destination buffer access was illegal:\n"
413	    "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
414	    "Data buffer not completely contained within a single segment; or\n"
415	    "Pointer with Fixed=1 is not 32-bit aligned; or\n"
416	    "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
417	},
418	{ 0x0C, "ILLEGAL_MEM_SEL", EIO,
419	    "A src_mem, dst_mem, or key_mem field was illegal:\n"
420	    "A field was set to a reserved value; or\n"
421	    "A public command attempted to reference AXI1 (local) or GART memory; or\n"
422	    "A Zlib command attmpted to use the LSB." },
423	{ 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
424	    "The specified context location was illegal:\n"
425	    "Context located in a LSB location disallowed by the LSB protection masks; or\n"
426	    "Context not completely contained within a single segment." },
427	{ 0x0E, "ILLEGAL_KEY_ADDR", EIO,
428	    "The specified key location was illegal:\n"
429	    "Key located in a LSB location disallowed by the LSB protection masks; or\n"
430	    "Key not completely contained within a single segment." },
431	{ 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
432	/* XXX Could fill out these descriptions too */
433	{ 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
434	{ 0x14, "IDMA0_AXI_DECERR", EIO, "" },
435	{ 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
436	{ 0x17, "IDMA1_AXI_DECERR", EIO, "" },
437	{ 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
438	{ 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
439	{ 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
440	{ 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
441	{ 0x1E, "ZLIB_BTYPE", EIO, "" },
442	{ 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
443	{ 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
444	{ 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
445	{ 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
446	{ 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
447	{ 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
448	{ 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
449	{ 0x27, "ODMA0_AXI_DECERR", EIO, "" },
450	{ 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
451	{ 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
452	{ 0x2B, "LSB_PARITY_ERR", EIO,
453	    "A read from the LSB encountered a parity error" },
454};
455
456static void
457ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
458{
459	struct ccp_completion_ctx *cctx;
460	const struct ccp_error_code *ec;
461	struct ccp_softc *sc;
462	uint32_t status, error, esource, faultblock;
463	unsigned q, idx;
464	int errno;
465
466	sc = qp->cq_softc;
467	q = qp->cq_qindex;
468
469	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
470
471	error = status & STATUS_ERROR_MASK;
472
473	/* Decode error status */
474	ec = NULL;
475	for (idx = 0; idx < nitems(ccp_error_codes); idx++)
476		if (ccp_error_codes[idx].ce_code == error) {
477			ec = &ccp_error_codes[idx];
478			break;
479		}
480
481	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
482	    STATUS_ERRORSOURCE_MASK;
483	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
484	    STATUS_VLSB_FAULTBLOCK_MASK;
485	device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
486	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
487	    faultblock);
488	if (ec != NULL)
489		device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
490
491	/* TODO Could format the desc nicely here */
492	idx = desc - qp->desc_ring;
493	DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
494	    (const void *)desc, " ");
495
496	/*
497	 * TODO Per �� 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
498	 * Zlib Decompress status may be interesting.
499	 */
500
501	while (true) {
502		/* Keep unused descriptors zero for next use. */
503		memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
504
505		cctx = &qp->completions_ring[idx];
506
507		/*
508		 * Restart procedure described in �� 14.2.5.  Could be used by HoC if we
509		 * used that.
510		 *
511		 * Advance HEAD_LO past bad descriptor + any remaining in
512		 * transaction manually, then restart queue.
513		 */
514		idx = (idx + 1) % (1 << sc->ring_size_order);
515
516		/* Callback function signals end of transaction */
517		if (cctx->callback_fn != NULL) {
518			if (ec == NULL)
519				errno = EIO;
520			else
521				errno = ec->ce_errno;
522			/* TODO More specific error code */
523			cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
524			cctx->callback_fn = NULL;
525			break;
526		}
527	}
528
529	qp->cq_head = idx;
530	qp->cq_waiting = false;
531	wakeup(&qp->cq_tail);
532	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
533	ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
534	    (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
535	ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
536	DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
537}
538
539static void
540ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
541{
542	struct ccp_completion_ctx *cctx;
543	struct ccp_softc *sc;
544	const struct ccp_desc *desc;
545	uint32_t headlo, idx;
546	unsigned q, completed;
547
548	sc = qp->cq_softc;
549	q = qp->cq_qindex;
550
551	mtx_lock(&qp->cq_lock);
552
553	/*
554	 * Hardware HEAD_LO points to the first incomplete descriptor.  Process
555	 * any submitted and completed descriptors, up to but not including
556	 * HEAD_LO.
557	 */
558	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
559	idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
560
561	DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
562	    qp->cq_head);
563	completed = 0;
564	while (qp->cq_head != idx) {
565		DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
566
567		cctx = &qp->completions_ring[qp->cq_head];
568		if (cctx->callback_fn != NULL) {
569			cctx->callback_fn(qp, cctx->session,
570			    cctx->callback_arg, 0);
571			cctx->callback_fn = NULL;
572		}
573
574		/* Keep unused descriptors zero for next use. */
575		memset(&qp->desc_ring[qp->cq_head], 0,
576		    sizeof(qp->desc_ring[qp->cq_head]));
577
578		qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
579		completed++;
580	}
581	if (completed > 0) {
582		qp->cq_waiting = false;
583		wakeup(&qp->cq_tail);
584	}
585
586	DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
587
588	/*
589	 * Desc points to the first incomplete descriptor, at the time we read
590	 * HEAD_LO.  If there was an error flagged in interrupt status, the HW
591	 * will not proceed past the erroneous descriptor by itself.
592	 */
593	desc = &qp->desc_ring[idx];
594	if ((ints & INT_ERROR) != 0)
595		ccp_intr_handle_error(qp, desc);
596
597	mtx_unlock(&qp->cq_lock);
598}
599
600static void
601ccp_intr_handler(void *arg)
602{
603	struct ccp_softc *sc = arg;
604	size_t i;
605	uint32_t ints;
606
607	DPRINTF(sc->dev, "%s: interrupt\n", __func__);
608
609	/*
610	 * We get one global interrupt per PCI device, shared over all of
611	 * its queues.  Scan each valid queue on interrupt for flags indicating
612	 * activity.
613	 */
614	for (i = 0; i < nitems(sc->queues); i++) {
615		if ((sc->valid_queues & (1 << i)) == 0)
616			continue;
617
618		ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
619		if (ints == 0)
620			continue;
621
622#if 0
623		DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
624		    (unsigned)ints, i);
625#endif
626		/* Write back 1s to clear interrupt status bits. */
627		ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
628
629		/*
630		 * If there was an error, we still need to run completions on
631		 * any descriptors prior to the error.  The completions handler
632		 * invoked below will also handle the error descriptor.
633		 */
634		if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
635			ccp_intr_run_completions(&sc->queues[i], ints);
636
637		if ((ints & INT_QUEUE_STOPPED) != 0)
638			device_printf(sc->dev, "%s: queue %zu stopped\n",
639			    __func__, i);
640	}
641
642	/* Re-enable interrupts after processing */
643	for (i = 0; i < nitems(sc->queues); i++) {
644		if ((sc->valid_queues & (1 << i)) == 0)
645			continue;
646		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
647		    INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
648	}
649}
650
651static int
652ccp_intr_filter(void *arg)
653{
654	struct ccp_softc *sc = arg;
655	size_t i;
656
657	/* TODO: Split individual queues into separate taskqueues? */
658	for (i = 0; i < nitems(sc->queues); i++) {
659		if ((sc->valid_queues & (1 << i)) == 0)
660			continue;
661
662		/* Mask interrupt until task completes */
663		ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
664	}
665
666	return (FILTER_SCHEDULE_THREAD);
667}
668
669static int
670ccp_setup_interrupts(struct ccp_softc *sc)
671{
672	uint32_t nvec;
673	int rid, error, n, ridcopy;
674
675	n = pci_msix_count(sc->dev);
676	if (n < 1) {
677		device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
678		return (ENXIO);
679	}
680
681	nvec = n;
682	error = pci_alloc_msix(sc->dev, &nvec);
683	if (error != 0) {
684		device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
685		    error);
686		return (error);
687	}
688	if (nvec < 1) {
689		device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
690		    __func__);
691		return (ENXIO);
692	}
693	if (nvec > nitems(sc->intr_res)) {
694		device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
695		    nvec);
696		nvec = nitems(sc->intr_res);
697	}
698
699	for (rid = 1; rid < 1 + nvec; rid++) {
700		ridcopy = rid;
701		sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
702		    SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
703		if (sc->intr_res[rid - 1] == NULL) {
704			device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
705			    __func__);
706			return (ENXIO);
707		}
708
709		sc->intr_tag[rid - 1] = NULL;
710		error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
711		    INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
712		    ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
713		if (error != 0)
714			device_printf(sc->dev, "%s: setup_intr: %d\n",
715			    __func__, error);
716	}
717	sc->intr_count = nvec;
718
719	return (error);
720}
721
722static void
723ccp_release_interrupts(struct ccp_softc *sc)
724{
725	unsigned i;
726
727	for (i = 0; i < sc->intr_count; i++) {
728		if (sc->intr_tag[i] != NULL)
729			bus_teardown_intr(sc->dev, sc->intr_res[i],
730			    sc->intr_tag[i]);
731		if (sc->intr_res[i] != NULL)
732			bus_release_resource(sc->dev, SYS_RES_IRQ,
733			    rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
734	}
735
736	pci_release_msi(sc->dev);
737}
738
739int
740ccp_hw_attach(device_t dev)
741{
742	struct ccp_softc *sc;
743	uint64_t lsbmask;
744	uint32_t version, lsbmasklo, lsbmaskhi;
745	unsigned queue_idx, j;
746	int error;
747	bool bars_mapped, interrupts_setup;
748
749	queue_idx = 0;
750	bars_mapped = interrupts_setup = false;
751	sc = device_get_softc(dev);
752
753	error = ccp_map_pci_bar(dev);
754	if (error != 0) {
755		device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
756		goto out;
757	}
758	bars_mapped = true;
759
760	error = pci_enable_busmaster(dev);
761	if (error != 0) {
762		device_printf(dev, "%s: couldn't enable busmaster\n",
763		    __func__);
764		goto out;
765	}
766
767	sc->ring_size_order = g_ccp_ring_order;
768	if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
769		device_printf(dev, "bogus hw.ccp.ring_order\n");
770		error = EINVAL;
771		goto out;
772	}
773	sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
774
775	version = ccp_read_4(sc, VERSION_REG);
776	if ((version & VERSION_NUM_MASK) < 5) {
777		device_printf(dev,
778		    "driver supports version 5 and later hardware\n");
779		error = ENXIO;
780		goto out;
781	}
782
783	error = ccp_setup_interrupts(sc);
784	if (error != 0)
785		goto out;
786	interrupts_setup = true;
787
788	sc->hw_version = version & VERSION_NUM_MASK;
789	sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
790	    VERSION_NUMVQM_MASK;
791	sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
792	    VERSION_LSBSIZE_MASK;
793	sc->hw_features = version & VERSION_CAP_MASK;
794
795	/*
796	 * Copy private LSB mask to public registers to enable access to LSB
797	 * from all queues allowed by BIOS.
798	 */
799	lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
800	lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
801	ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
802	ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
803
804	lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
805
806	for (; queue_idx < nitems(sc->queues); queue_idx++) {
807		error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
808		if (error != 0) {
809			device_printf(dev, "%s: couldn't attach queue %u\n",
810			    __func__, queue_idx);
811			goto out;
812		}
813	}
814	ccp_assign_lsb_regions(sc, lsbmask);
815
816out:
817	if (error != 0) {
818		if (interrupts_setup)
819			ccp_release_interrupts(sc);
820		for (j = 0; j < queue_idx; j++)
821			ccp_hw_detach_queue(dev, j);
822		if (sc->ring_size_order != 0)
823			pci_disable_busmaster(dev);
824		if (bars_mapped)
825			ccp_unmap_pci_bar(dev);
826	}
827	return (error);
828}
829
830void
831ccp_hw_detach(device_t dev)
832{
833	struct ccp_softc *sc;
834	unsigned i;
835
836	sc = device_get_softc(dev);
837
838	for (i = 0; i < nitems(sc->queues); i++)
839		ccp_hw_detach_queue(dev, i);
840
841	ccp_release_interrupts(sc);
842	pci_disable_busmaster(dev);
843	ccp_unmap_pci_bar(dev);
844}
845
846static int __must_check
847ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
848    enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
849    bus_size_t len, enum ccp_passthru_byteswap swapmode,
850    enum ccp_passthru_bitwise bitmode, bool interrupt,
851    const struct ccp_completion_ctx *cctx)
852{
853	struct ccp_desc *desc;
854
855	if (ccp_queue_get_ring_space(qp) == 0)
856		return (EAGAIN);
857
858	desc = &qp->desc_ring[qp->cq_tail];
859
860	memset(desc, 0, sizeof(*desc));
861	desc->engine = CCP_ENGINE_PASSTHRU;
862
863	desc->pt.ioc = interrupt;
864	desc->pt.byteswap = swapmode;
865	desc->pt.bitwise = bitmode;
866	desc->length = len;
867
868	desc->src_lo = (uint32_t)src;
869	desc->src_hi = src >> 32;
870	desc->src_mem = src_type;
871
872	desc->dst_lo = (uint32_t)dst;
873	desc->dst_hi = dst >> 32;
874	desc->dst_mem = dst_type;
875
876	if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
877		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
878
879	if (cctx != NULL)
880		memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
881
882	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
883	return (0);
884}
885
886static int __must_check
887ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
888    struct sglist *sgl, bus_size_t len, bool interrupt,
889    const struct ccp_completion_ctx *cctx)
890{
891	struct sglist_seg *seg;
892	size_t i, remain, nb;
893	int error;
894
895	remain = len;
896	for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
897		seg = &sgl->sg_segs[i];
898		/* crp lengths are int, so 32-bit min() is ok. */
899		nb = min(remain, seg->ss_len);
900
901		if (tolsb)
902			error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
903			    seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
904			    CCP_PASSTHRU_BYTESWAP_NOOP,
905			    CCP_PASSTHRU_BITWISE_NOOP,
906			    (nb == remain) && interrupt, cctx);
907		else
908			error = ccp_passthrough(qp, seg->ss_paddr,
909			    CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
910			    CCP_PASSTHRU_BYTESWAP_NOOP,
911			    CCP_PASSTHRU_BITWISE_NOOP,
912			    (nb == remain) && interrupt, cctx);
913		if (error != 0)
914			return (error);
915
916		remain -= nb;
917	}
918	return (0);
919}
920
921/*
922 * Note that these vectors are in reverse of the usual order.
923 */
924const struct SHA_vectors {
925	uint32_t SHA1[8];
926	uint32_t SHA224[8];
927	uint32_t SHA256[8];
928	uint64_t SHA384[8];
929	uint64_t SHA512[8];
930} SHA_H __aligned(PAGE_SIZE) = {
931	.SHA1 = {
932		0xc3d2e1f0ul,
933		0x10325476ul,
934		0x98badcfeul,
935		0xefcdab89ul,
936		0x67452301ul,
937		0,
938		0,
939		0,
940	},
941	.SHA224 = {
942		0xbefa4fa4ul,
943		0x64f98fa7ul,
944		0x68581511ul,
945		0xffc00b31ul,
946		0xf70e5939ul,
947		0x3070dd17ul,
948		0x367cd507ul,
949		0xc1059ed8ul,
950	},
951	.SHA256 = {
952		0x5be0cd19ul,
953		0x1f83d9abul,
954		0x9b05688cul,
955		0x510e527ful,
956		0xa54ff53aul,
957		0x3c6ef372ul,
958		0xbb67ae85ul,
959		0x6a09e667ul,
960	},
961	.SHA384 = {
962		0x47b5481dbefa4fa4ull,
963		0xdb0c2e0d64f98fa7ull,
964		0x8eb44a8768581511ull,
965		0x67332667ffc00b31ull,
966		0x152fecd8f70e5939ull,
967		0x9159015a3070dd17ull,
968		0x629a292a367cd507ull,
969		0xcbbb9d5dc1059ed8ull,
970	},
971	.SHA512 = {
972		0x5be0cd19137e2179ull,
973		0x1f83d9abfb41bd6bull,
974		0x9b05688c2b3e6c1full,
975		0x510e527fade682d1ull,
976		0xa54ff53a5f1d36f1ull,
977		0x3c6ef372fe94f82bull,
978		0xbb67ae8584caa73bull,
979		0x6a09e667f3bcc908ull,
980	},
981};
982/*
983 * Ensure vectors do not cross a page boundary.
984 *
985 * Disabled due to a new Clang error:  "expression is not an integral constant
986 * expression."  GCC (cross toolchain) seems to handle this assertion with
987 * _Static_assert just fine.
988 */
989#if 0
990CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
991#endif
992
993const struct SHA_Defn {
994	enum sha_version version;
995	const void *H_vectors;
996	size_t H_size;
997	struct auth_hash *axf;
998	enum ccp_sha_type engine_type;
999} SHA_definitions[] = {
1000	{
1001		.version = SHA1,
1002		.H_vectors = SHA_H.SHA1,
1003		.H_size = sizeof(SHA_H.SHA1),
1004		.axf = &auth_hash_hmac_sha1,
1005		.engine_type = CCP_SHA_TYPE_1,
1006	},
1007#if 0
1008	{
1009		.version = SHA2_224,
1010		.H_vectors = SHA_H.SHA224,
1011		.H_size = sizeof(SHA_H.SHA224),
1012		.axf = &auth_hash_hmac_sha2_224,
1013		.engine_type = CCP_SHA_TYPE_224,
1014	},
1015#endif
1016	{
1017		.version = SHA2_256,
1018		.H_vectors = SHA_H.SHA256,
1019		.H_size = sizeof(SHA_H.SHA256),
1020		.axf = &auth_hash_hmac_sha2_256,
1021		.engine_type = CCP_SHA_TYPE_256,
1022	},
1023	{
1024		.version = SHA2_384,
1025		.H_vectors = SHA_H.SHA384,
1026		.H_size = sizeof(SHA_H.SHA384),
1027		.axf = &auth_hash_hmac_sha2_384,
1028		.engine_type = CCP_SHA_TYPE_384,
1029	},
1030	{
1031		.version = SHA2_512,
1032		.H_vectors = SHA_H.SHA512,
1033		.H_size = sizeof(SHA_H.SHA512),
1034		.axf = &auth_hash_hmac_sha2_512,
1035		.engine_type = CCP_SHA_TYPE_512,
1036	},
1037};
1038
1039static int __must_check
1040ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1041    vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1042{
1043	struct ccp_desc *desc;
1044
1045	if (ccp_queue_get_ring_space(qp) == 0)
1046		return (EAGAIN);
1047
1048	desc = &qp->desc_ring[qp->cq_tail];
1049
1050	memset(desc, 0, sizeof(*desc));
1051	desc->engine = CCP_ENGINE_SHA;
1052	desc->som = start;
1053	desc->eom = end;
1054
1055	desc->sha.type = defn->engine_type;
1056	desc->length = len;
1057
1058	if (end) {
1059		desc->sha_len_lo = (uint32_t)msgbits;
1060		desc->sha_len_hi = msgbits >> 32;
1061	}
1062
1063	desc->src_lo = (uint32_t)addr;
1064	desc->src_hi = addr >> 32;
1065	desc->src_mem = CCP_MEMTYPE_SYSTEM;
1066
1067	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1068
1069	qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1070	return (0);
1071}
1072
1073static int __must_check
1074ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1075    struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1076{
1077	const struct SHA_Defn *defn;
1078	struct sglist_seg *seg;
1079	size_t i, msgsize, remaining, nb;
1080	uint32_t lsbaddr;
1081	int error;
1082
1083	for (i = 0; i < nitems(SHA_definitions); i++)
1084		if (SHA_definitions[i].version == version)
1085			break;
1086	if (i == nitems(SHA_definitions))
1087		return (EINVAL);
1088	defn = &SHA_definitions[i];
1089
1090	/* XXX validate input ??? */
1091
1092	/* Load initial SHA state into LSB */
1093	/* XXX ensure H_vectors don't span page boundaries */
1094	error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1095	    CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1096	    CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1097	    CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1098	    NULL);
1099	if (error != 0)
1100		return (error);
1101
1102	/* Execute series of SHA updates on correctly sized buffers */
1103	msgsize = 0;
1104	for (i = 0; i < sgl_src->sg_nseg; i++) {
1105		seg = &sgl_src->sg_segs[i];
1106		msgsize += seg->ss_len;
1107		error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1108		    seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1109		    msgsize << 3);
1110		if (error != 0)
1111			return (error);
1112	}
1113
1114	/* Copy result out to sgl_dst */
1115	remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1116	lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1117	for (i = 0; i < sgl_dst->sg_nseg; i++) {
1118		seg = &sgl_dst->sg_segs[i];
1119		/* crp lengths are int, so 32-bit min() is ok. */
1120		nb = min(remaining, seg->ss_len);
1121
1122		error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1123		    lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1124		    CCP_PASSTHRU_BITWISE_NOOP,
1125		    (cctx != NULL) ? (nb == remaining) : false,
1126		    (nb == remaining) ? cctx : NULL);
1127		if (error != 0)
1128			return (error);
1129
1130		remaining -= nb;
1131		lsbaddr += nb;
1132		if (remaining == 0)
1133			break;
1134	}
1135
1136	return (0);
1137}
1138
1139static void
1140byteswap256(uint64_t *buffer)
1141{
1142	uint64_t t;
1143
1144	t = bswap64(buffer[3]);
1145	buffer[3] = bswap64(buffer[0]);
1146	buffer[0] = t;
1147
1148	t = bswap64(buffer[2]);
1149	buffer[2] = bswap64(buffer[1]);
1150	buffer[1] = t;
1151}
1152
1153/*
1154 * Translate CCP internal LSB hash format into a standard hash ouput.
1155 *
1156 * Manipulates input buffer with byteswap256 operation.
1157 */
1158static void
1159ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1160{
1161	const struct SHA_Defn *defn;
1162	size_t i;
1163
1164	for (i = 0; i < nitems(SHA_definitions); i++)
1165		if (SHA_definitions[i].version == version)
1166			break;
1167	if (i == nitems(SHA_definitions))
1168		panic("bogus sha version auth_mode %u\n", (unsigned)version);
1169
1170	defn = &SHA_definitions[i];
1171
1172	/* Swap 256bit manually -- DMA engine can, but with limitations */
1173	byteswap256((void *)buffer);
1174	if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1175		byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1176
1177	switch (defn->version) {
1178	case SHA1:
1179		memcpy(output, buffer + 12, defn->axf->hashsize);
1180		break;
1181#if 0
1182	case SHA2_224:
1183		memcpy(output, buffer + XXX, defn->axf->hashsize);
1184		break;
1185#endif
1186	case SHA2_256:
1187		memcpy(output, buffer, defn->axf->hashsize);
1188		break;
1189	case SHA2_384:
1190		memcpy(output,
1191		    buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1192		    defn->axf->hashsize - LSB_ENTRY_SIZE);
1193		memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1194		    LSB_ENTRY_SIZE);
1195		break;
1196	case SHA2_512:
1197		memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1198		memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1199		break;
1200	}
1201}
1202
1203static void
1204ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1205    struct cryptop *crp, int error)
1206{
1207	char ihash[SHA2_512_HASH_LEN /* max hash len */];
1208	union authctx auth_ctx;
1209	struct auth_hash *axf;
1210
1211	axf = s->hmac.auth_hash;
1212
1213	s->pending--;
1214
1215	if (error != 0) {
1216		crp->crp_etype = error;
1217		goto out;
1218	}
1219
1220	/* Do remaining outer hash over small inner hash in software */
1221	axf->Init(&auth_ctx);
1222	axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1223	ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1224#if 0
1225	INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1226	    (u_char *)ihash, " ");
1227#endif
1228	axf->Update(&auth_ctx, ihash, axf->hashsize);
1229	axf->Final(s->hmac.res, &auth_ctx);
1230
1231	if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1232		crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1233		    ihash);
1234		if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1235			crp->crp_etype = EBADMSG;
1236	} else
1237		crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1238		    s->hmac.res);
1239
1240	/* Avoid leaking key material */
1241	explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1242	explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1243
1244out:
1245	crypto_done(crp);
1246}
1247
1248static void
1249ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1250    int error)
1251{
1252	struct cryptop *crp;
1253
1254	crp = vcrp;
1255	ccp_do_hmac_done(qp, s, crp, error);
1256}
1257
1258static int __must_check
1259ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1260    const struct ccp_completion_ctx *cctx)
1261{
1262	device_t dev;
1263	struct auth_hash *axf;
1264	int error;
1265
1266	dev = qp->cq_softc->dev;
1267	axf = s->hmac.auth_hash;
1268
1269	/*
1270	 * Populate the SGL describing inside hash contents.  We want to hash
1271	 * the ipad (key XOR fixed bit pattern) concatenated with the user
1272	 * data.
1273	 */
1274	sglist_reset(qp->cq_sg_ulptx);
1275	error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1276	if (error != 0)
1277		return (error);
1278	if (crp->crp_aad_length != 0) {
1279		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1280		    crp->crp_aad_start, crp->crp_aad_length);
1281		if (error != 0)
1282			return (error);
1283	}
1284	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1285	    crp->crp_payload_start, crp->crp_payload_length);
1286	if (error != 0) {
1287		DPRINTF(dev, "%s: sglist too short\n", __func__);
1288		return (error);
1289	}
1290	/* Populate SGL for output -- use hmac.res buffer. */
1291	sglist_reset(qp->cq_sg_dst);
1292	error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1293	    roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1294	if (error != 0)
1295		return (error);
1296
1297	error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1298	    cctx);
1299	if (error != 0) {
1300		DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1301		return (error);
1302	}
1303	return (0);
1304}
1305
1306int __must_check
1307ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1308{
1309	struct ccp_completion_ctx ctx;
1310
1311	ctx.callback_fn = ccp_hmac_done;
1312	ctx.callback_arg = crp;
1313	ctx.session = s;
1314
1315	return (ccp_do_hmac(qp, s, crp, &ctx));
1316}
1317
1318static void
1319ccp_byteswap(char *data, size_t len)
1320{
1321	size_t i;
1322	char t;
1323
1324	len--;
1325	for (i = 0; i < len; i++, len--) {
1326		t = data[i];
1327		data[i] = data[len];
1328		data[len] = t;
1329	}
1330}
1331
1332static void
1333ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1334    int error)
1335{
1336	struct cryptop *crp;
1337
1338	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1339
1340	crp = vcrp;
1341
1342	s->pending--;
1343
1344	if (error != 0)
1345		crp->crp_etype = error;
1346
1347	DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1348	crypto_done(crp);
1349}
1350
1351static void
1352ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1353    char *iv)
1354{
1355
1356	crypto_read_iv(crp, iv);
1357
1358	/*
1359	 * If the input IV is 12 bytes, append an explicit counter of 1.
1360	 */
1361	if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 &&
1362	    csp->csp_ivlen == 12)
1363		*(uint32_t *)&iv[12] = htobe32(1);
1364
1365	if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1366	    csp->csp_ivlen < AES_BLOCK_LEN)
1367		memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1368
1369	/* Reverse order of IV material for HW */
1370	INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1371	    csp->csp_ivlen);
1372
1373	/*
1374	 * For unknown reasons, XTS mode expects the IV in the reverse byte
1375	 * order to every other AES mode.
1376	 */
1377	if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1378		ccp_byteswap(iv, AES_BLOCK_LEN);
1379}
1380
1381static int __must_check
1382ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1383    size_t len)
1384{
1385	int error;
1386
1387	sglist_reset(qp->cq_sg_ulptx);
1388	error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1389	if (error != 0)
1390		return (error);
1391
1392	error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1393	    false, NULL);
1394	return (error);
1395}
1396
1397static int __must_check
1398ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1399    enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1400{
1401	struct ccp_desc *desc;
1402	device_t dev;
1403	unsigned i;
1404	enum ccp_xts_unitsize usize;
1405
1406	/* IV and Key data are already loaded */
1407
1408	dev = qp->cq_softc->dev;
1409
1410	for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1411		if (ccp_xts_unitsize_map[i].cxu_size ==
1412		    crp->crp_payload_length) {
1413			usize = ccp_xts_unitsize_map[i].cxu_id;
1414			break;
1415		}
1416	if (i >= nitems(ccp_xts_unitsize_map))
1417		return (EINVAL);
1418
1419	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1420		struct sglist_seg *seg;
1421
1422		seg = &qp->cq_sg_ulptx->sg_segs[i];
1423
1424		desc = &qp->desc_ring[qp->cq_tail];
1425		desc->engine = CCP_ENGINE_XTS_AES;
1426		desc->som = (i == 0);
1427		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1428		desc->ioc = (desc->eom && cctx != NULL);
1429		DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1430		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1431		    (int)desc->ioc, (int)dir);
1432
1433		if (desc->ioc)
1434			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1435			    sizeof(*cctx));
1436
1437		desc->aes_xts.encrypt = dir;
1438		desc->aes_xts.type = s->blkcipher.cipher_type;
1439		desc->aes_xts.size = usize;
1440
1441		DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1442		    qp->cq_tail, (unsigned)desc->aes_xts.type,
1443		    (unsigned)desc->aes_xts.size);
1444
1445		desc->length = seg->ss_len;
1446		desc->src_lo = (uint32_t)seg->ss_paddr;
1447		desc->src_hi = (seg->ss_paddr >> 32);
1448		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1449
1450		/* Crypt in-place */
1451		desc->dst_lo = desc->src_lo;
1452		desc->dst_hi = desc->src_hi;
1453		desc->dst_mem = desc->src_mem;
1454
1455		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1456		desc->key_hi = 0;
1457		desc->key_mem = CCP_MEMTYPE_SB;
1458
1459		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1460
1461		qp->cq_tail = (qp->cq_tail + 1) %
1462		    (1 << qp->cq_softc->ring_size_order);
1463	}
1464	return (0);
1465}
1466
1467static int __must_check
1468ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1469    struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1470{
1471	const struct crypto_session_params *csp;
1472	struct ccp_desc *desc;
1473	char *keydata;
1474	device_t dev;
1475	enum ccp_cipher_dir dir;
1476	int error, iv_len;
1477	size_t keydata_len;
1478	unsigned i, j;
1479
1480	dev = qp->cq_softc->dev;
1481
1482	if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1483		DPRINTF(dev, "%s: empty\n", __func__);
1484		return (EINVAL);
1485	}
1486	if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1487		DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1488		    crp->crp_payload_length);
1489		return (EINVAL);
1490	}
1491
1492	/*
1493	 * Individual segments must be multiples of AES block size for the HW
1494	 * to process it.  Non-compliant inputs aren't bogus, just not doable
1495	 * on this hardware.
1496	 */
1497	for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1498		if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1499			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1500			    qp->cq_sg_crp->sg_segs[i].ss_len);
1501			return (EINVAL);
1502		}
1503
1504	/* Gather IV/nonce data */
1505	csp = crypto_get_params(crp->crp_session);
1506	ccp_collect_iv(crp, csp, s->blkcipher.iv);
1507	iv_len = csp->csp_ivlen;
1508	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1509		iv_len = AES_BLOCK_LEN;
1510
1511	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1512		dir = CCP_CIPHER_DIR_ENCRYPT;
1513	else
1514		dir = CCP_CIPHER_DIR_DECRYPT;
1515
1516	/* Set up passthrough op(s) to copy IV into LSB */
1517	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1518	    s->blkcipher.iv, iv_len);
1519	if (error != 0)
1520		return (error);
1521
1522	/*
1523	 * Initialize keydata and keydata_len for GCC.  The default case of the
1524	 * following switch is impossible to reach, but GCC doesn't know that.
1525	 */
1526	keydata_len = 0;
1527	keydata = NULL;
1528
1529	switch (csp->csp_cipher_alg) {
1530	case CRYPTO_AES_XTS:
1531		for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1532			if (ccp_xts_unitsize_map[j].cxu_size ==
1533			    crp->crp_payload_length)
1534				break;
1535		/* Input buffer must be a supported UnitSize */
1536		if (j >= nitems(ccp_xts_unitsize_map)) {
1537			device_printf(dev, "%s: rejected block size: %u\n",
1538			    __func__, crp->crp_payload_length);
1539			return (EOPNOTSUPP);
1540		}
1541		/* FALLTHROUGH */
1542	case CRYPTO_AES_CBC:
1543	case CRYPTO_AES_ICM:
1544		keydata = s->blkcipher.enckey;
1545		keydata_len = s->blkcipher.key_len;
1546		break;
1547	}
1548
1549	INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1550	    keydata, " ");
1551	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1552		INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1553
1554	/* Reverse order of key material for HW */
1555	ccp_byteswap(keydata, keydata_len);
1556
1557	/* Store key material into LSB to avoid page boundaries */
1558	if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1559		/*
1560		 * XTS mode uses 2 256-bit vectors for the primary key and the
1561		 * tweak key.  For 128-bit keys, the vectors are zero-padded.
1562		 *
1563		 * After byteswapping the combined OCF-provided K1:K2 vector
1564		 * above, we need to reverse the order again so the hardware
1565		 * gets the swapped keys in the order K1':K2'.
1566		 */
1567		error = ccp_do_pst_to_lsb(qp,
1568		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1569		    keydata_len / 2);
1570		if (error != 0)
1571			return (error);
1572		error = ccp_do_pst_to_lsb(qp,
1573		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1574		    keydata + (keydata_len / 2), keydata_len / 2);
1575
1576		/* Zero-pad 128 bit keys */
1577		if (keydata_len == 32) {
1578			if (error != 0)
1579				return (error);
1580			error = ccp_do_pst_to_lsb(qp,
1581			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1582			    keydata_len / 2, g_zeroes, keydata_len / 2);
1583			if (error != 0)
1584				return (error);
1585			error = ccp_do_pst_to_lsb(qp,
1586			    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1587			    keydata_len / 2, g_zeroes, keydata_len / 2);
1588		}
1589	} else
1590		error = ccp_do_pst_to_lsb(qp,
1591		    ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1592		    keydata_len);
1593	if (error != 0)
1594		return (error);
1595
1596	/*
1597	 * Point SGLs at the subset of cryptop buffer contents representing the
1598	 * data.
1599	 */
1600	sglist_reset(qp->cq_sg_ulptx);
1601	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1602	    crp->crp_payload_start, crp->crp_payload_length);
1603	if (error != 0)
1604		return (error);
1605
1606	INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1607	    (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1608
1609	DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1610
1611	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1612		return (EAGAIN);
1613
1614	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1615		return (ccp_do_xts(qp, s, crp, dir, cctx));
1616
1617	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1618		struct sglist_seg *seg;
1619
1620		seg = &qp->cq_sg_ulptx->sg_segs[i];
1621
1622		desc = &qp->desc_ring[qp->cq_tail];
1623		desc->engine = CCP_ENGINE_AES;
1624		desc->som = (i == 0);
1625		desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1626		desc->ioc = (desc->eom && cctx != NULL);
1627		DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1628		    __func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1629		    (int)desc->ioc, (int)dir);
1630
1631		if (desc->ioc)
1632			memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1633			    sizeof(*cctx));
1634
1635		desc->aes.encrypt = dir;
1636		desc->aes.mode = s->blkcipher.cipher_mode;
1637		desc->aes.type = s->blkcipher.cipher_type;
1638		if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1639			/*
1640			 * Size of CTR value in bits, - 1.  ICM mode uses all
1641			 * 128 bits as counter.
1642			 */
1643			desc->aes.size = 127;
1644
1645		DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1646		    qp->cq_tail, (unsigned)desc->aes.mode,
1647		    (unsigned)desc->aes.type, (unsigned)desc->aes.size);
1648
1649		desc->length = seg->ss_len;
1650		desc->src_lo = (uint32_t)seg->ss_paddr;
1651		desc->src_hi = (seg->ss_paddr >> 32);
1652		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1653
1654		/* Crypt in-place */
1655		desc->dst_lo = desc->src_lo;
1656		desc->dst_hi = desc->src_hi;
1657		desc->dst_mem = desc->src_mem;
1658
1659		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1660		desc->key_hi = 0;
1661		desc->key_mem = CCP_MEMTYPE_SB;
1662
1663		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1664
1665		qp->cq_tail = (qp->cq_tail + 1) %
1666		    (1 << qp->cq_softc->ring_size_order);
1667	}
1668	return (0);
1669}
1670
1671int __must_check
1672ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1673{
1674	struct ccp_completion_ctx ctx;
1675
1676	ctx.callback_fn = ccp_blkcipher_done;
1677	ctx.session = s;
1678	ctx.callback_arg = crp;
1679
1680	return (ccp_do_blkcipher(qp, s, crp, &ctx));
1681}
1682
1683static void
1684ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1685    int error)
1686{
1687	struct cryptop *crp;
1688
1689	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1690
1691	crp = vcrp;
1692
1693	ccp_do_hmac_done(qp, s, crp, error);
1694}
1695
1696int __must_check
1697ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1698{
1699	struct ccp_completion_ctx ctx;
1700	int error;
1701
1702	ctx.callback_fn = ccp_authenc_done;
1703	ctx.session = s;
1704	ctx.callback_arg = crp;
1705
1706	/* Perform first operation */
1707	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1708		error = ccp_do_blkcipher(qp, s, crp, NULL);
1709	else
1710		error = ccp_do_hmac(qp, s, crp, NULL);
1711	if (error != 0)
1712		return (error);
1713
1714	/* Perform second operation */
1715	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1716		error = ccp_do_hmac(qp, s, crp, &ctx);
1717	else
1718		error = ccp_do_blkcipher(qp, s, crp, &ctx);
1719	return (error);
1720}
1721
1722static int __must_check
1723ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1724{
1725	struct ccp_desc *desc;
1726	struct sglist_seg *seg;
1727	unsigned i;
1728
1729	if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1730		return (EAGAIN);
1731
1732	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1733		seg = &qp->cq_sg_ulptx->sg_segs[i];
1734
1735		desc = &qp->desc_ring[qp->cq_tail];
1736
1737		desc->engine = CCP_ENGINE_AES;
1738		desc->aes.mode = CCP_AES_MODE_GHASH;
1739		desc->aes.type = s->blkcipher.cipher_type;
1740		desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1741
1742		desc->som = (i == 0);
1743		desc->length = seg->ss_len;
1744
1745		desc->src_lo = (uint32_t)seg->ss_paddr;
1746		desc->src_hi = (seg->ss_paddr >> 32);
1747		desc->src_mem = CCP_MEMTYPE_SYSTEM;
1748
1749		desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1750
1751		desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1752		desc->key_mem = CCP_MEMTYPE_SB;
1753
1754		qp->cq_tail = (qp->cq_tail + 1) %
1755		    (1 << qp->cq_softc->ring_size_order);
1756	}
1757	return (0);
1758}
1759
1760static int __must_check
1761ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1762    enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1763{
1764	struct ccp_desc *desc;
1765
1766	if (ccp_queue_get_ring_space(qp) == 0)
1767		return (EAGAIN);
1768
1769	desc = &qp->desc_ring[qp->cq_tail];
1770
1771	desc->engine = CCP_ENGINE_AES;
1772	desc->aes.mode = CCP_AES_MODE_GCTR;
1773	desc->aes.type = s->blkcipher.cipher_type;
1774	desc->aes.encrypt = dir;
1775	desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1776
1777	desc->som = som;
1778	desc->eom = eom;
1779
1780	/* Trailing bytes will be masked off by aes.size above. */
1781	desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1782
1783	desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1784	desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1785	desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1786
1787	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1788
1789	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1790	desc->key_mem = CCP_MEMTYPE_SB;
1791
1792	qp->cq_tail = (qp->cq_tail + 1) %
1793	    (1 << qp->cq_softc->ring_size_order);
1794	return (0);
1795}
1796
1797static int __must_check
1798ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1799{
1800	struct ccp_desc *desc;
1801
1802	if (ccp_queue_get_ring_space(qp) == 0)
1803		return (EAGAIN);
1804
1805	desc = &qp->desc_ring[qp->cq_tail];
1806
1807	desc->engine = CCP_ENGINE_AES;
1808	desc->aes.mode = CCP_AES_MODE_GHASH;
1809	desc->aes.type = s->blkcipher.cipher_type;
1810	desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1811
1812	desc->length = GMAC_BLOCK_LEN;
1813
1814	desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1815	desc->src_mem = CCP_MEMTYPE_SB;
1816
1817	desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1818
1819	desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1820	desc->key_mem = CCP_MEMTYPE_SB;
1821
1822	desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1823	desc->dst_mem = CCP_MEMTYPE_SB;
1824
1825	qp->cq_tail = (qp->cq_tail + 1) %
1826	    (1 << qp->cq_softc->ring_size_order);
1827	return (0);
1828}
1829
1830static void
1831ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1832    int error)
1833{
1834	char tag[GMAC_DIGEST_LEN];
1835	struct cryptop *crp;
1836
1837	crp = vcrp;
1838
1839	s->pending--;
1840
1841	if (error != 0) {
1842		crp->crp_etype = error;
1843		goto out;
1844	}
1845
1846	/* Encrypt is done.  Decrypt needs to verify tag. */
1847	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1848		goto out;
1849
1850	/* Copy in message tag. */
1851	crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1852
1853	/* Verify tag against computed GMAC */
1854	if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1855		crp->crp_etype = EBADMSG;
1856
1857out:
1858	explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1859	explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1860	crypto_done(crp);
1861}
1862
1863int __must_check
1864ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1865{
1866	const struct crypto_session_params *csp;
1867	struct ccp_completion_ctx ctx;
1868	enum ccp_cipher_dir dir;
1869	device_t dev;
1870	unsigned i;
1871	int error;
1872
1873	if (s->blkcipher.key_len == 0)
1874		return (EINVAL);
1875
1876	dev = qp->cq_softc->dev;
1877
1878	if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1879		dir = CCP_CIPHER_DIR_ENCRYPT;
1880	else
1881		dir = CCP_CIPHER_DIR_DECRYPT;
1882
1883	/* Zero initial GHASH portion of context */
1884	memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1885
1886	/* Gather IV data */
1887	csp = crypto_get_params(crp->crp_session);
1888	ccp_collect_iv(crp, csp, s->blkcipher.iv);
1889
1890	/* Reverse order of key material for HW */
1891	ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1892
1893	/* Prepare input buffer of concatenated lengths for final GHASH */
1894	be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1895	be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1896
1897	/* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1898	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1899	    s->blkcipher.iv, 32);
1900	if (error != 0)
1901		return (error);
1902	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1903	    s->blkcipher.enckey, s->blkcipher.key_len);
1904	if (error != 0)
1905		return (error);
1906	error = ccp_do_pst_to_lsb(qp,
1907	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1908	    GMAC_BLOCK_LEN);
1909	if (error != 0)
1910		return (error);
1911
1912	/* First step - compute GHASH over AAD */
1913	if (crp->crp_aad_length != 0) {
1914		sglist_reset(qp->cq_sg_ulptx);
1915		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1916		    crp->crp_aad_start, crp->crp_aad_length);
1917		if (error != 0)
1918			return (error);
1919
1920		/* This engine cannot process non-block multiple AAD data. */
1921		for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1922			if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1923			    GMAC_BLOCK_LEN) != 0) {
1924				DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1925				    __func__,
1926				    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1927				return (EINVAL);
1928			}
1929
1930		error = ccp_do_ghash_aad(qp, s);
1931		if (error != 0)
1932			return (error);
1933	}
1934
1935	/* Feed data piece by piece into GCTR */
1936	sglist_reset(qp->cq_sg_ulptx);
1937	error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1938	    crp->crp_payload_start, crp->crp_payload_length);
1939	if (error != 0)
1940		return (error);
1941
1942	/*
1943	 * All segments except the last must be even multiples of AES block
1944	 * size for the HW to process it.  Non-compliant inputs aren't bogus,
1945	 * just not doable on this hardware.
1946	 *
1947	 * XXX: Well, the hardware will produce a valid tag for shorter final
1948	 * segment inputs, but it will still write out a block-sized plaintext
1949	 * or ciphertext chunk.  For a typical CRP this tramples trailing data,
1950	 * including the provided message tag.  So, reject such inputs for now.
1951	 */
1952	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1953		if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1954			DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1955			    qp->cq_sg_ulptx->sg_segs[i].ss_len);
1956			return (EINVAL);
1957		}
1958
1959	for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1960		struct sglist_seg *seg;
1961
1962		seg = &qp->cq_sg_ulptx->sg_segs[i];
1963		error = ccp_do_gctr(qp, s, dir, seg,
1964		    (i == 0 && crp->crp_aad_length == 0),
1965		    i == (qp->cq_sg_ulptx->sg_nseg - 1));
1966		if (error != 0)
1967			return (error);
1968	}
1969
1970	/* Send just initial IV (not GHASH!) to LSB again */
1971	error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1972	    s->blkcipher.iv, AES_BLOCK_LEN);
1973	if (error != 0)
1974		return (error);
1975
1976	ctx.callback_fn = ccp_gcm_done;
1977	ctx.session = s;
1978	ctx.callback_arg = crp;
1979
1980	/* Compute final hash and copy result back */
1981	error = ccp_do_ghash_final(qp, s);
1982	if (error != 0)
1983		return (error);
1984
1985	/* When encrypting, copy computed tag out to caller buffer. */
1986	sglist_reset(qp->cq_sg_ulptx);
1987	if (dir == CCP_CIPHER_DIR_ENCRYPT)
1988		error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1989		    crp->crp_digest_start, s->gmac.hash_len);
1990	else
1991		/*
1992		 * For decrypting, copy the computed tag out to our session
1993		 * buffer to verify in our callback.
1994		 */
1995		error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1996		    s->gmac.hash_len);
1997	if (error != 0)
1998		return (error);
1999	error = ccp_passthrough_sgl(qp,
2000	    ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
2001	    s->gmac.hash_len, true, &ctx);
2002	return (error);
2003}
2004
2005#define MAX_TRNG_RETRIES	10
2006u_int
2007random_ccp_read(void *v, u_int c)
2008{
2009	uint32_t *buf;
2010	u_int i, j;
2011
2012	KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2013
2014	buf = v;
2015	for (i = c; i > 0; i -= sizeof(*buf)) {
2016		for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2017			*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2018			if (*buf != 0)
2019				break;
2020		}
2021		if (j == MAX_TRNG_RETRIES)
2022			return (0);
2023		buf++;
2024	}
2025	return (c);
2026
2027}
2028
2029#ifdef DDB
2030void
2031db_ccp_show_hw(struct ccp_softc *sc)
2032{
2033
2034	db_printf("  queue mask: 0x%x\n",
2035	    ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2036	db_printf("  queue prio: 0x%x\n",
2037	    ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2038	db_printf("  reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2039	db_printf("  trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2040	db_printf("  cmd timeout: 0x%x\n",
2041	    ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2042	db_printf("  lsb public mask lo: 0x%x\n",
2043	    ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2044	db_printf("  lsb public mask hi: 0x%x\n",
2045	    ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2046	db_printf("  lsb private mask lo: 0x%x\n",
2047	    ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2048	db_printf("  lsb private mask hi: 0x%x\n",
2049	    ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2050	db_printf("  version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2051}
2052
2053void
2054db_ccp_show_queue_hw(struct ccp_queue *qp)
2055{
2056	const struct ccp_error_code *ec;
2057	struct ccp_softc *sc;
2058	uint32_t status, error, esource, faultblock, headlo, qcontrol;
2059	unsigned q, i;
2060
2061	sc = qp->cq_softc;
2062	q = qp->cq_qindex;
2063
2064	qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2065	db_printf("  qcontrol: 0x%x%s%s\n", qcontrol,
2066	    (qcontrol & CMD_Q_RUN) ? " RUN" : "",
2067	    (qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2068	db_printf("  tail_lo: 0x%x\n",
2069	    ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2070	headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2071	db_printf("  head_lo: 0x%x\n", headlo);
2072	db_printf("  int enable: 0x%x\n",
2073	    ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2074	db_printf("  interrupt status: 0x%x\n",
2075	    ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2076	status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2077	db_printf("  status: 0x%x\n", status);
2078	db_printf("  int stats: 0x%x\n",
2079	    ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2080
2081	error = status & STATUS_ERROR_MASK;
2082	if (error == 0)
2083		return;
2084
2085	esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2086	    STATUS_ERRORSOURCE_MASK;
2087	faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2088	    STATUS_VLSB_FAULTBLOCK_MASK;
2089
2090	ec = NULL;
2091	for (i = 0; i < nitems(ccp_error_codes); i++)
2092		if (ccp_error_codes[i].ce_code == error)
2093			break;
2094	if (i < nitems(ccp_error_codes))
2095		ec = &ccp_error_codes[i];
2096
2097	db_printf("  Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2098	    (ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2099	    faultblock);
2100	if (ec != NULL)
2101		db_printf("  Error description: %s\n", ec->ce_desc);
2102
2103	i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2104	db_printf("  Bad descriptor idx: %u contents:\n  %32D\n", i,
2105	    (void *)&qp->desc_ring[i], " ");
2106}
2107#endif
2108