1/* SPDX-License-Identifier: GPL-2.0-only
2 * Copyright (C) 2020 Marvell.
3 */
4
5#ifndef __OTX2_CPT_REQMGR_H
6#define __OTX2_CPT_REQMGR_H
7
8#include "otx2_cpt_common.h"
9
10/* Completion code size and initial value */
11#define OTX2_CPT_COMPLETION_CODE_SIZE 8
12#define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE
13/*
14 * Maximum total number of SG buffers is 100, we divide it equally
15 * between input and output
16 */
17#define OTX2_CPT_MAX_SG_IN_CNT  50
18#define OTX2_CPT_MAX_SG_OUT_CNT 50
19
20/* DMA mode direct or SG */
21#define OTX2_CPT_DMA_MODE_DIRECT 0
22#define OTX2_CPT_DMA_MODE_SG     1
23
24/* Context source CPTR or DPTR */
25#define OTX2_CPT_FROM_CPTR 0
26#define OTX2_CPT_FROM_DPTR 1
27
28#define OTX2_CPT_MAX_REQ_SIZE 65535
29
30#define SG_COMPS_MAX    4
31#define SGV2_COMPS_MAX  3
32
33#define SG_COMP_3    3
34#define SG_COMP_2    2
35#define SG_COMP_1    1
36
37union otx2_cpt_opcode {
38	u16 flags;
39	struct {
40		u8 major;
41		u8 minor;
42	} s;
43};
44
45struct otx2_cptvf_request {
46	u32 param1;
47	u32 param2;
48	u16 dlen;
49	union otx2_cpt_opcode opcode;
50	dma_addr_t cptr_dma;
51	void *cptr;
52};
53
54/*
55 * CPT_INST_S software command definitions
56 * Words EI (0-3)
57 */
58union otx2_cpt_iq_cmd_word0 {
59	u64 u;
60	struct {
61		__be16 opcode;
62		__be16 param1;
63		__be16 param2;
64		__be16 dlen;
65	} s;
66};
67
68union otx2_cpt_iq_cmd_word3 {
69	u64 u;
70	struct {
71		u64 cptr:61;
72		u64 grp:3;
73	} s;
74};
75
76struct otx2_cpt_iq_command {
77	union otx2_cpt_iq_cmd_word0 cmd;
78	u64 dptr;
79	u64 rptr;
80	union otx2_cpt_iq_cmd_word3 cptr;
81};
82
83struct otx2_cpt_pending_entry {
84	void *completion_addr;	/* Completion address */
85	void *info;
86	/* Kernel async request callback */
87	void (*callback)(int status, void *arg1, void *arg2);
88	struct crypto_async_request *areq; /* Async request callback arg */
89	u8 resume_sender;	/* Notify sender to resume sending requests */
90	u8 busy;		/* Entry status (free/busy) */
91};
92
93struct otx2_cpt_pending_queue {
94	struct otx2_cpt_pending_entry *head; /* Head of the queue */
95	u32 front;		/* Process work from here */
96	u32 rear;		/* Append new work here */
97	u32 pending_count;	/* Pending requests count */
98	u32 qlen;		/* Queue length */
99	spinlock_t lock;	/* Queue lock */
100};
101
102struct otx2_cpt_buf_ptr {
103	u8 *vptr;
104	dma_addr_t dma_addr;
105	u16 size;
106};
107
108union otx2_cpt_ctrl_info {
109	u32 flags;
110	struct {
111#if defined(__BIG_ENDIAN_BITFIELD)
112		u32 reserved_6_31:26;
113		u32 grp:3;	/* Group bits */
114		u32 dma_mode:2;	/* DMA mode */
115		u32 se_req:1;	/* To SE core */
116#else
117		u32 se_req:1;	/* To SE core */
118		u32 dma_mode:2;	/* DMA mode */
119		u32 grp:3;	/* Group bits */
120		u32 reserved_6_31:26;
121#endif
122	} s;
123};
124
125struct otx2_cpt_req_info {
126	/* Kernel async request callback */
127	void (*callback)(int status, void *arg1, void *arg2);
128	struct crypto_async_request *areq; /* Async request callback arg */
129	struct otx2_cptvf_request req;/* Request information (core specific) */
130	union otx2_cpt_ctrl_info ctrl;/* User control information */
131	struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT];
132	struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT];
133	u8 *iv_out;     /* IV to send back */
134	u16 rlen;	/* Output length */
135	u8 in_cnt;	/* Number of input buffers */
136	u8 out_cnt;	/* Number of output buffers */
137	u8 req_type;	/* Type of request */
138	u8 is_enc;	/* Is a request an encryption request */
139	u8 is_trunc_hmac;/* Is truncated hmac used */
140};
141
142struct otx2_cpt_inst_info {
143	struct otx2_cpt_pending_entry *pentry;
144	struct otx2_cpt_req_info *req;
145	struct pci_dev *pdev;
146	void *completion_addr;
147	u8 *out_buffer;
148	u8 *in_buffer;
149	dma_addr_t dptr_baddr;
150	dma_addr_t rptr_baddr;
151	dma_addr_t comp_baddr;
152	unsigned long time_in;
153	u32 dlen;
154	u32 dma_len;
155	u64 gthr_sz;
156	u64 sctr_sz;
157	u8 extra_time;
158};
159
160struct otx2_cpt_sglist_component {
161	__be16 len0;
162	__be16 len1;
163	__be16 len2;
164	__be16 len3;
165	__be64 ptr0;
166	__be64 ptr1;
167	__be64 ptr2;
168	__be64 ptr3;
169};
170
171struct cn10kb_cpt_sglist_component {
172	u16 len0;
173	u16 len1;
174	u16 len2;
175	u16 valid_segs;
176	u64 ptr0;
177	u64 ptr1;
178	u64 ptr2;
179};
180
181static inline void otx2_cpt_info_destroy(struct pci_dev *pdev,
182					 struct otx2_cpt_inst_info *info)
183{
184	struct otx2_cpt_req_info *req;
185	int i;
186
187	if (info->dptr_baddr)
188		dma_unmap_single(&pdev->dev, info->dptr_baddr,
189				 info->dma_len, DMA_BIDIRECTIONAL);
190
191	if (info->req) {
192		req = info->req;
193		for (i = 0; i < req->out_cnt; i++) {
194			if (req->out[i].dma_addr)
195				dma_unmap_single(&pdev->dev,
196						 req->out[i].dma_addr,
197						 req->out[i].size,
198						 DMA_BIDIRECTIONAL);
199		}
200
201		for (i = 0; i < req->in_cnt; i++) {
202			if (req->in[i].dma_addr)
203				dma_unmap_single(&pdev->dev,
204						 req->in[i].dma_addr,
205						 req->in[i].size,
206						 DMA_BIDIRECTIONAL);
207		}
208	}
209	kfree(info);
210}
211
212static inline int setup_sgio_components(struct pci_dev *pdev,
213					struct otx2_cpt_buf_ptr *list,
214					int buf_count, u8 *buffer)
215{
216	struct otx2_cpt_sglist_component *sg_ptr;
217	int components;
218	int i, j;
219
220	if (unlikely(!list)) {
221		dev_err(&pdev->dev, "Input list pointer is NULL\n");
222		return -EINVAL;
223	}
224
225	for (i = 0; i < buf_count; i++) {
226		if (unlikely(!list[i].vptr))
227			continue;
228		list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr,
229						  list[i].size,
230						  DMA_BIDIRECTIONAL);
231		if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) {
232			dev_err(&pdev->dev, "Dma mapping failed\n");
233			goto sg_cleanup;
234		}
235	}
236	components = buf_count / SG_COMPS_MAX;
237	sg_ptr = (struct otx2_cpt_sglist_component *)buffer;
238	for (i = 0; i < components; i++) {
239		sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size);
240		sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size);
241		sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size);
242		sg_ptr->len3 = cpu_to_be16(list[i * SG_COMPS_MAX + 3].size);
243		sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr);
244		sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr);
245		sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr);
246		sg_ptr->ptr3 = cpu_to_be64(list[i * SG_COMPS_MAX + 3].dma_addr);
247		sg_ptr++;
248	}
249	components = buf_count % SG_COMPS_MAX;
250
251	switch (components) {
252	case SG_COMP_3:
253		sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size);
254		sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr);
255		fallthrough;
256	case SG_COMP_2:
257		sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size);
258		sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr);
259		fallthrough;
260	case SG_COMP_1:
261		sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size);
262		sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr);
263		break;
264	default:
265		break;
266	}
267	return 0;
268
269sg_cleanup:
270	for (j = 0; j < i; j++) {
271		if (list[j].dma_addr) {
272			dma_unmap_single(&pdev->dev, list[j].dma_addr,
273					 list[j].size, DMA_BIDIRECTIONAL);
274		}
275
276		list[j].dma_addr = 0;
277	}
278	return -EIO;
279}
280
281static inline int sgv2io_components_setup(struct pci_dev *pdev,
282					  struct otx2_cpt_buf_ptr *list,
283					  int buf_count, u8 *buffer)
284{
285	struct cn10kb_cpt_sglist_component *sg_ptr;
286	int components;
287	int i, j;
288
289	if (unlikely(!list)) {
290		dev_err(&pdev->dev, "Input list pointer is NULL\n");
291		return -EFAULT;
292	}
293
294	for (i = 0; i < buf_count; i++) {
295		if (unlikely(!list[i].vptr))
296			continue;
297		list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr,
298						  list[i].size,
299						  DMA_BIDIRECTIONAL);
300		if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) {
301			dev_err(&pdev->dev, "Dma mapping failed\n");
302			goto sg_cleanup;
303		}
304	}
305	components = buf_count / SGV2_COMPS_MAX;
306	sg_ptr = (struct cn10kb_cpt_sglist_component *)buffer;
307	for (i = 0; i < components; i++) {
308		sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size;
309		sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size;
310		sg_ptr->len2 = list[i * SGV2_COMPS_MAX + 2].size;
311		sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr;
312		sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr;
313		sg_ptr->ptr2 = list[i * SGV2_COMPS_MAX + 2].dma_addr;
314		sg_ptr->valid_segs = SGV2_COMPS_MAX;
315		sg_ptr++;
316	}
317	components = buf_count % SGV2_COMPS_MAX;
318
319	sg_ptr->valid_segs = components;
320	switch (components) {
321	case SG_COMP_2:
322		sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size;
323		sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr;
324		fallthrough;
325	case SG_COMP_1:
326		sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size;
327		sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr;
328		break;
329	default:
330		break;
331	}
332	return 0;
333
334sg_cleanup:
335	for (j = 0; j < i; j++) {
336		if (list[j].dma_addr) {
337			dma_unmap_single(&pdev->dev, list[j].dma_addr,
338					 list[j].size, DMA_BIDIRECTIONAL);
339		}
340
341		list[j].dma_addr = 0;
342	}
343	return -EIO;
344}
345
346static inline struct otx2_cpt_inst_info *
347cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
348		       gfp_t gfp)
349{
350	u32 dlen = 0, g_len, sg_len, info_len;
351	int align = OTX2_CPT_DMA_MINALIGN;
352	struct otx2_cpt_inst_info *info;
353	u16 g_sz_bytes, s_sz_bytes;
354	u32 total_mem_len;
355	int i;
356
357	g_sz_bytes = ((req->in_cnt + 2) / 3) *
358		      sizeof(struct cn10kb_cpt_sglist_component);
359	s_sz_bytes = ((req->out_cnt + 2) / 3) *
360		      sizeof(struct cn10kb_cpt_sglist_component);
361
362	g_len = ALIGN(g_sz_bytes, align);
363	sg_len = ALIGN(g_len + s_sz_bytes, align);
364	info_len = ALIGN(sizeof(*info), align);
365	total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s);
366
367	info = kzalloc(total_mem_len, gfp);
368	if (unlikely(!info))
369		return NULL;
370
371	for (i = 0; i < req->in_cnt; i++)
372		dlen += req->in[i].size;
373
374	info->dlen = dlen;
375	info->in_buffer = (u8 *)info + info_len;
376	info->gthr_sz = req->in_cnt;
377	info->sctr_sz = req->out_cnt;
378
379	/* Setup gather (input) components */
380	if (sgv2io_components_setup(pdev, req->in, req->in_cnt,
381				    info->in_buffer)) {
382		dev_err(&pdev->dev, "Failed to setup gather list\n");
383		goto destroy_info;
384	}
385
386	if (sgv2io_components_setup(pdev, req->out, req->out_cnt,
387				    &info->in_buffer[g_len])) {
388		dev_err(&pdev->dev, "Failed to setup scatter list\n");
389		goto destroy_info;
390	}
391
392	info->dma_len = total_mem_len - info_len;
393	info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer,
394					  info->dma_len, DMA_BIDIRECTIONAL);
395	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
396		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
397		goto destroy_info;
398	}
399	info->rptr_baddr = info->dptr_baddr + g_len;
400	/*
401	 * Get buffer for union otx2_cpt_res_s response
402	 * structure and its physical address
403	 */
404	info->completion_addr = info->in_buffer + sg_len;
405	info->comp_baddr = info->dptr_baddr + sg_len;
406
407	return info;
408
409destroy_info:
410	otx2_cpt_info_destroy(pdev, info);
411	return NULL;
412}
413
414/* SG list header size in bytes */
415#define SG_LIST_HDR_SIZE	8
416static inline struct otx2_cpt_inst_info *
417otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
418		    gfp_t gfp)
419{
420	int align = OTX2_CPT_DMA_MINALIGN;
421	struct otx2_cpt_inst_info *info;
422	u32 dlen, align_dlen, info_len;
423	u16 g_sz_bytes, s_sz_bytes;
424	u32 total_mem_len;
425
426	if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT ||
427		     req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) {
428		dev_err(&pdev->dev, "Error too many sg components\n");
429		return NULL;
430	}
431
432	g_sz_bytes = ((req->in_cnt + 3) / 4) *
433		      sizeof(struct otx2_cpt_sglist_component);
434	s_sz_bytes = ((req->out_cnt + 3) / 4) *
435		      sizeof(struct otx2_cpt_sglist_component);
436
437	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
438	align_dlen = ALIGN(dlen, align);
439	info_len = ALIGN(sizeof(*info), align);
440	total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
441
442	info = kzalloc(total_mem_len, gfp);
443	if (unlikely(!info))
444		return NULL;
445
446	info->dlen = dlen;
447	info->in_buffer = (u8 *)info + info_len;
448
449	((u16 *)info->in_buffer)[0] = req->out_cnt;
450	((u16 *)info->in_buffer)[1] = req->in_cnt;
451	((u16 *)info->in_buffer)[2] = 0;
452	((u16 *)info->in_buffer)[3] = 0;
453	cpu_to_be64s((u64 *)info->in_buffer);
454
455	/* Setup gather (input) components */
456	if (setup_sgio_components(pdev, req->in, req->in_cnt,
457				  &info->in_buffer[8])) {
458		dev_err(&pdev->dev, "Failed to setup gather list\n");
459		goto destroy_info;
460	}
461
462	if (setup_sgio_components(pdev, req->out, req->out_cnt,
463				  &info->in_buffer[8 + g_sz_bytes])) {
464		dev_err(&pdev->dev, "Failed to setup scatter list\n");
465		goto destroy_info;
466	}
467
468	info->dma_len = total_mem_len - info_len;
469	info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer,
470					  info->dma_len, DMA_BIDIRECTIONAL);
471	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
472		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
473		goto destroy_info;
474	}
475	/*
476	 * Get buffer for union otx2_cpt_res_s response
477	 * structure and its physical address
478	 */
479	info->completion_addr = info->in_buffer + align_dlen;
480	info->comp_baddr = info->dptr_baddr + align_dlen;
481
482	return info;
483
484destroy_info:
485	otx2_cpt_info_destroy(pdev, info);
486	return NULL;
487}
488
489struct otx2_cptlf_wqe;
490int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
491			int cpu_num);
492void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe);
493int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev);
494
495#endif /* __OTX2_CPT_REQMGR_H */
496