1// SPDX-License-Identifier: GPL-2.0
2/* Marvell OcteonTX CPT driver
3 *
4 * Copyright (C) 2019 Marvell International Ltd.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include "otx_cptvf.h"
12#include "otx_cptvf_algs.h"
13
14/* Completion code size and initial value */
15#define COMPLETION_CODE_SIZE	8
16#define COMPLETION_CODE_INIT	0
17
18/* SG list header size in bytes */
19#define SG_LIST_HDR_SIZE	8
20
21/* Default timeout when waiting for free pending entry in us */
22#define CPT_PENTRY_TIMEOUT	1000
23#define CPT_PENTRY_STEP		50
24
25/* Default threshold for stopping and resuming sender requests */
26#define CPT_IQ_STOP_MARGIN	128
27#define CPT_IQ_RESUME_MARGIN	512
28
29#define CPT_DMA_ALIGN		128
30
31void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req)
32{
33	int i;
34
35	pr_debug("Gather list size %d\n", req->incnt);
36	for (i = 0; i < req->incnt; i++) {
37		pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
38			 req->in[i].size, req->in[i].vptr,
39			 (void *) req->in[i].dma_addr);
40		pr_debug("Buffer hexdump (%d bytes)\n",
41			 req->in[i].size);
42		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
43				     req->in[i].vptr, req->in[i].size, false);
44	}
45
46	pr_debug("Scatter list size %d\n", req->outcnt);
47	for (i = 0; i < req->outcnt; i++) {
48		pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
49			 req->out[i].size, req->out[i].vptr,
50			 (void *) req->out[i].dma_addr);
51		pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size);
52		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
53				     req->out[i].vptr, req->out[i].size, false);
54	}
55}
56
57static inline struct otx_cpt_pending_entry *get_free_pending_entry(
58						struct otx_cpt_pending_queue *q,
59						int qlen)
60{
61	struct otx_cpt_pending_entry *ent = NULL;
62
63	ent = &q->head[q->rear];
64	if (unlikely(ent->busy))
65		return NULL;
66
67	q->rear++;
68	if (unlikely(q->rear == qlen))
69		q->rear = 0;
70
71	return ent;
72}
73
74static inline u32 modulo_inc(u32 index, u32 length, u32 inc)
75{
76	if (WARN_ON(inc > length))
77		inc = length;
78
79	index += inc;
80	if (unlikely(index >= length))
81		index -= length;
82
83	return index;
84}
85
86static inline void free_pentry(struct otx_cpt_pending_entry *pentry)
87{
88	pentry->completion_addr = NULL;
89	pentry->info = NULL;
90	pentry->callback = NULL;
91	pentry->areq = NULL;
92	pentry->resume_sender = false;
93	pentry->busy = false;
94}
95
96static inline int setup_sgio_components(struct pci_dev *pdev,
97					struct otx_cpt_buf_ptr *list,
98					int buf_count, u8 *buffer)
99{
100	struct otx_cpt_sglist_component *sg_ptr = NULL;
101	int ret = 0, i, j;
102	int components;
103
104	if (unlikely(!list)) {
105		dev_err(&pdev->dev, "Input list pointer is NULL\n");
106		return -EFAULT;
107	}
108
109	for (i = 0; i < buf_count; i++) {
110		if (likely(list[i].vptr)) {
111			list[i].dma_addr = dma_map_single(&pdev->dev,
112							  list[i].vptr,
113							  list[i].size,
114							  DMA_BIDIRECTIONAL);
115			if (unlikely(dma_mapping_error(&pdev->dev,
116						       list[i].dma_addr))) {
117				dev_err(&pdev->dev, "Dma mapping failed\n");
118				ret = -EIO;
119				goto sg_cleanup;
120			}
121		}
122	}
123
124	components = buf_count / 4;
125	sg_ptr = (struct otx_cpt_sglist_component *)buffer;
126	for (i = 0; i < components; i++) {
127		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
128		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
129		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
130		sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size);
131		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
132		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
133		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
134		sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr);
135		sg_ptr++;
136	}
137	components = buf_count % 4;
138
139	switch (components) {
140	case 3:
141		sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size);
142		sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
143		fallthrough;
144	case 2:
145		sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size);
146		sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
147		fallthrough;
148	case 1:
149		sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size);
150		sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
151		break;
152	default:
153		break;
154	}
155	return ret;
156
157sg_cleanup:
158	for (j = 0; j < i; j++) {
159		if (list[j].dma_addr) {
160			dma_unmap_single(&pdev->dev, list[i].dma_addr,
161					 list[i].size, DMA_BIDIRECTIONAL);
162		}
163
164		list[j].dma_addr = 0;
165	}
166	return ret;
167}
168
169static inline int setup_sgio_list(struct pci_dev *pdev,
170				  struct otx_cpt_info_buffer **pinfo,
171				  struct otx_cpt_req_info *req, gfp_t gfp)
172{
173	u32 dlen, align_dlen, info_len, rlen;
174	struct otx_cpt_info_buffer *info;
175	u16 g_sz_bytes, s_sz_bytes;
176	int align = CPT_DMA_ALIGN;
177	u32 total_mem_len;
178
179	if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT ||
180		     req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) {
181		dev_err(&pdev->dev, "Error too many sg components\n");
182		return -EINVAL;
183	}
184
185	g_sz_bytes = ((req->incnt + 3) / 4) *
186		      sizeof(struct otx_cpt_sglist_component);
187	s_sz_bytes = ((req->outcnt + 3) / 4) *
188		      sizeof(struct otx_cpt_sglist_component);
189
190	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
191	align_dlen = ALIGN(dlen, align);
192	info_len = ALIGN(sizeof(*info), align);
193	rlen = ALIGN(sizeof(union otx_cpt_res_s), align);
194	total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE;
195
196	info = kzalloc(total_mem_len, gfp);
197	if (unlikely(!info)) {
198		dev_err(&pdev->dev, "Memory allocation failed\n");
199		return -ENOMEM;
200	}
201	*pinfo = info;
202	info->dlen = dlen;
203	info->in_buffer = (u8 *)info + info_len;
204
205	((__be16 *)info->in_buffer)[0] = cpu_to_be16(req->outcnt);
206	((__be16 *)info->in_buffer)[1] = cpu_to_be16(req->incnt);
207	((u16 *)info->in_buffer)[2] = 0;
208	((u16 *)info->in_buffer)[3] = 0;
209
210	/* Setup gather (input) components */
211	if (setup_sgio_components(pdev, req->in, req->incnt,
212				  &info->in_buffer[8])) {
213		dev_err(&pdev->dev, "Failed to setup gather list\n");
214		return -EFAULT;
215	}
216
217	if (setup_sgio_components(pdev, req->out, req->outcnt,
218				  &info->in_buffer[8 + g_sz_bytes])) {
219		dev_err(&pdev->dev, "Failed to setup scatter list\n");
220		return -EFAULT;
221	}
222
223	info->dma_len = total_mem_len - info_len;
224	info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer,
225					  info->dma_len, DMA_BIDIRECTIONAL);
226	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
227		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
228		return -EIO;
229	}
230	/*
231	 * Get buffer for union otx_cpt_res_s response
232	 * structure and its physical address
233	 */
234	info->completion_addr = (u64 *)(info->in_buffer + align_dlen);
235	info->comp_baddr = info->dptr_baddr + align_dlen;
236
237	/* Create and initialize RPTR */
238	info->out_buffer = (u8 *)info->completion_addr + rlen;
239	info->rptr_baddr = info->comp_baddr + rlen;
240
241	*((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT);
242
243	return 0;
244}
245
246
247static void cpt_fill_inst(union otx_cpt_inst_s *inst,
248			  struct otx_cpt_info_buffer *info,
249			  struct otx_cpt_iq_cmd *cmd)
250{
251	inst->u[0] = 0x0;
252	inst->s.doneint = true;
253	inst->s.res_addr = (u64)info->comp_baddr;
254	inst->u[2] = 0x0;
255	inst->s.wq_ptr = 0;
256	inst->s.ei0 = cmd->cmd.u64;
257	inst->s.ei1 = cmd->dptr;
258	inst->s.ei2 = cmd->rptr;
259	inst->s.ei3 = cmd->cptr.u64;
260}
261
262/*
263 * On OcteonTX platform the parameter db_count is used as a count for ringing
264 * door bell. The valid values for db_count are:
265 * 0 - 1 CPT instruction will be enqueued however CPT will not be informed
266 * 1 - 1 CPT instruction will be enqueued and CPT will be informed
267 */
268static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf)
269{
270	struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo;
271	struct otx_cpt_cmd_queue *queue;
272	struct otx_cpt_cmd_chunk *curr;
273	u8 *ent;
274
275	queue = &qinfo->queue[0];
276	/*
277	 * cpt_send_cmd is currently called only from critical section
278	 * therefore no locking is required for accessing instruction queue
279	 */
280	ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE];
281	memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE);
282
283	if (++queue->idx >= queue->qhead->size / 64) {
284		curr = queue->qhead;
285
286		if (list_is_last(&curr->nextchunk, &queue->chead))
287			queue->qhead = queue->base;
288		else
289			queue->qhead = list_next_entry(queue->qhead, nextchunk);
290		queue->idx = 0;
291	}
292	/* make sure all memory stores are done before ringing doorbell */
293	smp_wmb();
294	otx_cptvf_write_vq_doorbell(cptvf, 1);
295}
296
297static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
298			   struct otx_cpt_pending_queue *pqueue,
299			   struct otx_cptvf *cptvf)
300{
301	struct otx_cptvf_request *cpt_req = &req->req;
302	struct otx_cpt_pending_entry *pentry = NULL;
303	union otx_cpt_ctrl_info *ctrl = &req->ctrl;
304	struct otx_cpt_info_buffer *info = NULL;
305	union otx_cpt_res_s *result = NULL;
306	struct otx_cpt_iq_cmd iq_cmd;
307	union otx_cpt_inst_s cptinst;
308	int retry, ret = 0;
309	u8 resume_sender;
310	gfp_t gfp;
311
312	gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
313							      GFP_ATOMIC;
314	ret = setup_sgio_list(pdev, &info, req, gfp);
315	if (unlikely(ret)) {
316		dev_err(&pdev->dev, "Setting up SG list failed\n");
317		goto request_cleanup;
318	}
319	cpt_req->dlen = info->dlen;
320
321	result = (union otx_cpt_res_s *) info->completion_addr;
322	result->s.compcode = COMPLETION_CODE_INIT;
323
324	spin_lock_bh(&pqueue->lock);
325	pentry = get_free_pending_entry(pqueue, pqueue->qlen);
326	retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP;
327	while (unlikely(!pentry) && retry--) {
328		spin_unlock_bh(&pqueue->lock);
329		udelay(CPT_PENTRY_STEP);
330		spin_lock_bh(&pqueue->lock);
331		pentry = get_free_pending_entry(pqueue, pqueue->qlen);
332	}
333
334	if (unlikely(!pentry)) {
335		ret = -ENOSPC;
336		spin_unlock_bh(&pqueue->lock);
337		goto request_cleanup;
338	}
339
340	/*
341	 * Check if we are close to filling in entire pending queue,
342	 * if so then tell the sender to stop/sleep by returning -EBUSY
343	 * We do it only for context which can sleep (GFP_KERNEL)
344	 */
345	if (gfp == GFP_KERNEL &&
346	    pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) {
347		pentry->resume_sender = true;
348	} else
349		pentry->resume_sender = false;
350	resume_sender = pentry->resume_sender;
351	pqueue->pending_count++;
352
353	pentry->completion_addr = info->completion_addr;
354	pentry->info = info;
355	pentry->callback = req->callback;
356	pentry->areq = req->areq;
357	pentry->busy = true;
358	info->pentry = pentry;
359	info->time_in = jiffies;
360	info->req = req;
361
362	/* Fill in the command */
363	iq_cmd.cmd.u64 = 0;
364	iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
365	iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
366	iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
367	iq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
368
369	iq_cmd.dptr = info->dptr_baddr;
370	iq_cmd.rptr = info->rptr_baddr;
371	iq_cmd.cptr.u64 = 0;
372	iq_cmd.cptr.s.grp = ctrl->s.grp;
373
374	/* Fill in the CPT_INST_S type command for HW interpretation */
375	cpt_fill_inst(&cptinst, info, &iq_cmd);
376
377	/* Print debug info if enabled */
378	otx_cpt_dump_sg_list(pdev, req);
379	pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE);
380	print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false);
381	pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen);
382	print_hex_dump_debug("", 0, 16, 1, info->in_buffer,
383			     cpt_req->dlen, false);
384
385	/* Send CPT command */
386	cpt_send_cmd(&cptinst, cptvf);
387
388	/*
389	 * We allocate and prepare pending queue entry in critical section
390	 * together with submitting CPT instruction to CPT instruction queue
391	 * to make sure that order of CPT requests is the same in both
392	 * pending and instruction queues
393	 */
394	spin_unlock_bh(&pqueue->lock);
395
396	ret = resume_sender ? -EBUSY : -EINPROGRESS;
397	return ret;
398
399request_cleanup:
400	do_request_cleanup(pdev, info);
401	return ret;
402}
403
404int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
405		       int cpu_num)
406{
407	struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
408
409	if (!otx_cpt_device_ready(cptvf)) {
410		dev_err(&pdev->dev, "CPT Device is not ready\n");
411		return -ENODEV;
412	}
413
414	if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) {
415		dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request\n",
416			cptvf->vfid);
417		return -EINVAL;
418	} else if ((cptvf->vftype == OTX_CPT_AE_TYPES) &&
419		   (req->ctrl.s.se_req)) {
420		dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request\n",
421			cptvf->vfid);
422		return -EINVAL;
423	}
424
425	return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf);
426}
427
428static int cpt_process_ccode(struct pci_dev *pdev,
429			     union otx_cpt_res_s *cpt_status,
430			     struct otx_cpt_info_buffer *cpt_info,
431			     struct otx_cpt_req_info *req, u32 *res_code)
432{
433	u8 ccode = cpt_status->s.compcode;
434	union otx_cpt_error_code ecode;
435
436	ecode.u = be64_to_cpup((__be64 *)cpt_info->out_buffer);
437	switch (ccode) {
438	case CPT_COMP_E_FAULT:
439		dev_err(&pdev->dev,
440			"Request failed with DMA fault\n");
441		otx_cpt_dump_sg_list(pdev, req);
442		break;
443
444	case CPT_COMP_E_SWERR:
445		dev_err(&pdev->dev,
446			"Request failed with software error code %d\n",
447			ecode.s.ccode);
448		otx_cpt_dump_sg_list(pdev, req);
449		break;
450
451	case CPT_COMP_E_HWERR:
452		dev_err(&pdev->dev,
453			"Request failed with hardware error\n");
454		otx_cpt_dump_sg_list(pdev, req);
455		break;
456
457	case COMPLETION_CODE_INIT:
458		/* check for timeout */
459		if (time_after_eq(jiffies, cpt_info->time_in +
460				  OTX_CPT_COMMAND_TIMEOUT * HZ))
461			dev_warn(&pdev->dev, "Request timed out 0x%p\n", req);
462		else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) {
463			cpt_info->time_in = jiffies;
464			cpt_info->extra_time++;
465		}
466		return 1;
467
468	case CPT_COMP_E_GOOD:
469		/* Check microcode completion code */
470		if (ecode.s.ccode) {
471			/*
472			 * If requested hmac is truncated and ucode returns
473			 * s/g write length error then we report success
474			 * because ucode writes as many bytes of calculated
475			 * hmac as available in gather buffer and reports
476			 * s/g write length error if number of bytes in gather
477			 * buffer is less than full hmac size.
478			 */
479			if (req->is_trunc_hmac &&
480			    ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) {
481				*res_code = 0;
482				break;
483			}
484
485			dev_err(&pdev->dev,
486				"Request failed with software error code 0x%x\n",
487				ecode.s.ccode);
488			otx_cpt_dump_sg_list(pdev, req);
489			break;
490		}
491
492		/* Request has been processed with success */
493		*res_code = 0;
494		break;
495
496	default:
497		dev_err(&pdev->dev, "Request returned invalid status\n");
498		break;
499	}
500
501	return 0;
502}
503
504static inline void process_pending_queue(struct pci_dev *pdev,
505					 struct otx_cpt_pending_queue *pqueue)
506{
507	void (*callback)(int status, void *arg1, void *arg2);
508	struct otx_cpt_pending_entry *resume_pentry = NULL;
509	struct otx_cpt_pending_entry *pentry = NULL;
510	struct otx_cpt_info_buffer *cpt_info = NULL;
511	union otx_cpt_res_s *cpt_status = NULL;
512	struct otx_cpt_req_info *req = NULL;
513	struct crypto_async_request *areq;
514	u32 res_code, resume_index;
515
516	while (1) {
517		spin_lock_bh(&pqueue->lock);
518		pentry = &pqueue->head[pqueue->front];
519
520		if (WARN_ON(!pentry)) {
521			spin_unlock_bh(&pqueue->lock);
522			break;
523		}
524
525		res_code = -EINVAL;
526		if (unlikely(!pentry->busy)) {
527			spin_unlock_bh(&pqueue->lock);
528			break;
529		}
530
531		if (unlikely(!pentry->callback)) {
532			dev_err(&pdev->dev, "Callback NULL\n");
533			goto process_pentry;
534		}
535
536		cpt_info = pentry->info;
537		if (unlikely(!cpt_info)) {
538			dev_err(&pdev->dev, "Pending entry post arg NULL\n");
539			goto process_pentry;
540		}
541
542		req = cpt_info->req;
543		if (unlikely(!req)) {
544			dev_err(&pdev->dev, "Request NULL\n");
545			goto process_pentry;
546		}
547
548		cpt_status = (union otx_cpt_res_s *) pentry->completion_addr;
549		if (unlikely(!cpt_status)) {
550			dev_err(&pdev->dev, "Completion address NULL\n");
551			goto process_pentry;
552		}
553
554		if (cpt_process_ccode(pdev, cpt_status, cpt_info, req,
555				      &res_code)) {
556			spin_unlock_bh(&pqueue->lock);
557			return;
558		}
559		cpt_info->pdev = pdev;
560
561process_pentry:
562		/*
563		 * Check if we should inform sending side to resume
564		 * We do it CPT_IQ_RESUME_MARGIN elements in advance before
565		 * pending queue becomes empty
566		 */
567		resume_index = modulo_inc(pqueue->front, pqueue->qlen,
568					  CPT_IQ_RESUME_MARGIN);
569		resume_pentry = &pqueue->head[resume_index];
570		if (resume_pentry &&
571		    resume_pentry->resume_sender) {
572			resume_pentry->resume_sender = false;
573			callback = resume_pentry->callback;
574			areq = resume_pentry->areq;
575
576			if (callback) {
577				spin_unlock_bh(&pqueue->lock);
578
579				/*
580				 * EINPROGRESS is an indication for sending
581				 * side that it can resume sending requests
582				 */
583				callback(-EINPROGRESS, areq, cpt_info);
584				spin_lock_bh(&pqueue->lock);
585			}
586		}
587
588		callback = pentry->callback;
589		areq = pentry->areq;
590		free_pentry(pentry);
591
592		pqueue->pending_count--;
593		pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1);
594		spin_unlock_bh(&pqueue->lock);
595
596		/*
597		 * Call callback after current pending entry has been
598		 * processed, we don't do it if the callback pointer is
599		 * invalid.
600		 */
601		if (callback)
602			callback(res_code, areq, cpt_info);
603	}
604}
605
606void otx_cpt_post_process(struct otx_cptvf_wqe *wqe)
607{
608	process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]);
609}
610