1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2
3#include <linux/dma-mapping.h>
4#include <linux/interrupt.h>
5#include <linux/log2.h>
6#include <linux/mm.h>
7#include <linux/netdevice.h>
8#include <linux/pci.h>
9#include <linux/slab.h>
10
11#include "fun_dev.h"
12#include "fun_queue.h"
13
14/* Allocate memory for a queue. This includes the memory for the HW descriptor
15 * ring, an optional 64b HW write-back area, and an optional SW state ring.
16 * Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring,
17 * and the VA of the write-back area.
18 */
19void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth,
20			 size_t hw_desc_sz, size_t sw_desc_sz, bool wb,
21			 int numa_node, dma_addr_t *dma_addr, void **sw_va,
22			 volatile __be64 **wb_va)
23{
24	int dev_node = dev_to_node(dma_dev);
25	size_t dma_sz;
26	void *va;
27
28	if (numa_node == NUMA_NO_NODE)
29		numa_node = dev_node;
30
31	/* Place optional write-back area at end of descriptor ring. */
32	dma_sz = hw_desc_sz * depth;
33	if (wb)
34		dma_sz += sizeof(u64);
35
36	set_dev_node(dma_dev, numa_node);
37	va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL);
38	set_dev_node(dma_dev, dev_node);
39	if (!va)
40		return NULL;
41
42	if (sw_desc_sz) {
43		*sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL,
44				       numa_node);
45		if (!*sw_va) {
46			dma_free_coherent(dma_dev, dma_sz, va, *dma_addr);
47			return NULL;
48		}
49	}
50
51	if (wb)
52		*wb_va = va + dma_sz - sizeof(u64);
53	return va;
54}
55EXPORT_SYMBOL_GPL(fun_alloc_ring_mem);
56
57void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz,
58		       bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va)
59{
60	if (hw_va) {
61		size_t sz = depth * hw_desc_sz;
62
63		if (wb)
64			sz += sizeof(u64);
65		dma_free_coherent(dma_dev, sz, hw_va, dma_addr);
66	}
67	kvfree(sw_va);
68}
69EXPORT_SYMBOL_GPL(fun_free_ring_mem);
70
71/* Prepare and issue an admin command to create an SQ on the device with the
72 * provided parameters. If the queue ID is auto-allocated by the device it is
73 * returned in *sqidp.
74 */
75int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid,
76		  u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr,
77		  u8 coal_nentries, u8 coal_usec, u32 irq_num,
78		  u32 scan_start_id, u32 scan_end_id,
79		  u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp)
80{
81	union {
82		struct fun_admin_epsq_req req;
83		struct fun_admin_generic_create_rsp rsp;
84	} cmd;
85	dma_addr_t wb_addr;
86	u32 hw_qid;
87	int rc;
88
89	if (sq_depth > fdev->q_depth)
90		return -EINVAL;
91	if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ)
92		sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf));
93
94	wb_addr = dma_addr + (sq_depth << sqe_size_log2);
95
96	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ,
97						    sizeof(cmd.req));
98	cmd.req.u.create =
99		FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
100					       sqid, cqid, sqe_size_log2,
101					       sq_depth - 1, dma_addr, 0,
102					       coal_nentries, coal_usec,
103					       irq_num, scan_start_id,
104					       scan_end_id, 0,
105					       rq_buf_size_log2,
106					       ilog2(sizeof(u64)), wb_addr);
107
108	rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
109				       &cmd.rsp, sizeof(cmd.rsp), 0);
110	if (rc)
111		return rc;
112
113	hw_qid = be32_to_cpu(cmd.rsp.id);
114	*dbp = fun_sq_db_addr(fdev, hw_qid);
115	if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
116		*sqidp = hw_qid;
117	return rc;
118}
119EXPORT_SYMBOL_GPL(fun_sq_create);
120
121/* Prepare and issue an admin command to create a CQ on the device with the
122 * provided parameters. If the queue ID is auto-allocated by the device it is
123 * returned in *cqidp.
124 */
125int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid,
126		  u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr,
127		  u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec,
128		  u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp,
129		  u32 __iomem **dbp)
130{
131	union {
132		struct fun_admin_epcq_req req;
133		struct fun_admin_generic_create_rsp rsp;
134	} cmd;
135	u32 hw_qid;
136	int rc;
137
138	if (cq_depth > fdev->q_depth)
139		return -EINVAL;
140
141	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
142						    sizeof(cmd.req));
143	cmd.req.u.create =
144		FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
145					       cqid, rqid, cqe_size_log2,
146					       cq_depth - 1, dma_addr, tailroom,
147					       headroom / 2, 0, coal_nentries,
148					       coal_usec, irq_num,
149					       scan_start_id, scan_end_id, 0);
150
151	rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
152				       &cmd.rsp, sizeof(cmd.rsp), 0);
153	if (rc)
154		return rc;
155
156	hw_qid = be32_to_cpu(cmd.rsp.id);
157	*dbp = fun_cq_db_addr(fdev, hw_qid);
158	if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
159		*cqidp = hw_qid;
160	return rc;
161}
162EXPORT_SYMBOL_GPL(fun_cq_create);
163
164static bool fun_sq_is_head_wb(const struct fun_queue *funq)
165{
166	return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS;
167}
168
169static void fun_clean_rq(struct fun_queue *funq)
170{
171	struct fun_dev *fdev = funq->fdev;
172	struct fun_rq_info *rqinfo;
173	unsigned int i;
174
175	for (i = 0; i < funq->rq_depth; i++) {
176		rqinfo = &funq->rq_info[i];
177		if (rqinfo->page) {
178			dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE,
179				       DMA_FROM_DEVICE);
180			put_page(rqinfo->page);
181			rqinfo->page = NULL;
182		}
183	}
184}
185
186static int fun_fill_rq(struct fun_queue *funq)
187{
188	struct device *dev = funq->fdev->dev;
189	int i, node = dev_to_node(dev);
190	struct fun_rq_info *rqinfo;
191
192	for (i = 0; i < funq->rq_depth; i++) {
193		rqinfo = &funq->rq_info[i];
194		rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0);
195		if (unlikely(!rqinfo->page))
196			return -ENOMEM;
197
198		rqinfo->dma = dma_map_page(dev, rqinfo->page, 0,
199					   PAGE_SIZE, DMA_FROM_DEVICE);
200		if (unlikely(dma_mapping_error(dev, rqinfo->dma))) {
201			put_page(rqinfo->page);
202			rqinfo->page = NULL;
203			return -ENOMEM;
204		}
205
206		funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma);
207	}
208
209	funq->rq_tail = funq->rq_depth - 1;
210	return 0;
211}
212
213static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset)
214{
215	if (buf_offset <= funq->rq_buf_offset) {
216		struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx];
217		struct device *dev = funq->fdev->dev;
218
219		dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE,
220					   DMA_FROM_DEVICE);
221		funq->num_rqe_to_fill++;
222		if (++funq->rq_buf_idx == funq->rq_depth)
223			funq->rq_buf_idx = 0;
224	}
225	funq->rq_buf_offset = buf_offset;
226}
227
228/* Given a command response with data scattered across >= 1 RQ buffers return
229 * a pointer to a contiguous buffer containing all the data. If the data is in
230 * one RQ buffer the start address within that buffer is returned, otherwise a
231 * new buffer is allocated and the data is gathered into it.
232 */
233static void *fun_data_from_rq(struct fun_queue *funq,
234			      const struct fun_rsp_common *rsp, bool *need_free)
235{
236	u32 bufoff, total_len, remaining, fragsize, dataoff;
237	struct device *dma_dev = funq->fdev->dev;
238	const struct fun_dataop_rqbuf *databuf;
239	const struct fun_dataop_hdr *dataop;
240	const struct fun_rq_info *rqinfo;
241	void *data;
242
243	dataop = (void *)rsp + rsp->suboff8 * 8;
244	total_len = be32_to_cpu(dataop->total_len);
245
246	if (likely(dataop->nsgl == 1)) {
247		databuf = (struct fun_dataop_rqbuf *)dataop->imm;
248		bufoff = be32_to_cpu(databuf->bufoff);
249		fun_rq_update_pos(funq, bufoff);
250		rqinfo = &funq->rq_info[funq->rq_buf_idx];
251		dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff,
252					total_len, DMA_FROM_DEVICE);
253		*need_free = false;
254		return page_address(rqinfo->page) + bufoff;
255	}
256
257	/* For scattered completions gather the fragments into one buffer. */
258
259	data = kmalloc(total_len, GFP_ATOMIC);
260	/* NULL is OK here. In case of failure we still need to consume the data
261	 * for proper buffer accounting but indicate an error in the response.
262	 */
263	if (likely(data))
264		*need_free = true;
265
266	dataoff = 0;
267	for (remaining = total_len; remaining; remaining -= fragsize) {
268		fun_rq_update_pos(funq, 0);
269		fragsize = min_t(unsigned int, PAGE_SIZE, remaining);
270		if (data) {
271			rqinfo = &funq->rq_info[funq->rq_buf_idx];
272			dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize,
273						DMA_FROM_DEVICE);
274			memcpy(data + dataoff, page_address(rqinfo->page),
275			       fragsize);
276			dataoff += fragsize;
277		}
278	}
279	return data;
280}
281
282unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max)
283{
284	const struct fun_cqe_info *info;
285	struct fun_rsp_common *rsp;
286	unsigned int new_cqes;
287	u16 sf_p, flags;
288	bool need_free;
289	void *cqe;
290
291	if (!max)
292		max = funq->cq_depth - 1;
293
294	for (new_cqes = 0; new_cqes < max; new_cqes++) {
295		cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2);
296		info = funq_cqe_info(funq, cqe);
297		sf_p = be16_to_cpu(info->sf_p);
298
299		if ((sf_p & 1) != funq->cq_phase)
300			break;
301
302		/* ensure the phase tag is read before other CQE fields */
303		dma_rmb();
304
305		if (++funq->cq_head == funq->cq_depth) {
306			funq->cq_head = 0;
307			funq->cq_phase = !funq->cq_phase;
308		}
309
310		rsp = cqe;
311		flags = be16_to_cpu(rsp->flags);
312
313		need_free = false;
314		if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) {
315			rsp = fun_data_from_rq(funq, rsp, &need_free);
316			if (!rsp) {
317				rsp = cqe;
318				rsp->len8 = 1;
319				if (rsp->ret == 0)
320					rsp->ret = ENOMEM;
321			}
322		}
323
324		if (funq->cq_cb)
325			funq->cq_cb(funq, funq->cb_data, rsp, info);
326		if (need_free)
327			kfree(rsp);
328	}
329
330	dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n",
331		funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase);
332	return new_cqes;
333}
334
335unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max)
336{
337	unsigned int processed;
338	u32 db;
339
340	processed = __fun_process_cq(funq, max);
341
342	if (funq->num_rqe_to_fill) {
343		funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) %
344				funq->rq_depth;
345		funq->num_rqe_to_fill = 0;
346		writel(funq->rq_tail, funq->rq_db);
347	}
348
349	db = funq->cq_head | FUN_DB_IRQ_ARM_F;
350	writel(db, funq->cq_db);
351	return processed;
352}
353
354static int fun_alloc_sqes(struct fun_queue *funq)
355{
356	funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth,
357					   1 << funq->sqe_size_log2, 0,
358					   fun_sq_is_head_wb(funq),
359					   NUMA_NO_NODE, &funq->sq_dma_addr,
360					   NULL, &funq->sq_head);
361	return funq->sq_cmds ? 0 : -ENOMEM;
362}
363
364static int fun_alloc_cqes(struct fun_queue *funq)
365{
366	funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth,
367					1 << funq->cqe_size_log2, 0, false,
368					NUMA_NO_NODE, &funq->cq_dma_addr, NULL,
369					NULL);
370	return funq->cqes ? 0 : -ENOMEM;
371}
372
373static int fun_alloc_rqes(struct fun_queue *funq)
374{
375	funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth,
376					sizeof(*funq->rqes),
377					sizeof(*funq->rq_info), false,
378					NUMA_NO_NODE, &funq->rq_dma_addr,
379					(void **)&funq->rq_info, NULL);
380	return funq->rqes ? 0 : -ENOMEM;
381}
382
383/* Free a queue's structures. */
384void fun_free_queue(struct fun_queue *funq)
385{
386	struct device *dev = funq->fdev->dev;
387
388	fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false,
389			  funq->cqes, funq->cq_dma_addr, NULL);
390	fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2,
391			  fun_sq_is_head_wb(funq), funq->sq_cmds,
392			  funq->sq_dma_addr, NULL);
393
394	if (funq->rqes) {
395		fun_clean_rq(funq);
396		fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes),
397				  false, funq->rqes, funq->rq_dma_addr,
398				  funq->rq_info);
399	}
400
401	kfree(funq);
402}
403
404/* Allocate and initialize a funq's structures. */
405struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid,
406				  const struct fun_queue_alloc_req *req)
407{
408	struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL);
409
410	if (!funq)
411		return NULL;
412
413	funq->fdev = fdev;
414	spin_lock_init(&funq->sq_lock);
415
416	funq->qid = qid;
417
418	/* Initial CQ/SQ/RQ ids */
419	if (req->rq_depth) {
420		funq->cqid = 2 * qid;
421		if (funq->qid) {
422			/* I/O Q: use rqid = cqid, sqid = +1 */
423			funq->rqid = funq->cqid;
424			funq->sqid = funq->rqid + 1;
425		} else {
426			/* Admin Q: sqid is always 0, use ID 1 for RQ */
427			funq->sqid = 0;
428			funq->rqid = 1;
429		}
430	} else {
431		funq->cqid = qid;
432		funq->sqid = qid;
433	}
434
435	funq->cq_flags = req->cq_flags;
436	funq->sq_flags = req->sq_flags;
437
438	funq->cqe_size_log2 = req->cqe_size_log2;
439	funq->sqe_size_log2 = req->sqe_size_log2;
440
441	funq->cq_depth = req->cq_depth;
442	funq->sq_depth = req->sq_depth;
443
444	funq->cq_intcoal_nentries = req->cq_intcoal_nentries;
445	funq->cq_intcoal_usec = req->cq_intcoal_usec;
446
447	funq->sq_intcoal_nentries = req->sq_intcoal_nentries;
448	funq->sq_intcoal_usec = req->sq_intcoal_usec;
449
450	if (fun_alloc_cqes(funq))
451		goto free_funq;
452
453	funq->cq_phase = 1;
454
455	if (fun_alloc_sqes(funq))
456		goto free_funq;
457
458	if (req->rq_depth) {
459		funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ;
460		funq->rq_depth = req->rq_depth;
461		funq->rq_buf_offset = -1;
462
463		if (fun_alloc_rqes(funq) || fun_fill_rq(funq))
464			goto free_funq;
465	}
466
467	funq->cq_vector = -1;
468	funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info);
469
470	/* SQ/CQ 0 are implicitly created, assign their doorbells now.
471	 * Other queues are assigned doorbells at their explicit creation.
472	 */
473	if (funq->sqid == 0)
474		funq->sq_db = fun_sq_db_addr(fdev, 0);
475	if (funq->cqid == 0)
476		funq->cq_db = fun_cq_db_addr(fdev, 0);
477
478	return funq;
479
480free_funq:
481	fun_free_queue(funq);
482	return NULL;
483}
484
485/* Create a funq's CQ on the device. */
486static int fun_create_cq(struct fun_queue *funq)
487{
488	struct fun_dev *fdev = funq->fdev;
489	unsigned int rqid;
490	int rc;
491
492	rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ?
493		funq->rqid : FUN_HCI_ID_INVALID;
494	rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid,
495			   funq->cqe_size_log2, funq->cq_depth,
496			   funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries,
497			   funq->cq_intcoal_usec, funq->cq_vector, 0, 0,
498			   &funq->cqid, &funq->cq_db);
499	if (!rc)
500		dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid);
501
502	return rc;
503}
504
505/* Create a funq's SQ on the device. */
506static int fun_create_sq(struct fun_queue *funq)
507{
508	struct fun_dev *fdev = funq->fdev;
509	int rc;
510
511	rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid,
512			   funq->sqe_size_log2, funq->sq_depth,
513			   funq->sq_dma_addr, funq->sq_intcoal_nentries,
514			   funq->sq_intcoal_usec, funq->cq_vector, 0, 0,
515			   0, &funq->sqid, &funq->sq_db);
516	if (!rc)
517		dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid);
518
519	return rc;
520}
521
522/* Create a funq's RQ on the device. */
523int fun_create_rq(struct fun_queue *funq)
524{
525	struct fun_dev *fdev = funq->fdev;
526	int rc;
527
528	rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0,
529			   funq->rq_depth, funq->rq_dma_addr, 0, 0,
530			   funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid,
531			   &funq->rq_db);
532	if (!rc)
533		dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid);
534
535	return rc;
536}
537
538static unsigned int funq_irq(struct fun_queue *funq)
539{
540	return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector);
541}
542
543int fun_request_irq(struct fun_queue *funq, const char *devname,
544		    irq_handler_t handler, void *data)
545{
546	int rc;
547
548	if (funq->cq_vector < 0)
549		return -EINVAL;
550
551	funq->irq_handler = handler;
552	funq->irq_data = data;
553
554	snprintf(funq->irqname, sizeof(funq->irqname),
555		 funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid);
556
557	rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data);
558	if (rc)
559		funq->irq_handler = NULL;
560
561	return rc;
562}
563
564/* Create all component queues of a funq  on the device. */
565int fun_create_queue(struct fun_queue *funq)
566{
567	int rc;
568
569	rc = fun_create_cq(funq);
570	if (rc)
571		return rc;
572
573	if (funq->rq_depth) {
574		rc = fun_create_rq(funq);
575		if (rc)
576			goto release_cq;
577	}
578
579	rc = fun_create_sq(funq);
580	if (rc)
581		goto release_rq;
582
583	return 0;
584
585release_rq:
586	fun_destroy_sq(funq->fdev, funq->rqid);
587release_cq:
588	fun_destroy_cq(funq->fdev, funq->cqid);
589	return rc;
590}
591
592void fun_free_irq(struct fun_queue *funq)
593{
594	if (funq->irq_handler) {
595		unsigned int vector = funq_irq(funq);
596
597		free_irq(vector, funq->irq_data);
598		funq->irq_handler = NULL;
599		funq->irq_data = NULL;
600	}
601}
602