1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2
3#include <linux/bpf_trace.h>
4#include <linux/dma-mapping.h>
5#include <linux/etherdevice.h>
6#include <linux/filter.h>
7#include <linux/irq.h>
8#include <linux/pci.h>
9#include <linux/skbuff.h>
10#include "funeth_txrx.h"
11#include "funeth.h"
12#include "fun_queue.h"
13
14#define CREATE_TRACE_POINTS
15#include "funeth_trace.h"
16
17/* Given the device's max supported MTU and pages of at least 4KB a packet can
18 * be scattered into at most 4 buffers.
19 */
20#define RX_MAX_FRAGS 4
21
22/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */
23#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
24
25/* We try to reuse pages for our buffers. To avoid frequent page ref writes we
26 * take EXTRA_PAGE_REFS references at once and then hand them out one per packet
27 * occupying the buffer.
28 */
29#define EXTRA_PAGE_REFS 1000000
30#define MIN_PAGE_REFS 1000
31
32enum {
33	FUN_XDP_FLUSH_REDIR = 1,
34	FUN_XDP_FLUSH_TX = 2,
35};
36
37/* See if a page is running low on refs we are holding and if so take more. */
38static void refresh_refs(struct funeth_rxbuf *buf)
39{
40	if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) {
41		buf->pg_refs += EXTRA_PAGE_REFS;
42		page_ref_add(buf->page, EXTRA_PAGE_REFS);
43	}
44}
45
46/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its
47 * page is worth retaining and there's room for it. Otherwise the page is
48 * unmapped and our references released.
49 */
50static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf)
51{
52	struct funeth_rx_cache *c = &q->cache;
53
54	if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) {
55		c->bufs[c->prod_cnt & c->mask] = *buf;
56		c->prod_cnt++;
57	} else {
58		dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
59				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
60		__page_frag_cache_drain(buf->page, buf->pg_refs);
61	}
62}
63
64/* Get a page from the Rx buffer cache. We only consider the next available
65 * page and return it if we own all its references.
66 */
67static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb)
68{
69	struct funeth_rx_cache *c = &q->cache;
70	struct funeth_rxbuf *buf;
71
72	if (c->prod_cnt == c->cons_cnt)
73		return false;             /* empty cache */
74
75	buf = &c->bufs[c->cons_cnt & c->mask];
76	if (page_ref_count(buf->page) == buf->pg_refs) {
77		dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
78					   PAGE_SIZE, DMA_FROM_DEVICE);
79		*rb = *buf;
80		buf->page = NULL;
81		refresh_refs(rb);
82		c->cons_cnt++;
83		return true;
84	}
85
86	/* Page can't be reused. If the cache is full drop this page. */
87	if (c->prod_cnt - c->cons_cnt > c->mask) {
88		dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
89				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
90		__page_frag_cache_drain(buf->page, buf->pg_refs);
91		buf->page = NULL;
92		c->cons_cnt++;
93	}
94	return false;
95}
96
97/* Allocate and DMA-map a page for receive. */
98static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb,
99			     int node, gfp_t gfp)
100{
101	struct page *p;
102
103	if (cache_get(q, rb))
104		return 0;
105
106	p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0);
107	if (unlikely(!p))
108		return -ENOMEM;
109
110	rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE,
111				    DMA_FROM_DEVICE);
112	if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) {
113		FUN_QSTAT_INC(q, rx_map_err);
114		__free_page(p);
115		return -ENOMEM;
116	}
117
118	FUN_QSTAT_INC(q, rx_page_alloc);
119
120	rb->page = p;
121	rb->pg_refs = 1;
122	refresh_refs(rb);
123	rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p);
124	return 0;
125}
126
127static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb)
128{
129	if (rb->page) {
130		dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE,
131			       DMA_FROM_DEVICE);
132		__page_frag_cache_drain(rb->page, rb->pg_refs);
133		rb->page = NULL;
134	}
135}
136
137/* Run the XDP program assigned to an Rx queue.
138 * Return %NULL if the buffer is consumed, or the virtual address of the packet
139 * to turn into an skb.
140 */
141static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
142			 int ref_ok, struct funeth_txq *xdp_q)
143{
144	struct bpf_prog *xdp_prog;
145	struct xdp_frame *xdpf;
146	struct xdp_buff xdp;
147	u32 act;
148
149	/* VA includes the headroom, frag size includes headroom + tailroom */
150	xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN),
151		      &q->xdp_rxq);
152	xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) -
153			 (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false);
154
155	xdp_prog = READ_ONCE(q->xdp_prog);
156	act = bpf_prog_run_xdp(xdp_prog, &xdp);
157
158	switch (act) {
159	case XDP_PASS:
160		/* remove headroom, which may not be FUN_XDP_HEADROOM now */
161		skb_frag_size_set(frags, xdp.data_end - xdp.data);
162		skb_frag_off_add(frags, xdp.data - xdp.data_hard_start);
163		goto pass;
164	case XDP_TX:
165		if (unlikely(!ref_ok))
166			goto pass;
167
168		xdpf = xdp_convert_buff_to_frame(&xdp);
169		if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
170			goto xdp_error;
171		FUN_QSTAT_INC(q, xdp_tx);
172		q->xdp_flush |= FUN_XDP_FLUSH_TX;
173		break;
174	case XDP_REDIRECT:
175		if (unlikely(!ref_ok))
176			goto pass;
177		if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog)))
178			goto xdp_error;
179		FUN_QSTAT_INC(q, xdp_redir);
180		q->xdp_flush |= FUN_XDP_FLUSH_REDIR;
181		break;
182	default:
183		bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act);
184		fallthrough;
185	case XDP_ABORTED:
186		trace_xdp_exception(q->netdev, xdp_prog, act);
187xdp_error:
188		q->cur_buf->pg_refs++; /* return frags' page reference */
189		FUN_QSTAT_INC(q, xdp_err);
190		break;
191	case XDP_DROP:
192		q->cur_buf->pg_refs++;
193		FUN_QSTAT_INC(q, xdp_drops);
194		break;
195	}
196	return NULL;
197
198pass:
199	return xdp.data;
200}
201
202/* A CQE contains a fixed completion structure along with optional metadata and
203 * even packet data. Given the start address of a CQE return the start of the
204 * contained fixed structure, which lies at the end.
205 */
206static const void *cqe_to_info(const void *cqe)
207{
208	return cqe + FUNETH_CQE_INFO_OFFSET;
209}
210
211/* The inverse of cqe_to_info(). */
212static const void *info_to_cqe(const void *cqe_info)
213{
214	return cqe_info - FUNETH_CQE_INFO_OFFSET;
215}
216
217/* Return the type of hash provided by the device based on the L3 and L4
218 * protocols it parsed for the packet.
219 */
220static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse)
221{
222	static const enum pkt_hash_types htype_map[] = {
223		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
224		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4,
225		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
226		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3
227	};
228	u16 key;
229
230	/* Build the key from the TCP/UDP and IP/IPv6 bits */
231	key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) |
232	      ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1);
233
234	return htype_map[key];
235}
236
237/* Each received packet can be scattered across several Rx buffers or can
238 * share a buffer with previously received packets depending on the buffer
239 * and packet sizes and the room available in the most recently used buffer.
240 *
241 * The rules are:
242 * - If the buffer at the head of an RQ has not been used it gets (part of) the
243 *   next incoming packet.
244 * - Otherwise, if the packet fully fits in the buffer's remaining space the
245 *   packet is written there.
246 * - Otherwise, the packet goes into the next Rx buffer.
247 *
248 * This function returns the Rx buffer for a packet or fragment thereof of the
249 * given length. If it isn't @buf it either recycles or frees that buffer
250 * before advancing the queue to the next buffer.
251 *
252 * If called repeatedly with the remaining length of a packet it will walk
253 * through all the buffers containing the packet.
254 */
255static struct funeth_rxbuf *
256get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len)
257{
258	if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset)
259		return buf;            /* @buf holds (part of) the packet */
260
261	/* The packet occupies part of the next buffer. Move there after
262	 * replenishing the current buffer slot either with the spare page or
263	 * by reusing the slot's existing page. Note that if a spare page isn't
264	 * available and the current packet occupies @buf it is a multi-frag
265	 * packet that will be dropped leaving @buf available for reuse.
266	 */
267	if ((page_ref_count(buf->page) == buf->pg_refs &&
268	     buf->node == numa_mem_id()) || !q->spare_buf.page) {
269		dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
270					   PAGE_SIZE, DMA_FROM_DEVICE);
271		refresh_refs(buf);
272	} else {
273		cache_offer(q, buf);
274		*buf = q->spare_buf;
275		q->spare_buf.page = NULL;
276		q->rqes[q->rq_cons & q->rq_mask] =
277			FUN_EPRQ_RQBUF_INIT(buf->dma_addr);
278	}
279	q->buf_offset = 0;
280	q->rq_cons++;
281	return &q->bufs[q->rq_cons & q->rq_mask];
282}
283
284/* Gather the page fragments making up the first Rx packet on @q. Its total
285 * length @tot_len includes optional head- and tail-rooms.
286 *
287 * Return 0 if the device retains ownership of at least some of the pages.
288 * In this case the caller may only copy the packet.
289 *
290 * A non-zero return value gives the caller permission to use references to the
291 * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least
292 * one of the pages is PF_MEMALLOC.
293 *
294 * Regardless of outcome the caller is granted a reference to each of the pages.
295 */
296static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
297			  skb_frag_t *frags)
298{
299	struct funeth_rxbuf *buf = q->cur_buf;
300	unsigned int frag_len;
301	int ref_ok = 1;
302
303	for (;;) {
304		buf = get_buf(q, buf, tot_len);
305
306		/* We always keep the RQ full of buffers so before we can give
307		 * one of our pages to the stack we require that we can obtain
308		 * a replacement page. If we can't the packet will either be
309		 * copied or dropped so we can retain ownership of the page and
310		 * reuse it.
311		 */
312		if (!q->spare_buf.page &&
313		    funeth_alloc_page(q, &q->spare_buf, numa_mem_id(),
314				      GFP_ATOMIC | __GFP_MEMALLOC))
315			ref_ok = 0;
316
317		frag_len = min_t(unsigned int, tot_len,
318				 PAGE_SIZE - q->buf_offset);
319		dma_sync_single_for_cpu(q->dma_dev,
320					buf->dma_addr + q->buf_offset,
321					frag_len, DMA_FROM_DEVICE);
322		buf->pg_refs--;
323		if (ref_ok)
324			ref_ok |= buf->node;
325
326		skb_frag_fill_page_desc(frags++, buf->page, q->buf_offset,
327					frag_len);
328
329		tot_len -= frag_len;
330		if (!tot_len)
331			break;
332
333		q->buf_offset = PAGE_SIZE;
334	}
335	q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN);
336	q->cur_buf = buf;
337	return ref_ok;
338}
339
340static bool rx_hwtstamp_enabled(const struct net_device *dev)
341{
342	const struct funeth_priv *d = netdev_priv(dev);
343
344	return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL;
345}
346
347/* Advance the CQ pointers and phase tag to the next CQE. */
348static void advance_cq(struct funeth_rxq *q)
349{
350	if (unlikely(q->cq_head == q->cq_mask)) {
351		q->cq_head = 0;
352		q->phase ^= 1;
353		q->next_cqe_info = cqe_to_info(q->cqes);
354	} else {
355		q->cq_head++;
356		q->next_cqe_info += FUNETH_CQE_SIZE;
357	}
358	prefetch(q->next_cqe_info);
359}
360
361/* Process the packet represented by the head CQE of @q. Gather the packet's
362 * fragments, run it through the optional XDP program, and if needed construct
363 * an skb and pass it to the stack.
364 */
365static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q)
366{
367	const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info);
368	unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len);
369	struct net_device *ndev = q->netdev;
370	skb_frag_t frags[RX_MAX_FRAGS];
371	struct skb_shared_info *si;
372	unsigned int headroom;
373	gro_result_t gro_res;
374	struct sk_buff *skb;
375	int ref_ok;
376	void *va;
377	u16 cv;
378
379	u64_stats_update_begin(&q->syncp);
380	q->stats.rx_pkts++;
381	q->stats.rx_bytes += pkt_len;
382	u64_stats_update_end(&q->syncp);
383
384	advance_cq(q);
385
386	/* account for head- and tail-room, present only for 1-buffer packets */
387	tot_len = pkt_len;
388	headroom = be16_to_cpu(rxreq->headroom);
389	if (likely(headroom))
390		tot_len += FUN_RX_TAILROOM + headroom;
391
392	ref_ok = fun_gather_pkt(q, tot_len, frags);
393	va = skb_frag_address(frags);
394	if (xdp_q && headroom == FUN_XDP_HEADROOM) {
395		va = fun_run_xdp(q, frags, va, ref_ok, xdp_q);
396		if (!va)
397			return;
398		headroom = 0;   /* XDP_PASS trims it */
399	}
400	if (unlikely(!ref_ok))
401		goto no_mem;
402
403	if (likely(headroom)) {
404		/* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */
405		prefetch(va + headroom);
406		skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN));
407		if (unlikely(!skb))
408			goto no_mem;
409
410		skb_reserve(skb, headroom);
411		__skb_put(skb, pkt_len);
412		skb->protocol = eth_type_trans(skb, ndev);
413	} else {
414		prefetch(va);
415		skb = napi_get_frags(q->napi);
416		if (unlikely(!skb))
417			goto no_mem;
418
419		if (ref_ok < 0)
420			skb->pfmemalloc = 1;
421
422		si = skb_shinfo(skb);
423		si->nr_frags = rxreq->nsgl;
424		for (i = 0; i < si->nr_frags; i++)
425			si->frags[i] = frags[i];
426
427		skb->len = pkt_len;
428		skb->data_len = pkt_len;
429		skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN);
430	}
431
432	skb_record_rx_queue(skb, q->qidx);
433	cv = be16_to_cpu(rxreq->pkt_cv);
434	if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash))
435		skb_set_hash(skb, be32_to_cpu(rxreq->hash),
436			     cqe_to_pkt_hash_type(cv));
437	if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) {
438		FUN_QSTAT_INC(q, rx_cso);
439		skb->ip_summed = CHECKSUM_UNNECESSARY;
440		skb->csum_level = be16_to_cpu(rxreq->csum) - 1;
441	}
442	if (unlikely(rx_hwtstamp_enabled(q->netdev)))
443		skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp);
444
445	trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv);
446
447	gro_res = skb->data_len ? napi_gro_frags(q->napi) :
448				  napi_gro_receive(q->napi, skb);
449	if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE)
450		FUN_QSTAT_INC(q, gro_merged);
451	else if (gro_res == GRO_HELD)
452		FUN_QSTAT_INC(q, gro_pkts);
453	return;
454
455no_mem:
456	FUN_QSTAT_INC(q, rx_mem_drops);
457
458	/* Release the references we've been granted for the frag pages.
459	 * We return the ref of the last frag and free the rest.
460	 */
461	q->cur_buf->pg_refs++;
462	for (i = 0; i < rxreq->nsgl - 1; i++)
463		__free_page(skb_frag_page(frags + i));
464}
465
466/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations
467 * indicating the CQE is new.
468 */
469static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase)
470{
471	u16 sf_p = be16_to_cpu(ci->sf_p);
472
473	return (sf_p & 1) ^ phase;
474}
475
476/* Walk through a CQ identifying and processing fresh CQEs up to the given
477 * budget. Return the remaining budget.
478 */
479static int fun_process_cqes(struct funeth_rxq *q, int budget)
480{
481	struct funeth_priv *fp = netdev_priv(q->netdev);
482	struct funeth_txq **xdpqs, *xdp_q = NULL;
483
484	xdpqs = rcu_dereference_bh(fp->xdpqs);
485	if (xdpqs)
486		xdp_q = xdpqs[smp_processor_id()];
487
488	while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) {
489		/* access other descriptor fields after the phase check */
490		dma_rmb();
491
492		fun_handle_cqe_pkt(q, xdp_q);
493		budget--;
494	}
495
496	if (unlikely(q->xdp_flush)) {
497		if (q->xdp_flush & FUN_XDP_FLUSH_TX)
498			fun_txq_wr_db(xdp_q);
499		if (q->xdp_flush & FUN_XDP_FLUSH_REDIR)
500			xdp_do_flush();
501		q->xdp_flush = 0;
502	}
503
504	return budget;
505}
506
507/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ
508 * doorbells as needed.
509 */
510int fun_rxq_napi_poll(struct napi_struct *napi, int budget)
511{
512	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
513	struct funeth_rxq *q = irq->rxq;
514	int work_done = budget - fun_process_cqes(q, budget);
515	u32 cq_db_val = q->cq_head;
516
517	if (unlikely(work_done >= budget))
518		FUN_QSTAT_INC(q, rx_budget);
519	else if (napi_complete_done(napi, work_done))
520		cq_db_val |= q->irq_db_val;
521
522	/* check whether to post new Rx buffers */
523	if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) {
524		u64_stats_update_begin(&q->syncp);
525		q->stats.rx_bufs += q->rq_cons - q->rq_cons_db;
526		u64_stats_update_end(&q->syncp);
527		q->rq_cons_db = q->rq_cons;
528		writel((q->rq_cons - 1) & q->rq_mask, q->rq_db);
529	}
530
531	writel(cq_db_val, q->cq_db);
532	return work_done;
533}
534
535/* Free the Rx buffers of an Rx queue. */
536static void fun_rxq_free_bufs(struct funeth_rxq *q)
537{
538	struct funeth_rxbuf *b = q->bufs;
539	unsigned int i;
540
541	for (i = 0; i <= q->rq_mask; i++, b++)
542		funeth_free_page(q, b);
543
544	funeth_free_page(q, &q->spare_buf);
545	q->cur_buf = NULL;
546}
547
548/* Initially provision an Rx queue with Rx buffers. */
549static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node)
550{
551	struct funeth_rxbuf *b = q->bufs;
552	unsigned int i;
553
554	for (i = 0; i <= q->rq_mask; i++, b++) {
555		if (funeth_alloc_page(q, b, node, GFP_KERNEL)) {
556			fun_rxq_free_bufs(q);
557			return -ENOMEM;
558		}
559		q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr);
560	}
561	q->cur_buf = q->bufs;
562	return 0;
563}
564
565/* Initialize a used-buffer cache of the given depth. */
566static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth,
567			      int node)
568{
569	c->mask = depth - 1;
570	c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node);
571	return c->bufs ? 0 : -ENOMEM;
572}
573
574/* Deallocate an Rx queue's used-buffer cache and its contents. */
575static void fun_rxq_free_cache(struct funeth_rxq *q)
576{
577	struct funeth_rxbuf *b = q->cache.bufs;
578	unsigned int i;
579
580	for (i = 0; i <= q->cache.mask; i++, b++)
581		funeth_free_page(q, b);
582
583	kvfree(q->cache.bufs);
584	q->cache.bufs = NULL;
585}
586
587int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog)
588{
589	struct funeth_priv *fp = netdev_priv(q->netdev);
590	struct fun_admin_epcq_req cmd;
591	u16 headroom;
592	int err;
593
594	headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
595	if (headroom != q->headroom) {
596		cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
597							sizeof(cmd));
598		cmd.u.modify =
599			FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY,
600						       0, q->hw_cqid, headroom);
601		err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0,
602						0);
603		if (err)
604			return err;
605		q->headroom = headroom;
606	}
607
608	WRITE_ONCE(q->xdp_prog, prog);
609	return 0;
610}
611
612/* Create an Rx queue, allocating the host memory it needs. */
613static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev,
614					    unsigned int qidx,
615					    unsigned int ncqe,
616					    unsigned int nrqe,
617					    struct fun_irq *irq)
618{
619	struct funeth_priv *fp = netdev_priv(dev);
620	struct funeth_rxq *q;
621	int err = -ENOMEM;
622	int numa_node;
623
624	numa_node = fun_irq_node(irq);
625	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
626	if (!q)
627		goto err;
628
629	q->qidx = qidx;
630	q->netdev = dev;
631	q->cq_mask = ncqe - 1;
632	q->rq_mask = nrqe - 1;
633	q->numa_node = numa_node;
634	q->rq_db_thres = nrqe / 4;
635	u64_stats_init(&q->syncp);
636	q->dma_dev = &fp->pdev->dev;
637
638	q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes),
639				     sizeof(*q->bufs), false, numa_node,
640				     &q->rq_dma_addr, (void **)&q->bufs, NULL);
641	if (!q->rqes)
642		goto free_q;
643
644	q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0,
645				     false, numa_node, &q->cq_dma_addr, NULL,
646				     NULL);
647	if (!q->cqes)
648		goto free_rqes;
649
650	err = fun_rxq_init_cache(&q->cache, nrqe, numa_node);
651	if (err)
652		goto free_cqes;
653
654	err = fun_rxq_alloc_bufs(q, numa_node);
655	if (err)
656		goto free_cache;
657
658	q->stats.rx_bufs = q->rq_mask;
659	q->init_state = FUN_QSTATE_INIT_SW;
660	return q;
661
662free_cache:
663	fun_rxq_free_cache(q);
664free_cqes:
665	dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes,
666			  q->cq_dma_addr);
667free_rqes:
668	fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes,
669			  q->rq_dma_addr, q->bufs);
670free_q:
671	kfree(q);
672err:
673	netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx);
674	return ERR_PTR(err);
675}
676
677static void fun_rxq_free_sw(struct funeth_rxq *q)
678{
679	struct funeth_priv *fp = netdev_priv(q->netdev);
680
681	fun_rxq_free_cache(q);
682	fun_rxq_free_bufs(q);
683	fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false,
684			  q->rqes, q->rq_dma_addr, q->bufs);
685	dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE,
686			  q->cqes, q->cq_dma_addr);
687
688	/* Before freeing the queue transfer key counters to the device. */
689	fp->rx_packets += q->stats.rx_pkts;
690	fp->rx_bytes   += q->stats.rx_bytes;
691	fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops;
692
693	kfree(q);
694}
695
696/* Create an Rx queue's resources on the device. */
697int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq)
698{
699	struct funeth_priv *fp = netdev_priv(q->netdev);
700	unsigned int ncqe = q->cq_mask + 1;
701	unsigned int nrqe = q->rq_mask + 1;
702	int err;
703
704	err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx,
705			       irq->napi.napi_id);
706	if (err)
707		goto out;
708
709	err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED,
710					 NULL);
711	if (err)
712		goto xdp_unreg;
713
714	q->phase = 1;
715	q->irq_cnt = 0;
716	q->cq_head = 0;
717	q->rq_cons = 0;
718	q->rq_cons_db = 0;
719	q->buf_offset = 0;
720	q->napi = &irq->napi;
721	q->irq_db_val = fp->cq_irq_db;
722	q->next_cqe_info = cqe_to_info(q->cqes);
723
724	q->xdp_prog = fp->xdp_prog;
725	q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
726
727	err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
728			    FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0,
729			    FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0,
730			    0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT,
731			    &q->hw_sqid, &q->rq_db);
732	if (err)
733		goto xdp_unreg;
734
735	err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
736			    FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0,
737			    q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe,
738			    q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0,
739			    irq->irq_idx, 0, fp->fdev->kern_end_qid,
740			    &q->hw_cqid, &q->cq_db);
741	if (err)
742		goto free_rq;
743
744	irq->rxq = q;
745	writel(q->rq_mask, q->rq_db);
746	q->init_state = FUN_QSTATE_INIT_FULL;
747
748	netif_info(fp, ifup, q->netdev,
749		   "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n",
750		   q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx,
751		   q->numa_node, q->headroom);
752	return 0;
753
754free_rq:
755	fun_destroy_sq(fp->fdev, q->hw_sqid);
756xdp_unreg:
757	xdp_rxq_info_unreg(&q->xdp_rxq);
758out:
759	netdev_err(q->netdev,
760		   "Failed to create Rx queue %u on device, error %d\n",
761		   q->qidx, err);
762	return err;
763}
764
765static void fun_rxq_free_dev(struct funeth_rxq *q)
766{
767	struct funeth_priv *fp = netdev_priv(q->netdev);
768	struct fun_irq *irq;
769
770	if (q->init_state < FUN_QSTATE_INIT_FULL)
771		return;
772
773	irq = container_of(q->napi, struct fun_irq, napi);
774	netif_info(fp, ifdown, q->netdev,
775		   "Freeing Rx queue %u (id %u/%u), IRQ %u\n",
776		   q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx);
777
778	irq->rxq = NULL;
779	xdp_rxq_info_unreg(&q->xdp_rxq);
780	fun_destroy_sq(fp->fdev, q->hw_sqid);
781	fun_destroy_cq(fp->fdev, q->hw_cqid);
782	q->init_state = FUN_QSTATE_INIT_SW;
783}
784
785/* Create or advance an Rx queue, allocating all the host and device resources
786 * needed to reach the target state.
787 */
788int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
789		      unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
790		      int state, struct funeth_rxq **qp)
791{
792	struct funeth_rxq *q = *qp;
793	int err;
794
795	if (!q) {
796		q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq);
797		if (IS_ERR(q))
798			return PTR_ERR(q);
799	}
800
801	if (q->init_state >= state)
802		goto out;
803
804	err = fun_rxq_create_dev(q, irq);
805	if (err) {
806		if (!*qp)
807			fun_rxq_free_sw(q);
808		return err;
809	}
810
811out:
812	*qp = q;
813	return 0;
814}
815
816/* Free Rx queue resources until it reaches the target state. */
817struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state)
818{
819	if (state < FUN_QSTATE_INIT_FULL)
820		fun_rxq_free_dev(q);
821
822	if (state == FUN_QSTATE_DESTROYED) {
823		fun_rxq_free_sw(q);
824		q = NULL;
825	}
826
827	return q;
828}
829