1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Linux driver for VMware's vmxnet3 ethernet NIC.
4 * Copyright (C) 2008-2023, VMware, Inc. All Rights Reserved.
5 * Maintained by: pv-drivers@vmware.com
6 *
7 */
8
9#include "vmxnet3_int.h"
10#include "vmxnet3_xdp.h"
11
12static void
13vmxnet3_xdp_exchange_program(struct vmxnet3_adapter *adapter,
14			     struct bpf_prog *prog)
15{
16	rcu_assign_pointer(adapter->xdp_bpf_prog, prog);
17}
18
19static inline struct vmxnet3_tx_queue *
20vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
21{
22	struct vmxnet3_tx_queue *tq;
23	int tq_number;
24	int cpu;
25
26	tq_number = adapter->num_tx_queues;
27	cpu = smp_processor_id();
28	if (likely(cpu < tq_number))
29		tq = &adapter->tx_queue[cpu];
30	else
31		tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
32
33	return tq;
34}
35
36static int
37vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf,
38		struct netlink_ext_ack *extack)
39{
40	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
41	struct bpf_prog *new_bpf_prog = bpf->prog;
42	struct bpf_prog *old_bpf_prog;
43	bool need_update;
44	bool running;
45	int err;
46
47	if (new_bpf_prog && netdev->mtu > VMXNET3_XDP_MAX_MTU) {
48		NL_SET_ERR_MSG_FMT_MOD(extack, "MTU %u too large for XDP",
49				       netdev->mtu);
50		return -EOPNOTSUPP;
51	}
52
53	if (adapter->netdev->features & NETIF_F_LRO) {
54		NL_SET_ERR_MSG_MOD(extack, "LRO is not supported with XDP");
55		adapter->netdev->features &= ~NETIF_F_LRO;
56	}
57
58	old_bpf_prog = rcu_dereference(adapter->xdp_bpf_prog);
59	if (!new_bpf_prog && !old_bpf_prog)
60		return 0;
61
62	running = netif_running(netdev);
63	need_update = !!old_bpf_prog != !!new_bpf_prog;
64
65	if (running && need_update)
66		vmxnet3_quiesce_dev(adapter);
67
68	vmxnet3_xdp_exchange_program(adapter, new_bpf_prog);
69	if (old_bpf_prog)
70		bpf_prog_put(old_bpf_prog);
71
72	if (!running || !need_update)
73		return 0;
74
75	if (new_bpf_prog)
76		xdp_features_set_redirect_target(netdev, false);
77	else
78		xdp_features_clear_redirect_target(netdev);
79
80	vmxnet3_reset_dev(adapter);
81	vmxnet3_rq_destroy_all(adapter);
82	vmxnet3_adjust_rx_ring_size(adapter);
83	err = vmxnet3_rq_create_all(adapter);
84	if (err) {
85		NL_SET_ERR_MSG_MOD(extack,
86				   "failed to re-create rx queues for XDP.");
87		return -EOPNOTSUPP;
88	}
89	err = vmxnet3_activate_dev(adapter);
90	if (err) {
91		NL_SET_ERR_MSG_MOD(extack,
92				   "failed to activate device for XDP.");
93		return -EOPNOTSUPP;
94	}
95	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
96
97	return 0;
98}
99
100/* This is the main xdp call used by kernel to set/unset eBPF program. */
101int
102vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
103{
104	switch (bpf->command) {
105	case XDP_SETUP_PROG:
106		return vmxnet3_xdp_set(netdev, bpf, bpf->extack);
107	default:
108		return -EINVAL;
109	}
110
111	return 0;
112}
113
114static int
115vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
116		       struct xdp_frame *xdpf,
117		       struct vmxnet3_tx_queue *tq, bool dma_map)
118{
119	struct vmxnet3_tx_buf_info *tbi = NULL;
120	union Vmxnet3_GenericDesc *gdesc;
121	struct vmxnet3_tx_ctx ctx;
122	int tx_num_deferred;
123	struct page *page;
124	u32 buf_size;
125	u32 dw2;
126
127	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
128	dw2 |= xdpf->len;
129	ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
130	gdesc = ctx.sop_txd;
131
132	buf_size = xdpf->len;
133	tbi = tq->buf_info + tq->tx_ring.next2fill;
134
135	if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
136		tq->stats.tx_ring_full++;
137		return -ENOSPC;
138	}
139
140	tbi->map_type = VMXNET3_MAP_XDP;
141	if (dma_map) { /* ndo_xdp_xmit */
142		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
143					       xdpf->data, buf_size,
144					       DMA_TO_DEVICE);
145		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
146			return -EFAULT;
147		tbi->map_type |= VMXNET3_MAP_SINGLE;
148	} else { /* XDP buffer from page pool */
149		page = virt_to_page(xdpf->data);
150		tbi->dma_addr = page_pool_get_dma_addr(page) +
151				VMXNET3_XDP_HEADROOM;
152		dma_sync_single_for_device(&adapter->pdev->dev,
153					   tbi->dma_addr, buf_size,
154					   DMA_TO_DEVICE);
155	}
156	tbi->xdpf = xdpf;
157	tbi->len = buf_size;
158
159	gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
160	WARN_ON_ONCE(gdesc->txd.gen == tq->tx_ring.gen);
161
162	gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
163	gdesc->dword[2] = cpu_to_le32(dw2);
164
165	/* Setup the EOP desc */
166	gdesc->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
167
168	gdesc->txd.om = 0;
169	gdesc->txd.msscof = 0;
170	gdesc->txd.hlen = 0;
171	gdesc->txd.ti = 0;
172
173	tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
174	le32_add_cpu(&tq->shared->txNumDeferred, 1);
175	tx_num_deferred++;
176
177	vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
178
179	/* set the last buf_info for the pkt */
180	tbi->sop_idx = ctx.sop_txd - tq->tx_ring.base;
181
182	dma_wmb();
183	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
184						  VMXNET3_TXD_GEN);
185
186	/* No need to handle the case when tx_num_deferred doesn't reach
187	 * threshold. Backend driver at hypervisor side will poll and reset
188	 * tq->shared->txNumDeferred to 0.
189	 */
190	if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
191		tq->shared->txNumDeferred = 0;
192		VMXNET3_WRITE_BAR0_REG(adapter,
193				       VMXNET3_REG_TXPROD + tq->qid * 8,
194				       tq->tx_ring.next2fill);
195	}
196
197	return 0;
198}
199
200static int
201vmxnet3_xdp_xmit_back(struct vmxnet3_adapter *adapter,
202		      struct xdp_frame *xdpf)
203{
204	struct vmxnet3_tx_queue *tq;
205	struct netdev_queue *nq;
206	int err;
207
208	tq = vmxnet3_xdp_get_tq(adapter);
209	if (tq->stopped)
210		return -ENETDOWN;
211
212	nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
213
214	__netif_tx_lock(nq, smp_processor_id());
215	err = vmxnet3_xdp_xmit_frame(adapter, xdpf, tq, false);
216	__netif_tx_unlock(nq);
217
218	return err;
219}
220
221/* ndo_xdp_xmit */
222int
223vmxnet3_xdp_xmit(struct net_device *dev,
224		 int n, struct xdp_frame **frames, u32 flags)
225{
226	struct vmxnet3_adapter *adapter = netdev_priv(dev);
227	struct vmxnet3_tx_queue *tq;
228	int i;
229
230	if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
231		return -ENETDOWN;
232	if (unlikely(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)))
233		return -EINVAL;
234
235	tq = vmxnet3_xdp_get_tq(adapter);
236	if (tq->stopped)
237		return -ENETDOWN;
238
239	for (i = 0; i < n; i++) {
240		if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
241			tq->stats.xdp_xmit_err++;
242			break;
243		}
244	}
245	tq->stats.xdp_xmit += i;
246
247	return i;
248}
249
250static int
251vmxnet3_run_xdp(struct vmxnet3_rx_queue *rq, struct xdp_buff *xdp,
252		struct bpf_prog *prog)
253{
254	struct xdp_frame *xdpf;
255	struct page *page;
256	int err;
257	u32 act;
258
259	rq->stats.xdp_packets++;
260	act = bpf_prog_run_xdp(prog, xdp);
261	page = virt_to_page(xdp->data_hard_start);
262
263	switch (act) {
264	case XDP_PASS:
265		return act;
266	case XDP_REDIRECT:
267		err = xdp_do_redirect(rq->adapter->netdev, xdp, prog);
268		if (!err) {
269			rq->stats.xdp_redirects++;
270		} else {
271			rq->stats.xdp_drops++;
272			page_pool_recycle_direct(rq->page_pool, page);
273		}
274		return act;
275	case XDP_TX:
276		xdpf = xdp_convert_buff_to_frame(xdp);
277		if (unlikely(!xdpf ||
278			     vmxnet3_xdp_xmit_back(rq->adapter, xdpf))) {
279			rq->stats.xdp_drops++;
280			page_pool_recycle_direct(rq->page_pool, page);
281		} else {
282			rq->stats.xdp_tx++;
283		}
284		return act;
285	default:
286		bpf_warn_invalid_xdp_action(rq->adapter->netdev, prog, act);
287		fallthrough;
288	case XDP_ABORTED:
289		trace_xdp_exception(rq->adapter->netdev, prog, act);
290		rq->stats.xdp_aborted++;
291		break;
292	case XDP_DROP:
293		rq->stats.xdp_drops++;
294		break;
295	}
296
297	page_pool_recycle_direct(rq->page_pool, page);
298
299	return act;
300}
301
302static struct sk_buff *
303vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page,
304		  const struct xdp_buff *xdp)
305{
306	struct sk_buff *skb;
307
308	skb = build_skb(page_address(page), PAGE_SIZE);
309	if (unlikely(!skb)) {
310		page_pool_recycle_direct(rq->page_pool, page);
311		rq->stats.rx_buf_alloc_failure++;
312		return NULL;
313	}
314
315	/* bpf prog might change len and data position. */
316	skb_reserve(skb, xdp->data - xdp->data_hard_start);
317	skb_put(skb, xdp->data_end - xdp->data);
318	skb_mark_for_recycle(skb);
319
320	return skb;
321}
322
323/* Handle packets from DataRing. */
324int
325vmxnet3_process_xdp_small(struct vmxnet3_adapter *adapter,
326			  struct vmxnet3_rx_queue *rq,
327			  void *data, int len,
328			  struct sk_buff **skb_xdp_pass)
329{
330	struct bpf_prog *xdp_prog;
331	struct xdp_buff xdp;
332	struct page *page;
333	int act;
334
335	page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC);
336	if (unlikely(!page)) {
337		rq->stats.rx_buf_alloc_failure++;
338		return XDP_DROP;
339	}
340
341	xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq);
342	xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset,
343			 len, false);
344	xdp_buff_clear_frags_flag(&xdp);
345
346	/* Must copy the data because it's at dataring. */
347	memcpy(xdp.data, data, len);
348
349	xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog);
350	if (!xdp_prog) {
351		act = XDP_PASS;
352		goto out_skb;
353	}
354	act = vmxnet3_run_xdp(rq, &xdp, xdp_prog);
355	if (act != XDP_PASS)
356		return act;
357
358out_skb:
359	*skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp);
360	if (!*skb_xdp_pass)
361		return XDP_DROP;
362
363	/* No need to refill. */
364	return likely(*skb_xdp_pass) ? act : XDP_DROP;
365}
366
367int
368vmxnet3_process_xdp(struct vmxnet3_adapter *adapter,
369		    struct vmxnet3_rx_queue *rq,
370		    struct Vmxnet3_RxCompDesc *rcd,
371		    struct vmxnet3_rx_buf_info *rbi,
372		    struct Vmxnet3_RxDesc *rxd,
373		    struct sk_buff **skb_xdp_pass)
374{
375	struct bpf_prog *xdp_prog;
376	dma_addr_t new_dma_addr;
377	struct xdp_buff xdp;
378	struct page *page;
379	void *new_data;
380	int act;
381
382	page = rbi->page;
383	dma_sync_single_for_cpu(&adapter->pdev->dev,
384				page_pool_get_dma_addr(page) +
385				rq->page_pool->p.offset, rbi->len,
386				page_pool_get_dma_dir(rq->page_pool));
387
388	xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq);
389	xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset,
390			 rbi->len, false);
391	xdp_buff_clear_frags_flag(&xdp);
392
393	xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog);
394	if (!xdp_prog) {
395		act = XDP_PASS;
396		goto out_skb;
397	}
398	act = vmxnet3_run_xdp(rq, &xdp, xdp_prog);
399
400	if (act == XDP_PASS) {
401out_skb:
402		*skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp);
403		if (!*skb_xdp_pass)
404			act = XDP_DROP;
405	}
406
407	new_data = vmxnet3_pp_get_buff(rq->page_pool, &new_dma_addr,
408				       GFP_ATOMIC);
409	if (!new_data) {
410		rq->stats.rx_buf_alloc_failure++;
411		return XDP_DROP;
412	}
413	rbi->page = virt_to_page(new_data);
414	rbi->dma_addr = new_dma_addr;
415	rxd->addr = cpu_to_le64(rbi->dma_addr);
416	rxd->len = rbi->len;
417
418	return act;
419}
420