1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7#include "gve.h"
8#include "gve_dqo.h"
9#include "gve_adminq.h"
10#include "gve_utils.h"
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13#include <linux/skbuff.h>
14#include <linux/slab.h>
15#include <net/ip6_checksum.h>
16#include <net/ipv6.h>
17#include <net/tcp.h>
18
19static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20{
21	return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22}
23
24static void gve_free_page_dqo(struct gve_priv *priv,
25			      struct gve_rx_buf_state_dqo *bs,
26			      bool free_page)
27{
28	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29	if (free_page)
30		gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31			      DMA_FROM_DEVICE);
32	bs->page_info.page = NULL;
33}
34
35static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36{
37	struct gve_rx_buf_state_dqo *buf_state;
38	s16 buffer_id;
39
40	buffer_id = rx->dqo.free_buf_states;
41	if (unlikely(buffer_id == -1))
42		return NULL;
43
44	buf_state = &rx->dqo.buf_states[buffer_id];
45
46	/* Remove buf_state from free list */
47	rx->dqo.free_buf_states = buf_state->next;
48
49	/* Point buf_state to itself to mark it as allocated */
50	buf_state->next = buffer_id;
51
52	return buf_state;
53}
54
55static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56				       struct gve_rx_buf_state_dqo *buf_state)
57{
58	s16 buffer_id = buf_state - rx->dqo.buf_states;
59
60	return buf_state->next == buffer_id;
61}
62
63static void gve_free_buf_state(struct gve_rx_ring *rx,
64			       struct gve_rx_buf_state_dqo *buf_state)
65{
66	s16 buffer_id = buf_state - rx->dqo.buf_states;
67
68	buf_state->next = rx->dqo.free_buf_states;
69	rx->dqo.free_buf_states = buffer_id;
70}
71
72static struct gve_rx_buf_state_dqo *
73gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74{
75	struct gve_rx_buf_state_dqo *buf_state;
76	s16 buffer_id;
77
78	buffer_id = list->head;
79	if (unlikely(buffer_id == -1))
80		return NULL;
81
82	buf_state = &rx->dqo.buf_states[buffer_id];
83
84	/* Remove buf_state from list */
85	list->head = buf_state->next;
86	if (buf_state->next == -1)
87		list->tail = -1;
88
89	/* Point buf_state to itself to mark it as allocated */
90	buf_state->next = buffer_id;
91
92	return buf_state;
93}
94
95static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96				  struct gve_index_list *list,
97				  struct gve_rx_buf_state_dqo *buf_state)
98{
99	s16 buffer_id = buf_state - rx->dqo.buf_states;
100
101	buf_state->next = -1;
102
103	if (list->head == -1) {
104		list->head = buffer_id;
105		list->tail = buffer_id;
106	} else {
107		int tail = list->tail;
108
109		rx->dqo.buf_states[tail].next = buffer_id;
110		list->tail = buffer_id;
111	}
112}
113
114static struct gve_rx_buf_state_dqo *
115gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116{
117	struct gve_rx_buf_state_dqo *buf_state;
118	int i;
119
120	/* Recycled buf states are immediately usable. */
121	buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122	if (likely(buf_state))
123		return buf_state;
124
125	if (unlikely(rx->dqo.used_buf_states.head == -1))
126		return NULL;
127
128	/* Used buf states are only usable when ref count reaches 0, which means
129	 * no SKBs refer to them.
130	 *
131	 * Search a limited number before giving up.
132	 */
133	for (i = 0; i < 5; i++) {
134		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135		if (gve_buf_ref_cnt(buf_state) == 0) {
136			rx->dqo.used_buf_states_cnt--;
137			return buf_state;
138		}
139
140		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141	}
142
143	/* For QPL, we cannot allocate any new buffers and must
144	 * wait for the existing ones to be available.
145	 */
146	if (rx->dqo.qpl)
147		return NULL;
148
149	/* If there are no free buf states discard an entry from
150	 * `used_buf_states` so it can be used.
151	 */
152	if (unlikely(rx->dqo.free_buf_states == -1)) {
153		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154		if (gve_buf_ref_cnt(buf_state) == 0)
155			return buf_state;
156
157		gve_free_page_dqo(rx->gve, buf_state, true);
158		gve_free_buf_state(rx, buf_state);
159	}
160
161	return NULL;
162}
163
164static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165			      struct gve_rx_buf_state_dqo *buf_state)
166{
167	struct gve_priv *priv = rx->gve;
168	u32 idx;
169
170	if (!rx->dqo.qpl) {
171		int err;
172
173		err = gve_alloc_page(priv, &priv->pdev->dev,
174				     &buf_state->page_info.page,
175				     &buf_state->addr,
176				     DMA_FROM_DEVICE, GFP_ATOMIC);
177		if (err)
178			return err;
179	} else {
180		idx = rx->dqo.next_qpl_page_idx;
181		if (idx >= priv->rx_pages_per_qpl) {
182			net_err_ratelimited("%s: Out of QPL pages\n",
183					    priv->dev->name);
184			return -ENOMEM;
185		}
186		buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187		buf_state->addr = rx->dqo.qpl->page_buses[idx];
188		rx->dqo.next_qpl_page_idx++;
189	}
190	buf_state->page_info.page_offset = 0;
191	buf_state->page_info.page_address =
192		page_address(buf_state->page_info.page);
193	buf_state->last_single_ref_offset = 0;
194
195	/* The page already has 1 ref. */
196	page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197	buf_state->page_info.pagecnt_bias = INT_MAX;
198
199	return 0;
200}
201
202static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
203{
204	struct device *hdev = &priv->pdev->dev;
205	int buf_count = rx->dqo.bufq.mask + 1;
206
207	if (rx->dqo.hdr_bufs.data) {
208		dma_free_coherent(hdev, priv->header_buf_size * buf_count,
209				  rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
210		rx->dqo.hdr_bufs.data = NULL;
211	}
212}
213
214void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
215{
216	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
217
218	if (!gve_rx_was_added_to_block(priv, idx))
219		return;
220
221	gve_remove_napi(priv, ntfy_idx);
222	gve_rx_remove_from_block(priv, idx);
223}
224
225static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
226				 struct gve_rx_alloc_rings_cfg *cfg)
227{
228	struct device *hdev = &priv->pdev->dev;
229	size_t completion_queue_slots;
230	size_t buffer_queue_slots;
231	int idx = rx->q_num;
232	size_t size;
233	int i;
234
235	completion_queue_slots = rx->dqo.complq.mask + 1;
236	buffer_queue_slots = rx->dqo.bufq.mask + 1;
237
238	if (rx->q_resources) {
239		dma_free_coherent(hdev, sizeof(*rx->q_resources),
240				  rx->q_resources, rx->q_resources_bus);
241		rx->q_resources = NULL;
242	}
243
244	for (i = 0; i < rx->dqo.num_buf_states; i++) {
245		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
246		/* Only free page for RDA. QPL pages are freed in gve_main. */
247		if (bs->page_info.page)
248			gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
249	}
250	if (rx->dqo.qpl) {
251		gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
252		rx->dqo.qpl = NULL;
253	}
254
255	if (rx->dqo.bufq.desc_ring) {
256		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
257		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
258				  rx->dqo.bufq.bus);
259		rx->dqo.bufq.desc_ring = NULL;
260	}
261
262	if (rx->dqo.complq.desc_ring) {
263		size = sizeof(rx->dqo.complq.desc_ring[0]) *
264			completion_queue_slots;
265		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
266				  rx->dqo.complq.bus);
267		rx->dqo.complq.desc_ring = NULL;
268	}
269
270	kvfree(rx->dqo.buf_states);
271	rx->dqo.buf_states = NULL;
272
273	gve_rx_free_hdr_bufs(priv, rx);
274
275	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
276}
277
278static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
279{
280	struct device *hdev = &priv->pdev->dev;
281	int buf_count = rx->dqo.bufq.mask + 1;
282
283	rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
284						   &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
285	if (!rx->dqo.hdr_bufs.data)
286		return -ENOMEM;
287
288	return 0;
289}
290
291void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
292{
293	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
294
295	gve_rx_add_to_block(priv, idx);
296	gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
297}
298
299static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
300				 struct gve_rx_alloc_rings_cfg *cfg,
301				 struct gve_rx_ring *rx,
302				 int idx)
303{
304	struct device *hdev = &priv->pdev->dev;
305	size_t size;
306	int i;
307
308	const u32 buffer_queue_slots = cfg->raw_addressing ?
309		priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
310	const u32 completion_queue_slots = cfg->ring_size;
311
312	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
313
314	memset(rx, 0, sizeof(*rx));
315	rx->gve = priv;
316	rx->q_num = idx;
317	rx->dqo.bufq.mask = buffer_queue_slots - 1;
318	rx->dqo.complq.num_free_slots = completion_queue_slots;
319	rx->dqo.complq.mask = completion_queue_slots - 1;
320	rx->ctx.skb_head = NULL;
321	rx->ctx.skb_tail = NULL;
322
323	rx->dqo.num_buf_states = cfg->raw_addressing ?
324		min_t(s16, S16_MAX, buffer_queue_slots * 4) :
325		priv->rx_pages_per_qpl;
326	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
327				      sizeof(rx->dqo.buf_states[0]),
328				      GFP_KERNEL);
329	if (!rx->dqo.buf_states)
330		return -ENOMEM;
331
332	/* Allocate header buffers for header-split */
333	if (cfg->enable_header_split)
334		if (gve_rx_alloc_hdr_bufs(priv, rx))
335			goto err;
336
337	/* Set up linked list of buffer IDs */
338	for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
339		rx->dqo.buf_states[i].next = i + 1;
340
341	rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
342	rx->dqo.recycled_buf_states.head = -1;
343	rx->dqo.recycled_buf_states.tail = -1;
344	rx->dqo.used_buf_states.head = -1;
345	rx->dqo.used_buf_states.tail = -1;
346
347	/* Allocate RX completion queue */
348	size = sizeof(rx->dqo.complq.desc_ring[0]) *
349		completion_queue_slots;
350	rx->dqo.complq.desc_ring =
351		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
352	if (!rx->dqo.complq.desc_ring)
353		goto err;
354
355	/* Allocate RX buffer queue */
356	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
357	rx->dqo.bufq.desc_ring =
358		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
359	if (!rx->dqo.bufq.desc_ring)
360		goto err;
361
362	if (!cfg->raw_addressing) {
363		rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
364		if (!rx->dqo.qpl)
365			goto err;
366		rx->dqo.next_qpl_page_idx = 0;
367	}
368
369	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
370					     &rx->q_resources_bus, GFP_KERNEL);
371	if (!rx->q_resources)
372		goto err;
373
374	return 0;
375
376err:
377	gve_rx_free_ring_dqo(priv, rx, cfg);
378	return -ENOMEM;
379}
380
381void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
382{
383	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
384	u64 index = be32_to_cpu(rx->q_resources->db_index);
385
386	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
387}
388
389int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
390			   struct gve_rx_alloc_rings_cfg *cfg)
391{
392	struct gve_rx_ring *rx;
393	int err;
394	int i;
395
396	if (!cfg->raw_addressing && !cfg->qpls) {
397		netif_err(priv, drv, priv->dev,
398			  "Cannot alloc QPL ring before allocing QPLs\n");
399		return -EINVAL;
400	}
401
402	rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
403		      GFP_KERNEL);
404	if (!rx)
405		return -ENOMEM;
406
407	for (i = 0; i < cfg->qcfg->num_queues; i++) {
408		err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
409		if (err) {
410			netif_err(priv, drv, priv->dev,
411				  "Failed to alloc rx ring=%d: err=%d\n",
412				  i, err);
413			goto err;
414		}
415	}
416
417	cfg->rx = rx;
418	return 0;
419
420err:
421	for (i--; i >= 0; i--)
422		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
423	kvfree(rx);
424	return err;
425}
426
427void gve_rx_free_rings_dqo(struct gve_priv *priv,
428			   struct gve_rx_alloc_rings_cfg *cfg)
429{
430	struct gve_rx_ring *rx = cfg->rx;
431	int i;
432
433	if (!rx)
434		return;
435
436	for (i = 0; i < cfg->qcfg->num_queues;  i++)
437		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
438
439	kvfree(rx);
440	cfg->rx = NULL;
441}
442
443void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
444{
445	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
446	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
447	struct gve_priv *priv = rx->gve;
448	u32 num_avail_slots;
449	u32 num_full_slots;
450	u32 num_posted = 0;
451
452	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
453	num_avail_slots = bufq->mask - num_full_slots;
454
455	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
456	while (num_posted < num_avail_slots) {
457		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
458		struct gve_rx_buf_state_dqo *buf_state;
459
460		buf_state = gve_get_recycled_buf_state(rx);
461		if (unlikely(!buf_state)) {
462			buf_state = gve_alloc_buf_state(rx);
463			if (unlikely(!buf_state))
464				break;
465
466			if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
467				u64_stats_update_begin(&rx->statss);
468				rx->rx_buf_alloc_fail++;
469				u64_stats_update_end(&rx->statss);
470				gve_free_buf_state(rx, buf_state);
471				break;
472			}
473		}
474
475		desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
476		desc->buf_addr = cpu_to_le64(buf_state->addr +
477					     buf_state->page_info.page_offset);
478		if (rx->dqo.hdr_bufs.data)
479			desc->header_buf_addr =
480				cpu_to_le64(rx->dqo.hdr_bufs.addr +
481					    priv->header_buf_size * bufq->tail);
482
483		bufq->tail = (bufq->tail + 1) & bufq->mask;
484		complq->num_free_slots--;
485		num_posted++;
486
487		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
488			gve_rx_write_doorbell_dqo(priv, rx->q_num);
489	}
490
491	rx->fill_cnt += num_posted;
492}
493
494static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
495				struct gve_rx_buf_state_dqo *buf_state)
496{
497	const u16 data_buffer_size = priv->data_buffer_size_dqo;
498	int pagecount;
499
500	/* Can't reuse if we only fit one buffer per page */
501	if (data_buffer_size * 2 > PAGE_SIZE)
502		goto mark_used;
503
504	pagecount = gve_buf_ref_cnt(buf_state);
505
506	/* Record the offset when we have a single remaining reference.
507	 *
508	 * When this happens, we know all of the other offsets of the page are
509	 * usable.
510	 */
511	if (pagecount == 1) {
512		buf_state->last_single_ref_offset =
513			buf_state->page_info.page_offset;
514	}
515
516	/* Use the next buffer sized chunk in the page. */
517	buf_state->page_info.page_offset += data_buffer_size;
518	buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
519
520	/* If we wrap around to the same offset without ever dropping to 1
521	 * reference, then we don't know if this offset was ever freed.
522	 */
523	if (buf_state->page_info.page_offset ==
524	    buf_state->last_single_ref_offset) {
525		goto mark_used;
526	}
527
528	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
529	return;
530
531mark_used:
532	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
533	rx->dqo.used_buf_states_cnt++;
534}
535
536static void gve_rx_skb_csum(struct sk_buff *skb,
537			    const struct gve_rx_compl_desc_dqo *desc,
538			    struct gve_ptype ptype)
539{
540	skb->ip_summed = CHECKSUM_NONE;
541
542	/* HW did not identify and process L3 and L4 headers. */
543	if (unlikely(!desc->l3_l4_processed))
544		return;
545
546	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
547		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
548			return;
549	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
550		/* Checksum should be skipped if this flag is set. */
551		if (unlikely(desc->ipv6_ex_add))
552			return;
553	}
554
555	if (unlikely(desc->csum_l4_err))
556		return;
557
558	switch (ptype.l4_type) {
559	case GVE_L4_TYPE_TCP:
560	case GVE_L4_TYPE_UDP:
561	case GVE_L4_TYPE_ICMP:
562	case GVE_L4_TYPE_SCTP:
563		skb->ip_summed = CHECKSUM_UNNECESSARY;
564		break;
565	default:
566		break;
567	}
568}
569
570static void gve_rx_skb_hash(struct sk_buff *skb,
571			    const struct gve_rx_compl_desc_dqo *compl_desc,
572			    struct gve_ptype ptype)
573{
574	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
575
576	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
577		hash_type = PKT_HASH_TYPE_L4;
578	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
579		hash_type = PKT_HASH_TYPE_L3;
580
581	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
582}
583
584static void gve_rx_free_skb(struct gve_rx_ring *rx)
585{
586	if (!rx->ctx.skb_head)
587		return;
588
589	dev_kfree_skb_any(rx->ctx.skb_head);
590	rx->ctx.skb_head = NULL;
591	rx->ctx.skb_tail = NULL;
592}
593
594static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
595{
596	if (!rx->dqo.qpl)
597		return false;
598	if (rx->dqo.used_buf_states_cnt <
599		     (rx->dqo.num_buf_states -
600		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
601		return false;
602	return true;
603}
604
605static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
606				struct gve_rx_buf_state_dqo *buf_state,
607				u16 buf_len)
608{
609	struct page *page = alloc_page(GFP_ATOMIC);
610	int num_frags;
611
612	if (!page)
613		return -ENOMEM;
614
615	memcpy(page_address(page),
616	       buf_state->page_info.page_address +
617	       buf_state->page_info.page_offset,
618	       buf_len);
619	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
620	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
621			0, buf_len, PAGE_SIZE);
622
623	u64_stats_update_begin(&rx->statss);
624	rx->rx_frag_alloc_cnt++;
625	u64_stats_update_end(&rx->statss);
626	/* Return unused buffer. */
627	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
628	return 0;
629}
630
631/* Chains multi skbs for single rx packet.
632 * Returns 0 if buffer is appended, -1 otherwise.
633 */
634static int gve_rx_append_frags(struct napi_struct *napi,
635			       struct gve_rx_buf_state_dqo *buf_state,
636			       u16 buf_len, struct gve_rx_ring *rx,
637			       struct gve_priv *priv)
638{
639	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
640
641	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
642		struct sk_buff *skb;
643
644		skb = napi_alloc_skb(napi, 0);
645		if (!skb)
646			return -1;
647
648		if (rx->ctx.skb_tail == rx->ctx.skb_head)
649			skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
650		else
651			rx->ctx.skb_tail->next = skb;
652		rx->ctx.skb_tail = skb;
653		num_frags = 0;
654	}
655	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
656		rx->ctx.skb_head->len += buf_len;
657		rx->ctx.skb_head->data_len += buf_len;
658		rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
659	}
660
661	/* Trigger ondemand page allocation if we are running low on buffers */
662	if (gve_rx_should_trigger_copy_ondemand(rx))
663		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
664
665	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
666			buf_state->page_info.page,
667			buf_state->page_info.page_offset,
668			buf_len, priv->data_buffer_size_dqo);
669	gve_dec_pagecnt_bias(&buf_state->page_info);
670
671	/* Advances buffer page-offset if page is partially used.
672	 * Marks buffer as used if page is full.
673	 */
674	gve_try_recycle_buf(priv, rx, buf_state);
675	return 0;
676}
677
678/* Returns 0 if descriptor is completed successfully.
679 * Returns -EINVAL if descriptor is invalid.
680 * Returns -ENOMEM if data cannot be copied to skb.
681 */
682static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
683		      const struct gve_rx_compl_desc_dqo *compl_desc,
684		      u32 desc_idx, int queue_idx)
685{
686	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
687	const bool hbo = compl_desc->header_buffer_overflow;
688	const bool eop = compl_desc->end_of_packet != 0;
689	const bool hsplit = compl_desc->split_header;
690	struct gve_rx_buf_state_dqo *buf_state;
691	struct gve_priv *priv = rx->gve;
692	u16 buf_len;
693	u16 hdr_len;
694
695	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
696		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
697				    priv->dev->name, buffer_id);
698		return -EINVAL;
699	}
700	buf_state = &rx->dqo.buf_states[buffer_id];
701	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
702		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
703				    priv->dev->name, buffer_id);
704		return -EINVAL;
705	}
706
707	if (unlikely(compl_desc->rx_error)) {
708		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
709				      buf_state);
710		return -EINVAL;
711	}
712
713	buf_len = compl_desc->packet_len;
714	hdr_len = compl_desc->header_len;
715
716	/* Page might have not been used for awhile and was likely last written
717	 * by a different thread.
718	 */
719	prefetch(buf_state->page_info.page);
720
721	/* Copy the header into the skb in the case of header split */
722	if (hsplit) {
723		int unsplit = 0;
724
725		if (hdr_len && !hbo) {
726			rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
727							    rx->dqo.hdr_bufs.data +
728							    desc_idx * priv->header_buf_size,
729							    hdr_len);
730			if (unlikely(!rx->ctx.skb_head))
731				goto error;
732			rx->ctx.skb_tail = rx->ctx.skb_head;
733		} else {
734			unsplit = 1;
735		}
736		u64_stats_update_begin(&rx->statss);
737		rx->rx_hsplit_pkt++;
738		rx->rx_hsplit_unsplit_pkt += unsplit;
739		rx->rx_hsplit_bytes += hdr_len;
740		u64_stats_update_end(&rx->statss);
741	}
742
743	/* Sync the portion of dma buffer for CPU to read. */
744	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
745				      buf_state->page_info.page_offset,
746				      buf_len, DMA_FROM_DEVICE);
747
748	/* Append to current skb if one exists. */
749	if (rx->ctx.skb_head) {
750		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
751						 priv)) != 0) {
752			goto error;
753		}
754		return 0;
755	}
756
757	if (eop && buf_len <= priv->rx_copybreak) {
758		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
759					       &buf_state->page_info, buf_len);
760		if (unlikely(!rx->ctx.skb_head))
761			goto error;
762		rx->ctx.skb_tail = rx->ctx.skb_head;
763
764		u64_stats_update_begin(&rx->statss);
765		rx->rx_copied_pkt++;
766		rx->rx_copybreak_pkt++;
767		u64_stats_update_end(&rx->statss);
768
769		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
770				      buf_state);
771		return 0;
772	}
773
774	rx->ctx.skb_head = napi_get_frags(napi);
775	if (unlikely(!rx->ctx.skb_head))
776		goto error;
777	rx->ctx.skb_tail = rx->ctx.skb_head;
778
779	if (gve_rx_should_trigger_copy_ondemand(rx)) {
780		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
781			goto error;
782		return 0;
783	}
784
785	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
786			buf_state->page_info.page_offset, buf_len,
787			priv->data_buffer_size_dqo);
788	gve_dec_pagecnt_bias(&buf_state->page_info);
789
790	gve_try_recycle_buf(priv, rx, buf_state);
791	return 0;
792
793error:
794	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
795	return -ENOMEM;
796}
797
798static int gve_rx_complete_rsc(struct sk_buff *skb,
799			       const struct gve_rx_compl_desc_dqo *desc,
800			       struct gve_ptype ptype)
801{
802	struct skb_shared_info *shinfo = skb_shinfo(skb);
803
804	/* Only TCP is supported right now. */
805	if (ptype.l4_type != GVE_L4_TYPE_TCP)
806		return -EINVAL;
807
808	switch (ptype.l3_type) {
809	case GVE_L3_TYPE_IPV4:
810		shinfo->gso_type = SKB_GSO_TCPV4;
811		break;
812	case GVE_L3_TYPE_IPV6:
813		shinfo->gso_type = SKB_GSO_TCPV6;
814		break;
815	default:
816		return -EINVAL;
817	}
818
819	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
820	return 0;
821}
822
823/* Returns 0 if skb is completed successfully, -1 otherwise. */
824static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
825			       const struct gve_rx_compl_desc_dqo *desc,
826			       netdev_features_t feat)
827{
828	struct gve_ptype ptype =
829		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
830	int err;
831
832	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
833
834	if (feat & NETIF_F_RXHASH)
835		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
836
837	if (feat & NETIF_F_RXCSUM)
838		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
839
840	/* RSC packets must set gso_size otherwise the TCP stack will complain
841	 * that packets are larger than MTU.
842	 */
843	if (desc->rsc) {
844		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
845		if (err < 0)
846			return err;
847	}
848
849	if (skb_headlen(rx->ctx.skb_head) == 0)
850		napi_gro_frags(napi);
851	else
852		napi_gro_receive(napi, rx->ctx.skb_head);
853
854	return 0;
855}
856
857int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
858{
859	struct napi_struct *napi = &block->napi;
860	netdev_features_t feat = napi->dev->features;
861
862	struct gve_rx_ring *rx = block->rx;
863	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
864
865	u32 work_done = 0;
866	u64 bytes = 0;
867	int err;
868
869	while (work_done < budget) {
870		struct gve_rx_compl_desc_dqo *compl_desc =
871			&complq->desc_ring[complq->head];
872		u32 pkt_bytes;
873
874		/* No more new packets */
875		if (compl_desc->generation == complq->cur_gen_bit)
876			break;
877
878		/* Prefetch the next two descriptors. */
879		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
880		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
881
882		/* Do not read data until we own the descriptor */
883		dma_rmb();
884
885		err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
886		if (err < 0) {
887			gve_rx_free_skb(rx);
888			u64_stats_update_begin(&rx->statss);
889			if (err == -ENOMEM)
890				rx->rx_skb_alloc_fail++;
891			else if (err == -EINVAL)
892				rx->rx_desc_err_dropped_pkt++;
893			u64_stats_update_end(&rx->statss);
894		}
895
896		complq->head = (complq->head + 1) & complq->mask;
897		complq->num_free_slots++;
898
899		/* When the ring wraps, the generation bit is flipped. */
900		complq->cur_gen_bit ^= (complq->head == 0);
901
902		/* Receiving a completion means we have space to post another
903		 * buffer on the buffer queue.
904		 */
905		{
906			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
907
908			bufq->head = (bufq->head + 1) & bufq->mask;
909		}
910
911		/* Free running counter of completed descriptors */
912		rx->cnt++;
913
914		if (!rx->ctx.skb_head)
915			continue;
916
917		if (!compl_desc->end_of_packet)
918			continue;
919
920		work_done++;
921		pkt_bytes = rx->ctx.skb_head->len;
922		/* The ethernet header (first ETH_HLEN bytes) is snipped off
923		 * by eth_type_trans.
924		 */
925		if (skb_headlen(rx->ctx.skb_head))
926			pkt_bytes += ETH_HLEN;
927
928		/* gve_rx_complete_skb() will consume skb if successful */
929		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
930			gve_rx_free_skb(rx);
931			u64_stats_update_begin(&rx->statss);
932			rx->rx_desc_err_dropped_pkt++;
933			u64_stats_update_end(&rx->statss);
934			continue;
935		}
936
937		bytes += pkt_bytes;
938		rx->ctx.skb_head = NULL;
939		rx->ctx.skb_tail = NULL;
940	}
941
942	gve_rx_post_buffers_dqo(rx);
943
944	u64_stats_update_begin(&rx->statss);
945	rx->rpackets += work_done;
946	rx->rbytes += bytes;
947	u64_stats_update_end(&rx->statss);
948
949	return work_done;
950}
951