1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7#include <linux/bpf.h>
8#include <linux/cpumask.h>
9#include <linux/etherdevice.h>
10#include <linux/filter.h>
11#include <linux/interrupt.h>
12#include <linux/module.h>
13#include <linux/pci.h>
14#include <linux/sched.h>
15#include <linux/timer.h>
16#include <linux/workqueue.h>
17#include <linux/utsname.h>
18#include <linux/version.h>
19#include <net/sch_generic.h>
20#include <net/xdp_sock_drv.h>
21#include "gve.h"
22#include "gve_dqo.h"
23#include "gve_adminq.h"
24#include "gve_register.h"
25#include "gve_utils.h"
26
27#define GVE_DEFAULT_RX_COPYBREAK	(256)
28
29#define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
30#define GVE_VERSION		"1.0.0"
31#define GVE_VERSION_PREFIX	"GVE-"
32
33// Minimum amount of time between queue kicks in msec (10 seconds)
34#define MIN_TX_TIMEOUT_GAP (1000 * 10)
35
36char gve_driver_name[] = "gve";
37const char gve_version_str[] = GVE_VERSION;
38static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
39
40static int gve_verify_driver_compatibility(struct gve_priv *priv)
41{
42	int err;
43	struct gve_driver_info *driver_info;
44	dma_addr_t driver_info_bus;
45
46	driver_info = dma_alloc_coherent(&priv->pdev->dev,
47					 sizeof(struct gve_driver_info),
48					 &driver_info_bus, GFP_KERNEL);
49	if (!driver_info)
50		return -ENOMEM;
51
52	*driver_info = (struct gve_driver_info) {
53		.os_type = 1, /* Linux */
54		.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
55		.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
56		.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
57		.driver_capability_flags = {
58			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
59			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
60			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
61			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
62		},
63	};
64	strscpy(driver_info->os_version_str1, utsname()->release,
65		sizeof(driver_info->os_version_str1));
66	strscpy(driver_info->os_version_str2, utsname()->version,
67		sizeof(driver_info->os_version_str2));
68
69	err = gve_adminq_verify_driver_compatibility(priv,
70						     sizeof(struct gve_driver_info),
71						     driver_info_bus);
72
73	/* It's ok if the device doesn't support this */
74	if (err == -EOPNOTSUPP)
75		err = 0;
76
77	dma_free_coherent(&priv->pdev->dev,
78			  sizeof(struct gve_driver_info),
79			  driver_info, driver_info_bus);
80	return err;
81}
82
83static netdev_features_t gve_features_check(struct sk_buff *skb,
84					    struct net_device *dev,
85					    netdev_features_t features)
86{
87	struct gve_priv *priv = netdev_priv(dev);
88
89	if (!gve_is_gqi(priv))
90		return gve_features_check_dqo(skb, dev, features);
91
92	return features;
93}
94
95static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
96{
97	struct gve_priv *priv = netdev_priv(dev);
98
99	if (gve_is_gqi(priv))
100		return gve_tx(skb, dev);
101	else
102		return gve_tx_dqo(skb, dev);
103}
104
105static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
106{
107	struct gve_priv *priv = netdev_priv(dev);
108	unsigned int start;
109	u64 packets, bytes;
110	int num_tx_queues;
111	int ring;
112
113	num_tx_queues = gve_num_tx_queues(priv);
114	if (priv->rx) {
115		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
116			do {
117				start =
118				  u64_stats_fetch_begin(&priv->rx[ring].statss);
119				packets = priv->rx[ring].rpackets;
120				bytes = priv->rx[ring].rbytes;
121			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
122						       start));
123			s->rx_packets += packets;
124			s->rx_bytes += bytes;
125		}
126	}
127	if (priv->tx) {
128		for (ring = 0; ring < num_tx_queues; ring++) {
129			do {
130				start =
131				  u64_stats_fetch_begin(&priv->tx[ring].statss);
132				packets = priv->tx[ring].pkt_done;
133				bytes = priv->tx[ring].bytes_done;
134			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
135						       start));
136			s->tx_packets += packets;
137			s->tx_bytes += bytes;
138		}
139	}
140}
141
142static int gve_alloc_counter_array(struct gve_priv *priv)
143{
144	priv->counter_array =
145		dma_alloc_coherent(&priv->pdev->dev,
146				   priv->num_event_counters *
147				   sizeof(*priv->counter_array),
148				   &priv->counter_array_bus, GFP_KERNEL);
149	if (!priv->counter_array)
150		return -ENOMEM;
151
152	return 0;
153}
154
155static void gve_free_counter_array(struct gve_priv *priv)
156{
157	if (!priv->counter_array)
158		return;
159
160	dma_free_coherent(&priv->pdev->dev,
161			  priv->num_event_counters *
162			  sizeof(*priv->counter_array),
163			  priv->counter_array, priv->counter_array_bus);
164	priv->counter_array = NULL;
165}
166
167/* NIC requests to report stats */
168static void gve_stats_report_task(struct work_struct *work)
169{
170	struct gve_priv *priv = container_of(work, struct gve_priv,
171					     stats_report_task);
172	if (gve_get_do_report_stats(priv)) {
173		gve_handle_report_stats(priv);
174		gve_clear_do_report_stats(priv);
175	}
176}
177
178static void gve_stats_report_schedule(struct gve_priv *priv)
179{
180	if (!gve_get_probe_in_progress(priv) &&
181	    !gve_get_reset_in_progress(priv)) {
182		gve_set_do_report_stats(priv);
183		queue_work(priv->gve_wq, &priv->stats_report_task);
184	}
185}
186
187static void gve_stats_report_timer(struct timer_list *t)
188{
189	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
190
191	mod_timer(&priv->stats_report_timer,
192		  round_jiffies(jiffies +
193		  msecs_to_jiffies(priv->stats_report_timer_period)));
194	gve_stats_report_schedule(priv);
195}
196
197static int gve_alloc_stats_report(struct gve_priv *priv)
198{
199	int tx_stats_num, rx_stats_num;
200
201	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
202		       gve_num_tx_queues(priv);
203	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
204		       priv->rx_cfg.num_queues;
205	priv->stats_report_len = struct_size(priv->stats_report, stats,
206					     size_add(tx_stats_num, rx_stats_num));
207	priv->stats_report =
208		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
209				   &priv->stats_report_bus, GFP_KERNEL);
210	if (!priv->stats_report)
211		return -ENOMEM;
212	/* Set up timer for the report-stats task */
213	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
214	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
215	return 0;
216}
217
218static void gve_free_stats_report(struct gve_priv *priv)
219{
220	if (!priv->stats_report)
221		return;
222
223	del_timer_sync(&priv->stats_report_timer);
224	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
225			  priv->stats_report, priv->stats_report_bus);
226	priv->stats_report = NULL;
227}
228
229static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
230{
231	struct gve_priv *priv = arg;
232
233	queue_work(priv->gve_wq, &priv->service_task);
234	return IRQ_HANDLED;
235}
236
237static irqreturn_t gve_intr(int irq, void *arg)
238{
239	struct gve_notify_block *block = arg;
240	struct gve_priv *priv = block->priv;
241
242	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
243	napi_schedule_irqoff(&block->napi);
244	return IRQ_HANDLED;
245}
246
247static irqreturn_t gve_intr_dqo(int irq, void *arg)
248{
249	struct gve_notify_block *block = arg;
250
251	/* Interrupts are automatically masked */
252	napi_schedule_irqoff(&block->napi);
253	return IRQ_HANDLED;
254}
255
256int gve_napi_poll(struct napi_struct *napi, int budget)
257{
258	struct gve_notify_block *block;
259	__be32 __iomem *irq_doorbell;
260	bool reschedule = false;
261	struct gve_priv *priv;
262	int work_done = 0;
263
264	block = container_of(napi, struct gve_notify_block, napi);
265	priv = block->priv;
266
267	if (block->tx) {
268		if (block->tx->q_num < priv->tx_cfg.num_queues)
269			reschedule |= gve_tx_poll(block, budget);
270		else if (budget)
271			reschedule |= gve_xdp_poll(block, budget);
272	}
273
274	if (!budget)
275		return 0;
276
277	if (block->rx) {
278		work_done = gve_rx_poll(block, budget);
279		reschedule |= work_done == budget;
280	}
281
282	if (reschedule)
283		return budget;
284
285       /* Complete processing - don't unmask irq if busy polling is enabled */
286	if (likely(napi_complete_done(napi, work_done))) {
287		irq_doorbell = gve_irq_doorbell(priv, block);
288		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
289
290		/* Ensure IRQ ACK is visible before we check pending work.
291		 * If queue had issued updates, it would be truly visible.
292		 */
293		mb();
294
295		if (block->tx)
296			reschedule |= gve_tx_clean_pending(priv, block->tx);
297		if (block->rx)
298			reschedule |= gve_rx_work_pending(block->rx);
299
300		if (reschedule && napi_schedule(napi))
301			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
302	}
303	return work_done;
304}
305
306int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
307{
308	struct gve_notify_block *block =
309		container_of(napi, struct gve_notify_block, napi);
310	struct gve_priv *priv = block->priv;
311	bool reschedule = false;
312	int work_done = 0;
313
314	if (block->tx)
315		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
316
317	if (!budget)
318		return 0;
319
320	if (block->rx) {
321		work_done = gve_rx_poll_dqo(block, budget);
322		reschedule |= work_done == budget;
323	}
324
325	if (reschedule)
326		return budget;
327
328	if (likely(napi_complete_done(napi, work_done))) {
329		/* Enable interrupts again.
330		 *
331		 * We don't need to repoll afterwards because HW supports the
332		 * PCI MSI-X PBA feature.
333		 *
334		 * Another interrupt would be triggered if a new event came in
335		 * since the last one.
336		 */
337		gve_write_irq_doorbell_dqo(priv, block,
338					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
339	}
340
341	return work_done;
342}
343
344static int gve_alloc_notify_blocks(struct gve_priv *priv)
345{
346	int num_vecs_requested = priv->num_ntfy_blks + 1;
347	unsigned int active_cpus;
348	int vecs_enabled;
349	int i, j;
350	int err;
351
352	priv->msix_vectors = kvcalloc(num_vecs_requested,
353				      sizeof(*priv->msix_vectors), GFP_KERNEL);
354	if (!priv->msix_vectors)
355		return -ENOMEM;
356	for (i = 0; i < num_vecs_requested; i++)
357		priv->msix_vectors[i].entry = i;
358	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
359					     GVE_MIN_MSIX, num_vecs_requested);
360	if (vecs_enabled < 0) {
361		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
362			GVE_MIN_MSIX, vecs_enabled);
363		err = vecs_enabled;
364		goto abort_with_msix_vectors;
365	}
366	if (vecs_enabled != num_vecs_requested) {
367		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
368		int vecs_per_type = new_num_ntfy_blks / 2;
369		int vecs_left = new_num_ntfy_blks % 2;
370
371		priv->num_ntfy_blks = new_num_ntfy_blks;
372		priv->mgmt_msix_idx = priv->num_ntfy_blks;
373		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
374						vecs_per_type);
375		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
376						vecs_per_type + vecs_left);
377		dev_err(&priv->pdev->dev,
378			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
379			vecs_enabled, priv->tx_cfg.max_queues,
380			priv->rx_cfg.max_queues);
381		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
382			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
383		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
384			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
385	}
386	/* Half the notification blocks go to TX and half to RX */
387	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
388
389	/* Setup Management Vector  - the last vector */
390	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
391		 pci_name(priv->pdev));
392	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
393			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
394	if (err) {
395		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
396		goto abort_with_msix_enabled;
397	}
398	priv->irq_db_indices =
399		dma_alloc_coherent(&priv->pdev->dev,
400				   priv->num_ntfy_blks *
401				   sizeof(*priv->irq_db_indices),
402				   &priv->irq_db_indices_bus, GFP_KERNEL);
403	if (!priv->irq_db_indices) {
404		err = -ENOMEM;
405		goto abort_with_mgmt_vector;
406	}
407
408	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
409				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
410	if (!priv->ntfy_blocks) {
411		err = -ENOMEM;
412		goto abort_with_irq_db_indices;
413	}
414
415	/* Setup the other blocks - the first n-1 vectors */
416	for (i = 0; i < priv->num_ntfy_blks; i++) {
417		struct gve_notify_block *block = &priv->ntfy_blocks[i];
418		int msix_idx = i;
419
420		snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
421			 i, pci_name(priv->pdev));
422		block->priv = priv;
423		err = request_irq(priv->msix_vectors[msix_idx].vector,
424				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
425				  0, block->name, block);
426		if (err) {
427			dev_err(&priv->pdev->dev,
428				"Failed to receive msix vector %d\n", i);
429			goto abort_with_some_ntfy_blocks;
430		}
431		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
432				      get_cpu_mask(i % active_cpus));
433		block->irq_db_index = &priv->irq_db_indices[i].index;
434	}
435	return 0;
436abort_with_some_ntfy_blocks:
437	for (j = 0; j < i; j++) {
438		struct gve_notify_block *block = &priv->ntfy_blocks[j];
439		int msix_idx = j;
440
441		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
442				      NULL);
443		free_irq(priv->msix_vectors[msix_idx].vector, block);
444	}
445	kvfree(priv->ntfy_blocks);
446	priv->ntfy_blocks = NULL;
447abort_with_irq_db_indices:
448	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
449			  sizeof(*priv->irq_db_indices),
450			  priv->irq_db_indices, priv->irq_db_indices_bus);
451	priv->irq_db_indices = NULL;
452abort_with_mgmt_vector:
453	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
454abort_with_msix_enabled:
455	pci_disable_msix(priv->pdev);
456abort_with_msix_vectors:
457	kvfree(priv->msix_vectors);
458	priv->msix_vectors = NULL;
459	return err;
460}
461
462static void gve_free_notify_blocks(struct gve_priv *priv)
463{
464	int i;
465
466	if (!priv->msix_vectors)
467		return;
468
469	/* Free the irqs */
470	for (i = 0; i < priv->num_ntfy_blks; i++) {
471		struct gve_notify_block *block = &priv->ntfy_blocks[i];
472		int msix_idx = i;
473
474		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
475				      NULL);
476		free_irq(priv->msix_vectors[msix_idx].vector, block);
477	}
478	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
479	kvfree(priv->ntfy_blocks);
480	priv->ntfy_blocks = NULL;
481	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
482			  sizeof(*priv->irq_db_indices),
483			  priv->irq_db_indices, priv->irq_db_indices_bus);
484	priv->irq_db_indices = NULL;
485	pci_disable_msix(priv->pdev);
486	kvfree(priv->msix_vectors);
487	priv->msix_vectors = NULL;
488}
489
490static int gve_setup_device_resources(struct gve_priv *priv)
491{
492	int err;
493
494	err = gve_alloc_counter_array(priv);
495	if (err)
496		return err;
497	err = gve_alloc_notify_blocks(priv);
498	if (err)
499		goto abort_with_counter;
500	err = gve_alloc_stats_report(priv);
501	if (err)
502		goto abort_with_ntfy_blocks;
503	err = gve_adminq_configure_device_resources(priv,
504						    priv->counter_array_bus,
505						    priv->num_event_counters,
506						    priv->irq_db_indices_bus,
507						    priv->num_ntfy_blks);
508	if (unlikely(err)) {
509		dev_err(&priv->pdev->dev,
510			"could not setup device_resources: err=%d\n", err);
511		err = -ENXIO;
512		goto abort_with_stats_report;
513	}
514
515	if (!gve_is_gqi(priv)) {
516		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
517					       GFP_KERNEL);
518		if (!priv->ptype_lut_dqo) {
519			err = -ENOMEM;
520			goto abort_with_stats_report;
521		}
522		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
523		if (err) {
524			dev_err(&priv->pdev->dev,
525				"Failed to get ptype map: err=%d\n", err);
526			goto abort_with_ptype_lut;
527		}
528	}
529
530	err = gve_adminq_report_stats(priv, priv->stats_report_len,
531				      priv->stats_report_bus,
532				      GVE_STATS_REPORT_TIMER_PERIOD);
533	if (err)
534		dev_err(&priv->pdev->dev,
535			"Failed to report stats: err=%d\n", err);
536	gve_set_device_resources_ok(priv);
537	return 0;
538
539abort_with_ptype_lut:
540	kvfree(priv->ptype_lut_dqo);
541	priv->ptype_lut_dqo = NULL;
542abort_with_stats_report:
543	gve_free_stats_report(priv);
544abort_with_ntfy_blocks:
545	gve_free_notify_blocks(priv);
546abort_with_counter:
547	gve_free_counter_array(priv);
548
549	return err;
550}
551
552static void gve_trigger_reset(struct gve_priv *priv);
553
554static void gve_teardown_device_resources(struct gve_priv *priv)
555{
556	int err;
557
558	/* Tell device its resources are being freed */
559	if (gve_get_device_resources_ok(priv)) {
560		/* detach the stats report */
561		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
562		if (err) {
563			dev_err(&priv->pdev->dev,
564				"Failed to detach stats report: err=%d\n", err);
565			gve_trigger_reset(priv);
566		}
567		err = gve_adminq_deconfigure_device_resources(priv);
568		if (err) {
569			dev_err(&priv->pdev->dev,
570				"Could not deconfigure device resources: err=%d\n",
571				err);
572			gve_trigger_reset(priv);
573		}
574	}
575
576	kvfree(priv->ptype_lut_dqo);
577	priv->ptype_lut_dqo = NULL;
578
579	gve_free_counter_array(priv);
580	gve_free_notify_blocks(priv);
581	gve_free_stats_report(priv);
582	gve_clear_device_resources_ok(priv);
583}
584
585static int gve_unregister_qpl(struct gve_priv *priv, u32 i)
586{
587	int err;
588
589	err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
590	if (err) {
591		netif_err(priv, drv, priv->dev,
592			  "Failed to unregister queue page list %d\n",
593			  priv->qpls[i].id);
594		return err;
595	}
596
597	priv->num_registered_pages -= priv->qpls[i].num_entries;
598	return 0;
599}
600
601static int gve_register_qpl(struct gve_priv *priv, u32 i)
602{
603	int num_rx_qpls;
604	int pages;
605	int err;
606
607	/* Rx QPLs succeed Tx QPLs in the priv->qpls array. */
608	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
609	if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) {
610		netif_err(priv, drv, priv->dev,
611			  "Cannot register nonexisting QPL at index %d\n", i);
612		return -EINVAL;
613	}
614
615	pages = priv->qpls[i].num_entries;
616
617	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
618		netif_err(priv, drv, priv->dev,
619			  "Reached max number of registered pages %llu > %llu\n",
620			  pages + priv->num_registered_pages,
621			  priv->max_registered_pages);
622		return -EINVAL;
623	}
624
625	err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
626	if (err) {
627		netif_err(priv, drv, priv->dev,
628			  "failed to register queue page list %d\n",
629			  priv->qpls[i].id);
630		/* This failure will trigger a reset - no need to clean
631		 * up
632		 */
633		return err;
634	}
635
636	priv->num_registered_pages += pages;
637	return 0;
638}
639
640static int gve_register_xdp_qpls(struct gve_priv *priv)
641{
642	int start_id;
643	int err;
644	int i;
645
646	start_id = gve_xdp_tx_start_queue_id(priv);
647	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
648		err = gve_register_qpl(priv, i);
649		/* This failure will trigger a reset - no need to clean up */
650		if (err)
651			return err;
652	}
653	return 0;
654}
655
656static int gve_register_qpls(struct gve_priv *priv)
657{
658	int num_tx_qpls, num_rx_qpls;
659	int start_id;
660	int err;
661	int i;
662
663	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
664				      gve_is_qpl(priv));
665	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
666
667	for (i = 0; i < num_tx_qpls; i++) {
668		err = gve_register_qpl(priv, i);
669		if (err)
670			return err;
671	}
672
673	/* there might be a gap between the tx and rx qpl ids */
674	start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
675	for (i = 0; i < num_rx_qpls; i++) {
676		err = gve_register_qpl(priv, start_id + i);
677		if (err)
678			return err;
679	}
680
681	return 0;
682}
683
684static int gve_unregister_xdp_qpls(struct gve_priv *priv)
685{
686	int start_id;
687	int err;
688	int i;
689
690	start_id = gve_xdp_tx_start_queue_id(priv);
691	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
692		err = gve_unregister_qpl(priv, i);
693		/* This failure will trigger a reset - no need to clean */
694		if (err)
695			return err;
696	}
697	return 0;
698}
699
700static int gve_unregister_qpls(struct gve_priv *priv)
701{
702	int num_tx_qpls, num_rx_qpls;
703	int start_id;
704	int err;
705	int i;
706
707	num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
708				      gve_is_qpl(priv));
709	num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
710
711	for (i = 0; i < num_tx_qpls; i++) {
712		err = gve_unregister_qpl(priv, i);
713		/* This failure will trigger a reset - no need to clean */
714		if (err)
715			return err;
716	}
717
718	start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
719	for (i = 0; i < num_rx_qpls; i++) {
720		err = gve_unregister_qpl(priv, start_id + i);
721		/* This failure will trigger a reset - no need to clean */
722		if (err)
723			return err;
724	}
725	return 0;
726}
727
728static int gve_create_xdp_rings(struct gve_priv *priv)
729{
730	int err;
731
732	err = gve_adminq_create_tx_queues(priv,
733					  gve_xdp_tx_start_queue_id(priv),
734					  priv->num_xdp_queues);
735	if (err) {
736		netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
737			  priv->num_xdp_queues);
738		/* This failure will trigger a reset - no need to clean
739		 * up
740		 */
741		return err;
742	}
743	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
744		  priv->num_xdp_queues);
745
746	return 0;
747}
748
749static int gve_create_rings(struct gve_priv *priv)
750{
751	int num_tx_queues = gve_num_tx_queues(priv);
752	int err;
753	int i;
754
755	err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
756	if (err) {
757		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
758			  num_tx_queues);
759		/* This failure will trigger a reset - no need to clean
760		 * up
761		 */
762		return err;
763	}
764	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
765		  num_tx_queues);
766
767	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
768	if (err) {
769		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
770			  priv->rx_cfg.num_queues);
771		/* This failure will trigger a reset - no need to clean
772		 * up
773		 */
774		return err;
775	}
776	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
777		  priv->rx_cfg.num_queues);
778
779	if (gve_is_gqi(priv)) {
780		/* Rx data ring has been prefilled with packet buffers at queue
781		 * allocation time.
782		 *
783		 * Write the doorbell to provide descriptor slots and packet
784		 * buffers to the NIC.
785		 */
786		for (i = 0; i < priv->rx_cfg.num_queues; i++)
787			gve_rx_write_doorbell(priv, &priv->rx[i]);
788	} else {
789		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
790			/* Post buffers and ring doorbell. */
791			gve_rx_post_buffers_dqo(&priv->rx[i]);
792		}
793	}
794
795	return 0;
796}
797
798static void init_xdp_sync_stats(struct gve_priv *priv)
799{
800	int start_id = gve_xdp_tx_start_queue_id(priv);
801	int i;
802
803	/* Init stats */
804	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
805		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
806
807		u64_stats_init(&priv->tx[i].statss);
808		priv->tx[i].ntfy_id = ntfy_idx;
809	}
810}
811
812static void gve_init_sync_stats(struct gve_priv *priv)
813{
814	int i;
815
816	for (i = 0; i < priv->tx_cfg.num_queues; i++)
817		u64_stats_init(&priv->tx[i].statss);
818
819	/* Init stats for XDP TX queues */
820	init_xdp_sync_stats(priv);
821
822	for (i = 0; i < priv->rx_cfg.num_queues; i++)
823		u64_stats_init(&priv->rx[i].statss);
824}
825
826static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
827				      struct gve_tx_alloc_rings_cfg *cfg)
828{
829	cfg->qcfg = &priv->tx_cfg;
830	cfg->raw_addressing = !gve_is_qpl(priv);
831	cfg->qpls = priv->qpls;
832	cfg->qpl_cfg = &priv->qpl_cfg;
833	cfg->ring_size = priv->tx_desc_cnt;
834	cfg->start_idx = 0;
835	cfg->num_rings = gve_num_tx_queues(priv);
836	cfg->tx = priv->tx;
837}
838
839static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
840{
841	int i;
842
843	if (!priv->tx)
844		return;
845
846	for (i = start_id; i < start_id + num_rings; i++) {
847		if (gve_is_gqi(priv))
848			gve_tx_stop_ring_gqi(priv, i);
849		else
850			gve_tx_stop_ring_dqo(priv, i);
851	}
852}
853
854static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
855			       int num_rings)
856{
857	int i;
858
859	for (i = start_id; i < start_id + num_rings; i++) {
860		if (gve_is_gqi(priv))
861			gve_tx_start_ring_gqi(priv, i);
862		else
863			gve_tx_start_ring_dqo(priv, i);
864	}
865}
866
867static int gve_alloc_xdp_rings(struct gve_priv *priv)
868{
869	struct gve_tx_alloc_rings_cfg cfg = {0};
870	int err = 0;
871
872	if (!priv->num_xdp_queues)
873		return 0;
874
875	gve_tx_get_curr_alloc_cfg(priv, &cfg);
876	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
877	cfg.num_rings = priv->num_xdp_queues;
878
879	err = gve_tx_alloc_rings_gqi(priv, &cfg);
880	if (err)
881		return err;
882
883	gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
884	init_xdp_sync_stats(priv);
885
886	return 0;
887}
888
889static int gve_alloc_rings(struct gve_priv *priv,
890			   struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
891			   struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
892{
893	int err;
894
895	if (gve_is_gqi(priv))
896		err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
897	else
898		err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
899	if (err)
900		return err;
901
902	if (gve_is_gqi(priv))
903		err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
904	else
905		err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
906	if (err)
907		goto free_tx;
908
909	return 0;
910
911free_tx:
912	if (gve_is_gqi(priv))
913		gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
914	else
915		gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
916	return err;
917}
918
919static int gve_destroy_xdp_rings(struct gve_priv *priv)
920{
921	int start_id;
922	int err;
923
924	start_id = gve_xdp_tx_start_queue_id(priv);
925	err = gve_adminq_destroy_tx_queues(priv,
926					   start_id,
927					   priv->num_xdp_queues);
928	if (err) {
929		netif_err(priv, drv, priv->dev,
930			  "failed to destroy XDP queues\n");
931		/* This failure will trigger a reset - no need to clean up */
932		return err;
933	}
934	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
935
936	return 0;
937}
938
939static int gve_destroy_rings(struct gve_priv *priv)
940{
941	int num_tx_queues = gve_num_tx_queues(priv);
942	int err;
943
944	err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
945	if (err) {
946		netif_err(priv, drv, priv->dev,
947			  "failed to destroy tx queues\n");
948		/* This failure will trigger a reset - no need to clean up */
949		return err;
950	}
951	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
952	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
953	if (err) {
954		netif_err(priv, drv, priv->dev,
955			  "failed to destroy rx queues\n");
956		/* This failure will trigger a reset - no need to clean up */
957		return err;
958	}
959	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
960	return 0;
961}
962
963static void gve_free_xdp_rings(struct gve_priv *priv)
964{
965	struct gve_tx_alloc_rings_cfg cfg = {0};
966
967	gve_tx_get_curr_alloc_cfg(priv, &cfg);
968	cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
969	cfg.num_rings = priv->num_xdp_queues;
970
971	if (priv->tx) {
972		gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
973		gve_tx_free_rings_gqi(priv, &cfg);
974	}
975}
976
977static void gve_free_rings(struct gve_priv *priv,
978			   struct gve_tx_alloc_rings_cfg *tx_cfg,
979			   struct gve_rx_alloc_rings_cfg *rx_cfg)
980{
981	if (gve_is_gqi(priv)) {
982		gve_tx_free_rings_gqi(priv, tx_cfg);
983		gve_rx_free_rings_gqi(priv, rx_cfg);
984	} else {
985		gve_tx_free_rings_dqo(priv, tx_cfg);
986		gve_rx_free_rings_dqo(priv, rx_cfg);
987	}
988}
989
990int gve_alloc_page(struct gve_priv *priv, struct device *dev,
991		   struct page **page, dma_addr_t *dma,
992		   enum dma_data_direction dir, gfp_t gfp_flags)
993{
994	*page = alloc_page(gfp_flags);
995	if (!*page) {
996		priv->page_alloc_fail++;
997		return -ENOMEM;
998	}
999	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
1000	if (dma_mapping_error(dev, *dma)) {
1001		priv->dma_mapping_error++;
1002		put_page(*page);
1003		return -ENOMEM;
1004	}
1005	return 0;
1006}
1007
1008static int gve_alloc_queue_page_list(struct gve_priv *priv,
1009				     struct gve_queue_page_list *qpl,
1010				     u32 id, int pages)
1011{
1012	int err;
1013	int i;
1014
1015	qpl->id = id;
1016	qpl->num_entries = 0;
1017	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1018	/* caller handles clean up */
1019	if (!qpl->pages)
1020		return -ENOMEM;
1021	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1022	/* caller handles clean up */
1023	if (!qpl->page_buses)
1024		return -ENOMEM;
1025
1026	for (i = 0; i < pages; i++) {
1027		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1028				     &qpl->page_buses[i],
1029				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1030		/* caller handles clean up */
1031		if (err)
1032			return -ENOMEM;
1033		qpl->num_entries++;
1034	}
1035
1036	return 0;
1037}
1038
1039void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1040		   enum dma_data_direction dir)
1041{
1042	if (!dma_mapping_error(dev, dma))
1043		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1044	if (page)
1045		put_page(page);
1046}
1047
1048static void gve_free_queue_page_list(struct gve_priv *priv,
1049				     struct gve_queue_page_list *qpl,
1050				     int id)
1051{
1052	int i;
1053
1054	if (!qpl->pages)
1055		return;
1056	if (!qpl->page_buses)
1057		goto free_pages;
1058
1059	for (i = 0; i < qpl->num_entries; i++)
1060		gve_free_page(&priv->pdev->dev, qpl->pages[i],
1061			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1062
1063	kvfree(qpl->page_buses);
1064	qpl->page_buses = NULL;
1065free_pages:
1066	kvfree(qpl->pages);
1067	qpl->pages = NULL;
1068}
1069
1070static void gve_free_n_qpls(struct gve_priv *priv,
1071			    struct gve_queue_page_list *qpls,
1072			    int start_id,
1073			    int num_qpls)
1074{
1075	int i;
1076
1077	for (i = start_id; i < start_id + num_qpls; i++)
1078		gve_free_queue_page_list(priv, &qpls[i], i);
1079}
1080
1081static int gve_alloc_n_qpls(struct gve_priv *priv,
1082			    struct gve_queue_page_list *qpls,
1083			    int page_count,
1084			    int start_id,
1085			    int num_qpls)
1086{
1087	int err;
1088	int i;
1089
1090	for (i = start_id; i < start_id + num_qpls; i++) {
1091		err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count);
1092		if (err)
1093			goto free_qpls;
1094	}
1095
1096	return 0;
1097
1098free_qpls:
1099	/* Must include the failing QPL too for gve_alloc_queue_page_list fails
1100	 * without cleaning up.
1101	 */
1102	gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1);
1103	return err;
1104}
1105
1106static int gve_alloc_qpls(struct gve_priv *priv,
1107			  struct gve_qpls_alloc_cfg *cfg)
1108{
1109	int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1110	int rx_start_id, tx_num_qpls, rx_num_qpls;
1111	struct gve_queue_page_list *qpls;
1112	int page_count;
1113	int err;
1114
1115	if (cfg->raw_addressing)
1116		return 0;
1117
1118	qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL);
1119	if (!qpls)
1120		return -ENOMEM;
1121
1122	cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) *
1123		sizeof(unsigned long) * BITS_PER_BYTE;
1124	cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1125					    sizeof(unsigned long), GFP_KERNEL);
1126	if (!cfg->qpl_cfg->qpl_id_map) {
1127		err = -ENOMEM;
1128		goto free_qpl_array;
1129	}
1130
1131	/* Allocate TX QPLs */
1132	page_count = priv->tx_pages_per_qpl;
1133	tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues,
1134				      gve_is_qpl(priv));
1135	err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls);
1136	if (err)
1137		goto free_qpl_map;
1138
1139	/* Allocate RX QPLs */
1140	rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg);
1141	/* For GQI_QPL number of pages allocated have 1:1 relationship with
1142	 * number of descriptors. For DQO, number of pages required are
1143	 * more than descriptors (because of out of order completions).
1144	 */
1145	page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1146	rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv));
1147	err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls);
1148	if (err)
1149		goto free_tx_qpls;
1150
1151	cfg->qpls = qpls;
1152	return 0;
1153
1154free_tx_qpls:
1155	gve_free_n_qpls(priv, qpls, 0, tx_num_qpls);
1156free_qpl_map:
1157	kvfree(cfg->qpl_cfg->qpl_id_map);
1158	cfg->qpl_cfg->qpl_id_map = NULL;
1159free_qpl_array:
1160	kvfree(qpls);
1161	return err;
1162}
1163
1164static void gve_free_qpls(struct gve_priv *priv,
1165			  struct gve_qpls_alloc_cfg *cfg)
1166{
1167	int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
1168	struct gve_queue_page_list *qpls = cfg->qpls;
1169	int i;
1170
1171	if (!qpls)
1172		return;
1173
1174	kvfree(cfg->qpl_cfg->qpl_id_map);
1175	cfg->qpl_cfg->qpl_id_map = NULL;
1176
1177	for (i = 0; i < max_queues; i++)
1178		gve_free_queue_page_list(priv, &qpls[i], i);
1179
1180	kvfree(qpls);
1181	cfg->qpls = NULL;
1182}
1183
1184/* Use this to schedule a reset when the device is capable of continuing
1185 * to handle other requests in its current state. If it is not, do a reset
1186 * in thread instead.
1187 */
1188void gve_schedule_reset(struct gve_priv *priv)
1189{
1190	gve_set_do_reset(priv);
1191	queue_work(priv->gve_wq, &priv->service_task);
1192}
1193
1194static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1195static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1196static void gve_turndown(struct gve_priv *priv);
1197static void gve_turnup(struct gve_priv *priv);
1198
1199static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1200{
1201	struct napi_struct *napi;
1202	struct gve_rx_ring *rx;
1203	int err = 0;
1204	int i, j;
1205	u32 tx_qid;
1206
1207	if (!priv->num_xdp_queues)
1208		return 0;
1209
1210	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1211		rx = &priv->rx[i];
1212		napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1213
1214		err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1215				       napi->napi_id);
1216		if (err)
1217			goto err;
1218		err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1219						 MEM_TYPE_PAGE_SHARED, NULL);
1220		if (err)
1221			goto err;
1222		rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1223		if (rx->xsk_pool) {
1224			err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1225					       napi->napi_id);
1226			if (err)
1227				goto err;
1228			err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1229							 MEM_TYPE_XSK_BUFF_POOL, NULL);
1230			if (err)
1231				goto err;
1232			xsk_pool_set_rxq_info(rx->xsk_pool,
1233					      &rx->xsk_rxq);
1234		}
1235	}
1236
1237	for (i = 0; i < priv->num_xdp_queues; i++) {
1238		tx_qid = gve_xdp_tx_queue_id(priv, i);
1239		priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1240	}
1241	return 0;
1242
1243err:
1244	for (j = i; j >= 0; j--) {
1245		rx = &priv->rx[j];
1246		if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1247			xdp_rxq_info_unreg(&rx->xdp_rxq);
1248		if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1249			xdp_rxq_info_unreg(&rx->xsk_rxq);
1250	}
1251	return err;
1252}
1253
1254static void gve_unreg_xdp_info(struct gve_priv *priv)
1255{
1256	int i, tx_qid;
1257
1258	if (!priv->num_xdp_queues)
1259		return;
1260
1261	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1262		struct gve_rx_ring *rx = &priv->rx[i];
1263
1264		xdp_rxq_info_unreg(&rx->xdp_rxq);
1265		if (rx->xsk_pool) {
1266			xdp_rxq_info_unreg(&rx->xsk_rxq);
1267			rx->xsk_pool = NULL;
1268		}
1269	}
1270
1271	for (i = 0; i < priv->num_xdp_queues; i++) {
1272		tx_qid = gve_xdp_tx_queue_id(priv, i);
1273		priv->tx[tx_qid].xsk_pool = NULL;
1274	}
1275}
1276
1277static void gve_drain_page_cache(struct gve_priv *priv)
1278{
1279	int i;
1280
1281	for (i = 0; i < priv->rx_cfg.num_queues; i++)
1282		page_frag_cache_drain(&priv->rx[i].page_cache);
1283}
1284
1285static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv,
1286					struct gve_qpls_alloc_cfg *cfg)
1287{
1288	  cfg->raw_addressing = !gve_is_qpl(priv);
1289	  cfg->is_gqi = gve_is_gqi(priv);
1290	  cfg->num_xdp_queues = priv->num_xdp_queues;
1291	  cfg->qpl_cfg = &priv->qpl_cfg;
1292	  cfg->tx_cfg = &priv->tx_cfg;
1293	  cfg->rx_cfg = &priv->rx_cfg;
1294	  cfg->qpls = priv->qpls;
1295}
1296
1297static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
1298				      struct gve_rx_alloc_rings_cfg *cfg)
1299{
1300	cfg->qcfg = &priv->rx_cfg;
1301	cfg->qcfg_tx = &priv->tx_cfg;
1302	cfg->raw_addressing = !gve_is_qpl(priv);
1303	cfg->enable_header_split = priv->header_split_enabled;
1304	cfg->qpls = priv->qpls;
1305	cfg->qpl_cfg = &priv->qpl_cfg;
1306	cfg->ring_size = priv->rx_desc_cnt;
1307	cfg->packet_buffer_size = gve_is_gqi(priv) ?
1308				  GVE_DEFAULT_RX_BUFFER_SIZE :
1309				  priv->data_buffer_size_dqo;
1310	cfg->rx = priv->rx;
1311}
1312
1313static void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
1314				    struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1315				    struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1316				    struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1317{
1318	gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg);
1319	gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
1320	gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
1321}
1322
1323static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
1324{
1325	int i;
1326
1327	for (i = 0; i < num_rings; i++) {
1328		if (gve_is_gqi(priv))
1329			gve_rx_start_ring_gqi(priv, i);
1330		else
1331			gve_rx_start_ring_dqo(priv, i);
1332	}
1333}
1334
1335static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
1336{
1337	int i;
1338
1339	if (!priv->rx)
1340		return;
1341
1342	for (i = 0; i < num_rings; i++) {
1343		if (gve_is_gqi(priv))
1344			gve_rx_stop_ring_gqi(priv, i);
1345		else
1346			gve_rx_stop_ring_dqo(priv, i);
1347	}
1348}
1349
1350static void gve_queues_mem_free(struct gve_priv *priv,
1351				struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1352				struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1353				struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1354{
1355	gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
1356	gve_free_qpls(priv, qpls_alloc_cfg);
1357}
1358
1359static int gve_queues_mem_alloc(struct gve_priv *priv,
1360				struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1361				struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1362				struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1363{
1364	int err;
1365
1366	err = gve_alloc_qpls(priv, qpls_alloc_cfg);
1367	if (err) {
1368		netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n");
1369		return err;
1370	}
1371	tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1372	rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
1373	err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
1374	if (err) {
1375		netif_err(priv, drv, priv->dev, "Failed to alloc rings\n");
1376		goto free_qpls;
1377	}
1378
1379	return 0;
1380
1381free_qpls:
1382	gve_free_qpls(priv, qpls_alloc_cfg);
1383	return err;
1384}
1385
1386static void gve_queues_mem_remove(struct gve_priv *priv)
1387{
1388	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1389	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1390	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1391
1392	gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
1393				&tx_alloc_cfg, &rx_alloc_cfg);
1394	gve_queues_mem_free(priv, &qpls_alloc_cfg,
1395			    &tx_alloc_cfg, &rx_alloc_cfg);
1396	priv->qpls = NULL;
1397	priv->tx = NULL;
1398	priv->rx = NULL;
1399}
1400
1401/* The passed-in queue memory is stored into priv and the queues are made live.
1402 * No memory is allocated. Passed-in memory is freed on errors.
1403 */
1404static int gve_queues_start(struct gve_priv *priv,
1405			    struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1406			    struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1407			    struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1408{
1409	struct net_device *dev = priv->dev;
1410	int err;
1411
1412	/* Record new resources into priv */
1413	priv->qpls = qpls_alloc_cfg->qpls;
1414	priv->tx = tx_alloc_cfg->tx;
1415	priv->rx = rx_alloc_cfg->rx;
1416
1417	/* Record new configs into priv */
1418	priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg;
1419	priv->tx_cfg = *tx_alloc_cfg->qcfg;
1420	priv->rx_cfg = *rx_alloc_cfg->qcfg;
1421	priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
1422	priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
1423
1424	if (priv->xdp_prog)
1425		priv->num_xdp_queues = priv->rx_cfg.num_queues;
1426	else
1427		priv->num_xdp_queues = 0;
1428
1429	gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
1430	gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
1431	gve_init_sync_stats(priv);
1432
1433	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1434	if (err)
1435		goto stop_and_free_rings;
1436	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1437	if (err)
1438		goto stop_and_free_rings;
1439
1440	err = gve_reg_xdp_info(priv, dev);
1441	if (err)
1442		goto stop_and_free_rings;
1443
1444	err = gve_register_qpls(priv);
1445	if (err)
1446		goto reset;
1447
1448	priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
1449	priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
1450
1451	err = gve_create_rings(priv);
1452	if (err)
1453		goto reset;
1454
1455	gve_set_device_rings_ok(priv);
1456
1457	if (gve_get_report_stats(priv))
1458		mod_timer(&priv->stats_report_timer,
1459			  round_jiffies(jiffies +
1460				msecs_to_jiffies(priv->stats_report_timer_period)));
1461
1462	gve_turnup(priv);
1463	queue_work(priv->gve_wq, &priv->service_task);
1464	priv->interface_up_cnt++;
1465	return 0;
1466
1467reset:
1468	if (gve_get_reset_in_progress(priv))
1469		goto stop_and_free_rings;
1470	gve_reset_and_teardown(priv, true);
1471	/* if this fails there is nothing we can do so just ignore the return */
1472	gve_reset_recovery(priv, false);
1473	/* return the original error */
1474	return err;
1475stop_and_free_rings:
1476	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1477	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1478	gve_queues_mem_remove(priv);
1479	return err;
1480}
1481
1482static int gve_open(struct net_device *dev)
1483{
1484	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1485	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1486	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1487	struct gve_priv *priv = netdev_priv(dev);
1488	int err;
1489
1490	gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
1491				&tx_alloc_cfg, &rx_alloc_cfg);
1492
1493	err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg,
1494				   &tx_alloc_cfg, &rx_alloc_cfg);
1495	if (err)
1496		return err;
1497
1498	/* No need to free on error: ownership of resources is lost after
1499	 * calling gve_queues_start.
1500	 */
1501	err = gve_queues_start(priv, &qpls_alloc_cfg,
1502			       &tx_alloc_cfg, &rx_alloc_cfg);
1503	if (err)
1504		return err;
1505
1506	return 0;
1507}
1508
1509static int gve_queues_stop(struct gve_priv *priv)
1510{
1511	int err;
1512
1513	netif_carrier_off(priv->dev);
1514	if (gve_get_device_rings_ok(priv)) {
1515		gve_turndown(priv);
1516		gve_drain_page_cache(priv);
1517		err = gve_destroy_rings(priv);
1518		if (err)
1519			goto err;
1520		err = gve_unregister_qpls(priv);
1521		if (err)
1522			goto err;
1523		gve_clear_device_rings_ok(priv);
1524	}
1525	del_timer_sync(&priv->stats_report_timer);
1526
1527	gve_unreg_xdp_info(priv);
1528
1529	gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
1530	gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
1531
1532	priv->interface_down_cnt++;
1533	return 0;
1534
1535err:
1536	/* This must have been called from a reset due to the rtnl lock
1537	 * so just return at this point.
1538	 */
1539	if (gve_get_reset_in_progress(priv))
1540		return err;
1541	/* Otherwise reset before returning */
1542	gve_reset_and_teardown(priv, true);
1543	return gve_reset_recovery(priv, false);
1544}
1545
1546static int gve_close(struct net_device *dev)
1547{
1548	struct gve_priv *priv = netdev_priv(dev);
1549	int err;
1550
1551	err = gve_queues_stop(priv);
1552	if (err)
1553		return err;
1554
1555	gve_queues_mem_remove(priv);
1556	return 0;
1557}
1558
1559static int gve_remove_xdp_queues(struct gve_priv *priv)
1560{
1561	int qpl_start_id;
1562	int err;
1563
1564	qpl_start_id = gve_xdp_tx_start_queue_id(priv);
1565
1566	err = gve_destroy_xdp_rings(priv);
1567	if (err)
1568		return err;
1569
1570	err = gve_unregister_xdp_qpls(priv);
1571	if (err)
1572		return err;
1573
1574	gve_unreg_xdp_info(priv);
1575	gve_free_xdp_rings(priv);
1576
1577	gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv));
1578	priv->num_xdp_queues = 0;
1579	return 0;
1580}
1581
1582static int gve_add_xdp_queues(struct gve_priv *priv)
1583{
1584	int start_id;
1585	int err;
1586
1587	priv->num_xdp_queues = priv->rx_cfg.num_queues;
1588
1589	start_id = gve_xdp_tx_start_queue_id(priv);
1590	err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl,
1591			       start_id, gve_num_xdp_qpls(priv));
1592	if (err)
1593		goto err;
1594
1595	err = gve_alloc_xdp_rings(priv);
1596	if (err)
1597		goto free_xdp_qpls;
1598
1599	err = gve_reg_xdp_info(priv, priv->dev);
1600	if (err)
1601		goto free_xdp_rings;
1602
1603	err = gve_register_xdp_qpls(priv);
1604	if (err)
1605		goto free_xdp_rings;
1606
1607	err = gve_create_xdp_rings(priv);
1608	if (err)
1609		goto free_xdp_rings;
1610
1611	return 0;
1612
1613free_xdp_rings:
1614	gve_free_xdp_rings(priv);
1615free_xdp_qpls:
1616	gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv));
1617err:
1618	priv->num_xdp_queues = 0;
1619	return err;
1620}
1621
1622static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1623{
1624	if (!gve_get_napi_enabled(priv))
1625		return;
1626
1627	if (link_status == netif_carrier_ok(priv->dev))
1628		return;
1629
1630	if (link_status) {
1631		netdev_info(priv->dev, "Device link is up.\n");
1632		netif_carrier_on(priv->dev);
1633	} else {
1634		netdev_info(priv->dev, "Device link is down.\n");
1635		netif_carrier_off(priv->dev);
1636	}
1637}
1638
1639static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1640		       struct netlink_ext_ack *extack)
1641{
1642	struct bpf_prog *old_prog;
1643	int err = 0;
1644	u32 status;
1645
1646	old_prog = READ_ONCE(priv->xdp_prog);
1647	if (!netif_carrier_ok(priv->dev)) {
1648		WRITE_ONCE(priv->xdp_prog, prog);
1649		if (old_prog)
1650			bpf_prog_put(old_prog);
1651		return 0;
1652	}
1653
1654	gve_turndown(priv);
1655	if (!old_prog && prog) {
1656		// Allocate XDP TX queues if an XDP program is
1657		// being installed
1658		err = gve_add_xdp_queues(priv);
1659		if (err)
1660			goto out;
1661	} else if (old_prog && !prog) {
1662		// Remove XDP TX queues if an XDP program is
1663		// being uninstalled
1664		err = gve_remove_xdp_queues(priv);
1665		if (err)
1666			goto out;
1667	}
1668	WRITE_ONCE(priv->xdp_prog, prog);
1669	if (old_prog)
1670		bpf_prog_put(old_prog);
1671
1672out:
1673	gve_turnup(priv);
1674	status = ioread32be(&priv->reg_bar0->device_status);
1675	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1676	return err;
1677}
1678
1679static int gve_xsk_pool_enable(struct net_device *dev,
1680			       struct xsk_buff_pool *pool,
1681			       u16 qid)
1682{
1683	struct gve_priv *priv = netdev_priv(dev);
1684	struct napi_struct *napi;
1685	struct gve_rx_ring *rx;
1686	int tx_qid;
1687	int err;
1688
1689	if (qid >= priv->rx_cfg.num_queues) {
1690		dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1691		return -EINVAL;
1692	}
1693	if (xsk_pool_get_rx_frame_size(pool) <
1694	     priv->dev->max_mtu + sizeof(struct ethhdr)) {
1695		dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1696		return -EINVAL;
1697	}
1698
1699	err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1700			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1701	if (err)
1702		return err;
1703
1704	/* If XDP prog is not installed, return */
1705	if (!priv->xdp_prog)
1706		return 0;
1707
1708	rx = &priv->rx[qid];
1709	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1710	err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1711	if (err)
1712		goto err;
1713
1714	err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1715					 MEM_TYPE_XSK_BUFF_POOL, NULL);
1716	if (err)
1717		goto err;
1718
1719	xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1720	rx->xsk_pool = pool;
1721
1722	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1723	priv->tx[tx_qid].xsk_pool = pool;
1724
1725	return 0;
1726err:
1727	if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1728		xdp_rxq_info_unreg(&rx->xsk_rxq);
1729
1730	xsk_pool_dma_unmap(pool,
1731			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1732	return err;
1733}
1734
1735static int gve_xsk_pool_disable(struct net_device *dev,
1736				u16 qid)
1737{
1738	struct gve_priv *priv = netdev_priv(dev);
1739	struct napi_struct *napi_rx;
1740	struct napi_struct *napi_tx;
1741	struct xsk_buff_pool *pool;
1742	int tx_qid;
1743
1744	pool = xsk_get_pool_from_qid(dev, qid);
1745	if (!pool)
1746		return -EINVAL;
1747	if (qid >= priv->rx_cfg.num_queues)
1748		return -EINVAL;
1749
1750	/* If XDP prog is not installed, unmap DMA and return */
1751	if (!priv->xdp_prog)
1752		goto done;
1753
1754	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1755	if (!netif_running(dev)) {
1756		priv->rx[qid].xsk_pool = NULL;
1757		xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1758		priv->tx[tx_qid].xsk_pool = NULL;
1759		goto done;
1760	}
1761
1762	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1763	napi_disable(napi_rx); /* make sure current rx poll is done */
1764
1765	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1766	napi_disable(napi_tx); /* make sure current tx poll is done */
1767
1768	priv->rx[qid].xsk_pool = NULL;
1769	xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1770	priv->tx[tx_qid].xsk_pool = NULL;
1771	smp_mb(); /* Make sure it is visible to the workers on datapath */
1772
1773	napi_enable(napi_rx);
1774	if (gve_rx_work_pending(&priv->rx[qid]))
1775		napi_schedule(napi_rx);
1776
1777	napi_enable(napi_tx);
1778	if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1779		napi_schedule(napi_tx);
1780
1781done:
1782	xsk_pool_dma_unmap(pool,
1783			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1784	return 0;
1785}
1786
1787static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1788{
1789	struct gve_priv *priv = netdev_priv(dev);
1790	int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1791
1792	if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1793		return -EINVAL;
1794
1795	if (flags & XDP_WAKEUP_TX) {
1796		struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1797		struct napi_struct *napi =
1798			&priv->ntfy_blocks[tx->ntfy_id].napi;
1799
1800		if (!napi_if_scheduled_mark_missed(napi)) {
1801			/* Call local_bh_enable to trigger SoftIRQ processing */
1802			local_bh_disable();
1803			napi_schedule(napi);
1804			local_bh_enable();
1805		}
1806
1807		tx->xdp_xsk_wakeup++;
1808	}
1809
1810	return 0;
1811}
1812
1813static int verify_xdp_configuration(struct net_device *dev)
1814{
1815	struct gve_priv *priv = netdev_priv(dev);
1816
1817	if (dev->features & NETIF_F_LRO) {
1818		netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1819		return -EOPNOTSUPP;
1820	}
1821
1822	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1823		netdev_warn(dev, "XDP is not supported in mode %d.\n",
1824			    priv->queue_format);
1825		return -EOPNOTSUPP;
1826	}
1827
1828	if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
1829		netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1830			    dev->mtu);
1831		return -EOPNOTSUPP;
1832	}
1833
1834	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1835	    (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1836		netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1837			    priv->rx_cfg.num_queues,
1838			    priv->tx_cfg.num_queues,
1839			    priv->tx_cfg.max_queues);
1840		return -EINVAL;
1841	}
1842	return 0;
1843}
1844
1845static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1846{
1847	struct gve_priv *priv = netdev_priv(dev);
1848	int err;
1849
1850	err = verify_xdp_configuration(dev);
1851	if (err)
1852		return err;
1853	switch (xdp->command) {
1854	case XDP_SETUP_PROG:
1855		return gve_set_xdp(priv, xdp->prog, xdp->extack);
1856	case XDP_SETUP_XSK_POOL:
1857		if (xdp->xsk.pool)
1858			return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1859		else
1860			return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1861	default:
1862		return -EINVAL;
1863	}
1864}
1865
1866static int gve_adjust_config(struct gve_priv *priv,
1867			     struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
1868			     struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
1869			     struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
1870{
1871	int err;
1872
1873	/* Allocate resources for the new confiugration */
1874	err = gve_queues_mem_alloc(priv, qpls_alloc_cfg,
1875				   tx_alloc_cfg, rx_alloc_cfg);
1876	if (err) {
1877		netif_err(priv, drv, priv->dev,
1878			  "Adjust config failed to alloc new queues");
1879		return err;
1880	}
1881
1882	/* Teardown the device and free existing resources */
1883	err = gve_close(priv->dev);
1884	if (err) {
1885		netif_err(priv, drv, priv->dev,
1886			  "Adjust config failed to close old queues");
1887		gve_queues_mem_free(priv, qpls_alloc_cfg,
1888				    tx_alloc_cfg, rx_alloc_cfg);
1889		return err;
1890	}
1891
1892	/* Bring the device back up again with the new resources. */
1893	err = gve_queues_start(priv, qpls_alloc_cfg,
1894			       tx_alloc_cfg, rx_alloc_cfg);
1895	if (err) {
1896		netif_err(priv, drv, priv->dev,
1897			  "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
1898		/* No need to free on error: ownership of resources is lost after
1899		 * calling gve_queues_start.
1900		 */
1901		gve_turndown(priv);
1902		return err;
1903	}
1904
1905	return 0;
1906}
1907
1908int gve_adjust_queues(struct gve_priv *priv,
1909		      struct gve_queue_config new_rx_config,
1910		      struct gve_queue_config new_tx_config)
1911{
1912	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
1913	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
1914	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
1915	struct gve_qpl_config new_qpl_cfg;
1916	int err;
1917
1918	gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
1919				&tx_alloc_cfg, &rx_alloc_cfg);
1920
1921	/* qpl_cfg is not read-only, it contains a map that gets updated as
1922	 * rings are allocated, which is why we cannot use the yet unreleased
1923	 * one in priv.
1924	 */
1925	qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1926	tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1927	rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
1928
1929	/* Relay the new config from ethtool */
1930	qpls_alloc_cfg.tx_cfg = &new_tx_config;
1931	tx_alloc_cfg.qcfg = &new_tx_config;
1932	rx_alloc_cfg.qcfg_tx = &new_tx_config;
1933	qpls_alloc_cfg.rx_cfg = &new_rx_config;
1934	rx_alloc_cfg.qcfg = &new_rx_config;
1935	tx_alloc_cfg.num_rings = new_tx_config.num_queues;
1936
1937	if (netif_carrier_ok(priv->dev)) {
1938		err = gve_adjust_config(priv, &qpls_alloc_cfg,
1939					&tx_alloc_cfg, &rx_alloc_cfg);
1940		return err;
1941	}
1942	/* Set the config for the next up. */
1943	priv->tx_cfg = new_tx_config;
1944	priv->rx_cfg = new_rx_config;
1945
1946	return 0;
1947}
1948
1949static void gve_turndown(struct gve_priv *priv)
1950{
1951	int idx;
1952
1953	if (netif_carrier_ok(priv->dev))
1954		netif_carrier_off(priv->dev);
1955
1956	if (!gve_get_napi_enabled(priv))
1957		return;
1958
1959	/* Disable napi to prevent more work from coming in */
1960	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1961		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1962		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1963
1964		napi_disable(&block->napi);
1965	}
1966	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1967		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1968		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1969
1970		napi_disable(&block->napi);
1971	}
1972
1973	/* Stop tx queues */
1974	netif_tx_disable(priv->dev);
1975
1976	gve_clear_napi_enabled(priv);
1977	gve_clear_report_stats(priv);
1978}
1979
1980static void gve_turnup(struct gve_priv *priv)
1981{
1982	int idx;
1983
1984	/* Start the tx queues */
1985	netif_tx_start_all_queues(priv->dev);
1986
1987	/* Enable napi and unmask interrupts for all queues */
1988	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1989		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1990		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1991
1992		napi_enable(&block->napi);
1993		if (gve_is_gqi(priv)) {
1994			iowrite32be(0, gve_irq_doorbell(priv, block));
1995		} else {
1996			gve_set_itr_coalesce_usecs_dqo(priv, block,
1997						       priv->tx_coalesce_usecs);
1998		}
1999	}
2000	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2001		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
2002		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
2003
2004		napi_enable(&block->napi);
2005		if (gve_is_gqi(priv)) {
2006			iowrite32be(0, gve_irq_doorbell(priv, block));
2007		} else {
2008			gve_set_itr_coalesce_usecs_dqo(priv, block,
2009						       priv->rx_coalesce_usecs);
2010		}
2011	}
2012
2013	gve_set_napi_enabled(priv);
2014}
2015
2016static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
2017{
2018	struct gve_notify_block *block;
2019	struct gve_tx_ring *tx = NULL;
2020	struct gve_priv *priv;
2021	u32 last_nic_done;
2022	u32 current_time;
2023	u32 ntfy_idx;
2024
2025	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
2026	priv = netdev_priv(dev);
2027	if (txqueue > priv->tx_cfg.num_queues)
2028		goto reset;
2029
2030	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
2031	if (ntfy_idx >= priv->num_ntfy_blks)
2032		goto reset;
2033
2034	block = &priv->ntfy_blocks[ntfy_idx];
2035	tx = block->tx;
2036
2037	current_time = jiffies_to_msecs(jiffies);
2038	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
2039		goto reset;
2040
2041	/* Check to see if there are missed completions, which will allow us to
2042	 * kick the queue.
2043	 */
2044	last_nic_done = gve_tx_load_event_counter(priv, tx);
2045	if (last_nic_done - tx->done) {
2046		netdev_info(dev, "Kicking queue %d", txqueue);
2047		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
2048		napi_schedule(&block->napi);
2049		tx->last_kick_msec = current_time;
2050		goto out;
2051	} // Else reset.
2052
2053reset:
2054	gve_schedule_reset(priv);
2055
2056out:
2057	if (tx)
2058		tx->queue_timeout++;
2059	priv->tx_timeo_cnt++;
2060}
2061
2062u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
2063{
2064	if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
2065		return GVE_MAX_RX_BUFFER_SIZE;
2066	else
2067		return GVE_DEFAULT_RX_BUFFER_SIZE;
2068}
2069
2070/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
2071bool gve_header_split_supported(const struct gve_priv *priv)
2072{
2073	return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
2074}
2075
2076int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
2077{
2078	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2079	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2080	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
2081	bool enable_hdr_split;
2082	int err = 0;
2083
2084	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
2085		return 0;
2086
2087	if (!gve_header_split_supported(priv)) {
2088		dev_err(&priv->pdev->dev, "Header-split not supported\n");
2089		return -EOPNOTSUPP;
2090	}
2091
2092	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
2093		enable_hdr_split = true;
2094	else
2095		enable_hdr_split = false;
2096
2097	if (enable_hdr_split == priv->header_split_enabled)
2098		return 0;
2099
2100	gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
2101				&tx_alloc_cfg, &rx_alloc_cfg);
2102
2103	rx_alloc_cfg.enable_header_split = enable_hdr_split;
2104	rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
2105
2106	if (netif_running(priv->dev))
2107		err = gve_adjust_config(priv, &qpls_alloc_cfg,
2108					&tx_alloc_cfg, &rx_alloc_cfg);
2109	return err;
2110}
2111
2112static int gve_set_features(struct net_device *netdev,
2113			    netdev_features_t features)
2114{
2115	const netdev_features_t orig_features = netdev->features;
2116	struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
2117	struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
2118	struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
2119	struct gve_priv *priv = netdev_priv(netdev);
2120	struct gve_qpl_config new_qpl_cfg;
2121	int err;
2122
2123	gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
2124				&tx_alloc_cfg, &rx_alloc_cfg);
2125	/* qpl_cfg is not read-only, it contains a map that gets updated as
2126	 * rings are allocated, which is why we cannot use the yet unreleased
2127	 * one in priv.
2128	 */
2129	qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2130	tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2131	rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
2132
2133	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
2134		netdev->features ^= NETIF_F_LRO;
2135		if (netif_carrier_ok(netdev)) {
2136			err = gve_adjust_config(priv, &qpls_alloc_cfg,
2137						&tx_alloc_cfg, &rx_alloc_cfg);
2138			if (err) {
2139				/* Revert the change on error. */
2140				netdev->features = orig_features;
2141				return err;
2142			}
2143		}
2144	}
2145
2146	return 0;
2147}
2148
2149static const struct net_device_ops gve_netdev_ops = {
2150	.ndo_start_xmit		=	gve_start_xmit,
2151	.ndo_features_check	=	gve_features_check,
2152	.ndo_open		=	gve_open,
2153	.ndo_stop		=	gve_close,
2154	.ndo_get_stats64	=	gve_get_stats,
2155	.ndo_tx_timeout         =       gve_tx_timeout,
2156	.ndo_set_features	=	gve_set_features,
2157	.ndo_bpf		=	gve_xdp,
2158	.ndo_xdp_xmit		=	gve_xdp_xmit,
2159	.ndo_xsk_wakeup		=	gve_xsk_wakeup,
2160};
2161
2162static void gve_handle_status(struct gve_priv *priv, u32 status)
2163{
2164	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
2165		dev_info(&priv->pdev->dev, "Device requested reset.\n");
2166		gve_set_do_reset(priv);
2167	}
2168	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
2169		priv->stats_report_trigger_cnt++;
2170		gve_set_do_report_stats(priv);
2171	}
2172}
2173
2174static void gve_handle_reset(struct gve_priv *priv)
2175{
2176	/* A service task will be scheduled at the end of probe to catch any
2177	 * resets that need to happen, and we don't want to reset until
2178	 * probe is done.
2179	 */
2180	if (gve_get_probe_in_progress(priv))
2181		return;
2182
2183	if (gve_get_do_reset(priv)) {
2184		rtnl_lock();
2185		gve_reset(priv, false);
2186		rtnl_unlock();
2187	}
2188}
2189
2190void gve_handle_report_stats(struct gve_priv *priv)
2191{
2192	struct stats *stats = priv->stats_report->stats;
2193	int idx, stats_idx = 0;
2194	unsigned int start = 0;
2195	u64 tx_bytes;
2196
2197	if (!gve_get_report_stats(priv))
2198		return;
2199
2200	be64_add_cpu(&priv->stats_report->written_count, 1);
2201	/* tx stats */
2202	if (priv->tx) {
2203		for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
2204			u32 last_completion = 0;
2205			u32 tx_frames = 0;
2206
2207			/* DQO doesn't currently support these metrics. */
2208			if (gve_is_gqi(priv)) {
2209				last_completion = priv->tx[idx].done;
2210				tx_frames = priv->tx[idx].req;
2211			}
2212
2213			do {
2214				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
2215				tx_bytes = priv->tx[idx].bytes_done;
2216			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
2217			stats[stats_idx++] = (struct stats) {
2218				.stat_name = cpu_to_be32(TX_WAKE_CNT),
2219				.value = cpu_to_be64(priv->tx[idx].wake_queue),
2220				.queue_id = cpu_to_be32(idx),
2221			};
2222			stats[stats_idx++] = (struct stats) {
2223				.stat_name = cpu_to_be32(TX_STOP_CNT),
2224				.value = cpu_to_be64(priv->tx[idx].stop_queue),
2225				.queue_id = cpu_to_be32(idx),
2226			};
2227			stats[stats_idx++] = (struct stats) {
2228				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
2229				.value = cpu_to_be64(tx_frames),
2230				.queue_id = cpu_to_be32(idx),
2231			};
2232			stats[stats_idx++] = (struct stats) {
2233				.stat_name = cpu_to_be32(TX_BYTES_SENT),
2234				.value = cpu_to_be64(tx_bytes),
2235				.queue_id = cpu_to_be32(idx),
2236			};
2237			stats[stats_idx++] = (struct stats) {
2238				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
2239				.value = cpu_to_be64(last_completion),
2240				.queue_id = cpu_to_be32(idx),
2241			};
2242			stats[stats_idx++] = (struct stats) {
2243				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
2244				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
2245				.queue_id = cpu_to_be32(idx),
2246			};
2247		}
2248	}
2249	/* rx stats */
2250	if (priv->rx) {
2251		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
2252			stats[stats_idx++] = (struct stats) {
2253				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
2254				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
2255				.queue_id = cpu_to_be32(idx),
2256			};
2257			stats[stats_idx++] = (struct stats) {
2258				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
2259				.value = cpu_to_be64(priv->rx[0].fill_cnt),
2260				.queue_id = cpu_to_be32(idx),
2261			};
2262		}
2263	}
2264}
2265
2266/* Handle NIC status register changes, reset requests and report stats */
2267static void gve_service_task(struct work_struct *work)
2268{
2269	struct gve_priv *priv = container_of(work, struct gve_priv,
2270					     service_task);
2271	u32 status = ioread32be(&priv->reg_bar0->device_status);
2272
2273	gve_handle_status(priv, status);
2274
2275	gve_handle_reset(priv);
2276	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2277}
2278
2279static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2280{
2281	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2282		priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2283		priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2284		priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2285		priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2286	} else {
2287		priv->dev->xdp_features = 0;
2288	}
2289}
2290
2291static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2292{
2293	int num_ntfy;
2294	int err;
2295
2296	/* Set up the adminq */
2297	err = gve_adminq_alloc(&priv->pdev->dev, priv);
2298	if (err) {
2299		dev_err(&priv->pdev->dev,
2300			"Failed to alloc admin queue: err=%d\n", err);
2301		return err;
2302	}
2303
2304	err = gve_verify_driver_compatibility(priv);
2305	if (err) {
2306		dev_err(&priv->pdev->dev,
2307			"Could not verify driver compatibility: err=%d\n", err);
2308		goto err;
2309	}
2310
2311	priv->num_registered_pages = 0;
2312
2313	if (skip_describe_device)
2314		goto setup_device;
2315
2316	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2317	/* Get the initial information we need from the device */
2318	err = gve_adminq_describe_device(priv);
2319	if (err) {
2320		dev_err(&priv->pdev->dev,
2321			"Could not get device information: err=%d\n", err);
2322		goto err;
2323	}
2324	priv->dev->mtu = priv->dev->max_mtu;
2325	num_ntfy = pci_msix_vec_count(priv->pdev);
2326	if (num_ntfy <= 0) {
2327		dev_err(&priv->pdev->dev,
2328			"could not count MSI-x vectors: err=%d\n", num_ntfy);
2329		err = num_ntfy;
2330		goto err;
2331	} else if (num_ntfy < GVE_MIN_MSIX) {
2332		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2333			GVE_MIN_MSIX, num_ntfy);
2334		err = -EINVAL;
2335		goto err;
2336	}
2337
2338	/* Big TCP is only supported on DQ*/
2339	if (!gve_is_gqi(priv))
2340		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2341
2342	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2343	/* gvnic has one Notification Block per MSI-x vector, except for the
2344	 * management vector
2345	 */
2346	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2347	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2348
2349	priv->tx_cfg.max_queues =
2350		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2351	priv->rx_cfg.max_queues =
2352		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2353
2354	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2355	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2356	if (priv->default_num_queues > 0) {
2357		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2358						priv->tx_cfg.num_queues);
2359		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2360						priv->rx_cfg.num_queues);
2361	}
2362
2363	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2364		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2365	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2366		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2367
2368	if (!gve_is_gqi(priv)) {
2369		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2370		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2371	}
2372
2373setup_device:
2374	gve_set_netdev_xdp_features(priv);
2375	err = gve_setup_device_resources(priv);
2376	if (!err)
2377		return 0;
2378err:
2379	gve_adminq_free(&priv->pdev->dev, priv);
2380	return err;
2381}
2382
2383static void gve_teardown_priv_resources(struct gve_priv *priv)
2384{
2385	gve_teardown_device_resources(priv);
2386	gve_adminq_free(&priv->pdev->dev, priv);
2387}
2388
2389static void gve_trigger_reset(struct gve_priv *priv)
2390{
2391	/* Reset the device by releasing the AQ */
2392	gve_adminq_release(priv);
2393}
2394
2395static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2396{
2397	gve_trigger_reset(priv);
2398	/* With the reset having already happened, close cannot fail */
2399	if (was_up)
2400		gve_close(priv->dev);
2401	gve_teardown_priv_resources(priv);
2402}
2403
2404static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2405{
2406	int err;
2407
2408	err = gve_init_priv(priv, true);
2409	if (err)
2410		goto err;
2411	if (was_up) {
2412		err = gve_open(priv->dev);
2413		if (err)
2414			goto err;
2415	}
2416	return 0;
2417err:
2418	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2419	gve_turndown(priv);
2420	return err;
2421}
2422
2423int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2424{
2425	bool was_up = netif_carrier_ok(priv->dev);
2426	int err;
2427
2428	dev_info(&priv->pdev->dev, "Performing reset\n");
2429	gve_clear_do_reset(priv);
2430	gve_set_reset_in_progress(priv);
2431	/* If we aren't attempting to teardown normally, just go turndown and
2432	 * reset right away.
2433	 */
2434	if (!attempt_teardown) {
2435		gve_turndown(priv);
2436		gve_reset_and_teardown(priv, was_up);
2437	} else {
2438		/* Otherwise attempt to close normally */
2439		if (was_up) {
2440			err = gve_close(priv->dev);
2441			/* If that fails reset as we did above */
2442			if (err)
2443				gve_reset_and_teardown(priv, was_up);
2444		}
2445		/* Clean up any remaining resources */
2446		gve_teardown_priv_resources(priv);
2447	}
2448
2449	/* Set it all back up */
2450	err = gve_reset_recovery(priv, was_up);
2451	gve_clear_reset_in_progress(priv);
2452	priv->reset_cnt++;
2453	priv->interface_up_cnt = 0;
2454	priv->interface_down_cnt = 0;
2455	priv->stats_report_trigger_cnt = 0;
2456	return err;
2457}
2458
2459static void gve_write_version(u8 __iomem *driver_version_register)
2460{
2461	const char *c = gve_version_prefix;
2462
2463	while (*c) {
2464		writeb(*c, driver_version_register);
2465		c++;
2466	}
2467
2468	c = gve_version_str;
2469	while (*c) {
2470		writeb(*c, driver_version_register);
2471		c++;
2472	}
2473	writeb('\n', driver_version_register);
2474}
2475
2476static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2477{
2478	int max_tx_queues, max_rx_queues;
2479	struct net_device *dev;
2480	__be32 __iomem *db_bar;
2481	struct gve_registers __iomem *reg_bar;
2482	struct gve_priv *priv;
2483	int err;
2484
2485	err = pci_enable_device(pdev);
2486	if (err)
2487		return err;
2488
2489	err = pci_request_regions(pdev, gve_driver_name);
2490	if (err)
2491		goto abort_with_enabled;
2492
2493	pci_set_master(pdev);
2494
2495	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2496	if (err) {
2497		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2498		goto abort_with_pci_region;
2499	}
2500
2501	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2502	if (!reg_bar) {
2503		dev_err(&pdev->dev, "Failed to map pci bar!\n");
2504		err = -ENOMEM;
2505		goto abort_with_pci_region;
2506	}
2507
2508	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2509	if (!db_bar) {
2510		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2511		err = -ENOMEM;
2512		goto abort_with_reg_bar;
2513	}
2514
2515	gve_write_version(&reg_bar->driver_version);
2516	/* Get max queues to alloc etherdev */
2517	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2518	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2519	/* Alloc and setup the netdev and priv */
2520	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2521	if (!dev) {
2522		dev_err(&pdev->dev, "could not allocate netdev\n");
2523		err = -ENOMEM;
2524		goto abort_with_db_bar;
2525	}
2526	SET_NETDEV_DEV(dev, &pdev->dev);
2527	pci_set_drvdata(pdev, dev);
2528	dev->ethtool_ops = &gve_ethtool_ops;
2529	dev->netdev_ops = &gve_netdev_ops;
2530
2531	/* Set default and supported features.
2532	 *
2533	 * Features might be set in other locations as well (such as
2534	 * `gve_adminq_describe_device`).
2535	 */
2536	dev->hw_features = NETIF_F_HIGHDMA;
2537	dev->hw_features |= NETIF_F_SG;
2538	dev->hw_features |= NETIF_F_HW_CSUM;
2539	dev->hw_features |= NETIF_F_TSO;
2540	dev->hw_features |= NETIF_F_TSO6;
2541	dev->hw_features |= NETIF_F_TSO_ECN;
2542	dev->hw_features |= NETIF_F_RXCSUM;
2543	dev->hw_features |= NETIF_F_RXHASH;
2544	dev->features = dev->hw_features;
2545	dev->watchdog_timeo = 5 * HZ;
2546	dev->min_mtu = ETH_MIN_MTU;
2547	netif_carrier_off(dev);
2548
2549	priv = netdev_priv(dev);
2550	priv->dev = dev;
2551	priv->pdev = pdev;
2552	priv->msg_enable = DEFAULT_MSG_LEVEL;
2553	priv->reg_bar0 = reg_bar;
2554	priv->db_bar2 = db_bar;
2555	priv->service_task_flags = 0x0;
2556	priv->state_flags = 0x0;
2557	priv->ethtool_flags = 0x0;
2558	priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
2559	priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
2560
2561	gve_set_probe_in_progress(priv);
2562	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2563	if (!priv->gve_wq) {
2564		dev_err(&pdev->dev, "Could not allocate workqueue");
2565		err = -ENOMEM;
2566		goto abort_with_netdev;
2567	}
2568	INIT_WORK(&priv->service_task, gve_service_task);
2569	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2570	priv->tx_cfg.max_queues = max_tx_queues;
2571	priv->rx_cfg.max_queues = max_rx_queues;
2572
2573	err = gve_init_priv(priv, false);
2574	if (err)
2575		goto abort_with_wq;
2576
2577	err = register_netdev(dev);
2578	if (err)
2579		goto abort_with_gve_init;
2580
2581	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2582	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2583	gve_clear_probe_in_progress(priv);
2584	queue_work(priv->gve_wq, &priv->service_task);
2585	return 0;
2586
2587abort_with_gve_init:
2588	gve_teardown_priv_resources(priv);
2589
2590abort_with_wq:
2591	destroy_workqueue(priv->gve_wq);
2592
2593abort_with_netdev:
2594	free_netdev(dev);
2595
2596abort_with_db_bar:
2597	pci_iounmap(pdev, db_bar);
2598
2599abort_with_reg_bar:
2600	pci_iounmap(pdev, reg_bar);
2601
2602abort_with_pci_region:
2603	pci_release_regions(pdev);
2604
2605abort_with_enabled:
2606	pci_disable_device(pdev);
2607	return err;
2608}
2609
2610static void gve_remove(struct pci_dev *pdev)
2611{
2612	struct net_device *netdev = pci_get_drvdata(pdev);
2613	struct gve_priv *priv = netdev_priv(netdev);
2614	__be32 __iomem *db_bar = priv->db_bar2;
2615	void __iomem *reg_bar = priv->reg_bar0;
2616
2617	unregister_netdev(netdev);
2618	gve_teardown_priv_resources(priv);
2619	destroy_workqueue(priv->gve_wq);
2620	free_netdev(netdev);
2621	pci_iounmap(pdev, db_bar);
2622	pci_iounmap(pdev, reg_bar);
2623	pci_release_regions(pdev);
2624	pci_disable_device(pdev);
2625}
2626
2627static void gve_shutdown(struct pci_dev *pdev)
2628{
2629	struct net_device *netdev = pci_get_drvdata(pdev);
2630	struct gve_priv *priv = netdev_priv(netdev);
2631	bool was_up = netif_carrier_ok(priv->dev);
2632
2633	rtnl_lock();
2634	if (was_up && gve_close(priv->dev)) {
2635		/* If the dev was up, attempt to close, if close fails, reset */
2636		gve_reset_and_teardown(priv, was_up);
2637	} else {
2638		/* If the dev wasn't up or close worked, finish tearing down */
2639		gve_teardown_priv_resources(priv);
2640	}
2641	rtnl_unlock();
2642}
2643
2644#ifdef CONFIG_PM
2645static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2646{
2647	struct net_device *netdev = pci_get_drvdata(pdev);
2648	struct gve_priv *priv = netdev_priv(netdev);
2649	bool was_up = netif_carrier_ok(priv->dev);
2650
2651	priv->suspend_cnt++;
2652	rtnl_lock();
2653	if (was_up && gve_close(priv->dev)) {
2654		/* If the dev was up, attempt to close, if close fails, reset */
2655		gve_reset_and_teardown(priv, was_up);
2656	} else {
2657		/* If the dev wasn't up or close worked, finish tearing down */
2658		gve_teardown_priv_resources(priv);
2659	}
2660	priv->up_before_suspend = was_up;
2661	rtnl_unlock();
2662	return 0;
2663}
2664
2665static int gve_resume(struct pci_dev *pdev)
2666{
2667	struct net_device *netdev = pci_get_drvdata(pdev);
2668	struct gve_priv *priv = netdev_priv(netdev);
2669	int err;
2670
2671	priv->resume_cnt++;
2672	rtnl_lock();
2673	err = gve_reset_recovery(priv, priv->up_before_suspend);
2674	rtnl_unlock();
2675	return err;
2676}
2677#endif /* CONFIG_PM */
2678
2679static const struct pci_device_id gve_id_table[] = {
2680	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2681	{ }
2682};
2683
2684static struct pci_driver gve_driver = {
2685	.name		= gve_driver_name,
2686	.id_table	= gve_id_table,
2687	.probe		= gve_probe,
2688	.remove		= gve_remove,
2689	.shutdown	= gve_shutdown,
2690#ifdef CONFIG_PM
2691	.suspend        = gve_suspend,
2692	.resume         = gve_resume,
2693#endif
2694};
2695
2696module_pci_driver(gve_driver);
2697
2698MODULE_DEVICE_TABLE(pci, gve_id_table);
2699MODULE_AUTHOR("Google, Inc.");
2700MODULE_DESCRIPTION("Google Virtual NIC Driver");
2701MODULE_LICENSE("Dual MIT/GPL");
2702MODULE_VERSION(GVE_VERSION);
2703