1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19 */
20
21/* Driver for VMware vmxnet3 virtual ethernet devices. */
22
23#include <sys/cdefs.h>
24__FBSDID("$FreeBSD$");
25
26#include "opt_rss.h"
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/endian.h>
32#include <sys/sockio.h>
33#include <sys/mbuf.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/socket.h>
37#include <sys/sysctl.h>
38#include <sys/smp.h>
39#include <vm/vm.h>
40#include <vm/pmap.h>
41
42#include <net/ethernet.h>
43#include <net/if.h>
44#include <net/if_var.h>
45#include <net/if_arp.h>
46#include <net/if_dl.h>
47#include <net/if_types.h>
48#include <net/if_media.h>
49#include <net/if_vlan_var.h>
50#include <net/iflib.h>
51#ifdef RSS
52#include <net/rss_config.h>
53#endif
54
55#include <netinet/in_systm.h>
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#include <netinet/ip6.h>
59#include <netinet6/ip6_var.h>
60#include <netinet/udp.h>
61#include <netinet/tcp.h>
62
63#include <machine/bus.h>
64#include <machine/resource.h>
65#include <sys/bus.h>
66#include <sys/rman.h>
67
68#include <dev/pci/pcireg.h>
69#include <dev/pci/pcivar.h>
70
71#include "ifdi_if.h"
72
73#include "if_vmxreg.h"
74#include "if_vmxvar.h"
75
76#include "opt_inet.h"
77#include "opt_inet6.h"
78
79
80#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
81#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
82
83static pci_vendor_info_t vmxnet3_vendor_info_array[] =
84{
85	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
86	/* required last entry */
87	PVID_END
88};
89
90static void	*vmxnet3_register(device_t);
91static int	vmxnet3_attach_pre(if_ctx_t);
92static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
93static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
94static int	vmxnet3_attach_post(if_ctx_t);
95static int	vmxnet3_detach(if_ctx_t);
96static int	vmxnet3_shutdown(if_ctx_t);
97static int	vmxnet3_suspend(if_ctx_t);
98static int	vmxnet3_resume(if_ctx_t);
99
100static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
101static void	vmxnet3_free_resources(struct vmxnet3_softc *);
102static int	vmxnet3_check_version(struct vmxnet3_softc *);
103static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
104
105static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
106static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
107static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
108static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
109static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
110static void	vmxnet3_queues_free(if_ctx_t);
111
112static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
113static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
114static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
115static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
116static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
117static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
118static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
119static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
120static void	vmxnet3_free_data(struct vmxnet3_softc *);
121
122static void	vmxnet3_evintr(struct vmxnet3_softc *);
123static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
124static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
125static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
126static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
127static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
128static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
129static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
130static int	vmxnet3_legacy_intr(void *);
131static int	vmxnet3_rxq_intr(void *);
132static int	vmxnet3_event_intr(void *);
133
134static void	vmxnet3_stop(if_ctx_t);
135
136static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
137static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
138static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
139static int	vmxnet3_enable_device(struct vmxnet3_softc *);
140static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
141static void	vmxnet3_init(if_ctx_t);
142static void	vmxnet3_multi_set(if_ctx_t);
143static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
144static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
145static int	vmxnet3_media_change(if_ctx_t);
146static int	vmxnet3_promisc_set(if_ctx_t, int);
147static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
148static void	vmxnet3_update_admin_status(if_ctx_t);
149static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
150
151static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
152		    uint16_t);
153static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
154static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
155static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
156
157static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
158static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
159static void	vmxnet3_link_status(struct vmxnet3_softc *);
160static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
161static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
162
163static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
164		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
165static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
166		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
167static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
168		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
169static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
170
171static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
172		    uint32_t);
173static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
174static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
175		    uint32_t);
176static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
177static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
178
179static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
180static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
181static void	vmxnet3_link_intr_enable(if_ctx_t);
182static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
183static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
184static void	vmxnet3_intr_enable_all(if_ctx_t);
185static void	vmxnet3_intr_disable_all(if_ctx_t);
186
187typedef enum {
188	VMXNET3_BARRIER_RD,
189	VMXNET3_BARRIER_WR,
190	VMXNET3_BARRIER_RDWR,
191} vmxnet3_barrier_t;
192
193static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
194
195
196static device_method_t vmxnet3_methods[] = {
197	/* Device interface */
198	DEVMETHOD(device_register, vmxnet3_register),
199	DEVMETHOD(device_probe, iflib_device_probe),
200	DEVMETHOD(device_attach, iflib_device_attach),
201	DEVMETHOD(device_detach, iflib_device_detach),
202	DEVMETHOD(device_shutdown, iflib_device_shutdown),
203	DEVMETHOD(device_suspend, iflib_device_suspend),
204	DEVMETHOD(device_resume, iflib_device_resume),
205	DEVMETHOD_END
206};
207
208static driver_t vmxnet3_driver = {
209	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
210};
211
212static devclass_t vmxnet3_devclass;
213DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
214IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
215MODULE_VERSION(vmx, 2);
216
217MODULE_DEPEND(vmx, pci, 1, 1, 1);
218MODULE_DEPEND(vmx, ether, 1, 1, 1);
219MODULE_DEPEND(vmx, iflib, 1, 1, 1);
220
221static device_method_t vmxnet3_iflib_methods[] = {
222	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
223	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
224	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
225
226	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
227	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
228	DEVMETHOD(ifdi_detach, vmxnet3_detach),
229
230	DEVMETHOD(ifdi_init, vmxnet3_init),
231	DEVMETHOD(ifdi_stop, vmxnet3_stop),
232	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
233	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
234	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
235	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
236	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
237	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
238	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
239	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
240
241	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
242	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
243	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
244	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
245	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
246	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
247
248	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
249	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
250
251	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
252	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
253	DEVMETHOD(ifdi_resume, vmxnet3_resume),
254
255	DEVMETHOD_END
256};
257
258static driver_t vmxnet3_iflib_driver = {
259	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
260};
261
262struct if_txrx vmxnet3_txrx = {
263	.ift_txd_encap = vmxnet3_isc_txd_encap,
264	.ift_txd_flush = vmxnet3_isc_txd_flush,
265	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
266	.ift_rxd_available = vmxnet3_isc_rxd_available,
267	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
268	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
269	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
270	.ift_legacy_intr = vmxnet3_legacy_intr
271};
272
273static struct if_shared_ctx vmxnet3_sctx_init = {
274	.isc_magic = IFLIB_MAGIC,
275	.isc_q_align = 512,
276
277	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
278	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
280	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
281
282	/*
283	 * These values are used to configure the busdma tag used for
284	 * receive descriptors.  Each receive descriptor only points to one
285	 * buffer.
286	 */
287	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
288	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
289	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
290
291	.isc_admin_intrcnt = 1,
292	.isc_vendor_info = vmxnet3_vendor_info_array,
293	.isc_driver_version = "2",
294	.isc_driver = &vmxnet3_iflib_driver,
295	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
296
297	/*
298	 * Number of receive queues per receive queue set, with associated
299	 * descriptor settings for each.
300	 */
301	.isc_nrxqs = 3,
302	.isc_nfl = 2, /* one free list for each receive command queue */
303	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
304	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
305	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
306
307	/*
308	 * Number of transmit queues per transmit queue set, with associated
309	 * descriptor settings for each.
310	 */
311	.isc_ntxqs = 2,
312	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
313	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
314	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
315};
316
317static void *
318vmxnet3_register(device_t dev)
319{
320	return (&vmxnet3_sctx_init);
321}
322
323static int
324trunc_powerof2(int val)
325{
326
327	return (1U << (fls(val) - 1));
328}
329
330static int
331vmxnet3_attach_pre(if_ctx_t ctx)
332{
333	device_t dev;
334	if_softc_ctx_t scctx;
335	struct vmxnet3_softc *sc;
336	uint32_t intr_config;
337	int error;
338
339	dev = iflib_get_dev(ctx);
340	sc = iflib_get_softc(ctx);
341	sc->vmx_dev = dev;
342	sc->vmx_ctx = ctx;
343	sc->vmx_sctx = iflib_get_sctx(ctx);
344	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
345	sc->vmx_ifp = iflib_get_ifp(ctx);
346	sc->vmx_media = iflib_get_media(ctx);
347	scctx = sc->vmx_scctx;
348
349	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
350	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
351	/* isc_tx_tso_size_max doesn't include possible vlan header */
352	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
353	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
354	scctx->isc_txrx = &vmxnet3_txrx;
355
356	/* If 0, the iflib tunable was not set, so set to the default */
357	if (scctx->isc_nrxqsets == 0)
358		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
359	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
360	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
361	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
362
363	/* If 0, the iflib tunable was not set, so set to the default */
364	if (scctx->isc_ntxqsets == 0)
365		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
366	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
367	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
368	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
369
370	/*
371	 * Enforce that the transmit completion queue descriptor count is
372	 * the same as the transmit command queue descriptor count.
373	 */
374	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
375	scctx->isc_txqsizes[0] =
376	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
377	scctx->isc_txqsizes[1] =
378	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
379
380	/*
381	 * Enforce that the receive completion queue descriptor count is the
382	 * sum of the receive command queue descriptor counts, and that the
383	 * second receive command queue descriptor count is the same as the
384	 * first one.
385	 */
386	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
387	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
388	scctx->isc_rxqsizes[0] =
389	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
390	scctx->isc_rxqsizes[1] =
391	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
392	scctx->isc_rxqsizes[2] =
393	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
394
395	/*
396	 * Initialize the max frame size and descriptor queue buffer
397	 * sizes.
398	 */
399	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
400
401	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
402
403	/* Map PCI BARs */
404	error = vmxnet3_alloc_resources(sc);
405	if (error)
406		goto fail;
407
408	/* Check device versions */
409	error = vmxnet3_check_version(sc);
410	if (error)
411		goto fail;
412
413	/*
414	 * The interrupt mode can be set in the hypervisor configuration via
415	 * the parameter ethernet<N>.intrMode.
416	 */
417	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
418	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
419
420	/*
421	 * Configure the softc context to attempt to configure the interrupt
422	 * mode now indicated by intr_config.  iflib will follow the usual
423	 * fallback path MSIX -> MSI -> LEGACY, starting at the configured
424	 * starting mode.
425	 */
426	switch (intr_config & 0x03) {
427	case VMXNET3_IT_AUTO:
428	case VMXNET3_IT_MSIX:
429		scctx->isc_msix_bar = pci_msix_table_bar(dev);
430		break;
431	case VMXNET3_IT_MSI:
432		scctx->isc_msix_bar = -1;
433		scctx->isc_disable_msix = 1;
434		break;
435	case VMXNET3_IT_LEGACY:
436		scctx->isc_msix_bar = 0;
437		break;
438	}
439
440	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
441	scctx->isc_capabilities = scctx->isc_capenable =
442	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
443	    IFCAP_TSO4 | IFCAP_TSO6 |
444	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
445	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
446	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
447	    IFCAP_JUMBO_MTU;
448
449	/* These capabilities are not enabled by default. */
450	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
451
452	vmxnet3_get_lladdr(sc);
453	iflib_set_mac(ctx, sc->vmx_lladdr);
454
455	return (0);
456fail:
457	/*
458	 * We must completely clean up anything allocated above as iflib
459	 * will not invoke any other driver entry points as a result of this
460	 * failure.
461	 */
462	vmxnet3_free_resources(sc);
463
464	return (error);
465}
466
467static int
468vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
469{
470	struct vmxnet3_softc *sc;
471	if_softc_ctx_t scctx;
472	struct vmxnet3_rxqueue *rxq;
473	int error;
474	int i;
475	char irq_name[16];
476
477	sc = iflib_get_softc(ctx);
478	scctx = sc->vmx_scctx;
479
480	for (i = 0; i < scctx->isc_nrxqsets; i++) {
481		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
482
483		rxq = &sc->vmx_rxq[i];
484		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
485		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
486		if (error) {
487			device_printf(iflib_get_dev(ctx),
488			    "Failed to register rxq %d interrupt handler\n", i);
489			return (error);
490		}
491	}
492
493	for (i = 0; i < scctx->isc_ntxqsets; i++) {
494		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
495
496		/*
497		 * Don't provide the corresponding rxq irq for reference -
498		 * we want the transmit task to be attached to a task queue
499		 * that is different from the one used by the corresponding
500		 * rxq irq.  That is because the TX doorbell writes are very
501		 * expensive as virtualized MMIO operations, so we want to
502		 * be able to defer them to another core when possible so
503		 * that they don't steal receive processing cycles during
504		 * stack turnarounds like TCP ACK generation.  The other
505		 * piece to this approach is enabling the iflib abdicate
506		 * option (currently via an interface-specific
507		 * tunable/sysctl).
508		 */
509		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
510		    irq_name);
511	}
512
513	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
514	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
515	    "event");
516	if (error) {
517		device_printf(iflib_get_dev(ctx),
518		    "Failed to register event interrupt handler\n");
519		return (error);
520	}
521
522	return (0);
523}
524
525static void
526vmxnet3_free_irqs(struct vmxnet3_softc *sc)
527{
528	if_softc_ctx_t scctx;
529	struct vmxnet3_rxqueue *rxq;
530	int i;
531
532	scctx = sc->vmx_scctx;
533
534	for (i = 0; i < scctx->isc_nrxqsets; i++) {
535		rxq = &sc->vmx_rxq[i];
536		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
537	}
538
539	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
540}
541
542static int
543vmxnet3_attach_post(if_ctx_t ctx)
544{
545	device_t dev;
546	if_softc_ctx_t scctx;
547	struct vmxnet3_softc *sc;
548	int error;
549
550	dev = iflib_get_dev(ctx);
551	scctx = iflib_get_softc_ctx(ctx);
552	sc = iflib_get_softc(ctx);
553
554	if (scctx->isc_nrxqsets > 1)
555		sc->vmx_flags |= VMXNET3_FLAG_RSS;
556
557	error = vmxnet3_alloc_data(sc);
558	if (error)
559		goto fail;
560
561	vmxnet3_set_interrupt_idx(sc);
562	vmxnet3_setup_sysctl(sc);
563
564	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
565	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
566
567fail:
568	return (error);
569}
570
571static int
572vmxnet3_detach(if_ctx_t ctx)
573{
574	struct vmxnet3_softc *sc;
575
576	sc = iflib_get_softc(ctx);
577
578	vmxnet3_free_irqs(sc);
579	vmxnet3_free_data(sc);
580	vmxnet3_free_resources(sc);
581
582	return (0);
583}
584
585static int
586vmxnet3_shutdown(if_ctx_t ctx)
587{
588
589	return (0);
590}
591
592static int
593vmxnet3_suspend(if_ctx_t ctx)
594{
595
596	return (0);
597}
598
599static int
600vmxnet3_resume(if_ctx_t ctx)
601{
602
603	return (0);
604}
605
606static int
607vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
608{
609	device_t dev;
610	int rid;
611
612	dev = sc->vmx_dev;
613
614	rid = PCIR_BAR(0);
615	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
616	    RF_ACTIVE);
617	if (sc->vmx_res0 == NULL) {
618		device_printf(dev,
619		    "could not map BAR0 memory\n");
620		return (ENXIO);
621	}
622
623	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
624	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
625
626	rid = PCIR_BAR(1);
627	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
628	    RF_ACTIVE);
629	if (sc->vmx_res1 == NULL) {
630		device_printf(dev,
631		    "could not map BAR1 memory\n");
632		return (ENXIO);
633	}
634
635	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
636	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
637
638	return (0);
639}
640
641static void
642vmxnet3_free_resources(struct vmxnet3_softc *sc)
643{
644	device_t dev;
645
646	dev = sc->vmx_dev;
647
648	if (sc->vmx_res0 != NULL) {
649		bus_release_resource(dev, SYS_RES_MEMORY,
650		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
651		sc->vmx_res0 = NULL;
652	}
653
654	if (sc->vmx_res1 != NULL) {
655		bus_release_resource(dev, SYS_RES_MEMORY,
656		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
657		sc->vmx_res1 = NULL;
658	}
659}
660
661static int
662vmxnet3_check_version(struct vmxnet3_softc *sc)
663{
664	device_t dev;
665	uint32_t version;
666
667	dev = sc->vmx_dev;
668
669	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
670	if ((version & 0x01) == 0) {
671		device_printf(dev, "unsupported hardware version %#x\n",
672		    version);
673		return (ENOTSUP);
674	}
675	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
676
677	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
678	if ((version & 0x01) == 0) {
679		device_printf(dev, "unsupported UPT version %#x\n", version);
680		return (ENOTSUP);
681	}
682	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
683
684	return (0);
685}
686
687static void
688vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
689{
690	if_softc_ctx_t scctx;
691	struct vmxnet3_txqueue *txq;
692	struct vmxnet3_txq_shared *txs;
693	struct vmxnet3_rxqueue *rxq;
694	struct vmxnet3_rxq_shared *rxs;
695	int intr_idx;
696	int i;
697
698	scctx = sc->vmx_scctx;
699
700	/*
701	 * There is always one interrupt per receive queue, assigned
702	 * starting with the first interrupt.  When there is only one
703	 * interrupt available, the event interrupt shares the receive queue
704	 * interrupt, otherwise it uses the interrupt following the last
705	 * receive queue interrupt.  Transmit queues are not assigned
706	 * interrupts, so they are given indexes beyond the indexes that
707	 * correspond to the real interrupts.
708	 */
709
710	/* The event interrupt is always the last vector. */
711	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
712
713	intr_idx = 0;
714	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
715		rxq = &sc->vmx_rxq[i];
716		rxs = rxq->vxrxq_rs;
717		rxq->vxrxq_intr_idx = intr_idx;
718		rxs->intr_idx = rxq->vxrxq_intr_idx;
719	}
720
721	/*
722	 * Assign the tx queues interrupt indexes above what we are actually
723	 * using.  These interrupts will never be enabled.
724	 */
725	intr_idx = scctx->isc_vectors;
726	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
727		txq = &sc->vmx_txq[i];
728		txs = txq->vxtxq_ts;
729		txq->vxtxq_intr_idx = intr_idx;
730		txs->intr_idx = txq->vxtxq_intr_idx;
731	}
732}
733
734static int
735vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
736{
737	if_softc_ctx_t scctx;
738	int size;
739	int error;
740
741	scctx = sc->vmx_scctx;
742
743	/*
744	 * The txq and rxq shared data areas must be allocated contiguously
745	 * as vmxnet3_driver_shared contains only a single address member
746	 * for the shared queue data area.
747	 */
748	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
749	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
750	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
751	if (error) {
752		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
753		return (error);
754	}
755
756	return (0);
757}
758
759static void
760vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
761{
762	struct vmxnet3_txqueue *txq;
763	struct vmxnet3_comp_ring *txc;
764	struct vmxnet3_txring *txr;
765	if_softc_ctx_t scctx;
766
767	txq = &sc->vmx_txq[q];
768	txc = &txq->vxtxq_comp_ring;
769	txr = &txq->vxtxq_cmd_ring;
770	scctx = sc->vmx_scctx;
771
772	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
773	    device_get_nameunit(sc->vmx_dev), q);
774
775	txq->vxtxq_sc = sc;
776	txq->vxtxq_id = q;
777	txc->vxcr_ndesc = scctx->isc_ntxd[0];
778	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
779}
780
781static int
782vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
783    int ntxqs, int ntxqsets)
784{
785	struct vmxnet3_softc *sc;
786	int q;
787	int error;
788	caddr_t kva;
789
790	sc = iflib_get_softc(ctx);
791
792	/* Allocate the array of transmit queues */
793	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
794	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
795	if (sc->vmx_txq == NULL)
796		return (ENOMEM);
797
798	/* Initialize driver state for each transmit queue */
799	for (q = 0; q < ntxqsets; q++)
800		vmxnet3_init_txq(sc, q);
801
802	/*
803	 * Allocate queue state that is shared with the device.  This check
804	 * and call is performed in both vmxnet3_tx_queues_alloc() and
805	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
806	 * order iflib invokes those routines in.
807	 */
808	if (sc->vmx_qs_dma.idi_size == 0) {
809		error = vmxnet3_queues_shared_alloc(sc);
810		if (error)
811			return (error);
812	}
813
814	kva = sc->vmx_qs_dma.idi_vaddr;
815	for (q = 0; q < ntxqsets; q++) {
816		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
817		kva += sizeof(struct vmxnet3_txq_shared);
818	}
819
820	/* Record descriptor ring vaddrs and paddrs */
821	for (q = 0; q < ntxqsets; q++) {
822		struct vmxnet3_txqueue *txq;
823		struct vmxnet3_txring *txr;
824		struct vmxnet3_comp_ring *txc;
825
826		txq = &sc->vmx_txq[q];
827		txc = &txq->vxtxq_comp_ring;
828		txr = &txq->vxtxq_cmd_ring;
829
830		/* Completion ring */
831		txc->vxcr_u.txcd =
832		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
833		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
834
835		/* Command ring */
836		txr->vxtxr_txd =
837		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
838		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
839	}
840
841	return (0);
842}
843
844static void
845vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
846{
847	struct vmxnet3_rxqueue *rxq;
848	struct vmxnet3_comp_ring *rxc;
849	struct vmxnet3_rxring *rxr;
850	if_softc_ctx_t scctx;
851	int i;
852
853	rxq = &sc->vmx_rxq[q];
854	rxc = &rxq->vxrxq_comp_ring;
855	scctx = sc->vmx_scctx;
856
857	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
858	    device_get_nameunit(sc->vmx_dev), q);
859
860	rxq->vxrxq_sc = sc;
861	rxq->vxrxq_id = q;
862
863	/*
864	 * First rxq is the completion queue, so there are nrxqs - 1 command
865	 * rings starting at iflib queue id 1.
866	 */
867	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
868	for (i = 0; i < nrxqs - 1; i++) {
869		rxr = &rxq->vxrxq_cmd_ring[i];
870		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
871	}
872}
873
874static int
875vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
876    int nrxqs, int nrxqsets)
877{
878	struct vmxnet3_softc *sc;
879	if_softc_ctx_t scctx;
880	int q;
881	int i;
882	int error;
883	caddr_t kva;
884
885	sc = iflib_get_softc(ctx);
886	scctx = sc->vmx_scctx;
887
888	/* Allocate the array of receive queues */
889	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
890	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
891	if (sc->vmx_rxq == NULL)
892		return (ENOMEM);
893
894	/* Initialize driver state for each receive queue */
895	for (q = 0; q < nrxqsets; q++)
896		vmxnet3_init_rxq(sc, q, nrxqs);
897
898	/*
899	 * Allocate queue state that is shared with the device.  This check
900	 * and call is performed in both vmxnet3_tx_queues_alloc() and
901	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
902	 * order iflib invokes those routines in.
903	 */
904	if (sc->vmx_qs_dma.idi_size == 0) {
905		error = vmxnet3_queues_shared_alloc(sc);
906		if (error)
907			return (error);
908	}
909
910	kva = sc->vmx_qs_dma.idi_vaddr +
911	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
912	for (q = 0; q < nrxqsets; q++) {
913		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
914		kva += sizeof(struct vmxnet3_rxq_shared);
915	}
916
917	/* Record descriptor ring vaddrs and paddrs */
918	for (q = 0; q < nrxqsets; q++) {
919		struct vmxnet3_rxqueue *rxq;
920		struct vmxnet3_rxring *rxr;
921		struct vmxnet3_comp_ring *rxc;
922
923		rxq = &sc->vmx_rxq[q];
924		rxc = &rxq->vxrxq_comp_ring;
925
926		/* Completion ring */
927		rxc->vxcr_u.rxcd =
928		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
929		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
930
931		/* Command ring(s) */
932		for (i = 0; i < nrxqs - 1; i++) {
933			rxr = &rxq->vxrxq_cmd_ring[i];
934
935			rxr->vxrxr_rxd =
936			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
937			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
938		}
939	}
940
941	return (0);
942}
943
944static void
945vmxnet3_queues_free(if_ctx_t ctx)
946{
947	struct vmxnet3_softc *sc;
948
949	sc = iflib_get_softc(ctx);
950
951	/* Free queue state area that is shared with the device */
952	if (sc->vmx_qs_dma.idi_size != 0) {
953		iflib_dma_free(&sc->vmx_qs_dma);
954		sc->vmx_qs_dma.idi_size = 0;
955	}
956
957	/* Free array of receive queues */
958	if (sc->vmx_rxq != NULL) {
959		free(sc->vmx_rxq, M_DEVBUF);
960		sc->vmx_rxq = NULL;
961	}
962
963	/* Free array of transmit queues */
964	if (sc->vmx_txq != NULL) {
965		free(sc->vmx_txq, M_DEVBUF);
966		sc->vmx_txq = NULL;
967	}
968}
969
970static int
971vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
972{
973	device_t dev;
974	size_t size;
975	int error;
976
977	dev = sc->vmx_dev;
978
979	/* Top level state structure shared with the device */
980	size = sizeof(struct vmxnet3_driver_shared);
981	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
982	if (error) {
983		device_printf(dev, "cannot alloc shared memory\n");
984		return (error);
985	}
986	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
987
988	/* RSS table state shared with the device */
989	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
990		size = sizeof(struct vmxnet3_rss_shared);
991		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
992		    &sc->vmx_rss_dma, 0);
993		if (error) {
994			device_printf(dev, "cannot alloc rss shared memory\n");
995			return (error);
996		}
997		sc->vmx_rss =
998		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
999	}
1000
1001	return (0);
1002}
1003
1004static void
1005vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1006{
1007
1008	/* Free RSS table state shared with the device */
1009	if (sc->vmx_rss != NULL) {
1010		iflib_dma_free(&sc->vmx_rss_dma);
1011		sc->vmx_rss = NULL;
1012	}
1013
1014	/* Free top level state structure shared with the device */
1015	if (sc->vmx_ds != NULL) {
1016		iflib_dma_free(&sc->vmx_ds_dma);
1017		sc->vmx_ds = NULL;
1018	}
1019}
1020
1021static int
1022vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1023{
1024	int error;
1025
1026	/* Multicast table state shared with the device */
1027	error = iflib_dma_alloc_align(sc->vmx_ctx,
1028	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1029	if (error)
1030		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1031	else
1032		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1033
1034	return (error);
1035}
1036
1037static void
1038vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1039{
1040
1041	/* Free multicast table state shared with the device */
1042	if (sc->vmx_mcast != NULL) {
1043		iflib_dma_free(&sc->vmx_mcast_dma);
1044		sc->vmx_mcast = NULL;
1045	}
1046}
1047
1048static void
1049vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1050{
1051	struct vmxnet3_driver_shared *ds;
1052	if_shared_ctx_t sctx;
1053	if_softc_ctx_t scctx;
1054	struct vmxnet3_txqueue *txq;
1055	struct vmxnet3_txq_shared *txs;
1056	struct vmxnet3_rxqueue *rxq;
1057	struct vmxnet3_rxq_shared *rxs;
1058	int i;
1059
1060	ds = sc->vmx_ds;
1061	sctx = sc->vmx_sctx;
1062	scctx = sc->vmx_scctx;
1063
1064	/*
1065	 * Initialize fields of the shared data that remains the same across
1066	 * reinits. Note the shared data is zero'd when allocated.
1067	 */
1068
1069	ds->magic = VMXNET3_REV1_MAGIC;
1070
1071	/* DriverInfo */
1072	ds->version = VMXNET3_DRIVER_VERSION;
1073	ds->guest = VMXNET3_GOS_FREEBSD |
1074#ifdef __LP64__
1075	    VMXNET3_GOS_64BIT;
1076#else
1077	    VMXNET3_GOS_32BIT;
1078#endif
1079	ds->vmxnet3_revision = 1;
1080	ds->upt_version = 1;
1081
1082	/* Misc. conf */
1083	ds->driver_data = vtophys(sc);
1084	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1085	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1086	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1087	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1088
1089	/* RSS conf */
1090	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1091		ds->rss.version = 1;
1092		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1093		ds->rss.len = sc->vmx_rss_dma.idi_size;
1094	}
1095
1096	/* Interrupt control. */
1097	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1098	/*
1099	 * Total number of interrupt indexes we are using in the shared
1100	 * config data, even though we don't actually allocate interrupt
1101	 * resources for the tx queues.  Some versions of the device will
1102	 * fail to initialize successfully if interrupt indexes are used in
1103	 * the shared config that exceed the number of interrupts configured
1104	 * here.
1105	 */
1106	ds->nintr = (scctx->isc_vectors == 1) ?
1107	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1108	ds->evintr = sc->vmx_event_intr_idx;
1109	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1110
1111	for (i = 0; i < ds->nintr; i++)
1112		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1113
1114	/* Receive filter. */
1115	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1116	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1117
1118	/* Tx queues */
1119	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1120		txq = &sc->vmx_txq[i];
1121		txs = txq->vxtxq_ts;
1122
1123		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1124		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1125		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1126		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1127		txs->driver_data = vtophys(txq);
1128		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1129	}
1130
1131	/* Rx queues */
1132	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1133		rxq = &sc->vmx_rxq[i];
1134		rxs = rxq->vxrxq_rs;
1135
1136		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1137		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1138		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1139		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1140		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1141		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1142		rxs->driver_data = vtophys(rxq);
1143		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1144	}
1145}
1146
1147static void
1148vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1149{
1150	/*
1151	 * Use the same key as the Linux driver until FreeBSD can do
1152	 * RSS (presumably Toeplitz) in software.
1153	 */
1154	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1155	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1156	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1157	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1158	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1159	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1160	};
1161
1162	struct vmxnet3_driver_shared *ds;
1163	if_softc_ctx_t scctx;
1164	struct vmxnet3_rss_shared *rss;
1165#ifdef RSS
1166	uint8_t rss_algo;
1167#endif
1168	int i;
1169
1170	ds = sc->vmx_ds;
1171	scctx = sc->vmx_scctx;
1172	rss = sc->vmx_rss;
1173
1174	rss->hash_type =
1175	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1176	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1177	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1178	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1179	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1180#ifdef RSS
1181	/*
1182	 * If the software RSS is configured to anything else other than
1183	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1184	 * the packet distribution, but report the hash as opaque to
1185	 * disengage from the software RSS.
1186	 */
1187	rss_algo = rss_gethashalgo();
1188	if (rss_algo == RSS_HASH_TOEPLITZ) {
1189		rss_getkey(rss->hash_key);
1190		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1191			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1192			    scctx->isc_nrxqsets;
1193		}
1194		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1195	} else
1196#endif
1197	{
1198		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1199		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1200			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1201		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1202	}
1203}
1204
1205static void
1206vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1207{
1208	struct ifnet *ifp;
1209	struct vmxnet3_driver_shared *ds;
1210	if_softc_ctx_t scctx;
1211
1212	ifp = sc->vmx_ifp;
1213	ds = sc->vmx_ds;
1214	scctx = sc->vmx_scctx;
1215
1216	ds->mtu = ifp->if_mtu;
1217	ds->ntxqueue = scctx->isc_ntxqsets;
1218	ds->nrxqueue = scctx->isc_nrxqsets;
1219
1220	ds->upt_features = 0;
1221	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1222		ds->upt_features |= UPT1_F_CSUM;
1223	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1224		ds->upt_features |= UPT1_F_VLAN;
1225	if (ifp->if_capenable & IFCAP_LRO)
1226		ds->upt_features |= UPT1_F_LRO;
1227
1228	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1229		ds->upt_features |= UPT1_F_RSS;
1230		vmxnet3_reinit_rss_shared_data(sc);
1231	}
1232
1233	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1234	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1235	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1236}
1237
1238static int
1239vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1240{
1241	int error;
1242
1243	error = vmxnet3_alloc_shared_data(sc);
1244	if (error)
1245		return (error);
1246
1247	error = vmxnet3_alloc_mcast_table(sc);
1248	if (error)
1249		return (error);
1250
1251	vmxnet3_init_shared_data(sc);
1252
1253	return (0);
1254}
1255
1256static void
1257vmxnet3_free_data(struct vmxnet3_softc *sc)
1258{
1259
1260	vmxnet3_free_mcast_table(sc);
1261	vmxnet3_free_shared_data(sc);
1262}
1263
1264static void
1265vmxnet3_evintr(struct vmxnet3_softc *sc)
1266{
1267	device_t dev;
1268	struct vmxnet3_txq_shared *ts;
1269	struct vmxnet3_rxq_shared *rs;
1270	uint32_t event;
1271
1272	dev = sc->vmx_dev;
1273
1274	/* Clear events. */
1275	event = sc->vmx_ds->event;
1276	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1277
1278	if (event & VMXNET3_EVENT_LINK)
1279		vmxnet3_link_status(sc);
1280
1281	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1282		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1283		ts = sc->vmx_txq[0].vxtxq_ts;
1284		if (ts->stopped != 0)
1285			device_printf(dev, "Tx queue error %#x\n", ts->error);
1286		rs = sc->vmx_rxq[0].vxrxq_rs;
1287		if (rs->stopped != 0)
1288			device_printf(dev, "Rx queue error %#x\n", rs->error);
1289
1290		/* XXX - rely on liflib watchdog to reset us? */
1291		device_printf(dev, "Rx/Tx queue error event ... "
1292		    "waiting for iflib watchdog reset\n");
1293	}
1294
1295	if (event & VMXNET3_EVENT_DIC)
1296		device_printf(dev, "device implementation change event\n");
1297	if (event & VMXNET3_EVENT_DEBUG)
1298		device_printf(dev, "debug event\n");
1299}
1300
1301static int
1302vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1303{
1304	struct vmxnet3_softc *sc;
1305	struct vmxnet3_txqueue *txq;
1306	struct vmxnet3_txring *txr;
1307	struct vmxnet3_txdesc *txd, *sop;
1308	bus_dma_segment_t *segs;
1309	int nsegs;
1310	int pidx;
1311	int hdrlen;
1312	int i;
1313	int gen;
1314
1315	sc = vsc;
1316	txq = &sc->vmx_txq[pi->ipi_qsidx];
1317	txr = &txq->vxtxq_cmd_ring;
1318	segs = pi->ipi_segs;
1319	nsegs = pi->ipi_nsegs;
1320	pidx = pi->ipi_pidx;
1321
1322	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1323	    ("%s: packet with too many segments %d", __func__, nsegs));
1324
1325	sop = &txr->vxtxr_txd[pidx];
1326	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1327
1328	for (i = 0; i < nsegs; i++) {
1329		txd = &txr->vxtxr_txd[pidx];
1330
1331		txd->addr = segs[i].ds_addr;
1332		txd->len = segs[i].ds_len;
1333		txd->gen = gen;
1334		txd->dtype = 0;
1335		txd->offload_mode = VMXNET3_OM_NONE;
1336		txd->offload_pos = 0;
1337		txd->hlen = 0;
1338		txd->eop = 0;
1339		txd->compreq = 0;
1340		txd->vtag_mode = 0;
1341		txd->vtag = 0;
1342
1343		if (++pidx == txr->vxtxr_ndesc) {
1344			pidx = 0;
1345			txr->vxtxr_gen ^= 1;
1346		}
1347		gen = txr->vxtxr_gen;
1348	}
1349	txd->eop = 1;
1350	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1351	pi->ipi_new_pidx = pidx;
1352
1353	/*
1354	 * VLAN
1355	 */
1356	if (pi->ipi_mflags & M_VLANTAG) {
1357		sop->vtag_mode = 1;
1358		sop->vtag = pi->ipi_vtag;
1359	}
1360
1361	/*
1362	 * TSO and checksum offloads
1363	 */
1364	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1365	if (pi->ipi_csum_flags & CSUM_TSO) {
1366		sop->offload_mode = VMXNET3_OM_TSO;
1367		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1368		sop->offload_pos = pi->ipi_tso_segsz;
1369	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1370	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1371		sop->offload_mode = VMXNET3_OM_CSUM;
1372		sop->hlen = hdrlen;
1373		sop->offload_pos = hdrlen +
1374		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1375			offsetof(struct tcphdr, th_sum) :
1376			offsetof(struct udphdr, uh_sum));
1377	}
1378
1379	/* Finally, change the ownership. */
1380	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1381	sop->gen ^= 1;
1382
1383	return (0);
1384}
1385
1386static void
1387vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1388{
1389	struct vmxnet3_softc *sc;
1390	struct vmxnet3_txqueue *txq;
1391
1392	sc = vsc;
1393	txq = &sc->vmx_txq[txqid];
1394
1395	/*
1396	 * pidx is what we last set ipi_new_pidx to in
1397	 * vmxnet3_isc_txd_encap()
1398	 */
1399
1400	/*
1401	 * Avoid expensive register updates if the flush request is
1402	 * redundant.
1403	 */
1404	if (txq->vxtxq_last_flush == pidx)
1405		return;
1406	txq->vxtxq_last_flush = pidx;
1407	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1408}
1409
1410static int
1411vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1412{
1413	struct vmxnet3_softc *sc;
1414	struct vmxnet3_txqueue *txq;
1415	struct vmxnet3_comp_ring *txc;
1416	struct vmxnet3_txcompdesc *txcd;
1417	struct vmxnet3_txring *txr;
1418	int processed;
1419
1420	sc = vsc;
1421	txq = &sc->vmx_txq[txqid];
1422	txc = &txq->vxtxq_comp_ring;
1423	txr = &txq->vxtxq_cmd_ring;
1424
1425	/*
1426	 * If clear is true, we need to report the number of TX command ring
1427	 * descriptors that have been processed by the device.  If clear is
1428	 * false, we just need to report whether or not at least one TX
1429	 * command ring descriptor has been processed by the device.
1430	 */
1431	processed = 0;
1432	for (;;) {
1433		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1434		if (txcd->gen != txc->vxcr_gen)
1435			break;
1436		else if (!clear)
1437			return (1);
1438		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1439
1440		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1441			txc->vxcr_next = 0;
1442			txc->vxcr_gen ^= 1;
1443		}
1444
1445		if (txcd->eop_idx < txr->vxtxr_next)
1446			processed += txr->vxtxr_ndesc -
1447			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1448		else
1449			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1450		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1451	}
1452
1453	return (processed);
1454}
1455
1456static int
1457vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1458{
1459	struct vmxnet3_softc *sc;
1460	struct vmxnet3_rxqueue *rxq;
1461	struct vmxnet3_comp_ring *rxc;
1462	struct vmxnet3_rxcompdesc *rxcd;
1463	int avail;
1464	int completed_gen;
1465#ifdef INVARIANTS
1466	int expect_sop = 1;
1467#endif
1468	sc = vsc;
1469	rxq = &sc->vmx_rxq[rxqid];
1470	rxc = &rxq->vxrxq_comp_ring;
1471
1472	avail = 0;
1473	completed_gen = rxc->vxcr_gen;
1474	for (;;) {
1475		rxcd = &rxc->vxcr_u.rxcd[idx];
1476		if (rxcd->gen != completed_gen)
1477			break;
1478		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1479
1480#ifdef INVARIANTS
1481		if (expect_sop)
1482			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1483		else
1484			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1485		expect_sop = rxcd->eop;
1486#endif
1487		if (rxcd->eop && (rxcd->len != 0))
1488			avail++;
1489		if (avail > budget)
1490			break;
1491		if (++idx == rxc->vxcr_ndesc) {
1492			idx = 0;
1493			completed_gen ^= 1;
1494		}
1495	}
1496
1497	return (avail);
1498}
1499
1500static int
1501vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1502{
1503	struct vmxnet3_softc *sc;
1504	if_softc_ctx_t scctx;
1505	struct vmxnet3_rxqueue *rxq;
1506	struct vmxnet3_comp_ring *rxc;
1507	struct vmxnet3_rxcompdesc *rxcd;
1508	struct vmxnet3_rxring *rxr;
1509	struct vmxnet3_rxdesc *rxd;
1510	if_rxd_frag_t frag;
1511	int cqidx;
1512	uint16_t total_len;
1513	uint8_t nfrags;
1514	uint8_t i;
1515	uint8_t flid;
1516
1517	sc = vsc;
1518	scctx = sc->vmx_scctx;
1519	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1520	rxc = &rxq->vxrxq_comp_ring;
1521
1522	/*
1523	 * Get a single packet starting at the given index in the completion
1524	 * queue.  That we have been called indicates that
1525	 * vmxnet3_isc_rxd_available() has already verified that either
1526	 * there is a complete packet available starting at the given index,
1527	 * or there are one or more zero length packets starting at the
1528	 * given index followed by a complete packet, so no verification of
1529	 * ownership of the descriptors (and no associated read barrier) is
1530	 * required here.
1531	 */
1532	cqidx = ri->iri_cidx;
1533	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1534	while (rxcd->len == 0) {
1535		KASSERT(rxcd->sop && rxcd->eop,
1536		    ("%s: zero-length packet without both sop and eop set",
1537			__func__));
1538		rxc->vxcr_zero_length++;
1539		if (++cqidx == rxc->vxcr_ndesc) {
1540			cqidx = 0;
1541			rxc->vxcr_gen ^= 1;
1542		}
1543		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1544	}
1545	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1546
1547	/*
1548	 * RSS and flow ID.
1549	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1550	 * be used only if the software RSS is enabled and it uses the same
1551	 * algorithm and the hash key as the "hardware".  If the software RSS
1552	 * is not enabled, then it's simply pointless to use those types.
1553	 * If it's enabled but with different parameters, then hash values will
1554	 * not match.
1555	 */
1556	ri->iri_flowid = rxcd->rss_hash;
1557#ifdef RSS
1558	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1559		switch (rxcd->rss_type) {
1560		case VMXNET3_RCD_RSS_TYPE_NONE:
1561			ri->iri_flowid = ri->iri_qsidx;
1562			ri->iri_rsstype = M_HASHTYPE_NONE;
1563			break;
1564		case VMXNET3_RCD_RSS_TYPE_IPV4:
1565			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1566			break;
1567		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1568			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1569			break;
1570		case VMXNET3_RCD_RSS_TYPE_IPV6:
1571			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1572			break;
1573		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1574			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1575			break;
1576		default:
1577			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1578			break;
1579		}
1580	} else
1581#endif
1582	{
1583		switch (rxcd->rss_type) {
1584		case VMXNET3_RCD_RSS_TYPE_NONE:
1585			ri->iri_flowid = ri->iri_qsidx;
1586			ri->iri_rsstype = M_HASHTYPE_NONE;
1587			break;
1588		default:
1589			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1590			break;
1591		}
1592	}
1593
1594	/*
1595	 * The queue numbering scheme used for rxcd->qid is as follows:
1596	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1597	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1598	 *
1599	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1600	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1601	 * indicates command ring (and flid) 1.
1602	 */
1603	nfrags = 0;
1604	total_len = 0;
1605	do {
1606		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1607		KASSERT(rxcd->gen == rxc->vxcr_gen,
1608		    ("%s: generation mismatch", __func__));
1609		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1610		rxr = &rxq->vxrxq_cmd_ring[flid];
1611		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1612
1613		frag = &ri->iri_frags[nfrags];
1614		frag->irf_flid = flid;
1615		frag->irf_idx = rxcd->rxd_idx;
1616		frag->irf_len = rxcd->len;
1617		total_len += rxcd->len;
1618		nfrags++;
1619		if (++cqidx == rxc->vxcr_ndesc) {
1620			cqidx = 0;
1621			rxc->vxcr_gen ^= 1;
1622		}
1623	} while (!rxcd->eop);
1624
1625	ri->iri_cidx = cqidx;
1626	ri->iri_nfrags = nfrags;
1627	ri->iri_len = total_len;
1628
1629	/*
1630	 * If there's an error, the last descriptor in the packet will
1631	 * have the error indicator set.  In this case, set all
1632	 * fragment lengths to zero.  This will cause iflib to discard
1633	 * the packet, but process all associated descriptors through
1634	 * the refill mechanism.
1635	 */
1636	if (__predict_false(rxcd->error)) {
1637		rxc->vxcr_pkt_errors++;
1638		for (i = 0; i < nfrags; i++) {
1639			frag = &ri->iri_frags[i];
1640			frag->irf_len = 0;
1641		}
1642	} else {
1643		/* Checksum offload information is in the last descriptor. */
1644		if (!rxcd->no_csum) {
1645			uint32_t csum_flags = 0;
1646
1647			if (rxcd->ipv4) {
1648				csum_flags |= CSUM_IP_CHECKED;
1649				if (rxcd->ipcsum_ok)
1650					csum_flags |= CSUM_IP_VALID;
1651			}
1652			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1653				csum_flags |= CSUM_L4_CALC;
1654				if (rxcd->csum_ok) {
1655					csum_flags |= CSUM_L4_VALID;
1656					ri->iri_csum_data = 0xffff;
1657				}
1658			}
1659			ri->iri_csum_flags = csum_flags;
1660		}
1661
1662		/* VLAN information is in the last descriptor. */
1663		if (rxcd->vlan) {
1664			ri->iri_flags |= M_VLANTAG;
1665			ri->iri_vtag = rxcd->vtag;
1666		}
1667	}
1668
1669	return (0);
1670}
1671
1672static void
1673vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1674{
1675	struct vmxnet3_softc *sc;
1676	struct vmxnet3_rxqueue *rxq;
1677	struct vmxnet3_rxring *rxr;
1678	struct vmxnet3_rxdesc *rxd;
1679	uint64_t *paddrs;
1680	int count;
1681	int len;
1682	int idx;
1683	int i;
1684	uint8_t flid;
1685	uint8_t btype;
1686
1687	count = iru->iru_count;
1688	len = iru->iru_buf_size;
1689	flid = iru->iru_flidx;
1690	paddrs = iru->iru_paddrs;
1691
1692	sc = vsc;
1693	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1694	rxr = &rxq->vxrxq_cmd_ring[flid];
1695	rxd = rxr->vxrxr_rxd;
1696
1697	/*
1698	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1699	 * command ring 1 is filled with BTYPE_BODY descriptors.
1700	 */
1701	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1702	/*
1703	 * The refill entries from iflib will advance monotonically,
1704	 * but the refilled descriptors may not be contiguous due to
1705	 * earlier skipping of descriptors by the device.  The refill
1706	 * entries from iflib need an entire state update, while the
1707	 * descriptors previously skipped by the device only need to
1708	 * have their generation numbers updated.
1709	 */
1710	idx = rxr->vxrxr_refill_start;
1711	i = 0;
1712	do {
1713		if (idx == iru->iru_idxs[i]) {
1714			rxd[idx].addr = paddrs[i];
1715			rxd[idx].len = len;
1716			rxd[idx].btype = btype;
1717			i++;
1718		} else
1719			rxr->vxrxr_desc_skips++;
1720		rxd[idx].gen = rxr->vxrxr_gen;
1721
1722		if (++idx == rxr->vxrxr_ndesc) {
1723			idx = 0;
1724			rxr->vxrxr_gen ^= 1;
1725		}
1726	} while (i != count);
1727	rxr->vxrxr_refill_start = idx;
1728}
1729
1730static void
1731vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1732{
1733	struct vmxnet3_softc *sc;
1734	struct vmxnet3_rxqueue *rxq;
1735	struct vmxnet3_rxring *rxr;
1736	bus_size_t r;
1737
1738	sc = vsc;
1739	rxq = &sc->vmx_rxq[rxqid];
1740	rxr = &rxq->vxrxq_cmd_ring[flid];
1741
1742	if (flid == 0)
1743		r = VMXNET3_BAR0_RXH1(rxqid);
1744	else
1745		r = VMXNET3_BAR0_RXH2(rxqid);
1746
1747	vmxnet3_write_bar0(sc, r, pidx);
1748}
1749
1750static int
1751vmxnet3_legacy_intr(void *xsc)
1752{
1753	struct vmxnet3_softc *sc;
1754	if_softc_ctx_t scctx;
1755	if_ctx_t ctx;
1756
1757	sc = xsc;
1758	scctx = sc->vmx_scctx;
1759	ctx = sc->vmx_ctx;
1760
1761	/*
1762	 * When there is only a single interrupt configured, this routine
1763	 * runs in fast interrupt context, following which the rxq 0 task
1764	 * will be enqueued.
1765	 */
1766	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1767		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1768			return (FILTER_HANDLED);
1769	}
1770	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1771		vmxnet3_intr_disable_all(ctx);
1772
1773	if (sc->vmx_ds->event != 0)
1774		iflib_admin_intr_deferred(ctx);
1775
1776	/*
1777	 * XXX - When there is both rxq and event activity, do we care
1778	 * whether the rxq 0 task or the admin task re-enables the interrupt
1779	 * first?
1780	 */
1781	return (FILTER_SCHEDULE_THREAD);
1782}
1783
1784static int
1785vmxnet3_rxq_intr(void *vrxq)
1786{
1787	struct vmxnet3_softc *sc;
1788	struct vmxnet3_rxqueue *rxq;
1789
1790	rxq = vrxq;
1791	sc = rxq->vxrxq_sc;
1792
1793	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1794		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1795
1796	return (FILTER_SCHEDULE_THREAD);
1797}
1798
1799static int
1800vmxnet3_event_intr(void *vsc)
1801{
1802	struct vmxnet3_softc *sc;
1803
1804	sc = vsc;
1805
1806	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1807		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1808
1809	/*
1810	 * The work will be done via vmxnet3_update_admin_status(), and the
1811	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1812	 *
1813	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1814	 */
1815	return (FILTER_SCHEDULE_THREAD);
1816}
1817
1818static void
1819vmxnet3_stop(if_ctx_t ctx)
1820{
1821	struct vmxnet3_softc *sc;
1822
1823	sc = iflib_get_softc(ctx);
1824
1825	sc->vmx_link_active = 0;
1826	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1827	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1828}
1829
1830static void
1831vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1832{
1833	struct vmxnet3_txring *txr;
1834	struct vmxnet3_comp_ring *txc;
1835
1836	txq->vxtxq_last_flush = -1;
1837
1838	txr = &txq->vxtxq_cmd_ring;
1839	txr->vxtxr_next = 0;
1840	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1841	/*
1842	 * iflib has zeroed out the descriptor array during the prior attach
1843	 * or stop
1844	 */
1845
1846	txc = &txq->vxtxq_comp_ring;
1847	txc->vxcr_next = 0;
1848	txc->vxcr_gen = VMXNET3_INIT_GEN;
1849	/*
1850	 * iflib has zeroed out the descriptor array during the prior attach
1851	 * or stop
1852	 */
1853}
1854
1855static void
1856vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1857{
1858	struct vmxnet3_rxring *rxr;
1859	struct vmxnet3_comp_ring *rxc;
1860	int i;
1861
1862	/*
1863	 * The descriptors will be populated with buffers during a
1864	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1865	 */
1866	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1867		rxr = &rxq->vxrxq_cmd_ring[i];
1868		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1869		rxr->vxrxr_desc_skips = 0;
1870		rxr->vxrxr_refill_start = 0;
1871		/*
1872		 * iflib has zeroed out the descriptor array during the
1873		 * prior attach or stop
1874		 */
1875	}
1876
1877	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1878		rxr = &rxq->vxrxq_cmd_ring[i];
1879		rxr->vxrxr_gen = 0;
1880		rxr->vxrxr_desc_skips = 0;
1881		rxr->vxrxr_refill_start = 0;
1882		bzero(rxr->vxrxr_rxd,
1883		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1884	}
1885
1886	rxc = &rxq->vxrxq_comp_ring;
1887	rxc->vxcr_next = 0;
1888	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1889	rxc->vxcr_zero_length = 0;
1890	rxc->vxcr_pkt_errors = 0;
1891	/*
1892	 * iflib has zeroed out the descriptor array during the prior attach
1893	 * or stop
1894	 */
1895}
1896
1897static void
1898vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1899{
1900	if_softc_ctx_t scctx;
1901	int q;
1902
1903	scctx = sc->vmx_scctx;
1904
1905	for (q = 0; q < scctx->isc_ntxqsets; q++)
1906		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1907
1908	for (q = 0; q < scctx->isc_nrxqsets; q++)
1909		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1910}
1911
1912static int
1913vmxnet3_enable_device(struct vmxnet3_softc *sc)
1914{
1915	if_softc_ctx_t scctx;
1916	int q;
1917
1918	scctx = sc->vmx_scctx;
1919
1920	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1921		device_printf(sc->vmx_dev, "device enable command failed!\n");
1922		return (1);
1923	}
1924
1925	/* Reset the Rx queue heads. */
1926	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1927		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1928		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1929	}
1930
1931	return (0);
1932}
1933
1934static void
1935vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1936{
1937	struct ifnet *ifp;
1938
1939	ifp = sc->vmx_ifp;
1940
1941	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1942
1943	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1944		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1945		    sizeof(sc->vmx_ds->vlan_filter));
1946	else
1947		bzero(sc->vmx_ds->vlan_filter,
1948		    sizeof(sc->vmx_ds->vlan_filter));
1949	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1950}
1951
1952static void
1953vmxnet3_init(if_ctx_t ctx)
1954{
1955	struct vmxnet3_softc *sc;
1956
1957	sc = iflib_get_softc(ctx);
1958
1959	/* Use the current MAC address. */
1960	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1961	vmxnet3_set_lladdr(sc);
1962
1963	vmxnet3_reinit_shared_data(sc);
1964	vmxnet3_reinit_queues(sc);
1965
1966	vmxnet3_enable_device(sc);
1967
1968	vmxnet3_reinit_rxfilters(sc);
1969	vmxnet3_link_status(sc);
1970}
1971
1972static void
1973vmxnet3_multi_set(if_ctx_t ctx)
1974{
1975
1976	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1977	    if_getflags(iflib_get_ifp(ctx)));
1978}
1979
1980static int
1981vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1982{
1983	struct vmxnet3_softc *sc;
1984	if_softc_ctx_t scctx;
1985
1986	sc = iflib_get_softc(ctx);
1987	scctx = sc->vmx_scctx;
1988
1989	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1990		ETHER_CRC_LEN))
1991		return (EINVAL);
1992
1993	/*
1994	 * Update the max frame size so that the rx mbuf size is
1995	 * chosen based on the new mtu during the interface init that
1996	 * will occur after this routine returns.
1997	 */
1998	scctx->isc_max_frame_size = mtu +
1999		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
2000	/* RX completion queue - n/a */
2001	scctx->isc_rxd_buf_size[0] = 0;
2002	/*
2003	 * For header-type descriptors (used for first segment of
2004	 * packet), let iflib determine the buffer size based on the
2005	 * max frame size.
2006	 */
2007	scctx->isc_rxd_buf_size[1] = 0;
2008	/*
2009	 * For body-type descriptors (used for jumbo frames and LRO),
2010	 * always use page-sized buffers.
2011	 */
2012	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2013
2014	return (0);
2015}
2016
2017static void
2018vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2019{
2020	struct vmxnet3_softc *sc;
2021
2022	sc = iflib_get_softc(ctx);
2023
2024	ifmr->ifm_status = IFM_AVALID;
2025	ifmr->ifm_active = IFM_ETHER;
2026
2027	if (vmxnet3_link_is_up(sc) != 0) {
2028		ifmr->ifm_status |= IFM_ACTIVE;
2029		ifmr->ifm_active |= IFM_AUTO;
2030	} else
2031		ifmr->ifm_active |= IFM_NONE;
2032}
2033
2034static int
2035vmxnet3_media_change(if_ctx_t ctx)
2036{
2037
2038	/* Ignore. */
2039	return (0);
2040}
2041
2042static int
2043vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2044{
2045
2046	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2047
2048	return (0);
2049}
2050
2051static uint64_t
2052vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2053{
2054	if_t ifp = iflib_get_ifp(ctx);
2055
2056	if (cnt < IFCOUNTERS)
2057		return if_get_counter_default(ifp, cnt);
2058
2059	return (0);
2060}
2061
2062static void
2063vmxnet3_update_admin_status(if_ctx_t ctx)
2064{
2065	struct vmxnet3_softc *sc;
2066
2067	sc = iflib_get_softc(ctx);
2068	if (sc->vmx_ds->event != 0)
2069		vmxnet3_evintr(sc);
2070
2071	vmxnet3_refresh_host_stats(sc);
2072}
2073
2074static void
2075vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2076{
2077	/* Host stats refresh is global, so just trigger it on txq 0 */
2078	if (qid == 0)
2079		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2080}
2081
2082static void
2083vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2084{
2085	int idx, bit;
2086
2087	if (tag == 0 || tag > 4095)
2088		return;
2089
2090	idx = (tag >> 5) & 0x7F;
2091	bit = tag & 0x1F;
2092
2093	/* Update our private VLAN bitvector. */
2094	if (add)
2095		sc->vmx_vlan_filter[idx] |= (1 << bit);
2096	else
2097		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2098}
2099
2100static void
2101vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2102{
2103
2104	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2105}
2106
2107static void
2108vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2109{
2110
2111	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2112}
2113
2114static void
2115vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2116{
2117	struct ifnet *ifp;
2118	struct vmxnet3_driver_shared *ds;
2119	struct ifmultiaddr *ifma;
2120	u_int mode;
2121
2122	ifp = sc->vmx_ifp;
2123	ds = sc->vmx_ds;
2124
2125	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2126	if (flags & IFF_PROMISC)
2127		mode |= VMXNET3_RXMODE_PROMISC;
2128	if (flags & IFF_ALLMULTI)
2129		mode |= VMXNET3_RXMODE_ALLMULTI;
2130	else {
2131		int cnt = 0, overflow = 0;
2132
2133		if_maddr_rlock(ifp);
2134		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2135			if (ifma->ifma_addr->sa_family != AF_LINK)
2136				continue;
2137			else if (cnt == VMXNET3_MULTICAST_MAX) {
2138				overflow = 1;
2139				break;
2140			}
2141
2142			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2143			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
2144			cnt++;
2145		}
2146		if_maddr_runlock(ifp);
2147
2148		if (overflow != 0) {
2149			cnt = 0;
2150			mode |= VMXNET3_RXMODE_ALLMULTI;
2151		} else if (cnt > 0)
2152			mode |= VMXNET3_RXMODE_MCAST;
2153		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2154	}
2155
2156	ds->rxmode = mode;
2157
2158	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2159	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2160}
2161
2162static void
2163vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2164{
2165
2166	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2167}
2168
2169static int
2170vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2171{
2172	uint32_t status;
2173
2174	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2175	return !!(status & 0x1);
2176}
2177
2178static void
2179vmxnet3_link_status(struct vmxnet3_softc *sc)
2180{
2181	if_ctx_t ctx;
2182	uint64_t speed;
2183	int link;
2184
2185	ctx = sc->vmx_ctx;
2186	link = vmxnet3_link_is_up(sc);
2187	speed = IF_Gbps(10);
2188
2189	if (link != 0 && sc->vmx_link_active == 0) {
2190		sc->vmx_link_active = 1;
2191		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2192	} else if (link == 0 && sc->vmx_link_active != 0) {
2193		sc->vmx_link_active = 0;
2194		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2195	}
2196}
2197
2198static void
2199vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2200{
2201	uint32_t ml, mh;
2202
2203	ml  = sc->vmx_lladdr[0];
2204	ml |= sc->vmx_lladdr[1] << 8;
2205	ml |= sc->vmx_lladdr[2] << 16;
2206	ml |= sc->vmx_lladdr[3] << 24;
2207	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2208
2209	mh  = sc->vmx_lladdr[4];
2210	mh |= sc->vmx_lladdr[5] << 8;
2211	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2212}
2213
2214static void
2215vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2216{
2217	uint32_t ml, mh;
2218
2219	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2220	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2221
2222	sc->vmx_lladdr[0] = ml;
2223	sc->vmx_lladdr[1] = ml >> 8;
2224	sc->vmx_lladdr[2] = ml >> 16;
2225	sc->vmx_lladdr[3] = ml >> 24;
2226	sc->vmx_lladdr[4] = mh;
2227	sc->vmx_lladdr[5] = mh >> 8;
2228}
2229
2230static void
2231vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2232    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2233{
2234	struct sysctl_oid *node, *txsnode;
2235	struct sysctl_oid_list *list, *txslist;
2236	struct UPT1_TxStats *txstats;
2237	char namebuf[16];
2238
2239	txstats = &txq->vxtxq_ts->stats;
2240
2241	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2242	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2243	    NULL, "Transmit Queue");
2244	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2245
2246	/*
2247	 * Add statistics reported by the host. These are updated by the
2248	 * iflib txq timer on txq 0.
2249	 */
2250	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2251	    NULL, "Host Statistics");
2252	txslist = SYSCTL_CHILDREN(txsnode);
2253	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2254	    &txstats->TSO_packets, "TSO packets");
2255	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2256	    &txstats->TSO_bytes, "TSO bytes");
2257	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2258	    &txstats->ucast_packets, "Unicast packets");
2259	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2260	    &txstats->ucast_bytes, "Unicast bytes");
2261	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2262	    &txstats->mcast_packets, "Multicast packets");
2263	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2264	    &txstats->mcast_bytes, "Multicast bytes");
2265	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2266	    &txstats->error, "Errors");
2267	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2268	    &txstats->discard, "Discards");
2269}
2270
2271static void
2272vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2273    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2274{
2275	struct sysctl_oid *node, *rxsnode;
2276	struct sysctl_oid_list *list, *rxslist;
2277	struct UPT1_RxStats *rxstats;
2278	char namebuf[16];
2279
2280	rxstats = &rxq->vxrxq_rs->stats;
2281
2282	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2283	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2284	    NULL, "Receive Queue");
2285	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2286
2287	/*
2288	 * Add statistics reported by the host. These are updated by the
2289	 * iflib txq timer on txq 0.
2290	 */
2291	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2292	    NULL, "Host Statistics");
2293	rxslist = SYSCTL_CHILDREN(rxsnode);
2294	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2295	    &rxstats->LRO_packets, "LRO packets");
2296	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2297	    &rxstats->LRO_bytes, "LRO bytes");
2298	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2299	    &rxstats->ucast_packets, "Unicast packets");
2300	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2301	    &rxstats->ucast_bytes, "Unicast bytes");
2302	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2303	    &rxstats->mcast_packets, "Multicast packets");
2304	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2305	    &rxstats->mcast_bytes, "Multicast bytes");
2306	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2307	    &rxstats->bcast_packets, "Broadcast packets");
2308	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2309	    &rxstats->bcast_bytes, "Broadcast bytes");
2310	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2311	    &rxstats->nobuffer, "No buffer");
2312	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2313	    &rxstats->error, "Errors");
2314}
2315
2316static void
2317vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2318    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2319{
2320	if_softc_ctx_t scctx;
2321	struct sysctl_oid *node;
2322	struct sysctl_oid_list *list;
2323	int i;
2324
2325	scctx = sc->vmx_scctx;
2326
2327	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2328		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2329
2330		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2331		    "debug", CTLFLAG_RD, NULL, "");
2332		list = SYSCTL_CHILDREN(node);
2333
2334		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2335		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2336		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2337		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2338		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2339		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2340		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2341		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2342		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2343		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2344		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2345		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2346	}
2347
2348	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2349		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2350
2351		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2352		    "debug", CTLFLAG_RD, NULL, "");
2353		list = SYSCTL_CHILDREN(node);
2354
2355		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2356		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2357		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2358		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2359		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2360		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2361		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2362		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2363		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2364		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2365		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2366		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2367		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2368		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2369		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2370		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2371		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2372		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2373		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2374		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2375	}
2376}
2377
2378static void
2379vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2380    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2381{
2382	if_softc_ctx_t scctx;
2383	int i;
2384
2385	scctx = sc->vmx_scctx;
2386
2387	for (i = 0; i < scctx->isc_ntxqsets; i++)
2388		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2389	for (i = 0; i < scctx->isc_nrxqsets; i++)
2390		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2391
2392	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2393}
2394
2395static void
2396vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2397{
2398	device_t dev;
2399	struct sysctl_ctx_list *ctx;
2400	struct sysctl_oid *tree;
2401	struct sysctl_oid_list *child;
2402
2403	dev = sc->vmx_dev;
2404	ctx = device_get_sysctl_ctx(dev);
2405	tree = device_get_sysctl_tree(dev);
2406	child = SYSCTL_CHILDREN(tree);
2407
2408	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2409}
2410
2411static void
2412vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2413{
2414
2415	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2416}
2417
2418static uint32_t
2419vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2420{
2421
2422	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2423}
2424
2425static void
2426vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2427{
2428
2429	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2430}
2431
2432static void
2433vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2434{
2435
2436	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2437}
2438
2439static uint32_t
2440vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2441{
2442
2443	vmxnet3_write_cmd(sc, cmd);
2444	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2445	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2446	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2447}
2448
2449static void
2450vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2451{
2452
2453	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2454}
2455
2456static void
2457vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2458{
2459
2460	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2461}
2462
2463static int
2464vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2465{
2466	/* Not using interrupts for TX */
2467	return (0);
2468}
2469
2470static int
2471vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2472{
2473	struct vmxnet3_softc *sc;
2474
2475	sc = iflib_get_softc(ctx);
2476	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2477	return (0);
2478}
2479
2480static void
2481vmxnet3_link_intr_enable(if_ctx_t ctx)
2482{
2483	struct vmxnet3_softc *sc;
2484
2485	sc = iflib_get_softc(ctx);
2486	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2487}
2488
2489static void
2490vmxnet3_intr_enable_all(if_ctx_t ctx)
2491{
2492	struct vmxnet3_softc *sc;
2493	if_softc_ctx_t scctx;
2494	int i;
2495
2496	sc = iflib_get_softc(ctx);
2497	scctx = sc->vmx_scctx;
2498	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2499	for (i = 0; i < scctx->isc_vectors; i++)
2500		vmxnet3_enable_intr(sc, i);
2501}
2502
2503static void
2504vmxnet3_intr_disable_all(if_ctx_t ctx)
2505{
2506	struct vmxnet3_softc *sc;
2507	int i;
2508
2509	sc = iflib_get_softc(ctx);
2510	/*
2511	 * iflib may invoke this routine before vmxnet3_attach_post() has
2512	 * run, which is before the top level shared data area is
2513	 * initialized and the device made aware of it.
2514	 */
2515	if (sc->vmx_ds != NULL)
2516		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2517	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2518		vmxnet3_disable_intr(sc, i);
2519}
2520
2521/*
2522 * Since this is a purely paravirtualized device, we do not have
2523 * to worry about DMA coherency. But at times, we must make sure
2524 * both the compiler and CPU do not reorder memory operations.
2525 */
2526static inline void
2527vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2528{
2529
2530	switch (type) {
2531	case VMXNET3_BARRIER_RD:
2532		rmb();
2533		break;
2534	case VMXNET3_BARRIER_WR:
2535		wmb();
2536		break;
2537	case VMXNET3_BARRIER_RDWR:
2538		mb();
2539		break;
2540	default:
2541		panic("%s: bad barrier type %d", __func__, type);
2542	}
2543}
2544