1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19 */
20
21/* Driver for VMware vmxnet3 virtual ethernet devices. */
22
23#include <sys/cdefs.h>
24#include "opt_rss.h"
25
26#include <sys/param.h>
27#include <sys/systm.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_var.h>
43#include <net/if_arp.h>
44#include <net/if_dl.h>
45#include <net/if_types.h>
46#include <net/if_media.h>
47#include <net/if_vlan_var.h>
48#include <net/iflib.h>
49#ifdef RSS
50#include <net/rss_config.h>
51#endif
52
53#include <netinet/in_systm.h>
54#include <netinet/in.h>
55#include <netinet/ip.h>
56#include <netinet/ip6.h>
57#include <netinet6/ip6_var.h>
58#include <netinet/udp.h>
59#include <netinet/tcp.h>
60
61#include <machine/bus.h>
62#include <machine/resource.h>
63#include <sys/bus.h>
64#include <sys/rman.h>
65
66#include <dev/pci/pcireg.h>
67#include <dev/pci/pcivar.h>
68
69#include "ifdi_if.h"
70
71#include "if_vmxreg.h"
72#include "if_vmxvar.h"
73
74#include "opt_inet.h"
75#include "opt_inet6.h"
76
77#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
78#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
79
80static const pci_vendor_info_t vmxnet3_vendor_info_array[] =
81{
82	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
83	/* required last entry */
84	PVID_END
85};
86
87static void	*vmxnet3_register(device_t);
88static int	vmxnet3_attach_pre(if_ctx_t);
89static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
90static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
91static int	vmxnet3_attach_post(if_ctx_t);
92static int	vmxnet3_detach(if_ctx_t);
93static int	vmxnet3_shutdown(if_ctx_t);
94static int	vmxnet3_suspend(if_ctx_t);
95static int	vmxnet3_resume(if_ctx_t);
96
97static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
98static void	vmxnet3_free_resources(struct vmxnet3_softc *);
99static int	vmxnet3_check_version(struct vmxnet3_softc *);
100static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
101
102static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
103static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
104static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
106static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107static void	vmxnet3_queues_free(if_ctx_t);
108
109static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
110static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
111static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
112static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
113static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
114static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
115static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
116static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
117static void	vmxnet3_free_data(struct vmxnet3_softc *);
118
119static void	vmxnet3_evintr(struct vmxnet3_softc *);
120static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
121static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
122static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
123static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
124static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
125static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
126static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
127static int	vmxnet3_legacy_intr(void *);
128static int	vmxnet3_rxq_intr(void *);
129static int	vmxnet3_event_intr(void *);
130
131static void	vmxnet3_stop(if_ctx_t);
132
133static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
134static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
135static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
136static int	vmxnet3_enable_device(struct vmxnet3_softc *);
137static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
138static void	vmxnet3_init(if_ctx_t);
139static void	vmxnet3_multi_set(if_ctx_t);
140static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
141static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
142static int	vmxnet3_media_change(if_ctx_t);
143static int	vmxnet3_promisc_set(if_ctx_t, int);
144static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
145static void	vmxnet3_update_admin_status(if_ctx_t);
146static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
147
148static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
149		    uint16_t);
150static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
151static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
152static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
153
154static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
155static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
156static void	vmxnet3_link_status(struct vmxnet3_softc *);
157static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
158static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
159
160static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
161		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
163		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
165		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
167
168static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
169		    uint32_t);
170static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
171static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
172		    uint32_t);
173static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
174static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
175
176static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
177static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
178static void	vmxnet3_link_intr_enable(if_ctx_t);
179static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
180static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
181static void	vmxnet3_intr_enable_all(if_ctx_t);
182static void	vmxnet3_intr_disable_all(if_ctx_t);
183static bool	vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event);
184
185typedef enum {
186	VMXNET3_BARRIER_RD,
187	VMXNET3_BARRIER_WR,
188	VMXNET3_BARRIER_RDWR,
189} vmxnet3_barrier_t;
190
191static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
192
193static device_method_t vmxnet3_methods[] = {
194	/* Device interface */
195	DEVMETHOD(device_register, vmxnet3_register),
196	DEVMETHOD(device_probe, iflib_device_probe),
197	DEVMETHOD(device_attach, iflib_device_attach),
198	DEVMETHOD(device_detach, iflib_device_detach),
199	DEVMETHOD(device_shutdown, iflib_device_shutdown),
200	DEVMETHOD(device_suspend, iflib_device_suspend),
201	DEVMETHOD(device_resume, iflib_device_resume),
202	DEVMETHOD_END
203};
204
205static driver_t vmxnet3_driver = {
206	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
207};
208
209DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
210IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
211MODULE_VERSION(vmx, 2);
212
213MODULE_DEPEND(vmx, pci, 1, 1, 1);
214MODULE_DEPEND(vmx, ether, 1, 1, 1);
215MODULE_DEPEND(vmx, iflib, 1, 1, 1);
216
217static device_method_t vmxnet3_iflib_methods[] = {
218	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
219	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
220	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
221
222	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
223	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
224	DEVMETHOD(ifdi_detach, vmxnet3_detach),
225
226	DEVMETHOD(ifdi_init, vmxnet3_init),
227	DEVMETHOD(ifdi_stop, vmxnet3_stop),
228	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
229	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
230	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
231	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
232	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
233	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
234	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
235	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
236
237	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
238	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
239	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
240	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
241	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
242	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
243
244	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
245	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
246
247	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
248	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
249	DEVMETHOD(ifdi_resume, vmxnet3_resume),
250
251	DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart),
252
253	DEVMETHOD_END
254};
255
256static driver_t vmxnet3_iflib_driver = {
257	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
258};
259
260struct if_txrx vmxnet3_txrx = {
261	.ift_txd_encap = vmxnet3_isc_txd_encap,
262	.ift_txd_flush = vmxnet3_isc_txd_flush,
263	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
264	.ift_rxd_available = vmxnet3_isc_rxd_available,
265	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
266	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
267	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
268	.ift_legacy_intr = vmxnet3_legacy_intr
269};
270
271static struct if_shared_ctx vmxnet3_sctx_init = {
272	.isc_magic = IFLIB_MAGIC,
273	.isc_q_align = 512,
274
275	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
276	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
278	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279
280	/*
281	 * These values are used to configure the busdma tag used for
282	 * receive descriptors.  Each receive descriptor only points to one
283	 * buffer.
284	 */
285	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
286	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
287	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
288
289	.isc_admin_intrcnt = 1,
290	.isc_vendor_info = vmxnet3_vendor_info_array,
291	.isc_driver_version = "2",
292	.isc_driver = &vmxnet3_iflib_driver,
293	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
294
295	/*
296	 * Number of receive queues per receive queue set, with associated
297	 * descriptor settings for each.
298	 */
299	.isc_nrxqs = 3,
300	.isc_nfl = 2, /* one free list for each receive command queue */
301	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
302	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
303	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
304
305	/*
306	 * Number of transmit queues per transmit queue set, with associated
307	 * descriptor settings for each.
308	 */
309	.isc_ntxqs = 2,
310	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
311	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
312	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
313};
314
315static void *
316vmxnet3_register(device_t dev)
317{
318	return (&vmxnet3_sctx_init);
319}
320
321static int
322trunc_powerof2(int val)
323{
324
325	return (1U << (fls(val) - 1));
326}
327
328static int
329vmxnet3_attach_pre(if_ctx_t ctx)
330{
331	device_t dev;
332	if_softc_ctx_t scctx;
333	struct vmxnet3_softc *sc;
334	uint32_t intr_config;
335	int error;
336
337	dev = iflib_get_dev(ctx);
338	sc = iflib_get_softc(ctx);
339	sc->vmx_dev = dev;
340	sc->vmx_ctx = ctx;
341	sc->vmx_sctx = iflib_get_sctx(ctx);
342	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
343	sc->vmx_ifp = iflib_get_ifp(ctx);
344	sc->vmx_media = iflib_get_media(ctx);
345	scctx = sc->vmx_scctx;
346
347	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
348	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
349	/* isc_tx_tso_size_max doesn't include possible vlan header */
350	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
351	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
352	scctx->isc_txrx = &vmxnet3_txrx;
353
354	/* If 0, the iflib tunable was not set, so set to the default */
355	if (scctx->isc_nrxqsets == 0)
356		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
357	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
358	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
359	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
360
361	/* If 0, the iflib tunable was not set, so set to the default */
362	if (scctx->isc_ntxqsets == 0)
363		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
364	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
365	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
366	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
367
368	/*
369	 * Enforce that the transmit completion queue descriptor count is
370	 * the same as the transmit command queue descriptor count.
371	 */
372	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
373	scctx->isc_txqsizes[0] =
374	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
375	scctx->isc_txqsizes[1] =
376	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
377
378	/*
379	 * Enforce that the receive completion queue descriptor count is the
380	 * sum of the receive command queue descriptor counts, and that the
381	 * second receive command queue descriptor count is the same as the
382	 * first one.
383	 */
384	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
385	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
386	scctx->isc_rxqsizes[0] =
387	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
388	scctx->isc_rxqsizes[1] =
389	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
390	scctx->isc_rxqsizes[2] =
391	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
392
393	/*
394	 * Initialize the max frame size and descriptor queue buffer
395	 * sizes.
396	 */
397	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
398
399	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
400
401	/* Map PCI BARs */
402	error = vmxnet3_alloc_resources(sc);
403	if (error)
404		goto fail;
405
406	/* Check device versions */
407	error = vmxnet3_check_version(sc);
408	if (error)
409		goto fail;
410
411	/*
412	 * The interrupt mode can be set in the hypervisor configuration via
413	 * the parameter ethernet<N>.intrMode.
414	 */
415	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
416	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
417
418	/*
419	 * Configure the softc context to attempt to configure the interrupt
420	 * mode now indicated by intr_config.  iflib will follow the usual
421	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
422	 * starting mode.
423	 */
424	switch (intr_config & 0x03) {
425	case VMXNET3_IT_AUTO:
426	case VMXNET3_IT_MSIX:
427		scctx->isc_msix_bar = pci_msix_table_bar(dev);
428		break;
429	case VMXNET3_IT_MSI:
430		scctx->isc_msix_bar = -1;
431		scctx->isc_disable_msix = 1;
432		break;
433	case VMXNET3_IT_LEGACY:
434		scctx->isc_msix_bar = 0;
435		break;
436	}
437
438	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
439	scctx->isc_capabilities = scctx->isc_capenable =
440	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
441	    IFCAP_TSO4 | IFCAP_TSO6 |
442	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
443	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
444	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
445	    IFCAP_JUMBO_MTU;
446
447	/* These capabilities are not enabled by default. */
448	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
449
450	vmxnet3_get_lladdr(sc);
451	iflib_set_mac(ctx, sc->vmx_lladdr);
452
453	return (0);
454fail:
455	/*
456	 * We must completely clean up anything allocated above as iflib
457	 * will not invoke any other driver entry points as a result of this
458	 * failure.
459	 */
460	vmxnet3_free_resources(sc);
461
462	return (error);
463}
464
465static int
466vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
467{
468	struct vmxnet3_softc *sc;
469	if_softc_ctx_t scctx;
470	struct vmxnet3_rxqueue *rxq;
471	int error;
472	int i;
473	char irq_name[16];
474
475	sc = iflib_get_softc(ctx);
476	scctx = sc->vmx_scctx;
477
478	for (i = 0; i < scctx->isc_nrxqsets; i++) {
479		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
480
481		rxq = &sc->vmx_rxq[i];
482		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
483		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
484		if (error) {
485			device_printf(iflib_get_dev(ctx),
486			    "Failed to register rxq %d interrupt handler\n", i);
487			return (error);
488		}
489	}
490
491	for (i = 0; i < scctx->isc_ntxqsets; i++) {
492		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
493
494		/*
495		 * Don't provide the corresponding rxq irq for reference -
496		 * we want the transmit task to be attached to a task queue
497		 * that is different from the one used by the corresponding
498		 * rxq irq.  That is because the TX doorbell writes are very
499		 * expensive as virtualized MMIO operations, so we want to
500		 * be able to defer them to another core when possible so
501		 * that they don't steal receive processing cycles during
502		 * stack turnarounds like TCP ACK generation.  The other
503		 * piece to this approach is enabling the iflib abdicate
504		 * option (currently via an interface-specific
505		 * tunable/sysctl).
506		 */
507		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
508		    irq_name);
509	}
510
511	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
512	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
513	    "event");
514	if (error) {
515		device_printf(iflib_get_dev(ctx),
516		    "Failed to register event interrupt handler\n");
517		return (error);
518	}
519
520	return (0);
521}
522
523static void
524vmxnet3_free_irqs(struct vmxnet3_softc *sc)
525{
526	if_softc_ctx_t scctx;
527	struct vmxnet3_rxqueue *rxq;
528	int i;
529
530	scctx = sc->vmx_scctx;
531
532	for (i = 0; i < scctx->isc_nrxqsets; i++) {
533		rxq = &sc->vmx_rxq[i];
534		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
535	}
536
537	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
538}
539
540static int
541vmxnet3_attach_post(if_ctx_t ctx)
542{
543	if_softc_ctx_t scctx;
544	struct vmxnet3_softc *sc;
545	int error;
546
547	scctx = iflib_get_softc_ctx(ctx);
548	sc = iflib_get_softc(ctx);
549
550	if (scctx->isc_nrxqsets > 1)
551		sc->vmx_flags |= VMXNET3_FLAG_RSS;
552
553	error = vmxnet3_alloc_data(sc);
554	if (error)
555		goto fail;
556
557	vmxnet3_set_interrupt_idx(sc);
558	vmxnet3_setup_sysctl(sc);
559
560	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
561	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
562
563fail:
564	return (error);
565}
566
567static int
568vmxnet3_detach(if_ctx_t ctx)
569{
570	struct vmxnet3_softc *sc;
571
572	sc = iflib_get_softc(ctx);
573
574	vmxnet3_free_irqs(sc);
575	vmxnet3_free_data(sc);
576	vmxnet3_free_resources(sc);
577
578	return (0);
579}
580
581static int
582vmxnet3_shutdown(if_ctx_t ctx)
583{
584
585	return (0);
586}
587
588static int
589vmxnet3_suspend(if_ctx_t ctx)
590{
591
592	return (0);
593}
594
595static int
596vmxnet3_resume(if_ctx_t ctx)
597{
598
599	return (0);
600}
601
602static int
603vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
604{
605	device_t dev;
606	int rid;
607
608	dev = sc->vmx_dev;
609
610	rid = PCIR_BAR(0);
611	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
612	    RF_ACTIVE);
613	if (sc->vmx_res0 == NULL) {
614		device_printf(dev,
615		    "could not map BAR0 memory\n");
616		return (ENXIO);
617	}
618
619	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
620	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
621
622	rid = PCIR_BAR(1);
623	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
624	    RF_ACTIVE);
625	if (sc->vmx_res1 == NULL) {
626		device_printf(dev,
627		    "could not map BAR1 memory\n");
628		return (ENXIO);
629	}
630
631	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
632	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
633
634	return (0);
635}
636
637static void
638vmxnet3_free_resources(struct vmxnet3_softc *sc)
639{
640	device_t dev;
641
642	dev = sc->vmx_dev;
643
644	if (sc->vmx_res0 != NULL) {
645		bus_release_resource(dev, SYS_RES_MEMORY,
646		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
647		sc->vmx_res0 = NULL;
648	}
649
650	if (sc->vmx_res1 != NULL) {
651		bus_release_resource(dev, SYS_RES_MEMORY,
652		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
653		sc->vmx_res1 = NULL;
654	}
655}
656
657static int
658vmxnet3_check_version(struct vmxnet3_softc *sc)
659{
660	device_t dev;
661	uint32_t version;
662
663	dev = sc->vmx_dev;
664
665	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
666	if ((version & 0x01) == 0) {
667		device_printf(dev, "unsupported hardware version %#x\n",
668		    version);
669		return (ENOTSUP);
670	}
671	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
672
673	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
674	if ((version & 0x01) == 0) {
675		device_printf(dev, "unsupported UPT version %#x\n", version);
676		return (ENOTSUP);
677	}
678	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
679
680	return (0);
681}
682
683static void
684vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
685{
686	if_softc_ctx_t scctx;
687	struct vmxnet3_txqueue *txq;
688	struct vmxnet3_txq_shared *txs;
689	struct vmxnet3_rxqueue *rxq;
690	struct vmxnet3_rxq_shared *rxs;
691	int intr_idx;
692	int i;
693
694	scctx = sc->vmx_scctx;
695
696	/*
697	 * There is always one interrupt per receive queue, assigned
698	 * starting with the first interrupt.  When there is only one
699	 * interrupt available, the event interrupt shares the receive queue
700	 * interrupt, otherwise it uses the interrupt following the last
701	 * receive queue interrupt.  Transmit queues are not assigned
702	 * interrupts, so they are given indexes beyond the indexes that
703	 * correspond to the real interrupts.
704	 */
705
706	/* The event interrupt is always the last vector. */
707	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
708
709	intr_idx = 0;
710	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
711		rxq = &sc->vmx_rxq[i];
712		rxs = rxq->vxrxq_rs;
713		rxq->vxrxq_intr_idx = intr_idx;
714		rxs->intr_idx = rxq->vxrxq_intr_idx;
715	}
716
717	/*
718	 * Assign the tx queues interrupt indexes above what we are actually
719	 * using.  These interrupts will never be enabled.
720	 */
721	intr_idx = scctx->isc_vectors;
722	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
723		txq = &sc->vmx_txq[i];
724		txs = txq->vxtxq_ts;
725		txq->vxtxq_intr_idx = intr_idx;
726		txs->intr_idx = txq->vxtxq_intr_idx;
727	}
728}
729
730static int
731vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
732{
733	if_softc_ctx_t scctx;
734	int size;
735	int error;
736
737	scctx = sc->vmx_scctx;
738
739	/*
740	 * The txq and rxq shared data areas must be allocated contiguously
741	 * as vmxnet3_driver_shared contains only a single address member
742	 * for the shared queue data area.
743	 */
744	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
745	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
746	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
747	if (error) {
748		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
749		return (error);
750	}
751
752	return (0);
753}
754
755static void
756vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
757{
758	struct vmxnet3_txqueue *txq;
759	struct vmxnet3_comp_ring *txc;
760	struct vmxnet3_txring *txr;
761	if_softc_ctx_t scctx;
762
763	txq = &sc->vmx_txq[q];
764	txc = &txq->vxtxq_comp_ring;
765	txr = &txq->vxtxq_cmd_ring;
766	scctx = sc->vmx_scctx;
767
768	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
769	    device_get_nameunit(sc->vmx_dev), q);
770
771	txq->vxtxq_sc = sc;
772	txq->vxtxq_id = q;
773	txc->vxcr_ndesc = scctx->isc_ntxd[0];
774	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
775}
776
777static int
778vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
779    int ntxqs, int ntxqsets)
780{
781	struct vmxnet3_softc *sc;
782	int q;
783	int error;
784	caddr_t kva;
785
786	sc = iflib_get_softc(ctx);
787
788	/* Allocate the array of transmit queues */
789	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
790	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
791	if (sc->vmx_txq == NULL)
792		return (ENOMEM);
793
794	/* Initialize driver state for each transmit queue */
795	for (q = 0; q < ntxqsets; q++)
796		vmxnet3_init_txq(sc, q);
797
798	/*
799	 * Allocate queue state that is shared with the device.  This check
800	 * and call is performed in both vmxnet3_tx_queues_alloc() and
801	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
802	 * order iflib invokes those routines in.
803	 */
804	if (sc->vmx_qs_dma.idi_size == 0) {
805		error = vmxnet3_queues_shared_alloc(sc);
806		if (error)
807			return (error);
808	}
809
810	kva = sc->vmx_qs_dma.idi_vaddr;
811	for (q = 0; q < ntxqsets; q++) {
812		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
813		kva += sizeof(struct vmxnet3_txq_shared);
814	}
815
816	/* Record descriptor ring vaddrs and paddrs */
817	for (q = 0; q < ntxqsets; q++) {
818		struct vmxnet3_txqueue *txq;
819		struct vmxnet3_txring *txr;
820		struct vmxnet3_comp_ring *txc;
821
822		txq = &sc->vmx_txq[q];
823		txc = &txq->vxtxq_comp_ring;
824		txr = &txq->vxtxq_cmd_ring;
825
826		/* Completion ring */
827		txc->vxcr_u.txcd =
828		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
829		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
830
831		/* Command ring */
832		txr->vxtxr_txd =
833		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
834		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
835	}
836
837	return (0);
838}
839
840static void
841vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
842{
843	struct vmxnet3_rxqueue *rxq;
844	struct vmxnet3_comp_ring *rxc;
845	struct vmxnet3_rxring *rxr;
846	if_softc_ctx_t scctx;
847	int i;
848
849	rxq = &sc->vmx_rxq[q];
850	rxc = &rxq->vxrxq_comp_ring;
851	scctx = sc->vmx_scctx;
852
853	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
854	    device_get_nameunit(sc->vmx_dev), q);
855
856	rxq->vxrxq_sc = sc;
857	rxq->vxrxq_id = q;
858
859	/*
860	 * First rxq is the completion queue, so there are nrxqs - 1 command
861	 * rings starting at iflib queue id 1.
862	 */
863	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
864	for (i = 0; i < nrxqs - 1; i++) {
865		rxr = &rxq->vxrxq_cmd_ring[i];
866		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
867	}
868}
869
870static int
871vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
872    int nrxqs, int nrxqsets)
873{
874	struct vmxnet3_softc *sc;
875	if_softc_ctx_t scctx;
876	int q;
877	int i;
878	int error;
879	caddr_t kva;
880
881	sc = iflib_get_softc(ctx);
882	scctx = sc->vmx_scctx;
883
884	/* Allocate the array of receive queues */
885	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
886	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
887	if (sc->vmx_rxq == NULL)
888		return (ENOMEM);
889
890	/* Initialize driver state for each receive queue */
891	for (q = 0; q < nrxqsets; q++)
892		vmxnet3_init_rxq(sc, q, nrxqs);
893
894	/*
895	 * Allocate queue state that is shared with the device.  This check
896	 * and call is performed in both vmxnet3_tx_queues_alloc() and
897	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
898	 * order iflib invokes those routines in.
899	 */
900	if (sc->vmx_qs_dma.idi_size == 0) {
901		error = vmxnet3_queues_shared_alloc(sc);
902		if (error)
903			return (error);
904	}
905
906	kva = sc->vmx_qs_dma.idi_vaddr +
907	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
908	for (q = 0; q < nrxqsets; q++) {
909		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
910		kva += sizeof(struct vmxnet3_rxq_shared);
911	}
912
913	/* Record descriptor ring vaddrs and paddrs */
914	for (q = 0; q < nrxqsets; q++) {
915		struct vmxnet3_rxqueue *rxq;
916		struct vmxnet3_rxring *rxr;
917		struct vmxnet3_comp_ring *rxc;
918
919		rxq = &sc->vmx_rxq[q];
920		rxc = &rxq->vxrxq_comp_ring;
921
922		/* Completion ring */
923		rxc->vxcr_u.rxcd =
924		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
925		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
926
927		/* Command ring(s) */
928		for (i = 0; i < nrxqs - 1; i++) {
929			rxr = &rxq->vxrxq_cmd_ring[i];
930
931			rxr->vxrxr_rxd =
932			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
933			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
934		}
935	}
936
937	return (0);
938}
939
940static void
941vmxnet3_queues_free(if_ctx_t ctx)
942{
943	struct vmxnet3_softc *sc;
944
945	sc = iflib_get_softc(ctx);
946
947	/* Free queue state area that is shared with the device */
948	if (sc->vmx_qs_dma.idi_size != 0) {
949		iflib_dma_free(&sc->vmx_qs_dma);
950		sc->vmx_qs_dma.idi_size = 0;
951	}
952
953	/* Free array of receive queues */
954	if (sc->vmx_rxq != NULL) {
955		free(sc->vmx_rxq, M_DEVBUF);
956		sc->vmx_rxq = NULL;
957	}
958
959	/* Free array of transmit queues */
960	if (sc->vmx_txq != NULL) {
961		free(sc->vmx_txq, M_DEVBUF);
962		sc->vmx_txq = NULL;
963	}
964}
965
966static int
967vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
968{
969	device_t dev;
970	size_t size;
971	int error;
972
973	dev = sc->vmx_dev;
974
975	/* Top level state structure shared with the device */
976	size = sizeof(struct vmxnet3_driver_shared);
977	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
978	if (error) {
979		device_printf(dev, "cannot alloc shared memory\n");
980		return (error);
981	}
982	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
983
984	/* RSS table state shared with the device */
985	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
986		size = sizeof(struct vmxnet3_rss_shared);
987		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
988		    &sc->vmx_rss_dma, 0);
989		if (error) {
990			device_printf(dev, "cannot alloc rss shared memory\n");
991			return (error);
992		}
993		sc->vmx_rss =
994		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
995	}
996
997	return (0);
998}
999
1000static void
1001vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1002{
1003
1004	/* Free RSS table state shared with the device */
1005	if (sc->vmx_rss != NULL) {
1006		iflib_dma_free(&sc->vmx_rss_dma);
1007		sc->vmx_rss = NULL;
1008	}
1009
1010	/* Free top level state structure shared with the device */
1011	if (sc->vmx_ds != NULL) {
1012		iflib_dma_free(&sc->vmx_ds_dma);
1013		sc->vmx_ds = NULL;
1014	}
1015}
1016
1017static int
1018vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1019{
1020	int error;
1021
1022	/* Multicast table state shared with the device */
1023	error = iflib_dma_alloc_align(sc->vmx_ctx,
1024	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1025	if (error)
1026		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1027	else
1028		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1029
1030	return (error);
1031}
1032
1033static void
1034vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1035{
1036
1037	/* Free multicast table state shared with the device */
1038	if (sc->vmx_mcast != NULL) {
1039		iflib_dma_free(&sc->vmx_mcast_dma);
1040		sc->vmx_mcast = NULL;
1041	}
1042}
1043
1044static void
1045vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1046{
1047	struct vmxnet3_driver_shared *ds;
1048	if_softc_ctx_t scctx;
1049	struct vmxnet3_txqueue *txq;
1050	struct vmxnet3_txq_shared *txs;
1051	struct vmxnet3_rxqueue *rxq;
1052	struct vmxnet3_rxq_shared *rxs;
1053	int i;
1054
1055	ds = sc->vmx_ds;
1056	scctx = sc->vmx_scctx;
1057
1058	/*
1059	 * Initialize fields of the shared data that remains the same across
1060	 * reinits. Note the shared data is zero'd when allocated.
1061	 */
1062
1063	ds->magic = VMXNET3_REV1_MAGIC;
1064
1065	/* DriverInfo */
1066	ds->version = VMXNET3_DRIVER_VERSION;
1067	ds->guest = VMXNET3_GOS_FREEBSD |
1068#ifdef __LP64__
1069	    VMXNET3_GOS_64BIT;
1070#else
1071	    VMXNET3_GOS_32BIT;
1072#endif
1073	ds->vmxnet3_revision = 1;
1074	ds->upt_version = 1;
1075
1076	/* Misc. conf */
1077	ds->driver_data = vtophys(sc);
1078	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1079	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1080	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1081	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1082
1083	/* RSS conf */
1084	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1085		ds->rss.version = 1;
1086		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1087		ds->rss.len = sc->vmx_rss_dma.idi_size;
1088	}
1089
1090	/* Interrupt control. */
1091	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1092	/*
1093	 * Total number of interrupt indexes we are using in the shared
1094	 * config data, even though we don't actually allocate interrupt
1095	 * resources for the tx queues.  Some versions of the device will
1096	 * fail to initialize successfully if interrupt indexes are used in
1097	 * the shared config that exceed the number of interrupts configured
1098	 * here.
1099	 */
1100	ds->nintr = (scctx->isc_vectors == 1) ?
1101	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1102	ds->evintr = sc->vmx_event_intr_idx;
1103	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1104
1105	for (i = 0; i < ds->nintr; i++)
1106		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1107
1108	/* Receive filter. */
1109	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1110	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1111
1112	/* Tx queues */
1113	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1114		txq = &sc->vmx_txq[i];
1115		txs = txq->vxtxq_ts;
1116
1117		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1118		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1119		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1120		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1121		txs->driver_data = vtophys(txq);
1122		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1123	}
1124
1125	/* Rx queues */
1126	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1127		rxq = &sc->vmx_rxq[i];
1128		rxs = rxq->vxrxq_rs;
1129
1130		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1131		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1132		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1133		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1134		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1135		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1136		rxs->driver_data = vtophys(rxq);
1137		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1138	}
1139}
1140
1141static void
1142vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1143{
1144	/*
1145	 * Use the same key as the Linux driver until FreeBSD can do
1146	 * RSS (presumably Toeplitz) in software.
1147	 */
1148	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1149	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1150	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1151	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1152	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1153	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1154	};
1155
1156	if_softc_ctx_t scctx;
1157	struct vmxnet3_rss_shared *rss;
1158#ifdef RSS
1159	uint8_t rss_algo;
1160#endif
1161	int i;
1162
1163	scctx = sc->vmx_scctx;
1164	rss = sc->vmx_rss;
1165
1166	rss->hash_type =
1167	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1168	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1169	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1170	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1171	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1172#ifdef RSS
1173	/*
1174	 * If the software RSS is configured to anything else other than
1175	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1176	 * the packet distribution, but report the hash as opaque to
1177	 * disengage from the software RSS.
1178	 */
1179	rss_algo = rss_gethashalgo();
1180	if (rss_algo == RSS_HASH_TOEPLITZ) {
1181		rss_getkey(rss->hash_key);
1182		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1183			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1184			    scctx->isc_nrxqsets;
1185		}
1186		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1187	} else
1188#endif
1189	{
1190		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1191		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1192			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1193		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1194	}
1195}
1196
1197static void
1198vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1199{
1200	if_t ifp;
1201	struct vmxnet3_driver_shared *ds;
1202	if_softc_ctx_t scctx;
1203
1204	ifp = sc->vmx_ifp;
1205	ds = sc->vmx_ds;
1206	scctx = sc->vmx_scctx;
1207
1208	ds->mtu = if_getmtu(ifp);
1209	ds->ntxqueue = scctx->isc_ntxqsets;
1210	ds->nrxqueue = scctx->isc_nrxqsets;
1211
1212	ds->upt_features = 0;
1213	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1214		ds->upt_features |= UPT1_F_CSUM;
1215	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1216		ds->upt_features |= UPT1_F_VLAN;
1217	if (if_getcapenable(ifp) & IFCAP_LRO)
1218		ds->upt_features |= UPT1_F_LRO;
1219
1220	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1221		ds->upt_features |= UPT1_F_RSS;
1222		vmxnet3_reinit_rss_shared_data(sc);
1223	}
1224
1225	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1226	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1227	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1228}
1229
1230static int
1231vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1232{
1233	int error;
1234
1235	error = vmxnet3_alloc_shared_data(sc);
1236	if (error)
1237		return (error);
1238
1239	error = vmxnet3_alloc_mcast_table(sc);
1240	if (error)
1241		return (error);
1242
1243	vmxnet3_init_shared_data(sc);
1244
1245	return (0);
1246}
1247
1248static void
1249vmxnet3_free_data(struct vmxnet3_softc *sc)
1250{
1251
1252	vmxnet3_free_mcast_table(sc);
1253	vmxnet3_free_shared_data(sc);
1254}
1255
1256static void
1257vmxnet3_evintr(struct vmxnet3_softc *sc)
1258{
1259	device_t dev;
1260	struct vmxnet3_txq_shared *ts;
1261	struct vmxnet3_rxq_shared *rs;
1262	uint32_t event;
1263
1264	dev = sc->vmx_dev;
1265
1266	/* Clear events. */
1267	event = sc->vmx_ds->event;
1268	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1269
1270	if (event & VMXNET3_EVENT_LINK)
1271		vmxnet3_link_status(sc);
1272
1273	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1274		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1275		ts = sc->vmx_txq[0].vxtxq_ts;
1276		if (ts->stopped != 0)
1277			device_printf(dev, "Tx queue error %#x\n", ts->error);
1278		rs = sc->vmx_rxq[0].vxrxq_rs;
1279		if (rs->stopped != 0)
1280			device_printf(dev, "Rx queue error %#x\n", rs->error);
1281
1282		/* XXX - rely on liflib watchdog to reset us? */
1283		device_printf(dev, "Rx/Tx queue error event ... "
1284		    "waiting for iflib watchdog reset\n");
1285	}
1286
1287	if (event & VMXNET3_EVENT_DIC)
1288		device_printf(dev, "device implementation change event\n");
1289	if (event & VMXNET3_EVENT_DEBUG)
1290		device_printf(dev, "debug event\n");
1291}
1292
1293static int
1294vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1295{
1296	struct vmxnet3_softc *sc;
1297	struct vmxnet3_txqueue *txq;
1298	struct vmxnet3_txring *txr;
1299	struct vmxnet3_txdesc *txd, *sop;
1300	bus_dma_segment_t *segs;
1301	int nsegs;
1302	int pidx;
1303	int hdrlen;
1304	int i;
1305	int gen;
1306
1307	sc = vsc;
1308	txq = &sc->vmx_txq[pi->ipi_qsidx];
1309	txr = &txq->vxtxq_cmd_ring;
1310	segs = pi->ipi_segs;
1311	nsegs = pi->ipi_nsegs;
1312	pidx = pi->ipi_pidx;
1313
1314	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1315	    ("%s: packet with too many segments %d", __func__, nsegs));
1316
1317	sop = &txr->vxtxr_txd[pidx];
1318	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1319
1320	for (i = 0; i < nsegs; i++) {
1321		txd = &txr->vxtxr_txd[pidx];
1322
1323		txd->addr = segs[i].ds_addr;
1324		txd->len = segs[i].ds_len;
1325		txd->gen = gen;
1326		txd->dtype = 0;
1327		txd->offload_mode = VMXNET3_OM_NONE;
1328		txd->offload_pos = 0;
1329		txd->hlen = 0;
1330		txd->eop = 0;
1331		txd->compreq = 0;
1332		txd->vtag_mode = 0;
1333		txd->vtag = 0;
1334
1335		if (++pidx == txr->vxtxr_ndesc) {
1336			pidx = 0;
1337			txr->vxtxr_gen ^= 1;
1338		}
1339		gen = txr->vxtxr_gen;
1340	}
1341	txd->eop = 1;
1342	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1343	pi->ipi_new_pidx = pidx;
1344
1345	/*
1346	 * VLAN
1347	 */
1348	if (pi->ipi_mflags & M_VLANTAG) {
1349		sop->vtag_mode = 1;
1350		sop->vtag = pi->ipi_vtag;
1351	}
1352
1353	/*
1354	 * TSO and checksum offloads
1355	 */
1356	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1357	if (pi->ipi_csum_flags & CSUM_TSO) {
1358		sop->offload_mode = VMXNET3_OM_TSO;
1359		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1360		sop->offload_pos = pi->ipi_tso_segsz;
1361	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1362	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1363		sop->offload_mode = VMXNET3_OM_CSUM;
1364		sop->hlen = hdrlen;
1365		sop->offload_pos = hdrlen +
1366		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1367			offsetof(struct tcphdr, th_sum) :
1368			offsetof(struct udphdr, uh_sum));
1369	}
1370
1371	/* Finally, change the ownership. */
1372	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1373	sop->gen ^= 1;
1374
1375	return (0);
1376}
1377
1378static void
1379vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1380{
1381	struct vmxnet3_softc *sc;
1382	struct vmxnet3_txqueue *txq;
1383
1384	sc = vsc;
1385	txq = &sc->vmx_txq[txqid];
1386
1387	/*
1388	 * pidx is what we last set ipi_new_pidx to in
1389	 * vmxnet3_isc_txd_encap()
1390	 */
1391
1392	/*
1393	 * Avoid expensive register updates if the flush request is
1394	 * redundant.
1395	 */
1396	if (txq->vxtxq_last_flush == pidx)
1397		return;
1398	txq->vxtxq_last_flush = pidx;
1399	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1400}
1401
1402static int
1403vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1404{
1405	struct vmxnet3_softc *sc;
1406	struct vmxnet3_txqueue *txq;
1407	struct vmxnet3_comp_ring *txc;
1408	struct vmxnet3_txcompdesc *txcd;
1409	struct vmxnet3_txring *txr;
1410	int processed;
1411
1412	sc = vsc;
1413	txq = &sc->vmx_txq[txqid];
1414	txc = &txq->vxtxq_comp_ring;
1415	txr = &txq->vxtxq_cmd_ring;
1416
1417	/*
1418	 * If clear is true, we need to report the number of TX command ring
1419	 * descriptors that have been processed by the device.  If clear is
1420	 * false, we just need to report whether or not at least one TX
1421	 * command ring descriptor has been processed by the device.
1422	 */
1423	processed = 0;
1424	for (;;) {
1425		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1426		if (txcd->gen != txc->vxcr_gen)
1427			break;
1428		else if (!clear)
1429			return (1);
1430		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1431
1432		MPASS(txc->vxcr_next < txc->vxcr_ndesc);
1433		if (++txc->vxcr_next >= txc->vxcr_ndesc) {
1434			txc->vxcr_next = 0;
1435			txc->vxcr_gen ^= 1;
1436		}
1437
1438		if (txcd->eop_idx < txr->vxtxr_next)
1439			processed += txr->vxtxr_ndesc -
1440			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1441		else
1442			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1443		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1444	}
1445
1446	return (processed);
1447}
1448
1449static int
1450vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1451{
1452	struct vmxnet3_softc *sc;
1453	struct vmxnet3_rxqueue *rxq;
1454	struct vmxnet3_comp_ring *rxc;
1455	struct vmxnet3_rxcompdesc *rxcd;
1456	int avail;
1457	int completed_gen;
1458#ifdef INVARIANTS
1459	int expect_sop = 1;
1460#endif
1461	sc = vsc;
1462	rxq = &sc->vmx_rxq[rxqid];
1463	rxc = &rxq->vxrxq_comp_ring;
1464
1465	avail = 0;
1466	completed_gen = rxc->vxcr_gen;
1467	for (;;) {
1468		rxcd = &rxc->vxcr_u.rxcd[idx];
1469		if (rxcd->gen != completed_gen)
1470			break;
1471		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1472
1473#ifdef INVARIANTS
1474		if (expect_sop)
1475			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1476		else
1477			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1478		expect_sop = rxcd->eop;
1479#endif
1480		if (rxcd->eop && (rxcd->len != 0))
1481			avail++;
1482		if (avail > budget)
1483			break;
1484		if (++idx == rxc->vxcr_ndesc) {
1485			idx = 0;
1486			completed_gen ^= 1;
1487		}
1488	}
1489
1490	return (avail);
1491}
1492
1493static int
1494vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1495{
1496	struct vmxnet3_softc *sc;
1497	if_softc_ctx_t scctx;
1498	struct vmxnet3_rxqueue *rxq;
1499	struct vmxnet3_comp_ring *rxc;
1500	struct vmxnet3_rxcompdesc *rxcd;
1501	if_rxd_frag_t frag;
1502	int cqidx;
1503	uint16_t total_len;
1504	uint8_t nfrags;
1505	uint8_t i;
1506	uint8_t flid;
1507
1508	sc = vsc;
1509	scctx = sc->vmx_scctx;
1510	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1511	rxc = &rxq->vxrxq_comp_ring;
1512
1513	/*
1514	 * Get a single packet starting at the given index in the completion
1515	 * queue.  That we have been called indicates that
1516	 * vmxnet3_isc_rxd_available() has already verified that either
1517	 * there is a complete packet available starting at the given index,
1518	 * or there are one or more zero length packets starting at the
1519	 * given index followed by a complete packet, so no verification of
1520	 * ownership of the descriptors (and no associated read barrier) is
1521	 * required here.
1522	 */
1523	cqidx = ri->iri_cidx;
1524	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1525	while (rxcd->len == 0) {
1526		KASSERT(rxcd->sop && rxcd->eop,
1527		    ("%s: zero-length packet without both sop and eop set",
1528			__func__));
1529		rxc->vxcr_zero_length++;
1530		if (++cqidx == rxc->vxcr_ndesc) {
1531			cqidx = 0;
1532			rxc->vxcr_gen ^= 1;
1533		}
1534		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1535	}
1536	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1537
1538	/*
1539	 * RSS and flow ID.
1540	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1541	 * be used only if the software RSS is enabled and it uses the same
1542	 * algorithm and the hash key as the "hardware".  If the software RSS
1543	 * is not enabled, then it's simply pointless to use those types.
1544	 * If it's enabled but with different parameters, then hash values will
1545	 * not match.
1546	 */
1547	ri->iri_flowid = rxcd->rss_hash;
1548#ifdef RSS
1549	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1550		switch (rxcd->rss_type) {
1551		case VMXNET3_RCD_RSS_TYPE_NONE:
1552			ri->iri_flowid = ri->iri_qsidx;
1553			ri->iri_rsstype = M_HASHTYPE_NONE;
1554			break;
1555		case VMXNET3_RCD_RSS_TYPE_IPV4:
1556			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1557			break;
1558		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1559			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1560			break;
1561		case VMXNET3_RCD_RSS_TYPE_IPV6:
1562			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1563			break;
1564		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1565			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1566			break;
1567		default:
1568			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1569			break;
1570		}
1571	} else
1572#endif
1573	{
1574		switch (rxcd->rss_type) {
1575		case VMXNET3_RCD_RSS_TYPE_NONE:
1576			ri->iri_flowid = ri->iri_qsidx;
1577			ri->iri_rsstype = M_HASHTYPE_NONE;
1578			break;
1579		default:
1580			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1581			break;
1582		}
1583	}
1584
1585	/*
1586	 * The queue numbering scheme used for rxcd->qid is as follows:
1587	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1588	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1589	 *
1590	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1591	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1592	 * indicates command ring (and flid) 1.
1593	 */
1594	nfrags = 0;
1595	total_len = 0;
1596	do {
1597		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1598		KASSERT(rxcd->gen == rxc->vxcr_gen,
1599		    ("%s: generation mismatch", __func__));
1600		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1601		    ("%s: too many fragments", __func__));
1602		if (__predict_true(rxcd->len != 0)) {
1603			frag = &ri->iri_frags[nfrags];
1604			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1605			frag->irf_flid = flid;
1606			frag->irf_idx = rxcd->rxd_idx;
1607			frag->irf_len = rxcd->len;
1608			total_len += rxcd->len;
1609			nfrags++;
1610		} else {
1611			rxc->vcxr_zero_length_frag++;
1612		}
1613		if (++cqidx == rxc->vxcr_ndesc) {
1614			cqidx = 0;
1615			rxc->vxcr_gen ^= 1;
1616		}
1617	} while (!rxcd->eop);
1618
1619	ri->iri_cidx = cqidx;
1620	ri->iri_nfrags = nfrags;
1621	ri->iri_len = total_len;
1622
1623	/*
1624	 * If there's an error, the last descriptor in the packet will
1625	 * have the error indicator set.  In this case, set all
1626	 * fragment lengths to zero.  This will cause iflib to discard
1627	 * the packet, but process all associated descriptors through
1628	 * the refill mechanism.
1629	 */
1630	if (__predict_false(rxcd->error)) {
1631		rxc->vxcr_pkt_errors++;
1632		for (i = 0; i < nfrags; i++) {
1633			frag = &ri->iri_frags[i];
1634			frag->irf_len = 0;
1635		}
1636	} else {
1637		/* Checksum offload information is in the last descriptor. */
1638		if (!rxcd->no_csum) {
1639			uint32_t csum_flags = 0;
1640
1641			if (rxcd->ipv4) {
1642				csum_flags |= CSUM_IP_CHECKED;
1643				if (rxcd->ipcsum_ok)
1644					csum_flags |= CSUM_IP_VALID;
1645			}
1646			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1647				csum_flags |= CSUM_L4_CALC;
1648				if (rxcd->csum_ok) {
1649					csum_flags |= CSUM_L4_VALID;
1650					ri->iri_csum_data = 0xffff;
1651				}
1652			}
1653			ri->iri_csum_flags = csum_flags;
1654		}
1655
1656		/* VLAN information is in the last descriptor. */
1657		if (rxcd->vlan) {
1658			ri->iri_flags |= M_VLANTAG;
1659			ri->iri_vtag = rxcd->vtag;
1660		}
1661	}
1662
1663	return (0);
1664}
1665
1666static void
1667vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1668{
1669	struct vmxnet3_softc *sc;
1670	struct vmxnet3_rxqueue *rxq;
1671	struct vmxnet3_rxring *rxr;
1672	struct vmxnet3_rxdesc *rxd;
1673	uint64_t *paddrs;
1674	int count;
1675	int len;
1676	int idx;
1677	int i;
1678	uint8_t flid;
1679	uint8_t btype;
1680
1681	count = iru->iru_count;
1682	len = iru->iru_buf_size;
1683	flid = iru->iru_flidx;
1684	paddrs = iru->iru_paddrs;
1685
1686	sc = vsc;
1687	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1688	rxr = &rxq->vxrxq_cmd_ring[flid];
1689	rxd = rxr->vxrxr_rxd;
1690
1691	/*
1692	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1693	 * command ring 1 is filled with BTYPE_BODY descriptors.
1694	 */
1695	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1696	/*
1697	 * The refill entries from iflib will advance monotonically,
1698	 * but the refilled descriptors may not be contiguous due to
1699	 * earlier skipping of descriptors by the device.  The refill
1700	 * entries from iflib need an entire state update, while the
1701	 * descriptors previously skipped by the device only need to
1702	 * have their generation numbers updated.
1703	 */
1704	idx = rxr->vxrxr_refill_start;
1705	i = 0;
1706	do {
1707		if (idx == iru->iru_idxs[i]) {
1708			rxd[idx].addr = paddrs[i];
1709			rxd[idx].len = len;
1710			rxd[idx].btype = btype;
1711			i++;
1712		} else
1713			rxr->vxrxr_desc_skips++;
1714		rxd[idx].gen = rxr->vxrxr_gen;
1715
1716		if (++idx == rxr->vxrxr_ndesc) {
1717			idx = 0;
1718			rxr->vxrxr_gen ^= 1;
1719		}
1720	} while (i != count);
1721	rxr->vxrxr_refill_start = idx;
1722}
1723
1724static void
1725vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1726{
1727	struct vmxnet3_softc *sc;
1728	bus_size_t r;
1729
1730	sc = vsc;
1731
1732	if (flid == 0)
1733		r = VMXNET3_BAR0_RXH1(rxqid);
1734	else
1735		r = VMXNET3_BAR0_RXH2(rxqid);
1736
1737	vmxnet3_write_bar0(sc, r, pidx);
1738}
1739
1740static int
1741vmxnet3_legacy_intr(void *xsc)
1742{
1743	struct vmxnet3_softc *sc;
1744	if_softc_ctx_t scctx;
1745	if_ctx_t ctx;
1746
1747	sc = xsc;
1748	scctx = sc->vmx_scctx;
1749	ctx = sc->vmx_ctx;
1750
1751	/*
1752	 * When there is only a single interrupt configured, this routine
1753	 * runs in fast interrupt context, following which the rxq 0 task
1754	 * will be enqueued.
1755	 */
1756	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1757		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1758			return (FILTER_HANDLED);
1759	}
1760	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1761		vmxnet3_intr_disable_all(ctx);
1762
1763	if (sc->vmx_ds->event != 0)
1764		iflib_admin_intr_deferred(ctx);
1765
1766	/*
1767	 * XXX - When there is both rxq and event activity, do we care
1768	 * whether the rxq 0 task or the admin task re-enables the interrupt
1769	 * first?
1770	 */
1771	return (FILTER_SCHEDULE_THREAD);
1772}
1773
1774static int
1775vmxnet3_rxq_intr(void *vrxq)
1776{
1777	struct vmxnet3_softc *sc;
1778	struct vmxnet3_rxqueue *rxq;
1779
1780	rxq = vrxq;
1781	sc = rxq->vxrxq_sc;
1782
1783	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1784		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1785
1786	return (FILTER_SCHEDULE_THREAD);
1787}
1788
1789static int
1790vmxnet3_event_intr(void *vsc)
1791{
1792	struct vmxnet3_softc *sc;
1793
1794	sc = vsc;
1795
1796	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1797		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1798
1799	/*
1800	 * The work will be done via vmxnet3_update_admin_status(), and the
1801	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1802	 *
1803	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1804	 */
1805	return (FILTER_SCHEDULE_THREAD);
1806}
1807
1808static void
1809vmxnet3_stop(if_ctx_t ctx)
1810{
1811	struct vmxnet3_softc *sc;
1812
1813	sc = iflib_get_softc(ctx);
1814
1815	sc->vmx_link_active = 0;
1816	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1817	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1818}
1819
1820static void
1821vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1822{
1823	struct vmxnet3_txring *txr;
1824	struct vmxnet3_comp_ring *txc;
1825
1826	txq->vxtxq_last_flush = -1;
1827
1828	txr = &txq->vxtxq_cmd_ring;
1829	txr->vxtxr_next = 0;
1830	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1831	/*
1832	 * iflib has zeroed out the descriptor array during the prior attach
1833	 * or stop
1834	 */
1835
1836	txc = &txq->vxtxq_comp_ring;
1837	txc->vxcr_next = 0;
1838	txc->vxcr_gen = VMXNET3_INIT_GEN;
1839	/*
1840	 * iflib has zeroed out the descriptor array during the prior attach
1841	 * or stop
1842	 */
1843}
1844
1845static void
1846vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1847{
1848	struct vmxnet3_rxring *rxr;
1849	struct vmxnet3_comp_ring *rxc;
1850	int i;
1851
1852	/*
1853	 * The descriptors will be populated with buffers during a
1854	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1855	 */
1856	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1857		rxr = &rxq->vxrxq_cmd_ring[i];
1858		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1859		rxr->vxrxr_desc_skips = 0;
1860		rxr->vxrxr_refill_start = 0;
1861		/*
1862		 * iflib has zeroed out the descriptor array during the
1863		 * prior attach or stop
1864		 */
1865	}
1866
1867	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1868		rxr = &rxq->vxrxq_cmd_ring[i];
1869		rxr->vxrxr_gen = 0;
1870		rxr->vxrxr_desc_skips = 0;
1871		rxr->vxrxr_refill_start = 0;
1872		bzero(rxr->vxrxr_rxd,
1873		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1874	}
1875
1876	rxc = &rxq->vxrxq_comp_ring;
1877	rxc->vxcr_next = 0;
1878	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1879	rxc->vxcr_zero_length = 0;
1880	rxc->vcxr_zero_length_frag = 0;
1881	rxc->vxcr_pkt_errors = 0;
1882	/*
1883	 * iflib has zeroed out the descriptor array during the prior attach
1884	 * or stop
1885	 */
1886}
1887
1888static void
1889vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1890{
1891	if_softc_ctx_t scctx;
1892	int q;
1893
1894	scctx = sc->vmx_scctx;
1895
1896	for (q = 0; q < scctx->isc_ntxqsets; q++)
1897		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1898
1899	for (q = 0; q < scctx->isc_nrxqsets; q++)
1900		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1901}
1902
1903static int
1904vmxnet3_enable_device(struct vmxnet3_softc *sc)
1905{
1906	if_softc_ctx_t scctx;
1907	int q;
1908
1909	scctx = sc->vmx_scctx;
1910
1911	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1912		device_printf(sc->vmx_dev, "device enable command failed!\n");
1913		return (1);
1914	}
1915
1916	/* Reset the Rx queue heads. */
1917	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1918		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1919		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1920	}
1921
1922	return (0);
1923}
1924
1925static void
1926vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1927{
1928	if_t ifp;
1929
1930	ifp = sc->vmx_ifp;
1931
1932	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1933
1934	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1935		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1936		    sizeof(sc->vmx_ds->vlan_filter));
1937	else
1938		bzero(sc->vmx_ds->vlan_filter,
1939		    sizeof(sc->vmx_ds->vlan_filter));
1940	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1941}
1942
1943static void
1944vmxnet3_init(if_ctx_t ctx)
1945{
1946	struct vmxnet3_softc *sc;
1947
1948	sc = iflib_get_softc(ctx);
1949
1950	/* Use the current MAC address. */
1951	bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1952	vmxnet3_set_lladdr(sc);
1953
1954	vmxnet3_reinit_shared_data(sc);
1955	vmxnet3_reinit_queues(sc);
1956
1957	vmxnet3_enable_device(sc);
1958
1959	vmxnet3_reinit_rxfilters(sc);
1960	vmxnet3_link_status(sc);
1961}
1962
1963static void
1964vmxnet3_multi_set(if_ctx_t ctx)
1965{
1966
1967	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1968	    if_getflags(iflib_get_ifp(ctx)));
1969}
1970
1971static int
1972vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1973{
1974	struct vmxnet3_softc *sc;
1975	if_softc_ctx_t scctx;
1976
1977	sc = iflib_get_softc(ctx);
1978	scctx = sc->vmx_scctx;
1979
1980	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1981		ETHER_CRC_LEN))
1982		return (EINVAL);
1983
1984	/*
1985	 * Update the max frame size so that the rx mbuf size is
1986	 * chosen based on the new mtu during the interface init that
1987	 * will occur after this routine returns.
1988	 */
1989	scctx->isc_max_frame_size = mtu +
1990		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1991	/* RX completion queue - n/a */
1992	scctx->isc_rxd_buf_size[0] = 0;
1993	/*
1994	 * For header-type descriptors (used for first segment of
1995	 * packet), let iflib determine the buffer size based on the
1996	 * max frame size.
1997	 */
1998	scctx->isc_rxd_buf_size[1] = 0;
1999	/*
2000	 * For body-type descriptors (used for jumbo frames and LRO),
2001	 * always use page-sized buffers.
2002	 */
2003	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2004
2005	return (0);
2006}
2007
2008static void
2009vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2010{
2011	struct vmxnet3_softc *sc;
2012
2013	sc = iflib_get_softc(ctx);
2014
2015	ifmr->ifm_status = IFM_AVALID;
2016	ifmr->ifm_active = IFM_ETHER;
2017
2018	if (vmxnet3_link_is_up(sc) != 0) {
2019		ifmr->ifm_status |= IFM_ACTIVE;
2020		ifmr->ifm_active |= IFM_AUTO;
2021	} else
2022		ifmr->ifm_active |= IFM_NONE;
2023}
2024
2025static int
2026vmxnet3_media_change(if_ctx_t ctx)
2027{
2028
2029	/* Ignore. */
2030	return (0);
2031}
2032
2033static int
2034vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2035{
2036
2037	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2038
2039	return (0);
2040}
2041
2042static uint64_t
2043vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2044{
2045	if_t ifp = iflib_get_ifp(ctx);
2046
2047	if (cnt < IFCOUNTERS)
2048		return if_get_counter_default(ifp, cnt);
2049
2050	return (0);
2051}
2052
2053static void
2054vmxnet3_update_admin_status(if_ctx_t ctx)
2055{
2056	struct vmxnet3_softc *sc;
2057
2058	sc = iflib_get_softc(ctx);
2059	if (sc->vmx_ds->event != 0)
2060		vmxnet3_evintr(sc);
2061
2062	vmxnet3_refresh_host_stats(sc);
2063}
2064
2065static void
2066vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2067{
2068	/* Host stats refresh is global, so just trigger it on txq 0 */
2069	if (qid == 0)
2070		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2071}
2072
2073static void
2074vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2075{
2076	int idx, bit;
2077
2078	if (tag == 0 || tag > 4095)
2079		return;
2080
2081	idx = (tag >> 5) & 0x7F;
2082	bit = tag & 0x1F;
2083
2084	/* Update our private VLAN bitvector. */
2085	if (add)
2086		sc->vmx_vlan_filter[idx] |= (1 << bit);
2087	else
2088		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2089}
2090
2091static void
2092vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2093{
2094
2095	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2096}
2097
2098static void
2099vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2100{
2101
2102	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2103}
2104
2105static u_int
2106vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2107{
2108	struct vmxnet3_softc *sc = arg;
2109
2110	if (count < VMXNET3_MULTICAST_MAX)
2111		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2112		    ETHER_ADDR_LEN);
2113
2114	return (1);
2115}
2116
2117static void
2118vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2119{
2120	if_t ifp;
2121	struct vmxnet3_driver_shared *ds;
2122	u_int mode;
2123
2124	ifp = sc->vmx_ifp;
2125	ds = sc->vmx_ds;
2126
2127	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2128	if (flags & IFF_PROMISC)
2129		mode |= VMXNET3_RXMODE_PROMISC;
2130	if (flags & IFF_ALLMULTI)
2131		mode |= VMXNET3_RXMODE_ALLMULTI;
2132	else {
2133		int cnt;
2134
2135		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2136		if (cnt >= VMXNET3_MULTICAST_MAX) {
2137			cnt = 0;
2138			mode |= VMXNET3_RXMODE_ALLMULTI;
2139		} else if (cnt > 0)
2140			mode |= VMXNET3_RXMODE_MCAST;
2141		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2142	}
2143
2144	ds->rxmode = mode;
2145
2146	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2147	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2148}
2149
2150static void
2151vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2152{
2153
2154	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2155}
2156
2157static int
2158vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2159{
2160	uint32_t status;
2161
2162	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2163	return !!(status & 0x1);
2164}
2165
2166static void
2167vmxnet3_link_status(struct vmxnet3_softc *sc)
2168{
2169	if_ctx_t ctx;
2170	uint64_t speed;
2171	int link;
2172
2173	ctx = sc->vmx_ctx;
2174	link = vmxnet3_link_is_up(sc);
2175	speed = IF_Gbps(10);
2176
2177	if (link != 0 && sc->vmx_link_active == 0) {
2178		sc->vmx_link_active = 1;
2179		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2180	} else if (link == 0 && sc->vmx_link_active != 0) {
2181		sc->vmx_link_active = 0;
2182		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2183	}
2184}
2185
2186static void
2187vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2188{
2189	uint32_t ml, mh;
2190
2191	ml  = sc->vmx_lladdr[0];
2192	ml |= sc->vmx_lladdr[1] << 8;
2193	ml |= sc->vmx_lladdr[2] << 16;
2194	ml |= sc->vmx_lladdr[3] << 24;
2195	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2196
2197	mh  = sc->vmx_lladdr[4];
2198	mh |= sc->vmx_lladdr[5] << 8;
2199	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2200}
2201
2202static void
2203vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2204{
2205	uint32_t ml, mh;
2206
2207	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2208	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2209
2210	sc->vmx_lladdr[0] = ml;
2211	sc->vmx_lladdr[1] = ml >> 8;
2212	sc->vmx_lladdr[2] = ml >> 16;
2213	sc->vmx_lladdr[3] = ml >> 24;
2214	sc->vmx_lladdr[4] = mh;
2215	sc->vmx_lladdr[5] = mh >> 8;
2216}
2217
2218static void
2219vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2220    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2221{
2222	struct sysctl_oid *node, *txsnode;
2223	struct sysctl_oid_list *list, *txslist;
2224	struct UPT1_TxStats *txstats;
2225	char namebuf[16];
2226
2227	txstats = &txq->vxtxq_ts->stats;
2228
2229	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2230	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2231	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2232	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2233
2234	/*
2235	 * Add statistics reported by the host. These are updated by the
2236	 * iflib txq timer on txq 0.
2237	 */
2238	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2239	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2240	txslist = SYSCTL_CHILDREN(txsnode);
2241	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2242	    &txstats->TSO_packets, "TSO packets");
2243	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2244	    &txstats->TSO_bytes, "TSO bytes");
2245	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2246	    &txstats->ucast_packets, "Unicast packets");
2247	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2248	    &txstats->ucast_bytes, "Unicast bytes");
2249	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2250	    &txstats->mcast_packets, "Multicast packets");
2251	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2252	    &txstats->mcast_bytes, "Multicast bytes");
2253	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2254	    &txstats->error, "Errors");
2255	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2256	    &txstats->discard, "Discards");
2257}
2258
2259static void
2260vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2261    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2262{
2263	struct sysctl_oid *node, *rxsnode;
2264	struct sysctl_oid_list *list, *rxslist;
2265	struct UPT1_RxStats *rxstats;
2266	char namebuf[16];
2267
2268	rxstats = &rxq->vxrxq_rs->stats;
2269
2270	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2271	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2272	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2273	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2274
2275	/*
2276	 * Add statistics reported by the host. These are updated by the
2277	 * iflib txq timer on txq 0.
2278	 */
2279	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2280	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2281	rxslist = SYSCTL_CHILDREN(rxsnode);
2282	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2283	    &rxstats->LRO_packets, "LRO packets");
2284	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2285	    &rxstats->LRO_bytes, "LRO bytes");
2286	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2287	    &rxstats->ucast_packets, "Unicast packets");
2288	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2289	    &rxstats->ucast_bytes, "Unicast bytes");
2290	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2291	    &rxstats->mcast_packets, "Multicast packets");
2292	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2293	    &rxstats->mcast_bytes, "Multicast bytes");
2294	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2295	    &rxstats->bcast_packets, "Broadcast packets");
2296	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2297	    &rxstats->bcast_bytes, "Broadcast bytes");
2298	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2299	    &rxstats->nobuffer, "No buffer");
2300	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2301	    &rxstats->error, "Errors");
2302}
2303
2304static void
2305vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2306    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2307{
2308	if_softc_ctx_t scctx;
2309	struct sysctl_oid *node;
2310	struct sysctl_oid_list *list;
2311	int i;
2312
2313	scctx = sc->vmx_scctx;
2314
2315	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2316		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2317
2318		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2319		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2320		list = SYSCTL_CHILDREN(node);
2321
2322		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2323		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2324		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2325		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2326		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2327		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2328		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2329		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2330		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2331		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2332		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2333		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2334	}
2335
2336	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2337		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2338
2339		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2340		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2341		list = SYSCTL_CHILDREN(node);
2342
2343		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2344		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2345		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2346		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2347		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2348		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2349		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2350		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2351		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2352		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2353		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2354		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2355		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2356		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2357		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2358		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2359		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2360		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2361		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2362		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2363		    0, "");
2364		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2365		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2366	}
2367}
2368
2369static void
2370vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2371    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2372{
2373	if_softc_ctx_t scctx;
2374	int i;
2375
2376	scctx = sc->vmx_scctx;
2377
2378	for (i = 0; i < scctx->isc_ntxqsets; i++)
2379		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2380	for (i = 0; i < scctx->isc_nrxqsets; i++)
2381		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2382
2383	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2384}
2385
2386static void
2387vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2388{
2389	device_t dev;
2390	struct sysctl_ctx_list *ctx;
2391	struct sysctl_oid *tree;
2392	struct sysctl_oid_list *child;
2393
2394	dev = sc->vmx_dev;
2395	ctx = device_get_sysctl_ctx(dev);
2396	tree = device_get_sysctl_tree(dev);
2397	child = SYSCTL_CHILDREN(tree);
2398
2399	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2400}
2401
2402static void
2403vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2404{
2405
2406	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2407}
2408
2409static uint32_t
2410vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2411{
2412
2413	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2414}
2415
2416static void
2417vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2418{
2419
2420	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2421}
2422
2423static void
2424vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2425{
2426
2427	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2428}
2429
2430static uint32_t
2431vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2432{
2433
2434	vmxnet3_write_cmd(sc, cmd);
2435	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2436	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2437	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2438}
2439
2440static void
2441vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2442{
2443
2444	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2445}
2446
2447static void
2448vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2449{
2450
2451	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2452}
2453
2454static int
2455vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2456{
2457	/* Not using interrupts for TX */
2458	return (0);
2459}
2460
2461static int
2462vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2463{
2464	struct vmxnet3_softc *sc;
2465
2466	sc = iflib_get_softc(ctx);
2467	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2468	return (0);
2469}
2470
2471static void
2472vmxnet3_link_intr_enable(if_ctx_t ctx)
2473{
2474	struct vmxnet3_softc *sc;
2475
2476	sc = iflib_get_softc(ctx);
2477	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2478}
2479
2480static void
2481vmxnet3_intr_enable_all(if_ctx_t ctx)
2482{
2483	struct vmxnet3_softc *sc;
2484	if_softc_ctx_t scctx;
2485	int i;
2486
2487	sc = iflib_get_softc(ctx);
2488	scctx = sc->vmx_scctx;
2489	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2490	for (i = 0; i < scctx->isc_vectors; i++)
2491		vmxnet3_enable_intr(sc, i);
2492}
2493
2494static void
2495vmxnet3_intr_disable_all(if_ctx_t ctx)
2496{
2497	struct vmxnet3_softc *sc;
2498	int i;
2499
2500	sc = iflib_get_softc(ctx);
2501	/*
2502	 * iflib may invoke this routine before vmxnet3_attach_post() has
2503	 * run, which is before the top level shared data area is
2504	 * initialized and the device made aware of it.
2505	 */
2506	if (sc->vmx_ds != NULL)
2507		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2508	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2509		vmxnet3_disable_intr(sc, i);
2510}
2511
2512static bool
2513vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
2514{
2515	switch (event) {
2516	case IFLIB_RESTART_VLAN_CONFIG:
2517		return (true);
2518	default:
2519		return (false);
2520	}
2521}
2522
2523/*
2524 * Since this is a purely paravirtualized device, we do not have
2525 * to worry about DMA coherency. But at times, we must make sure
2526 * both the compiler and CPU do not reorder memory operations.
2527 */
2528static inline void
2529vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2530{
2531
2532	switch (type) {
2533	case VMXNET3_BARRIER_RD:
2534		rmb();
2535		break;
2536	case VMXNET3_BARRIER_WR:
2537		wmb();
2538		break;
2539	case VMXNET3_BARRIER_RDWR:
2540		mb();
2541		break;
2542	default:
2543		panic("%s: bad barrier type %d", __func__, type);
2544	}
2545}
2546