if_vmx.c revision 320099
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: stable/11/sys/dev/vmware/vmxnet3/if_vmx.c 320099 2017-06-19 14:45:20Z avg $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/eventhandler.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <sys/taskqueue.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <net/ethernet.h>
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_arp.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47#include <net/if_media.h>
48#include <net/if_vlan_var.h>
49
50#include <net/bpf.h>
51
52#include <netinet/in_systm.h>
53#include <netinet/in.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#include <netinet/udp.h>
58#include <netinet/tcp.h>
59
60#include <machine/in_cksum.h>
61
62#include <machine/bus.h>
63#include <machine/resource.h>
64#include <sys/bus.h>
65#include <sys/rman.h>
66
67#include <dev/pci/pcireg.h>
68#include <dev/pci/pcivar.h>
69
70#include "if_vmxreg.h"
71#include "if_vmxvar.h"
72
73#include "opt_inet.h"
74#include "opt_inet6.h"
75
76#ifdef VMXNET3_FAILPOINTS
77#include <sys/fail.h>
78static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
79    "vmxnet3 fail points");
80#define VMXNET3_FP	_debug_fail_point_vmxnet3
81#endif
82
83static int	vmxnet3_probe(device_t);
84static int	vmxnet3_attach(device_t);
85static int	vmxnet3_detach(device_t);
86static int	vmxnet3_shutdown(device_t);
87
88static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
89static void	vmxnet3_free_resources(struct vmxnet3_softc *);
90static int	vmxnet3_check_version(struct vmxnet3_softc *);
91static void	vmxnet3_initial_config(struct vmxnet3_softc *);
92static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
93
94static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
97static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
98		    struct vmxnet3_interrupt *);
99static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
100static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
102static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
103static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
104
105static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
106		    struct vmxnet3_interrupt *);
107static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
108
109#ifndef VMXNET3_LEGACY_TX
110static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
112static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
113static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
114#endif
115
116static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
117static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
118static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
119static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
120static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
121static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
122
123static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
130static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
131static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
132static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_init_hwassist(struct vmxnet3_softc *);
134static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
135static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
136static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
137static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
138static void	vmxnet3_free_data(struct vmxnet3_softc *);
139static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
140
141static void	vmxnet3_evintr(struct vmxnet3_softc *);
142static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
143static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
144static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
145static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
146		    struct vmxnet3_rxring *, int);
147static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
148static void	vmxnet3_legacy_intr(void *);
149static void	vmxnet3_txq_intr(void *);
150static void	vmxnet3_rxq_intr(void *);
151static void	vmxnet3_event_intr(void *);
152
153static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
154static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
155static void	vmxnet3_stop(struct vmxnet3_softc *);
156
157static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
158static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
159static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
160static int	vmxnet3_enable_device(struct vmxnet3_softc *);
161static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
162static int	vmxnet3_reinit(struct vmxnet3_softc *);
163static void	vmxnet3_init_locked(struct vmxnet3_softc *);
164static void	vmxnet3_init(void *);
165
166static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
167		    int *, int *, int *);
168static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
169		    bus_dmamap_t, bus_dma_segment_t [], int *);
170static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
171static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
172#ifdef VMXNET3_LEGACY_TX
173static void	vmxnet3_start_locked(struct ifnet *);
174static void	vmxnet3_start(struct ifnet *);
175#else
176static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
177		    struct mbuf *);
178static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
179static void	vmxnet3_txq_tq_deferred(void *, int);
180#endif
181static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
182static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
183
184static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
185		    uint16_t);
186static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
187static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
188static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
189static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
190static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
191static uint64_t	vmxnet3_get_counter(struct ifnet *, ift_counter);
192
193#ifndef VMXNET3_LEGACY_TX
194static void	vmxnet3_qflush(struct ifnet *);
195#endif
196
197static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
198static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242/* Tunables. */
243static int vmxnet3_mq_disable = 0;
244TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254static device_method_t vmxnet3_methods[] = {
255	/* Device interface. */
256	DEVMETHOD(device_probe,		vmxnet3_probe),
257	DEVMETHOD(device_attach,	vmxnet3_attach),
258	DEVMETHOD(device_detach,	vmxnet3_detach),
259	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
260
261	DEVMETHOD_END
262};
263
264static driver_t vmxnet3_driver = {
265	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266};
267
268static devclass_t vmxnet3_devclass;
269DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271MODULE_DEPEND(vmx, pci, 1, 1, 1);
272MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
275#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
276
277static int
278vmxnet3_probe(device_t dev)
279{
280
281	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284		return (BUS_PROBE_DEFAULT);
285	}
286
287	return (ENXIO);
288}
289
290static int
291vmxnet3_attach(device_t dev)
292{
293	struct vmxnet3_softc *sc;
294	int error;
295
296	sc = device_get_softc(dev);
297	sc->vmx_dev = dev;
298
299	pci_enable_busmaster(dev);
300
301	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304	vmxnet3_initial_config(sc);
305
306	error = vmxnet3_alloc_resources(sc);
307	if (error)
308		goto fail;
309
310	error = vmxnet3_check_version(sc);
311	if (error)
312		goto fail;
313
314	error = vmxnet3_alloc_rxtx_queues(sc);
315	if (error)
316		goto fail;
317
318#ifndef VMXNET3_LEGACY_TX
319	error = vmxnet3_alloc_taskqueue(sc);
320	if (error)
321		goto fail;
322#endif
323
324	error = vmxnet3_alloc_interrupts(sc);
325	if (error)
326		goto fail;
327
328	vmxnet3_check_multiqueue(sc);
329
330	error = vmxnet3_alloc_data(sc);
331	if (error)
332		goto fail;
333
334	error = vmxnet3_setup_interface(sc);
335	if (error)
336		goto fail;
337
338	error = vmxnet3_setup_interrupts(sc);
339	if (error) {
340		ether_ifdetach(sc->vmx_ifp);
341		device_printf(dev, "could not set up interrupt\n");
342		goto fail;
343	}
344
345	vmxnet3_setup_sysctl(sc);
346#ifndef VMXNET3_LEGACY_TX
347	vmxnet3_start_taskqueue(sc);
348#endif
349
350fail:
351	if (error)
352		vmxnet3_detach(dev);
353
354	return (error);
355}
356
357static int
358vmxnet3_detach(device_t dev)
359{
360	struct vmxnet3_softc *sc;
361	struct ifnet *ifp;
362
363	sc = device_get_softc(dev);
364	ifp = sc->vmx_ifp;
365
366	if (device_is_attached(dev)) {
367		VMXNET3_CORE_LOCK(sc);
368		vmxnet3_stop(sc);
369		VMXNET3_CORE_UNLOCK(sc);
370
371		callout_drain(&sc->vmx_tick);
372#ifndef VMXNET3_LEGACY_TX
373		vmxnet3_drain_taskqueue(sc);
374#endif
375
376		ether_ifdetach(ifp);
377	}
378
379	if (sc->vmx_vlan_attach != NULL) {
380		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381		sc->vmx_vlan_attach = NULL;
382	}
383	if (sc->vmx_vlan_detach != NULL) {
384		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385		sc->vmx_vlan_detach = NULL;
386	}
387
388#ifndef VMXNET3_LEGACY_TX
389	vmxnet3_free_taskqueue(sc);
390#endif
391	vmxnet3_free_interrupts(sc);
392
393	if (ifp != NULL) {
394		if_free(ifp);
395		sc->vmx_ifp = NULL;
396	}
397
398	ifmedia_removeall(&sc->vmx_media);
399
400	vmxnet3_free_data(sc);
401	vmxnet3_free_resources(sc);
402	vmxnet3_free_rxtx_queues(sc);
403
404	VMXNET3_CORE_LOCK_DESTROY(sc);
405
406	return (0);
407}
408
409static int
410vmxnet3_shutdown(device_t dev)
411{
412
413	return (0);
414}
415
416static int
417vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418{
419	device_t dev;
420	int rid;
421
422	dev = sc->vmx_dev;
423
424	rid = PCIR_BAR(0);
425	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426	    RF_ACTIVE);
427	if (sc->vmx_res0 == NULL) {
428		device_printf(dev,
429		    "could not map BAR0 memory\n");
430		return (ENXIO);
431	}
432
433	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436	rid = PCIR_BAR(1);
437	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438	    RF_ACTIVE);
439	if (sc->vmx_res1 == NULL) {
440		device_printf(dev,
441		    "could not map BAR1 memory\n");
442		return (ENXIO);
443	}
444
445	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449		rid = PCIR_BAR(2);
450		sc->vmx_msix_res = bus_alloc_resource_any(dev,
451		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
452	}
453
454	if (sc->vmx_msix_res == NULL)
455		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457	return (0);
458}
459
460static void
461vmxnet3_free_resources(struct vmxnet3_softc *sc)
462{
463	device_t dev;
464	int rid;
465
466	dev = sc->vmx_dev;
467
468	if (sc->vmx_res0 != NULL) {
469		rid = PCIR_BAR(0);
470		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471		sc->vmx_res0 = NULL;
472	}
473
474	if (sc->vmx_res1 != NULL) {
475		rid = PCIR_BAR(1);
476		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477		sc->vmx_res1 = NULL;
478	}
479
480	if (sc->vmx_msix_res != NULL) {
481		rid = PCIR_BAR(2);
482		bus_release_resource(dev, SYS_RES_MEMORY, rid,
483		    sc->vmx_msix_res);
484		sc->vmx_msix_res = NULL;
485	}
486}
487
488static int
489vmxnet3_check_version(struct vmxnet3_softc *sc)
490{
491	device_t dev;
492	uint32_t version;
493
494	dev = sc->vmx_dev;
495
496	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497	if ((version & 0x01) == 0) {
498		device_printf(dev, "unsupported hardware version %#x\n",
499		    version);
500		return (ENOTSUP);
501	}
502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505	if ((version & 0x01) == 0) {
506		device_printf(dev, "unsupported UPT version %#x\n", version);
507		return (ENOTSUP);
508	}
509	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511	return (0);
512}
513
514static int
515trunc_powerof2(int val)
516{
517
518	return (1U << (fls(val) - 1));
519}
520
521static void
522vmxnet3_initial_config(struct vmxnet3_softc *sc)
523{
524	int nqueue, ndesc;
525
526	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
527	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
528		nqueue = VMXNET3_DEF_TX_QUEUES;
529	if (nqueue > mp_ncpus)
530		nqueue = mp_ncpus;
531	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
532
533	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
534	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
535		nqueue = VMXNET3_DEF_RX_QUEUES;
536	if (nqueue > mp_ncpus)
537		nqueue = mp_ncpus;
538	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
539
540	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
541		sc->vmx_max_nrxqueues = 1;
542		sc->vmx_max_ntxqueues = 1;
543	}
544
545	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
546	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
547		ndesc = VMXNET3_DEF_TX_NDESC;
548	if (ndesc & VMXNET3_MASK_TX_NDESC)
549		ndesc &= ~VMXNET3_MASK_TX_NDESC;
550	sc->vmx_ntxdescs = ndesc;
551
552	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
553	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
554		ndesc = VMXNET3_DEF_RX_NDESC;
555	if (ndesc & VMXNET3_MASK_RX_NDESC)
556		ndesc &= ~VMXNET3_MASK_RX_NDESC;
557	sc->vmx_nrxdescs = ndesc;
558	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
559}
560
561static void
562vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
563{
564
565	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
566		goto out;
567
568	/* BMV: Just use the maximum configured for now. */
569	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
570	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
571
572	if (sc->vmx_nrxqueues > 1)
573		sc->vmx_flags |= VMXNET3_FLAG_RSS;
574
575	return;
576
577out:
578	sc->vmx_ntxqueues = 1;
579	sc->vmx_nrxqueues = 1;
580}
581
582static int
583vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
584{
585	device_t dev;
586	int nmsix, cnt, required;
587
588	dev = sc->vmx_dev;
589
590	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
591		return (1);
592
593	/* Allocate an additional vector for the events interrupt. */
594	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
595
596	nmsix = pci_msix_count(dev);
597	if (nmsix < required)
598		return (1);
599
600	cnt = required;
601	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
602		sc->vmx_nintrs = required;
603		return (0);
604	} else
605		pci_release_msi(dev);
606
607	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
608
609	return (1);
610}
611
612static int
613vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
614{
615	device_t dev;
616	int nmsi, cnt, required;
617
618	dev = sc->vmx_dev;
619	required = 1;
620
621	nmsi = pci_msi_count(dev);
622	if (nmsi < required)
623		return (1);
624
625	cnt = required;
626	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
627		sc->vmx_nintrs = 1;
628		return (0);
629	} else
630		pci_release_msi(dev);
631
632	return (1);
633}
634
635static int
636vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
637{
638
639	sc->vmx_nintrs = 1;
640	return (0);
641}
642
643static int
644vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
645    struct vmxnet3_interrupt *intr)
646{
647	struct resource *irq;
648
649	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
650	if (irq == NULL)
651		return (ENXIO);
652
653	intr->vmxi_irq = irq;
654	intr->vmxi_rid = rid;
655
656	return (0);
657}
658
659static int
660vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
661{
662	int i, rid, flags, error;
663
664	rid = 0;
665	flags = RF_ACTIVE;
666
667	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
668		flags |= RF_SHAREABLE;
669	else
670		rid = 1;
671
672	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
673		error = vmxnet3_alloc_interrupt(sc, rid, flags,
674		    &sc->vmx_intrs[i]);
675		if (error)
676			return (error);
677	}
678
679	return (0);
680}
681
682static int
683vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
684{
685	device_t dev;
686	struct vmxnet3_txqueue *txq;
687	struct vmxnet3_rxqueue *rxq;
688	struct vmxnet3_interrupt *intr;
689	enum intr_type type;
690	int i, error;
691
692	dev = sc->vmx_dev;
693	intr = &sc->vmx_intrs[0];
694	type = INTR_TYPE_NET | INTR_MPSAFE;
695
696	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
697		txq = &sc->vmx_txq[i];
698		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
699		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
700		if (error)
701			return (error);
702		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
703		    "tq%d", i);
704		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
705	}
706
707	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
708		rxq = &sc->vmx_rxq[i];
709		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
710		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
711		if (error)
712			return (error);
713		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
714		    "rq%d", i);
715		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
716	}
717
718	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
719	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
720	if (error)
721		return (error);
722	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
723	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
724
725	return (0);
726}
727
728static int
729vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
730{
731	struct vmxnet3_interrupt *intr;
732	int i, error;
733
734	intr = &sc->vmx_intrs[0];
735	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
736	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
737	    &intr->vmxi_handler);
738
739	for (i = 0; i < sc->vmx_ntxqueues; i++)
740		sc->vmx_txq[i].vxtxq_intr_idx = 0;
741	for (i = 0; i < sc->vmx_nrxqueues; i++)
742		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
743	sc->vmx_event_intr_idx = 0;
744
745	return (error);
746}
747
748static void
749vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
750{
751	struct vmxnet3_txqueue *txq;
752	struct vmxnet3_txq_shared *txs;
753	struct vmxnet3_rxqueue *rxq;
754	struct vmxnet3_rxq_shared *rxs;
755	int i;
756
757	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
758
759	for (i = 0; i < sc->vmx_ntxqueues; i++) {
760		txq = &sc->vmx_txq[i];
761		txs = txq->vxtxq_ts;
762		txs->intr_idx = txq->vxtxq_intr_idx;
763	}
764
765	for (i = 0; i < sc->vmx_nrxqueues; i++) {
766		rxq = &sc->vmx_rxq[i];
767		rxs = rxq->vxrxq_rs;
768		rxs->intr_idx = rxq->vxrxq_intr_idx;
769	}
770}
771
772static int
773vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
774{
775	int error;
776
777	error = vmxnet3_alloc_intr_resources(sc);
778	if (error)
779		return (error);
780
781	switch (sc->vmx_intr_type) {
782	case VMXNET3_IT_MSIX:
783		error = vmxnet3_setup_msix_interrupts(sc);
784		break;
785	case VMXNET3_IT_MSI:
786	case VMXNET3_IT_LEGACY:
787		error = vmxnet3_setup_legacy_interrupt(sc);
788		break;
789	default:
790		panic("%s: invalid interrupt type %d", __func__,
791		    sc->vmx_intr_type);
792	}
793
794	if (error == 0)
795		vmxnet3_set_interrupt_idx(sc);
796
797	return (error);
798}
799
800static int
801vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
802{
803	device_t dev;
804	uint32_t config;
805	int error;
806
807	dev = sc->vmx_dev;
808	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
809
810	sc->vmx_intr_type = config & 0x03;
811	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
812
813	switch (sc->vmx_intr_type) {
814	case VMXNET3_IT_AUTO:
815		sc->vmx_intr_type = VMXNET3_IT_MSIX;
816		/* FALLTHROUGH */
817	case VMXNET3_IT_MSIX:
818		error = vmxnet3_alloc_msix_interrupts(sc);
819		if (error == 0)
820			break;
821		sc->vmx_intr_type = VMXNET3_IT_MSI;
822		/* FALLTHROUGH */
823	case VMXNET3_IT_MSI:
824		error = vmxnet3_alloc_msi_interrupts(sc);
825		if (error == 0)
826			break;
827		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
828		/* FALLTHROUGH */
829	case VMXNET3_IT_LEGACY:
830		error = vmxnet3_alloc_legacy_interrupts(sc);
831		if (error == 0)
832			break;
833		/* FALLTHROUGH */
834	default:
835		sc->vmx_intr_type = -1;
836		device_printf(dev, "cannot allocate any interrupt resources\n");
837		return (ENXIO);
838	}
839
840	return (error);
841}
842
843static void
844vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
845    struct vmxnet3_interrupt *intr)
846{
847	device_t dev;
848
849	dev = sc->vmx_dev;
850
851	if (intr->vmxi_handler != NULL) {
852		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
853		intr->vmxi_handler = NULL;
854	}
855
856	if (intr->vmxi_irq != NULL) {
857		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
858		    intr->vmxi_irq);
859		intr->vmxi_irq = NULL;
860		intr->vmxi_rid = -1;
861	}
862}
863
864static void
865vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
866{
867	int i;
868
869	for (i = 0; i < sc->vmx_nintrs; i++)
870		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
871
872	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
873	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
874		pci_release_msi(sc->vmx_dev);
875}
876
877#ifndef VMXNET3_LEGACY_TX
878static int
879vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
880{
881	device_t dev;
882
883	dev = sc->vmx_dev;
884
885	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
886	    taskqueue_thread_enqueue, &sc->vmx_tq);
887	if (sc->vmx_tq == NULL)
888		return (ENOMEM);
889
890	return (0);
891}
892
893static void
894vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
895{
896	device_t dev;
897	int nthreads, error;
898
899	dev = sc->vmx_dev;
900
901	/*
902	 * The taskqueue is typically not frequently used, so a dedicated
903	 * thread for each queue is unnecessary.
904	 */
905	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
906
907	/*
908	 * Most drivers just ignore the return value - it only fails
909	 * with ENOMEM so an error is not likely. It is hard for us
910	 * to recover from an error here.
911	 */
912	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
913	    "%s taskq", device_get_nameunit(dev));
914	if (error)
915		device_printf(dev, "failed to start taskqueue: %d", error);
916}
917
918static void
919vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
920{
921	struct vmxnet3_txqueue *txq;
922	int i;
923
924	if (sc->vmx_tq != NULL) {
925		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
926			txq = &sc->vmx_txq[i];
927			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
928		}
929	}
930}
931
932static void
933vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
934{
935	if (sc->vmx_tq != NULL) {
936		taskqueue_free(sc->vmx_tq);
937		sc->vmx_tq = NULL;
938	}
939}
940#endif
941
942static int
943vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
944{
945	struct vmxnet3_rxqueue *rxq;
946	struct vmxnet3_rxring *rxr;
947	int i;
948
949	rxq = &sc->vmx_rxq[q];
950
951	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
952	    device_get_nameunit(sc->vmx_dev), q);
953	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
954
955	rxq->vxrxq_sc = sc;
956	rxq->vxrxq_id = q;
957
958	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
959		rxr = &rxq->vxrxq_cmd_ring[i];
960		rxr->vxrxr_rid = i;
961		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
962		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
963		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
964		if (rxr->vxrxr_rxbuf == NULL)
965			return (ENOMEM);
966
967		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
968	}
969
970	return (0);
971}
972
973static int
974vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
975{
976	struct vmxnet3_txqueue *txq;
977	struct vmxnet3_txring *txr;
978
979	txq = &sc->vmx_txq[q];
980	txr = &txq->vxtxq_cmd_ring;
981
982	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
983	    device_get_nameunit(sc->vmx_dev), q);
984	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
985
986	txq->vxtxq_sc = sc;
987	txq->vxtxq_id = q;
988
989	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
990	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
991	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
992	if (txr->vxtxr_txbuf == NULL)
993		return (ENOMEM);
994
995	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
996
997#ifndef VMXNET3_LEGACY_TX
998	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
999
1000	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
1001	    M_NOWAIT, &txq->vxtxq_mtx);
1002	if (txq->vxtxq_br == NULL)
1003		return (ENOMEM);
1004#endif
1005
1006	return (0);
1007}
1008
1009static int
1010vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1011{
1012	int i, error;
1013
1014	/*
1015	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1016	 * disabled by default because its apparently broken for devices passed
1017	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1018	 * must be set to zero for MSIX. This check prevents us from allocating
1019	 * queue structures that we will not use.
1020	 */
1021	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1022		sc->vmx_max_nrxqueues = 1;
1023		sc->vmx_max_ntxqueues = 1;
1024	}
1025
1026	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1027	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1028	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1029	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1030	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1031		return (ENOMEM);
1032
1033	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1034		error = vmxnet3_init_rxq(sc, i);
1035		if (error)
1036			return (error);
1037	}
1038
1039	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1040		error = vmxnet3_init_txq(sc, i);
1041		if (error)
1042			return (error);
1043	}
1044
1045	return (0);
1046}
1047
1048static void
1049vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1050{
1051	struct vmxnet3_rxring *rxr;
1052	int i;
1053
1054	rxq->vxrxq_sc = NULL;
1055	rxq->vxrxq_id = -1;
1056
1057	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1058		rxr = &rxq->vxrxq_cmd_ring[i];
1059
1060		if (rxr->vxrxr_rxbuf != NULL) {
1061			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1062			rxr->vxrxr_rxbuf = NULL;
1063		}
1064	}
1065
1066	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1067		mtx_destroy(&rxq->vxrxq_mtx);
1068}
1069
1070static void
1071vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1072{
1073	struct vmxnet3_txring *txr;
1074
1075	txr = &txq->vxtxq_cmd_ring;
1076
1077	txq->vxtxq_sc = NULL;
1078	txq->vxtxq_id = -1;
1079
1080#ifndef VMXNET3_LEGACY_TX
1081	if (txq->vxtxq_br != NULL) {
1082		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1083		txq->vxtxq_br = NULL;
1084	}
1085#endif
1086
1087	if (txr->vxtxr_txbuf != NULL) {
1088		free(txr->vxtxr_txbuf, M_DEVBUF);
1089		txr->vxtxr_txbuf = NULL;
1090	}
1091
1092	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1093		mtx_destroy(&txq->vxtxq_mtx);
1094}
1095
1096static void
1097vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1098{
1099	int i;
1100
1101	if (sc->vmx_rxq != NULL) {
1102		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1103			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1104		free(sc->vmx_rxq, M_DEVBUF);
1105		sc->vmx_rxq = NULL;
1106	}
1107
1108	if (sc->vmx_txq != NULL) {
1109		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1110			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1111		free(sc->vmx_txq, M_DEVBUF);
1112		sc->vmx_txq = NULL;
1113	}
1114}
1115
1116static int
1117vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1118{
1119	device_t dev;
1120	uint8_t *kva;
1121	size_t size;
1122	int i, error;
1123
1124	dev = sc->vmx_dev;
1125
1126	size = sizeof(struct vmxnet3_driver_shared);
1127	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1128	if (error) {
1129		device_printf(dev, "cannot alloc shared memory\n");
1130		return (error);
1131	}
1132	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1133
1134	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1135	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1136	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1137	if (error) {
1138		device_printf(dev, "cannot alloc queue shared memory\n");
1139		return (error);
1140	}
1141	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1142	kva = sc->vmx_qs;
1143
1144	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1145		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1146		kva += sizeof(struct vmxnet3_txq_shared);
1147	}
1148	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1149		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1150		kva += sizeof(struct vmxnet3_rxq_shared);
1151	}
1152
1153	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1154		size = sizeof(struct vmxnet3_rss_shared);
1155		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1156		if (error) {
1157			device_printf(dev, "cannot alloc rss shared memory\n");
1158			return (error);
1159		}
1160		sc->vmx_rss =
1161		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1162	}
1163
1164	return (0);
1165}
1166
1167static void
1168vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1169{
1170
1171	if (sc->vmx_rss != NULL) {
1172		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1173		sc->vmx_rss = NULL;
1174	}
1175
1176	if (sc->vmx_qs != NULL) {
1177		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1178		sc->vmx_qs = NULL;
1179	}
1180
1181	if (sc->vmx_ds != NULL) {
1182		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1183		sc->vmx_ds = NULL;
1184	}
1185}
1186
1187static int
1188vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1189{
1190	device_t dev;
1191	struct vmxnet3_txqueue *txq;
1192	struct vmxnet3_txring *txr;
1193	struct vmxnet3_comp_ring *txc;
1194	size_t descsz, compsz;
1195	int i, q, error;
1196
1197	dev = sc->vmx_dev;
1198
1199	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1200		txq = &sc->vmx_txq[q];
1201		txr = &txq->vxtxq_cmd_ring;
1202		txc = &txq->vxtxq_comp_ring;
1203
1204		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1205		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1206
1207		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1208		    1, 0,			/* alignment, boundary */
1209		    BUS_SPACE_MAXADDR,		/* lowaddr */
1210		    BUS_SPACE_MAXADDR,		/* highaddr */
1211		    NULL, NULL,			/* filter, filterarg */
1212		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1213		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1214		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1215		    0,				/* flags */
1216		    NULL, NULL,			/* lockfunc, lockarg */
1217		    &txr->vxtxr_txtag);
1218		if (error) {
1219			device_printf(dev,
1220			    "unable to create Tx buffer tag for queue %d\n", q);
1221			return (error);
1222		}
1223
1224		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1225		if (error) {
1226			device_printf(dev, "cannot alloc Tx descriptors for "
1227			    "queue %d error %d\n", q, error);
1228			return (error);
1229		}
1230		txr->vxtxr_txd =
1231		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1232
1233		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1234		if (error) {
1235			device_printf(dev, "cannot alloc Tx comp descriptors "
1236			   "for queue %d error %d\n", q, error);
1237			return (error);
1238		}
1239		txc->vxcr_u.txcd =
1240		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1241
1242		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1243			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1244			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1245			if (error) {
1246				device_printf(dev, "unable to create Tx buf "
1247				    "dmamap for queue %d idx %d\n", q, i);
1248				return (error);
1249			}
1250		}
1251	}
1252
1253	return (0);
1254}
1255
1256static void
1257vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1258{
1259	device_t dev;
1260	struct vmxnet3_txqueue *txq;
1261	struct vmxnet3_txring *txr;
1262	struct vmxnet3_comp_ring *txc;
1263	struct vmxnet3_txbuf *txb;
1264	int i, q;
1265
1266	dev = sc->vmx_dev;
1267
1268	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1269		txq = &sc->vmx_txq[q];
1270		txr = &txq->vxtxq_cmd_ring;
1271		txc = &txq->vxtxq_comp_ring;
1272
1273		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1274			txb = &txr->vxtxr_txbuf[i];
1275			if (txb->vtxb_dmamap != NULL) {
1276				bus_dmamap_destroy(txr->vxtxr_txtag,
1277				    txb->vtxb_dmamap);
1278				txb->vtxb_dmamap = NULL;
1279			}
1280		}
1281
1282		if (txc->vxcr_u.txcd != NULL) {
1283			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1284			txc->vxcr_u.txcd = NULL;
1285		}
1286
1287		if (txr->vxtxr_txd != NULL) {
1288			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1289			txr->vxtxr_txd = NULL;
1290		}
1291
1292		if (txr->vxtxr_txtag != NULL) {
1293			bus_dma_tag_destroy(txr->vxtxr_txtag);
1294			txr->vxtxr_txtag = NULL;
1295		}
1296	}
1297}
1298
1299static int
1300vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1301{
1302	device_t dev;
1303	struct vmxnet3_rxqueue *rxq;
1304	struct vmxnet3_rxring *rxr;
1305	struct vmxnet3_comp_ring *rxc;
1306	int descsz, compsz;
1307	int i, j, q, error;
1308
1309	dev = sc->vmx_dev;
1310
1311	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1312		rxq = &sc->vmx_rxq[q];
1313		rxc = &rxq->vxrxq_comp_ring;
1314		compsz = 0;
1315
1316		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1317			rxr = &rxq->vxrxq_cmd_ring[i];
1318
1319			descsz = rxr->vxrxr_ndesc *
1320			    sizeof(struct vmxnet3_rxdesc);
1321			compsz += rxr->vxrxr_ndesc *
1322			    sizeof(struct vmxnet3_rxcompdesc);
1323
1324			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1325			    1, 0,		/* alignment, boundary */
1326			    BUS_SPACE_MAXADDR,	/* lowaddr */
1327			    BUS_SPACE_MAXADDR,	/* highaddr */
1328			    NULL, NULL,		/* filter, filterarg */
1329			    MJUMPAGESIZE,	/* maxsize */
1330			    1,			/* nsegments */
1331			    MJUMPAGESIZE,	/* maxsegsize */
1332			    0,			/* flags */
1333			    NULL, NULL,		/* lockfunc, lockarg */
1334			    &rxr->vxrxr_rxtag);
1335			if (error) {
1336				device_printf(dev,
1337				    "unable to create Rx buffer tag for "
1338				    "queue %d\n", q);
1339				return (error);
1340			}
1341
1342			error = vmxnet3_dma_malloc(sc, descsz, 512,
1343			    &rxr->vxrxr_dma);
1344			if (error) {
1345				device_printf(dev, "cannot allocate Rx "
1346				    "descriptors for queue %d/%d error %d\n",
1347				    i, q, error);
1348				return (error);
1349			}
1350			rxr->vxrxr_rxd =
1351			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1352		}
1353
1354		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1355		if (error) {
1356			device_printf(dev, "cannot alloc Rx comp descriptors "
1357			    "for queue %d error %d\n", q, error);
1358			return (error);
1359		}
1360		rxc->vxcr_u.rxcd =
1361		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1362
1363		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1364			rxr = &rxq->vxrxq_cmd_ring[i];
1365
1366			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1367			    &rxr->vxrxr_spare_dmap);
1368			if (error) {
1369				device_printf(dev, "unable to create spare "
1370				    "dmamap for queue %d/%d error %d\n",
1371				    q, i, error);
1372				return (error);
1373			}
1374
1375			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1376				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1377				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1378				if (error) {
1379					device_printf(dev, "unable to create "
1380					    "dmamap for queue %d/%d slot %d "
1381					    "error %d\n",
1382					    q, i, j, error);
1383					return (error);
1384				}
1385			}
1386		}
1387	}
1388
1389	return (0);
1390}
1391
1392static void
1393vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1394{
1395	device_t dev;
1396	struct vmxnet3_rxqueue *rxq;
1397	struct vmxnet3_rxring *rxr;
1398	struct vmxnet3_comp_ring *rxc;
1399	struct vmxnet3_rxbuf *rxb;
1400	int i, j, q;
1401
1402	dev = sc->vmx_dev;
1403
1404	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1405		rxq = &sc->vmx_rxq[q];
1406		rxc = &rxq->vxrxq_comp_ring;
1407
1408		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1409			rxr = &rxq->vxrxq_cmd_ring[i];
1410
1411			if (rxr->vxrxr_spare_dmap != NULL) {
1412				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1413				    rxr->vxrxr_spare_dmap);
1414				rxr->vxrxr_spare_dmap = NULL;
1415			}
1416
1417			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1418				rxb = &rxr->vxrxr_rxbuf[j];
1419				if (rxb->vrxb_dmamap != NULL) {
1420					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1421					    rxb->vrxb_dmamap);
1422					rxb->vrxb_dmamap = NULL;
1423				}
1424			}
1425		}
1426
1427		if (rxc->vxcr_u.rxcd != NULL) {
1428			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1429			rxc->vxcr_u.rxcd = NULL;
1430		}
1431
1432		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1433			rxr = &rxq->vxrxq_cmd_ring[i];
1434
1435			if (rxr->vxrxr_rxd != NULL) {
1436				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1437				rxr->vxrxr_rxd = NULL;
1438			}
1439
1440			if (rxr->vxrxr_rxtag != NULL) {
1441				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1442				rxr->vxrxr_rxtag = NULL;
1443			}
1444		}
1445	}
1446}
1447
1448static int
1449vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1450{
1451	int error;
1452
1453	error = vmxnet3_alloc_txq_data(sc);
1454	if (error)
1455		return (error);
1456
1457	error = vmxnet3_alloc_rxq_data(sc);
1458	if (error)
1459		return (error);
1460
1461	return (0);
1462}
1463
1464static void
1465vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1466{
1467
1468	if (sc->vmx_rxq != NULL)
1469		vmxnet3_free_rxq_data(sc);
1470
1471	if (sc->vmx_txq != NULL)
1472		vmxnet3_free_txq_data(sc);
1473}
1474
1475static int
1476vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1477{
1478	int error;
1479
1480	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1481	    32, &sc->vmx_mcast_dma);
1482	if (error)
1483		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1484	else
1485		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1486
1487	return (error);
1488}
1489
1490static void
1491vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1492{
1493
1494	if (sc->vmx_mcast != NULL) {
1495		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1496		sc->vmx_mcast = NULL;
1497	}
1498}
1499
1500static void
1501vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1502{
1503	struct vmxnet3_driver_shared *ds;
1504	struct vmxnet3_txqueue *txq;
1505	struct vmxnet3_txq_shared *txs;
1506	struct vmxnet3_rxqueue *rxq;
1507	struct vmxnet3_rxq_shared *rxs;
1508	int i;
1509
1510	ds = sc->vmx_ds;
1511
1512	/*
1513	 * Initialize fields of the shared data that remains the same across
1514	 * reinits. Note the shared data is zero'd when allocated.
1515	 */
1516
1517	ds->magic = VMXNET3_REV1_MAGIC;
1518
1519	/* DriverInfo */
1520	ds->version = VMXNET3_DRIVER_VERSION;
1521	ds->guest = VMXNET3_GOS_FREEBSD |
1522#ifdef __LP64__
1523	    VMXNET3_GOS_64BIT;
1524#else
1525	    VMXNET3_GOS_32BIT;
1526#endif
1527	ds->vmxnet3_revision = 1;
1528	ds->upt_version = 1;
1529
1530	/* Misc. conf */
1531	ds->driver_data = vtophys(sc);
1532	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1533	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1534	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1535	ds->nrxsg_max = sc->vmx_max_rxsegs;
1536
1537	/* RSS conf */
1538	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1539		ds->rss.version = 1;
1540		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1541		ds->rss.len = sc->vmx_rss_dma.dma_size;
1542	}
1543
1544	/* Interrupt control. */
1545	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1546	ds->nintr = sc->vmx_nintrs;
1547	ds->evintr = sc->vmx_event_intr_idx;
1548	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1549
1550	for (i = 0; i < sc->vmx_nintrs; i++)
1551		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1552
1553	/* Receive filter. */
1554	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1555	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1556
1557	/* Tx queues */
1558	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1559		txq = &sc->vmx_txq[i];
1560		txs = txq->vxtxq_ts;
1561
1562		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1563		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1564		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1565		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1566		txs->driver_data = vtophys(txq);
1567		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1568	}
1569
1570	/* Rx queues */
1571	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1572		rxq = &sc->vmx_rxq[i];
1573		rxs = rxq->vxrxq_rs;
1574
1575		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1576		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1577		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1578		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1579		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1580		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1581		rxs->driver_data = vtophys(rxq);
1582		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1583	}
1584}
1585
1586static void
1587vmxnet3_init_hwassist(struct vmxnet3_softc *sc)
1588{
1589	struct ifnet *ifp = sc->vmx_ifp;
1590	uint64_t hwassist;
1591
1592	hwassist = 0;
1593	if (ifp->if_capenable & IFCAP_TXCSUM)
1594		hwassist |= VMXNET3_CSUM_OFFLOAD;
1595	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1596		hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1597	if (ifp->if_capenable & IFCAP_TSO4)
1598		hwassist |= CSUM_IP_TSO;
1599	if (ifp->if_capenable & IFCAP_TSO6)
1600		hwassist |= CSUM_IP6_TSO;
1601	ifp->if_hwassist = hwassist;
1602}
1603
1604static void
1605vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1606{
1607	struct ifnet *ifp;
1608
1609	ifp = sc->vmx_ifp;
1610
1611	/* Use the current MAC address. */
1612	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1613	vmxnet3_set_lladdr(sc);
1614
1615	vmxnet3_init_hwassist(sc);
1616}
1617
1618static void
1619vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1620{
1621	/*
1622	 * Use the same key as the Linux driver until FreeBSD can do
1623	 * RSS (presumably Toeplitz) in software.
1624	 */
1625	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1626	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1627	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1628	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1629	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1630	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1631	};
1632
1633	struct vmxnet3_driver_shared *ds;
1634	struct vmxnet3_rss_shared *rss;
1635	int i;
1636
1637	ds = sc->vmx_ds;
1638	rss = sc->vmx_rss;
1639
1640	rss->hash_type =
1641	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1642	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1643	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1644	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1645	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1646	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1647
1648	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1649		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1650}
1651
1652static void
1653vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1654{
1655	struct ifnet *ifp;
1656	struct vmxnet3_driver_shared *ds;
1657
1658	ifp = sc->vmx_ifp;
1659	ds = sc->vmx_ds;
1660
1661	ds->mtu = ifp->if_mtu;
1662	ds->ntxqueue = sc->vmx_ntxqueues;
1663	ds->nrxqueue = sc->vmx_nrxqueues;
1664
1665	ds->upt_features = 0;
1666	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1667		ds->upt_features |= UPT1_F_CSUM;
1668	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1669		ds->upt_features |= UPT1_F_VLAN;
1670	if (ifp->if_capenable & IFCAP_LRO)
1671		ds->upt_features |= UPT1_F_LRO;
1672
1673	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1674		ds->upt_features |= UPT1_F_RSS;
1675		vmxnet3_reinit_rss_shared_data(sc);
1676	}
1677
1678	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1679	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1680	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1681}
1682
1683static int
1684vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1685{
1686	int error;
1687
1688	error = vmxnet3_alloc_shared_data(sc);
1689	if (error)
1690		return (error);
1691
1692	error = vmxnet3_alloc_queue_data(sc);
1693	if (error)
1694		return (error);
1695
1696	error = vmxnet3_alloc_mcast_table(sc);
1697	if (error)
1698		return (error);
1699
1700	vmxnet3_init_shared_data(sc);
1701
1702	return (0);
1703}
1704
1705static void
1706vmxnet3_free_data(struct vmxnet3_softc *sc)
1707{
1708
1709	vmxnet3_free_mcast_table(sc);
1710	vmxnet3_free_queue_data(sc);
1711	vmxnet3_free_shared_data(sc);
1712}
1713
1714static int
1715vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1716{
1717	device_t dev;
1718	struct ifnet *ifp;
1719
1720	dev = sc->vmx_dev;
1721
1722	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1723	if (ifp == NULL) {
1724		device_printf(dev, "cannot allocate ifnet structure\n");
1725		return (ENOSPC);
1726	}
1727
1728	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1729#if __FreeBSD_version < 1000025
1730	ifp->if_baudrate = 1000000000;
1731#elif __FreeBSD_version < 1100011
1732	if_initbaudrate(ifp, IF_Gbps(10));
1733#else
1734	ifp->if_baudrate = IF_Gbps(10);
1735#endif
1736	ifp->if_softc = sc;
1737	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1738	ifp->if_init = vmxnet3_init;
1739	ifp->if_ioctl = vmxnet3_ioctl;
1740	ifp->if_get_counter = vmxnet3_get_counter;
1741	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1742	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1743	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1744
1745#ifdef VMXNET3_LEGACY_TX
1746	ifp->if_start = vmxnet3_start;
1747	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1748	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1749	IFQ_SET_READY(&ifp->if_snd);
1750#else
1751	ifp->if_transmit = vmxnet3_txq_mq_start;
1752	ifp->if_qflush = vmxnet3_qflush;
1753#endif
1754
1755	vmxnet3_get_lladdr(sc);
1756	ether_ifattach(ifp, sc->vmx_lladdr);
1757
1758	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1759	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1760	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1761	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1762	    IFCAP_VLAN_HWCSUM;
1763	ifp->if_capenable = ifp->if_capabilities;
1764
1765	/* These capabilities are not enabled by default. */
1766	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1767
1768	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1769	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1770	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1771	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1772
1773	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1774	    vmxnet3_media_status);
1775	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1776	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1777
1778	return (0);
1779}
1780
1781static void
1782vmxnet3_evintr(struct vmxnet3_softc *sc)
1783{
1784	device_t dev;
1785	struct ifnet *ifp;
1786	struct vmxnet3_txq_shared *ts;
1787	struct vmxnet3_rxq_shared *rs;
1788	uint32_t event;
1789	int reset;
1790
1791	dev = sc->vmx_dev;
1792	ifp = sc->vmx_ifp;
1793	reset = 0;
1794
1795	VMXNET3_CORE_LOCK(sc);
1796
1797	/* Clear events. */
1798	event = sc->vmx_ds->event;
1799	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1800
1801	if (event & VMXNET3_EVENT_LINK) {
1802		vmxnet3_link_status(sc);
1803		if (sc->vmx_link_active != 0)
1804			vmxnet3_tx_start_all(sc);
1805	}
1806
1807	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1808		reset = 1;
1809		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1810		ts = sc->vmx_txq[0].vxtxq_ts;
1811		if (ts->stopped != 0)
1812			device_printf(dev, "Tx queue error %#x\n", ts->error);
1813		rs = sc->vmx_rxq[0].vxrxq_rs;
1814		if (rs->stopped != 0)
1815			device_printf(dev, "Rx queue error %#x\n", rs->error);
1816		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1817	}
1818
1819	if (event & VMXNET3_EVENT_DIC)
1820		device_printf(dev, "device implementation change event\n");
1821	if (event & VMXNET3_EVENT_DEBUG)
1822		device_printf(dev, "debug event\n");
1823
1824	if (reset != 0) {
1825		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1826		vmxnet3_init_locked(sc);
1827	}
1828
1829	VMXNET3_CORE_UNLOCK(sc);
1830}
1831
1832static void
1833vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1834{
1835	struct vmxnet3_softc *sc;
1836	struct ifnet *ifp;
1837	struct vmxnet3_txring *txr;
1838	struct vmxnet3_comp_ring *txc;
1839	struct vmxnet3_txcompdesc *txcd;
1840	struct vmxnet3_txbuf *txb;
1841	struct mbuf *m;
1842	u_int sop;
1843
1844	sc = txq->vxtxq_sc;
1845	ifp = sc->vmx_ifp;
1846	txr = &txq->vxtxq_cmd_ring;
1847	txc = &txq->vxtxq_comp_ring;
1848
1849	VMXNET3_TXQ_LOCK_ASSERT(txq);
1850
1851	for (;;) {
1852		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1853		if (txcd->gen != txc->vxcr_gen)
1854			break;
1855		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1856
1857		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1858			txc->vxcr_next = 0;
1859			txc->vxcr_gen ^= 1;
1860		}
1861
1862		sop = txr->vxtxr_next;
1863		txb = &txr->vxtxr_txbuf[sop];
1864
1865		if ((m = txb->vtxb_m) != NULL) {
1866			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1867			    BUS_DMASYNC_POSTWRITE);
1868			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1869
1870			txq->vxtxq_stats.vmtxs_opackets++;
1871			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1872			if (m->m_flags & M_MCAST)
1873				txq->vxtxq_stats.vmtxs_omcasts++;
1874
1875			m_freem(m);
1876			txb->vtxb_m = NULL;
1877		}
1878
1879		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1880	}
1881
1882	if (txr->vxtxr_head == txr->vxtxr_next)
1883		txq->vxtxq_watchdog = 0;
1884}
1885
1886static int
1887vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1888{
1889	struct ifnet *ifp;
1890	struct mbuf *m;
1891	struct vmxnet3_rxdesc *rxd;
1892	struct vmxnet3_rxbuf *rxb;
1893	bus_dma_tag_t tag;
1894	bus_dmamap_t dmap;
1895	bus_dma_segment_t segs[1];
1896	int idx, clsize, btype, flags, nsegs, error;
1897
1898	ifp = sc->vmx_ifp;
1899	tag = rxr->vxrxr_rxtag;
1900	dmap = rxr->vxrxr_spare_dmap;
1901	idx = rxr->vxrxr_fill;
1902	rxd = &rxr->vxrxr_rxd[idx];
1903	rxb = &rxr->vxrxr_rxbuf[idx];
1904
1905#ifdef VMXNET3_FAILPOINTS
1906	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1907	if (rxr->vxrxr_rid != 0)
1908		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1909#endif
1910
1911	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1912		flags = M_PKTHDR;
1913		clsize = MCLBYTES;
1914		btype = VMXNET3_BTYPE_HEAD;
1915	} else {
1916#if __FreeBSD_version < 902001
1917		/*
1918		 * These mbufs will never be used for the start of a frame.
1919		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1920		 * required the mbuf to always be a packet header. Avoid
1921		 * unnecessary mbuf initialization in newer versions where
1922		 * that is not the case.
1923		 */
1924		flags = M_PKTHDR;
1925#else
1926		flags = 0;
1927#endif
1928		clsize = MJUMPAGESIZE;
1929		btype = VMXNET3_BTYPE_BODY;
1930	}
1931
1932	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1933	if (m == NULL) {
1934		sc->vmx_stats.vmst_mgetcl_failed++;
1935		return (ENOBUFS);
1936	}
1937
1938	if (btype == VMXNET3_BTYPE_HEAD) {
1939		m->m_len = m->m_pkthdr.len = clsize;
1940		m_adj(m, ETHER_ALIGN);
1941	} else
1942		m->m_len = clsize;
1943
1944	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1945	    BUS_DMA_NOWAIT);
1946	if (error) {
1947		m_freem(m);
1948		sc->vmx_stats.vmst_mbuf_load_failed++;
1949		return (error);
1950	}
1951	KASSERT(nsegs == 1,
1952	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1953#if __FreeBSD_version < 902001
1954	if (btype == VMXNET3_BTYPE_BODY)
1955		m->m_flags &= ~M_PKTHDR;
1956#endif
1957
1958	if (rxb->vrxb_m != NULL) {
1959		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1960		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1961	}
1962
1963	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1964	rxb->vrxb_dmamap = dmap;
1965	rxb->vrxb_m = m;
1966
1967	rxd->addr = segs[0].ds_addr;
1968	rxd->len = segs[0].ds_len;
1969	rxd->btype = btype;
1970	rxd->gen = rxr->vxrxr_gen;
1971
1972	vmxnet3_rxr_increment_fill(rxr);
1973	return (0);
1974}
1975
1976static void
1977vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1978    struct vmxnet3_rxring *rxr, int idx)
1979{
1980	struct vmxnet3_rxdesc *rxd;
1981
1982	rxd = &rxr->vxrxr_rxd[idx];
1983	rxd->gen = rxr->vxrxr_gen;
1984	vmxnet3_rxr_increment_fill(rxr);
1985}
1986
1987static void
1988vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1989{
1990	struct vmxnet3_softc *sc;
1991	struct vmxnet3_rxring *rxr;
1992	struct vmxnet3_comp_ring *rxc;
1993	struct vmxnet3_rxcompdesc *rxcd;
1994	int idx, eof;
1995
1996	sc = rxq->vxrxq_sc;
1997	rxc = &rxq->vxrxq_comp_ring;
1998
1999	do {
2000		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2001		if (rxcd->gen != rxc->vxcr_gen)
2002			break;		/* Not expected. */
2003		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2004
2005		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2006			rxc->vxcr_next = 0;
2007			rxc->vxcr_gen ^= 1;
2008		}
2009
2010		idx = rxcd->rxd_idx;
2011		eof = rxcd->eop;
2012		if (rxcd->qid < sc->vmx_nrxqueues)
2013			rxr = &rxq->vxrxq_cmd_ring[0];
2014		else
2015			rxr = &rxq->vxrxq_cmd_ring[1];
2016		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2017	} while (!eof);
2018}
2019
2020static void
2021vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2022{
2023
2024	if (rxcd->ipv4) {
2025		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2026		if (rxcd->ipcsum_ok)
2027			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2028	}
2029
2030	if (!rxcd->fragment) {
2031		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2032			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2033			    CSUM_PSEUDO_HDR;
2034			m->m_pkthdr.csum_data = 0xFFFF;
2035		}
2036	}
2037}
2038
2039static void
2040vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2041    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2042{
2043	struct vmxnet3_softc *sc;
2044	struct ifnet *ifp;
2045
2046	sc = rxq->vxrxq_sc;
2047	ifp = sc->vmx_ifp;
2048
2049	if (rxcd->error) {
2050		rxq->vxrxq_stats.vmrxs_ierrors++;
2051		m_freem(m);
2052		return;
2053	}
2054
2055#ifdef notyet
2056	switch (rxcd->rss_type) {
2057	case VMXNET3_RCD_RSS_TYPE_IPV4:
2058		m->m_pkthdr.flowid = rxcd->rss_hash;
2059		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2060		break;
2061	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2062		m->m_pkthdr.flowid = rxcd->rss_hash;
2063		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2064		break;
2065	case VMXNET3_RCD_RSS_TYPE_IPV6:
2066		m->m_pkthdr.flowid = rxcd->rss_hash;
2067		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2068		break;
2069	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2070		m->m_pkthdr.flowid = rxcd->rss_hash;
2071		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2072		break;
2073	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2074		m->m_pkthdr.flowid = rxq->vxrxq_id;
2075		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2076		break;
2077	}
2078#else
2079	m->m_pkthdr.flowid = rxq->vxrxq_id;
2080	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2081#endif
2082
2083	if (!rxcd->no_csum)
2084		vmxnet3_rx_csum(rxcd, m);
2085	if (rxcd->vlan) {
2086		m->m_flags |= M_VLANTAG;
2087		m->m_pkthdr.ether_vtag = rxcd->vtag;
2088	}
2089
2090	rxq->vxrxq_stats.vmrxs_ipackets++;
2091	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2092
2093	VMXNET3_RXQ_UNLOCK(rxq);
2094	(*ifp->if_input)(ifp, m);
2095	VMXNET3_RXQ_LOCK(rxq);
2096}
2097
2098static void
2099vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2100{
2101	struct vmxnet3_softc *sc;
2102	struct ifnet *ifp;
2103	struct vmxnet3_rxring *rxr;
2104	struct vmxnet3_comp_ring *rxc;
2105	struct vmxnet3_rxdesc *rxd;
2106	struct vmxnet3_rxcompdesc *rxcd;
2107	struct mbuf *m, *m_head, *m_tail;
2108	int idx, length;
2109
2110	sc = rxq->vxrxq_sc;
2111	ifp = sc->vmx_ifp;
2112	rxc = &rxq->vxrxq_comp_ring;
2113
2114	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2115
2116	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2117		return;
2118
2119	m_head = rxq->vxrxq_mhead;
2120	rxq->vxrxq_mhead = NULL;
2121	m_tail = rxq->vxrxq_mtail;
2122	rxq->vxrxq_mtail = NULL;
2123	MPASS(m_head == NULL || m_tail != NULL);
2124
2125	for (;;) {
2126		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2127		if (rxcd->gen != rxc->vxcr_gen) {
2128			rxq->vxrxq_mhead = m_head;
2129			rxq->vxrxq_mtail = m_tail;
2130			break;
2131		}
2132		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2133
2134		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2135			rxc->vxcr_next = 0;
2136			rxc->vxcr_gen ^= 1;
2137		}
2138
2139		idx = rxcd->rxd_idx;
2140		length = rxcd->len;
2141		if (rxcd->qid < sc->vmx_nrxqueues)
2142			rxr = &rxq->vxrxq_cmd_ring[0];
2143		else
2144			rxr = &rxq->vxrxq_cmd_ring[1];
2145		rxd = &rxr->vxrxr_rxd[idx];
2146
2147		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2148		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2149		    __func__, rxcd->qid, idx));
2150
2151		/*
2152		 * The host may skip descriptors. We detect this when this
2153		 * descriptor does not match the previous fill index. Catch
2154		 * up with the host now.
2155		 */
2156		if (__predict_false(rxr->vxrxr_fill != idx)) {
2157			while (rxr->vxrxr_fill != idx) {
2158				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2159				    rxr->vxrxr_gen;
2160				vmxnet3_rxr_increment_fill(rxr);
2161			}
2162		}
2163
2164		if (rxcd->sop) {
2165			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2166			    ("%s: start of frame w/o head buffer", __func__));
2167			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2168			    ("%s: start of frame not in ring 0", __func__));
2169			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2170			    ("%s: start of frame at unexcepted index %d (%d)",
2171			     __func__, idx, sc->vmx_rx_max_chain));
2172			KASSERT(m_head == NULL,
2173			    ("%s: duplicate start of frame?", __func__));
2174
2175			if (length == 0) {
2176				/* Just ignore this descriptor. */
2177				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2178				goto nextp;
2179			}
2180
2181			if (vmxnet3_newbuf(sc, rxr) != 0) {
2182				rxq->vxrxq_stats.vmrxs_iqdrops++;
2183				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2184				if (!rxcd->eop)
2185					vmxnet3_rxq_discard_chain(rxq);
2186				goto nextp;
2187			}
2188
2189			m->m_pkthdr.rcvif = ifp;
2190			m->m_pkthdr.len = m->m_len = length;
2191			m->m_pkthdr.csum_flags = 0;
2192			m_head = m_tail = m;
2193
2194		} else {
2195			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2196			    ("%s: non start of frame w/o body buffer", __func__));
2197
2198			if (m_head == NULL && m_tail == NULL) {
2199				/*
2200				 * This is a continuation of a packet that we
2201				 * started to drop, but could not drop entirely
2202				 * because this segment was still owned by the
2203				 * host.  So, drop the remainder now.
2204				 */
2205				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2206				if (!rxcd->eop)
2207					vmxnet3_rxq_discard_chain(rxq);
2208				goto nextp;
2209			}
2210
2211			KASSERT(m_head != NULL,
2212			    ("%s: frame not started?", __func__));
2213
2214			if (vmxnet3_newbuf(sc, rxr) != 0) {
2215				rxq->vxrxq_stats.vmrxs_iqdrops++;
2216				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2217				if (!rxcd->eop)
2218					vmxnet3_rxq_discard_chain(rxq);
2219				m_freem(m_head);
2220				m_head = m_tail = NULL;
2221				goto nextp;
2222			}
2223
2224			m->m_len = length;
2225			m_head->m_pkthdr.len += length;
2226			m_tail->m_next = m;
2227			m_tail = m;
2228		}
2229
2230		if (rxcd->eop) {
2231			vmxnet3_rxq_input(rxq, rxcd, m_head);
2232			m_head = m_tail = NULL;
2233
2234			/* Must recheck after dropping the Rx lock. */
2235			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2236				break;
2237		}
2238
2239nextp:
2240		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2241			int qid = rxcd->qid;
2242			bus_size_t r;
2243
2244			idx = (idx + 1) % rxr->vxrxr_ndesc;
2245			if (qid >= sc->vmx_nrxqueues) {
2246				qid -= sc->vmx_nrxqueues;
2247				r = VMXNET3_BAR0_RXH2(qid);
2248			} else
2249				r = VMXNET3_BAR0_RXH1(qid);
2250			vmxnet3_write_bar0(sc, r, idx);
2251		}
2252	}
2253}
2254
2255static void
2256vmxnet3_legacy_intr(void *xsc)
2257{
2258	struct vmxnet3_softc *sc;
2259	struct vmxnet3_rxqueue *rxq;
2260	struct vmxnet3_txqueue *txq;
2261
2262	sc = xsc;
2263	rxq = &sc->vmx_rxq[0];
2264	txq = &sc->vmx_txq[0];
2265
2266	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2267		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2268			return;
2269	}
2270	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2271		vmxnet3_disable_all_intrs(sc);
2272
2273	if (sc->vmx_ds->event != 0)
2274		vmxnet3_evintr(sc);
2275
2276	VMXNET3_RXQ_LOCK(rxq);
2277	vmxnet3_rxq_eof(rxq);
2278	VMXNET3_RXQ_UNLOCK(rxq);
2279
2280	VMXNET3_TXQ_LOCK(txq);
2281	vmxnet3_txq_eof(txq);
2282	vmxnet3_txq_start(txq);
2283	VMXNET3_TXQ_UNLOCK(txq);
2284
2285	vmxnet3_enable_all_intrs(sc);
2286}
2287
2288static void
2289vmxnet3_txq_intr(void *xtxq)
2290{
2291	struct vmxnet3_softc *sc;
2292	struct vmxnet3_txqueue *txq;
2293
2294	txq = xtxq;
2295	sc = txq->vxtxq_sc;
2296
2297	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2298		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2299
2300	VMXNET3_TXQ_LOCK(txq);
2301	vmxnet3_txq_eof(txq);
2302	vmxnet3_txq_start(txq);
2303	VMXNET3_TXQ_UNLOCK(txq);
2304
2305	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2306}
2307
2308static void
2309vmxnet3_rxq_intr(void *xrxq)
2310{
2311	struct vmxnet3_softc *sc;
2312	struct vmxnet3_rxqueue *rxq;
2313
2314	rxq = xrxq;
2315	sc = rxq->vxrxq_sc;
2316
2317	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2318		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2319
2320	VMXNET3_RXQ_LOCK(rxq);
2321	vmxnet3_rxq_eof(rxq);
2322	VMXNET3_RXQ_UNLOCK(rxq);
2323
2324	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2325}
2326
2327static void
2328vmxnet3_event_intr(void *xsc)
2329{
2330	struct vmxnet3_softc *sc;
2331
2332	sc = xsc;
2333
2334	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2335		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2336
2337	if (sc->vmx_ds->event != 0)
2338		vmxnet3_evintr(sc);
2339
2340	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2341}
2342
2343static void
2344vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2345{
2346	struct vmxnet3_txring *txr;
2347	struct vmxnet3_txbuf *txb;
2348	int i;
2349
2350	txr = &txq->vxtxq_cmd_ring;
2351
2352	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2353		txb = &txr->vxtxr_txbuf[i];
2354
2355		if (txb->vtxb_m == NULL)
2356			continue;
2357
2358		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2359		    BUS_DMASYNC_POSTWRITE);
2360		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2361		m_freem(txb->vtxb_m);
2362		txb->vtxb_m = NULL;
2363	}
2364}
2365
2366static void
2367vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2368{
2369	struct vmxnet3_rxring *rxr;
2370	struct vmxnet3_rxbuf *rxb;
2371	int i, j;
2372
2373	if (rxq->vxrxq_mhead != NULL) {
2374		m_freem(rxq->vxrxq_mhead);
2375		rxq->vxrxq_mhead = NULL;
2376		rxq->vxrxq_mtail = NULL;
2377	}
2378
2379	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2380		rxr = &rxq->vxrxq_cmd_ring[i];
2381
2382		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2383			rxb = &rxr->vxrxr_rxbuf[j];
2384
2385			if (rxb->vrxb_m == NULL)
2386				continue;
2387
2388			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2389			    BUS_DMASYNC_POSTREAD);
2390			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2391			m_freem(rxb->vrxb_m);
2392			rxb->vrxb_m = NULL;
2393		}
2394	}
2395}
2396
2397static void
2398vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2399{
2400	struct vmxnet3_rxqueue *rxq;
2401	struct vmxnet3_txqueue *txq;
2402	int i;
2403
2404	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2405		rxq = &sc->vmx_rxq[i];
2406		VMXNET3_RXQ_LOCK(rxq);
2407		VMXNET3_RXQ_UNLOCK(rxq);
2408	}
2409
2410	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2411		txq = &sc->vmx_txq[i];
2412		VMXNET3_TXQ_LOCK(txq);
2413		VMXNET3_TXQ_UNLOCK(txq);
2414	}
2415}
2416
2417static void
2418vmxnet3_stop(struct vmxnet3_softc *sc)
2419{
2420	struct ifnet *ifp;
2421	int q;
2422
2423	ifp = sc->vmx_ifp;
2424	VMXNET3_CORE_LOCK_ASSERT(sc);
2425
2426	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2427	sc->vmx_link_active = 0;
2428	callout_stop(&sc->vmx_tick);
2429
2430	/* Disable interrupts. */
2431	vmxnet3_disable_all_intrs(sc);
2432	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2433
2434	vmxnet3_stop_rendezvous(sc);
2435
2436	for (q = 0; q < sc->vmx_ntxqueues; q++)
2437		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2438	for (q = 0; q < sc->vmx_nrxqueues; q++)
2439		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2440
2441	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2442}
2443
2444static void
2445vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2446{
2447	struct vmxnet3_txring *txr;
2448	struct vmxnet3_comp_ring *txc;
2449
2450	txr = &txq->vxtxq_cmd_ring;
2451	txr->vxtxr_head = 0;
2452	txr->vxtxr_next = 0;
2453	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2454	bzero(txr->vxtxr_txd,
2455	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2456
2457	txc = &txq->vxtxq_comp_ring;
2458	txc->vxcr_next = 0;
2459	txc->vxcr_gen = VMXNET3_INIT_GEN;
2460	bzero(txc->vxcr_u.txcd,
2461	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2462}
2463
2464static int
2465vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2466{
2467	struct ifnet *ifp;
2468	struct vmxnet3_rxring *rxr;
2469	struct vmxnet3_comp_ring *rxc;
2470	int i, populate, idx, frame_size, error;
2471
2472	ifp = sc->vmx_ifp;
2473	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2474	    ifp->if_mtu;
2475
2476	/*
2477	 * If the MTU causes us to exceed what a regular sized cluster can
2478	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2479	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2480	 *
2481	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2482	 * our life easier. We do not support changing the ring size after
2483	 * the attach.
2484	 */
2485	if (frame_size <= MCLBYTES)
2486		sc->vmx_rx_max_chain = 1;
2487	else
2488		sc->vmx_rx_max_chain = 2;
2489
2490	/*
2491	 * Only populate ring 1 if the configuration will take advantage
2492	 * of it. That is either when LRO is enabled or the frame size
2493	 * exceeds what ring 0 can contain.
2494	 */
2495	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2496	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2497		populate = 1;
2498	else
2499		populate = VMXNET3_RXRINGS_PERQ;
2500
2501	for (i = 0; i < populate; i++) {
2502		rxr = &rxq->vxrxq_cmd_ring[i];
2503		rxr->vxrxr_fill = 0;
2504		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2505		bzero(rxr->vxrxr_rxd,
2506		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2507
2508		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2509			error = vmxnet3_newbuf(sc, rxr);
2510			if (error)
2511				return (error);
2512		}
2513	}
2514
2515	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2516		rxr = &rxq->vxrxq_cmd_ring[i];
2517		rxr->vxrxr_fill = 0;
2518		rxr->vxrxr_gen = 0;
2519		bzero(rxr->vxrxr_rxd,
2520		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2521	}
2522
2523	rxc = &rxq->vxrxq_comp_ring;
2524	rxc->vxcr_next = 0;
2525	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2526	bzero(rxc->vxcr_u.rxcd,
2527	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2528
2529	return (0);
2530}
2531
2532static int
2533vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2534{
2535	device_t dev;
2536	int q, error;
2537
2538	dev = sc->vmx_dev;
2539
2540	for (q = 0; q < sc->vmx_ntxqueues; q++)
2541		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2542
2543	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2544		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2545		if (error) {
2546			device_printf(dev, "cannot populate Rx queue %d\n", q);
2547			return (error);
2548		}
2549	}
2550
2551	return (0);
2552}
2553
2554static int
2555vmxnet3_enable_device(struct vmxnet3_softc *sc)
2556{
2557	int q;
2558
2559	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2560		device_printf(sc->vmx_dev, "device enable command failed!\n");
2561		return (1);
2562	}
2563
2564	/* Reset the Rx queue heads. */
2565	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2566		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2567		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2568	}
2569
2570	return (0);
2571}
2572
2573static void
2574vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2575{
2576	struct ifnet *ifp;
2577
2578	ifp = sc->vmx_ifp;
2579
2580	vmxnet3_set_rxfilter(sc);
2581
2582	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2583		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2584		    sizeof(sc->vmx_ds->vlan_filter));
2585	else
2586		bzero(sc->vmx_ds->vlan_filter,
2587		    sizeof(sc->vmx_ds->vlan_filter));
2588	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2589}
2590
2591static int
2592vmxnet3_reinit(struct vmxnet3_softc *sc)
2593{
2594
2595	vmxnet3_reinit_interface(sc);
2596	vmxnet3_reinit_shared_data(sc);
2597
2598	if (vmxnet3_reinit_queues(sc) != 0)
2599		return (ENXIO);
2600
2601	if (vmxnet3_enable_device(sc) != 0)
2602		return (ENXIO);
2603
2604	vmxnet3_reinit_rxfilters(sc);
2605
2606	return (0);
2607}
2608
2609static void
2610vmxnet3_init_locked(struct vmxnet3_softc *sc)
2611{
2612	struct ifnet *ifp;
2613
2614	ifp = sc->vmx_ifp;
2615
2616	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2617		return;
2618
2619	vmxnet3_stop(sc);
2620
2621	if (vmxnet3_reinit(sc) != 0) {
2622		vmxnet3_stop(sc);
2623		return;
2624	}
2625
2626	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2627	vmxnet3_link_status(sc);
2628
2629	vmxnet3_enable_all_intrs(sc);
2630	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2631}
2632
2633static void
2634vmxnet3_init(void *xsc)
2635{
2636	struct vmxnet3_softc *sc;
2637
2638	sc = xsc;
2639
2640	VMXNET3_CORE_LOCK(sc);
2641	vmxnet3_init_locked(sc);
2642	VMXNET3_CORE_UNLOCK(sc);
2643}
2644
2645/*
2646 * BMV: Much of this can go away once we finally have offsets in
2647 * the mbuf packet header. Bug andre@.
2648 */
2649static int
2650vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2651    int *etype, int *proto, int *start)
2652{
2653	struct ether_vlan_header *evh;
2654	int offset;
2655#if defined(INET)
2656	struct ip *ip = NULL;
2657	struct ip iphdr;
2658#endif
2659#if defined(INET6)
2660	struct ip6_hdr *ip6 = NULL;
2661	struct ip6_hdr ip6hdr;
2662#endif
2663
2664	evh = mtod(m, struct ether_vlan_header *);
2665	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2666		/* BMV: We should handle nested VLAN tags too. */
2667		*etype = ntohs(evh->evl_proto);
2668		offset = sizeof(struct ether_vlan_header);
2669	} else {
2670		*etype = ntohs(evh->evl_encap_proto);
2671		offset = sizeof(struct ether_header);
2672	}
2673
2674	switch (*etype) {
2675#if defined(INET)
2676	case ETHERTYPE_IP:
2677		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2678			m_copydata(m, offset, sizeof(struct ip),
2679			    (caddr_t) &iphdr);
2680			ip = &iphdr;
2681		} else
2682			ip = mtodo(m, offset);
2683		*proto = ip->ip_p;
2684		*start = offset + (ip->ip_hl << 2);
2685		break;
2686#endif
2687#if defined(INET6)
2688	case ETHERTYPE_IPV6:
2689		if (__predict_false(m->m_len <
2690		    offset + sizeof(struct ip6_hdr))) {
2691			m_copydata(m, offset, sizeof(struct ip6_hdr),
2692			    (caddr_t) &ip6hdr);
2693			ip6 = &ip6hdr;
2694		} else
2695			ip6 = mtodo(m, offset);
2696		*proto = -1;
2697		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2698		/* Assert the network stack sent us a valid packet. */
2699		KASSERT(*start > offset,
2700		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2701		    *start, offset, *proto));
2702		break;
2703#endif
2704	default:
2705		return (EINVAL);
2706	}
2707
2708	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2709		struct tcphdr *tcp, tcphdr;
2710		uint16_t sum;
2711
2712		if (__predict_false(*proto != IPPROTO_TCP)) {
2713			/* Likely failed to correctly parse the mbuf. */
2714			return (EINVAL);
2715		}
2716
2717		txq->vxtxq_stats.vmtxs_tso++;
2718
2719		switch (*etype) {
2720#if defined(INET)
2721		case ETHERTYPE_IP:
2722			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2723			    htons(IPPROTO_TCP));
2724			break;
2725#endif
2726#if defined(INET6)
2727		case ETHERTYPE_IPV6:
2728			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2729			break;
2730#endif
2731		default:
2732			sum = 0;
2733			break;
2734		}
2735
2736		if (m->m_len < *start + sizeof(struct tcphdr)) {
2737			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2738			    sizeof(uint16_t), (caddr_t) &sum);
2739			m_copydata(m, *start, sizeof(struct tcphdr),
2740			    (caddr_t) &tcphdr);
2741			tcp = &tcphdr;
2742		} else {
2743			tcp = mtodo(m, *start);
2744			tcp->th_sum = sum;
2745		}
2746
2747		/*
2748		 * For TSO, the size of the protocol header is also
2749		 * included in the descriptor header size.
2750		 */
2751		*start += (tcp->th_off << 2);
2752	} else
2753		txq->vxtxq_stats.vmtxs_csum++;
2754
2755	return (0);
2756}
2757
2758static int
2759vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2760    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2761{
2762	struct vmxnet3_txring *txr;
2763	struct mbuf *m;
2764	bus_dma_tag_t tag;
2765	int error;
2766
2767	txr = &txq->vxtxq_cmd_ring;
2768	m = *m0;
2769	tag = txr->vxtxr_txtag;
2770
2771	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2772	if (error == 0 || error != EFBIG)
2773		return (error);
2774
2775	m = m_defrag(m, M_NOWAIT);
2776	if (m != NULL) {
2777		*m0 = m;
2778		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2779	} else
2780		error = ENOBUFS;
2781
2782	if (error) {
2783		m_freem(*m0);
2784		*m0 = NULL;
2785		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2786	} else
2787		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2788
2789	return (error);
2790}
2791
2792static void
2793vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2794{
2795	struct vmxnet3_txring *txr;
2796
2797	txr = &txq->vxtxq_cmd_ring;
2798	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2799}
2800
2801static int
2802vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2803{
2804	struct vmxnet3_softc *sc;
2805	struct vmxnet3_txring *txr;
2806	struct vmxnet3_txdesc *txd, *sop;
2807	struct mbuf *m;
2808	bus_dmamap_t dmap;
2809	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2810	int i, gen, nsegs, etype, proto, start, error;
2811
2812	sc = txq->vxtxq_sc;
2813	start = 0;
2814	txd = NULL;
2815	txr = &txq->vxtxq_cmd_ring;
2816	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2817
2818	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2819	if (error)
2820		return (error);
2821
2822	m = *m0;
2823	M_ASSERTPKTHDR(m);
2824	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2825	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2826
2827	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2828		txq->vxtxq_stats.vmtxs_full++;
2829		vmxnet3_txq_unload_mbuf(txq, dmap);
2830		return (ENOSPC);
2831	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2832		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2833		if (error) {
2834			txq->vxtxq_stats.vmtxs_offload_failed++;
2835			vmxnet3_txq_unload_mbuf(txq, dmap);
2836			m_freem(m);
2837			*m0 = NULL;
2838			return (error);
2839		}
2840	}
2841
2842	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2843	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2844	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2845
2846	for (i = 0; i < nsegs; i++) {
2847		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2848
2849		txd->addr = segs[i].ds_addr;
2850		txd->len = segs[i].ds_len;
2851		txd->gen = gen;
2852		txd->dtype = 0;
2853		txd->offload_mode = VMXNET3_OM_NONE;
2854		txd->offload_pos = 0;
2855		txd->hlen = 0;
2856		txd->eop = 0;
2857		txd->compreq = 0;
2858		txd->vtag_mode = 0;
2859		txd->vtag = 0;
2860
2861		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2862			txr->vxtxr_head = 0;
2863			txr->vxtxr_gen ^= 1;
2864		}
2865		gen = txr->vxtxr_gen;
2866	}
2867	txd->eop = 1;
2868	txd->compreq = 1;
2869
2870	if (m->m_flags & M_VLANTAG) {
2871		sop->vtag_mode = 1;
2872		sop->vtag = m->m_pkthdr.ether_vtag;
2873	}
2874
2875	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2876		sop->offload_mode = VMXNET3_OM_TSO;
2877		sop->hlen = start;
2878		sop->offload_pos = m->m_pkthdr.tso_segsz;
2879	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2880	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2881		sop->offload_mode = VMXNET3_OM_CSUM;
2882		sop->hlen = start;
2883		sop->offload_pos = start + m->m_pkthdr.csum_data;
2884	}
2885
2886	/* Finally, change the ownership. */
2887	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2888	sop->gen ^= 1;
2889
2890	txq->vxtxq_ts->npending += nsegs;
2891	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2892		txq->vxtxq_ts->npending = 0;
2893		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2894		    txr->vxtxr_head);
2895	}
2896
2897	return (0);
2898}
2899
2900#ifdef VMXNET3_LEGACY_TX
2901
2902static void
2903vmxnet3_start_locked(struct ifnet *ifp)
2904{
2905	struct vmxnet3_softc *sc;
2906	struct vmxnet3_txqueue *txq;
2907	struct vmxnet3_txring *txr;
2908	struct mbuf *m_head;
2909	int tx, avail;
2910
2911	sc = ifp->if_softc;
2912	txq = &sc->vmx_txq[0];
2913	txr = &txq->vxtxq_cmd_ring;
2914	tx = 0;
2915
2916	VMXNET3_TXQ_LOCK_ASSERT(txq);
2917
2918	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2919	    sc->vmx_link_active == 0)
2920		return;
2921
2922	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2923		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2924			break;
2925
2926		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2927		if (m_head == NULL)
2928			break;
2929
2930		/* Assume worse case if this mbuf is the head of a chain. */
2931		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2932			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2933			break;
2934		}
2935
2936		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2937			if (m_head != NULL)
2938				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2939			break;
2940		}
2941
2942		tx++;
2943		ETHER_BPF_MTAP(ifp, m_head);
2944	}
2945
2946	if (tx > 0)
2947		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2948}
2949
2950static void
2951vmxnet3_start(struct ifnet *ifp)
2952{
2953	struct vmxnet3_softc *sc;
2954	struct vmxnet3_txqueue *txq;
2955
2956	sc = ifp->if_softc;
2957	txq = &sc->vmx_txq[0];
2958
2959	VMXNET3_TXQ_LOCK(txq);
2960	vmxnet3_start_locked(ifp);
2961	VMXNET3_TXQ_UNLOCK(txq);
2962}
2963
2964#else /* !VMXNET3_LEGACY_TX */
2965
2966static int
2967vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2968{
2969	struct vmxnet3_softc *sc;
2970	struct vmxnet3_txring *txr;
2971	struct buf_ring *br;
2972	struct ifnet *ifp;
2973	int tx, avail, error;
2974
2975	sc = txq->vxtxq_sc;
2976	br = txq->vxtxq_br;
2977	ifp = sc->vmx_ifp;
2978	txr = &txq->vxtxq_cmd_ring;
2979	tx = 0;
2980	error = 0;
2981
2982	VMXNET3_TXQ_LOCK_ASSERT(txq);
2983
2984	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2985	    sc->vmx_link_active == 0) {
2986		if (m != NULL)
2987			error = drbr_enqueue(ifp, br, m);
2988		return (error);
2989	}
2990
2991	if (m != NULL) {
2992		error = drbr_enqueue(ifp, br, m);
2993		if (error)
2994			return (error);
2995	}
2996
2997	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2998		m = drbr_peek(ifp, br);
2999		if (m == NULL)
3000			break;
3001
3002		/* Assume worse case if this mbuf is the head of a chain. */
3003		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3004			drbr_putback(ifp, br, m);
3005			break;
3006		}
3007
3008		if (vmxnet3_txq_encap(txq, &m) != 0) {
3009			if (m != NULL)
3010				drbr_putback(ifp, br, m);
3011			else
3012				drbr_advance(ifp, br);
3013			break;
3014		}
3015		drbr_advance(ifp, br);
3016
3017		tx++;
3018		ETHER_BPF_MTAP(ifp, m);
3019	}
3020
3021	if (tx > 0)
3022		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3023
3024	return (0);
3025}
3026
3027static int
3028vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3029{
3030	struct vmxnet3_softc *sc;
3031	struct vmxnet3_txqueue *txq;
3032	int i, ntxq, error;
3033
3034	sc = ifp->if_softc;
3035	ntxq = sc->vmx_ntxqueues;
3036
3037	/* check if flowid is set */
3038	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3039		i = m->m_pkthdr.flowid % ntxq;
3040	else
3041		i = curcpu % ntxq;
3042
3043	txq = &sc->vmx_txq[i];
3044
3045	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3046		error = vmxnet3_txq_mq_start_locked(txq, m);
3047		VMXNET3_TXQ_UNLOCK(txq);
3048	} else {
3049		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3050		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3051	}
3052
3053	return (error);
3054}
3055
3056static void
3057vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3058{
3059	struct vmxnet3_softc *sc;
3060	struct vmxnet3_txqueue *txq;
3061
3062	txq = xtxq;
3063	sc = txq->vxtxq_sc;
3064
3065	VMXNET3_TXQ_LOCK(txq);
3066	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3067		vmxnet3_txq_mq_start_locked(txq, NULL);
3068	VMXNET3_TXQ_UNLOCK(txq);
3069}
3070
3071#endif /* VMXNET3_LEGACY_TX */
3072
3073static void
3074vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3075{
3076	struct vmxnet3_softc *sc;
3077	struct ifnet *ifp;
3078
3079	sc = txq->vxtxq_sc;
3080	ifp = sc->vmx_ifp;
3081
3082#ifdef VMXNET3_LEGACY_TX
3083	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3084		vmxnet3_start_locked(ifp);
3085#else
3086	if (!drbr_empty(ifp, txq->vxtxq_br))
3087		vmxnet3_txq_mq_start_locked(txq, NULL);
3088#endif
3089}
3090
3091static void
3092vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3093{
3094	struct vmxnet3_txqueue *txq;
3095	int i;
3096
3097	VMXNET3_CORE_LOCK_ASSERT(sc);
3098
3099	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3100		txq = &sc->vmx_txq[i];
3101
3102		VMXNET3_TXQ_LOCK(txq);
3103		vmxnet3_txq_start(txq);
3104		VMXNET3_TXQ_UNLOCK(txq);
3105	}
3106}
3107
3108static void
3109vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3110{
3111	struct ifnet *ifp;
3112	int idx, bit;
3113
3114	ifp = sc->vmx_ifp;
3115	idx = (tag >> 5) & 0x7F;
3116	bit = tag & 0x1F;
3117
3118	if (tag == 0 || tag > 4095)
3119		return;
3120
3121	VMXNET3_CORE_LOCK(sc);
3122
3123	/* Update our private VLAN bitvector. */
3124	if (add)
3125		sc->vmx_vlan_filter[idx] |= (1 << bit);
3126	else
3127		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3128
3129	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3130		if (add)
3131			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3132		else
3133			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3134		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3135	}
3136
3137	VMXNET3_CORE_UNLOCK(sc);
3138}
3139
3140static void
3141vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3142{
3143
3144	if (ifp->if_softc == arg)
3145		vmxnet3_update_vlan_filter(arg, 1, tag);
3146}
3147
3148static void
3149vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3150{
3151
3152	if (ifp->if_softc == arg)
3153		vmxnet3_update_vlan_filter(arg, 0, tag);
3154}
3155
3156static void
3157vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3158{
3159	struct ifnet *ifp;
3160	struct vmxnet3_driver_shared *ds;
3161	struct ifmultiaddr *ifma;
3162	u_int mode;
3163
3164	ifp = sc->vmx_ifp;
3165	ds = sc->vmx_ds;
3166
3167	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3168	if (ifp->if_flags & IFF_PROMISC)
3169		mode |= VMXNET3_RXMODE_PROMISC;
3170	if (ifp->if_flags & IFF_ALLMULTI)
3171		mode |= VMXNET3_RXMODE_ALLMULTI;
3172	else {
3173		int cnt = 0, overflow = 0;
3174
3175		if_maddr_rlock(ifp);
3176		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3177			if (ifma->ifma_addr->sa_family != AF_LINK)
3178				continue;
3179			else if (cnt == VMXNET3_MULTICAST_MAX) {
3180				overflow = 1;
3181				break;
3182			}
3183
3184			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3185			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3186			cnt++;
3187		}
3188		if_maddr_runlock(ifp);
3189
3190		if (overflow != 0) {
3191			cnt = 0;
3192			mode |= VMXNET3_RXMODE_ALLMULTI;
3193		} else if (cnt > 0)
3194			mode |= VMXNET3_RXMODE_MCAST;
3195		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3196	}
3197
3198	ds->rxmode = mode;
3199
3200	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3201	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3202}
3203
3204static int
3205vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3206{
3207	struct ifnet *ifp;
3208
3209	ifp = sc->vmx_ifp;
3210
3211	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3212		return (EINVAL);
3213
3214	ifp->if_mtu = mtu;
3215
3216	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3217		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3218		vmxnet3_init_locked(sc);
3219	}
3220
3221	return (0);
3222}
3223
3224static int
3225vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3226{
3227	struct vmxnet3_softc *sc;
3228	struct ifreq *ifr;
3229	int reinit, mask, error;
3230
3231	sc = ifp->if_softc;
3232	ifr = (struct ifreq *) data;
3233	error = 0;
3234
3235	switch (cmd) {
3236	case SIOCSIFMTU:
3237		if (ifp->if_mtu != ifr->ifr_mtu) {
3238			VMXNET3_CORE_LOCK(sc);
3239			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3240			VMXNET3_CORE_UNLOCK(sc);
3241		}
3242		break;
3243
3244	case SIOCSIFFLAGS:
3245		VMXNET3_CORE_LOCK(sc);
3246		if (ifp->if_flags & IFF_UP) {
3247			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3248				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3249				    (IFF_PROMISC | IFF_ALLMULTI)) {
3250					vmxnet3_set_rxfilter(sc);
3251				}
3252			} else
3253				vmxnet3_init_locked(sc);
3254		} else {
3255			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3256				vmxnet3_stop(sc);
3257		}
3258		sc->vmx_if_flags = ifp->if_flags;
3259		VMXNET3_CORE_UNLOCK(sc);
3260		break;
3261
3262	case SIOCADDMULTI:
3263	case SIOCDELMULTI:
3264		VMXNET3_CORE_LOCK(sc);
3265		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3266			vmxnet3_set_rxfilter(sc);
3267		VMXNET3_CORE_UNLOCK(sc);
3268		break;
3269
3270	case SIOCSIFMEDIA:
3271	case SIOCGIFMEDIA:
3272		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3273		break;
3274
3275	case SIOCSIFCAP:
3276		VMXNET3_CORE_LOCK(sc);
3277		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3278
3279		if (mask & IFCAP_TXCSUM)
3280			ifp->if_capenable ^= IFCAP_TXCSUM;
3281		if (mask & IFCAP_TXCSUM_IPV6)
3282			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3283		if (mask & IFCAP_TSO4)
3284			ifp->if_capenable ^= IFCAP_TSO4;
3285		if (mask & IFCAP_TSO6)
3286			ifp->if_capenable ^= IFCAP_TSO6;
3287
3288		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3289		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3290			/* Changing these features requires us to reinit. */
3291			reinit = 1;
3292
3293			if (mask & IFCAP_RXCSUM)
3294				ifp->if_capenable ^= IFCAP_RXCSUM;
3295			if (mask & IFCAP_RXCSUM_IPV6)
3296				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3297			if (mask & IFCAP_LRO)
3298				ifp->if_capenable ^= IFCAP_LRO;
3299			if (mask & IFCAP_VLAN_HWTAGGING)
3300				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3301			if (mask & IFCAP_VLAN_HWFILTER)
3302				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3303		} else
3304			reinit = 0;
3305
3306		if (mask & IFCAP_VLAN_HWTSO)
3307			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3308
3309		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3310			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3311			vmxnet3_init_locked(sc);
3312		} else {
3313			vmxnet3_init_hwassist(sc);
3314		}
3315
3316		VMXNET3_CORE_UNLOCK(sc);
3317		VLAN_CAPABILITIES(ifp);
3318		break;
3319
3320	default:
3321		error = ether_ioctl(ifp, cmd, data);
3322		break;
3323	}
3324
3325	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3326
3327	return (error);
3328}
3329
3330#ifndef VMXNET3_LEGACY_TX
3331static void
3332vmxnet3_qflush(struct ifnet *ifp)
3333{
3334	struct vmxnet3_softc *sc;
3335	struct vmxnet3_txqueue *txq;
3336	struct mbuf *m;
3337	int i;
3338
3339	sc = ifp->if_softc;
3340
3341	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3342		txq = &sc->vmx_txq[i];
3343
3344		VMXNET3_TXQ_LOCK(txq);
3345		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3346			m_freem(m);
3347		VMXNET3_TXQ_UNLOCK(txq);
3348	}
3349
3350	if_qflush(ifp);
3351}
3352#endif
3353
3354static int
3355vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3356{
3357	struct vmxnet3_softc *sc;
3358
3359	sc = txq->vxtxq_sc;
3360
3361	VMXNET3_TXQ_LOCK(txq);
3362	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3363		VMXNET3_TXQ_UNLOCK(txq);
3364		return (0);
3365	}
3366	VMXNET3_TXQ_UNLOCK(txq);
3367
3368	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3369	    txq->vxtxq_id);
3370	return (1);
3371}
3372
3373static void
3374vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3375{
3376
3377	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3378}
3379
3380static uint64_t
3381vmxnet3_get_counter(struct ifnet *ifp, ift_counter cnt)
3382{
3383	struct vmxnet3_softc *sc;
3384	uint64_t rv;
3385
3386	sc = if_getsoftc(ifp);
3387	rv = 0;
3388
3389	/*
3390	 * With the exception of if_ierrors, these ifnet statistics are
3391	 * only updated in the driver, so just set them to our accumulated
3392	 * values. if_ierrors is updated in ether_input() for malformed
3393	 * frames that we should have already discarded.
3394	 */
3395	switch (cnt) {
3396	case IFCOUNTER_IPACKETS:
3397		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3398			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ipackets;
3399		return (rv);
3400	case IFCOUNTER_IQDROPS:
3401		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3402			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_iqdrops;
3403		return (rv);
3404	case IFCOUNTER_IERRORS:
3405		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3406			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ierrors;
3407		return (rv);
3408	case IFCOUNTER_OPACKETS:
3409		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3410			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_opackets;
3411		return (rv);
3412#ifndef VMXNET3_LEGACY_TX
3413	case IFCOUNTER_OBYTES:
3414		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3415			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_obytes;
3416		return (rv);
3417	case IFCOUNTER_OMCASTS:
3418		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3419			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_omcasts;
3420		return (rv);
3421#endif
3422	default:
3423		return (if_get_counter_default(ifp, cnt));
3424	}
3425}
3426
3427static void
3428vmxnet3_tick(void *xsc)
3429{
3430	struct vmxnet3_softc *sc;
3431	struct ifnet *ifp;
3432	int i, timedout;
3433
3434	sc = xsc;
3435	ifp = sc->vmx_ifp;
3436	timedout = 0;
3437
3438	VMXNET3_CORE_LOCK_ASSERT(sc);
3439
3440	vmxnet3_refresh_host_stats(sc);
3441
3442	for (i = 0; i < sc->vmx_ntxqueues; i++)
3443		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3444
3445	if (timedout != 0) {
3446		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3447		vmxnet3_init_locked(sc);
3448	} else
3449		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3450}
3451
3452static int
3453vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3454{
3455	uint32_t status;
3456
3457	/* Also update the link speed while here. */
3458	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3459	sc->vmx_link_speed = status >> 16;
3460	return !!(status & 0x1);
3461}
3462
3463static void
3464vmxnet3_link_status(struct vmxnet3_softc *sc)
3465{
3466	struct ifnet *ifp;
3467	int link;
3468
3469	ifp = sc->vmx_ifp;
3470	link = vmxnet3_link_is_up(sc);
3471
3472	if (link != 0 && sc->vmx_link_active == 0) {
3473		sc->vmx_link_active = 1;
3474		if_link_state_change(ifp, LINK_STATE_UP);
3475	} else if (link == 0 && sc->vmx_link_active != 0) {
3476		sc->vmx_link_active = 0;
3477		if_link_state_change(ifp, LINK_STATE_DOWN);
3478	}
3479}
3480
3481static void
3482vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3483{
3484	struct vmxnet3_softc *sc;
3485
3486	sc = ifp->if_softc;
3487
3488	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3489	ifmr->ifm_status = IFM_AVALID;
3490
3491	VMXNET3_CORE_LOCK(sc);
3492	if (vmxnet3_link_is_up(sc) != 0)
3493		ifmr->ifm_status |= IFM_ACTIVE;
3494	else
3495		ifmr->ifm_status |= IFM_NONE;
3496	VMXNET3_CORE_UNLOCK(sc);
3497}
3498
3499static int
3500vmxnet3_media_change(struct ifnet *ifp)
3501{
3502
3503	/* Ignore. */
3504	return (0);
3505}
3506
3507static void
3508vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3509{
3510	uint32_t ml, mh;
3511
3512	ml  = sc->vmx_lladdr[0];
3513	ml |= sc->vmx_lladdr[1] << 8;
3514	ml |= sc->vmx_lladdr[2] << 16;
3515	ml |= sc->vmx_lladdr[3] << 24;
3516	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3517
3518	mh  = sc->vmx_lladdr[4];
3519	mh |= sc->vmx_lladdr[5] << 8;
3520	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3521}
3522
3523static void
3524vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3525{
3526	uint32_t ml, mh;
3527
3528	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3529	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3530
3531	sc->vmx_lladdr[0] = ml;
3532	sc->vmx_lladdr[1] = ml >> 8;
3533	sc->vmx_lladdr[2] = ml >> 16;
3534	sc->vmx_lladdr[3] = ml >> 24;
3535	sc->vmx_lladdr[4] = mh;
3536	sc->vmx_lladdr[5] = mh >> 8;
3537}
3538
3539static void
3540vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3541    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3542{
3543	struct sysctl_oid *node, *txsnode;
3544	struct sysctl_oid_list *list, *txslist;
3545	struct vmxnet3_txq_stats *stats;
3546	struct UPT1_TxStats *txstats;
3547	char namebuf[16];
3548
3549	stats = &txq->vxtxq_stats;
3550	txstats = &txq->vxtxq_ts->stats;
3551
3552	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3553	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3554	    NULL, "Transmit Queue");
3555	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3556
3557	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3558	    &stats->vmtxs_opackets, "Transmit packets");
3559	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3560	    &stats->vmtxs_obytes, "Transmit bytes");
3561	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3562	    &stats->vmtxs_omcasts, "Transmit multicasts");
3563	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3564	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3565	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3566	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3567	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3568	    &stats->vmtxs_full, "Transmit ring full");
3569	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3570	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3571
3572	/*
3573	 * Add statistics reported by the host. These are updated once
3574	 * per second.
3575	 */
3576	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3577	    NULL, "Host Statistics");
3578	txslist = SYSCTL_CHILDREN(txsnode);
3579	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3580	    &txstats->TSO_packets, "TSO packets");
3581	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3582	    &txstats->TSO_bytes, "TSO bytes");
3583	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3584	    &txstats->ucast_packets, "Unicast packets");
3585	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3586	    &txstats->ucast_bytes, "Unicast bytes");
3587	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3588	    &txstats->mcast_packets, "Multicast packets");
3589	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3590	    &txstats->mcast_bytes, "Multicast bytes");
3591	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3592	    &txstats->error, "Errors");
3593	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3594	    &txstats->discard, "Discards");
3595}
3596
3597static void
3598vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3599    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3600{
3601	struct sysctl_oid *node, *rxsnode;
3602	struct sysctl_oid_list *list, *rxslist;
3603	struct vmxnet3_rxq_stats *stats;
3604	struct UPT1_RxStats *rxstats;
3605	char namebuf[16];
3606
3607	stats = &rxq->vxrxq_stats;
3608	rxstats = &rxq->vxrxq_rs->stats;
3609
3610	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3611	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3612	    NULL, "Receive Queue");
3613	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3614
3615	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3616	    &stats->vmrxs_ipackets, "Receive packets");
3617	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3618	    &stats->vmrxs_ibytes, "Receive bytes");
3619	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3620	    &stats->vmrxs_iqdrops, "Receive drops");
3621	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3622	    &stats->vmrxs_ierrors, "Receive errors");
3623
3624	/*
3625	 * Add statistics reported by the host. These are updated once
3626	 * per second.
3627	 */
3628	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3629	    NULL, "Host Statistics");
3630	rxslist = SYSCTL_CHILDREN(rxsnode);
3631	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3632	    &rxstats->LRO_packets, "LRO packets");
3633	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3634	    &rxstats->LRO_bytes, "LRO bytes");
3635	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3636	    &rxstats->ucast_packets, "Unicast packets");
3637	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3638	    &rxstats->ucast_bytes, "Unicast bytes");
3639	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3640	    &rxstats->mcast_packets, "Multicast packets");
3641	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3642	    &rxstats->mcast_bytes, "Multicast bytes");
3643	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3644	    &rxstats->bcast_packets, "Broadcast packets");
3645	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3646	    &rxstats->bcast_bytes, "Broadcast bytes");
3647	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3648	    &rxstats->nobuffer, "No buffer");
3649	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3650	    &rxstats->error, "Errors");
3651}
3652
3653static void
3654vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3655    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3656{
3657	struct sysctl_oid *node;
3658	struct sysctl_oid_list *list;
3659	int i;
3660
3661	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3662		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3663
3664		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3665		    "debug", CTLFLAG_RD, NULL, "");
3666		list = SYSCTL_CHILDREN(node);
3667
3668		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3669		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3670		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3671		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3672		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3673		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3674		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3675		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3676		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3677		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3678		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3679		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3680		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3681		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3682	}
3683
3684	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3685		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3686
3687		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3688		    "debug", CTLFLAG_RD, NULL, "");
3689		list = SYSCTL_CHILDREN(node);
3690
3691		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3692		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3693		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3694		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3695		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3696		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3697		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3698		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3699		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3700		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3701		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3702		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3703		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3704		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3705		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3706		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3707		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3708		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3709	}
3710}
3711
3712static void
3713vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3714    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3715{
3716	int i;
3717
3718	for (i = 0; i < sc->vmx_ntxqueues; i++)
3719		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3720	for (i = 0; i < sc->vmx_nrxqueues; i++)
3721		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3722
3723	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3724}
3725
3726static void
3727vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3728{
3729	device_t dev;
3730	struct vmxnet3_statistics *stats;
3731	struct sysctl_ctx_list *ctx;
3732	struct sysctl_oid *tree;
3733	struct sysctl_oid_list *child;
3734
3735	dev = sc->vmx_dev;
3736	ctx = device_get_sysctl_ctx(dev);
3737	tree = device_get_sysctl_tree(dev);
3738	child = SYSCTL_CHILDREN(tree);
3739
3740	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3741	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3742	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3743	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3744	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3745	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3746	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3747	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3748
3749	stats = &sc->vmx_stats;
3750	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3751	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3752	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3753	    &stats->vmst_defrag_failed, 0,
3754	    "Tx mbuf dropped because defrag failed");
3755	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3756	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3757	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3758	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3759
3760	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3761}
3762
3763static void
3764vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3765{
3766
3767	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3768}
3769
3770static uint32_t
3771vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3772{
3773
3774	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3775}
3776
3777static void
3778vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3779{
3780
3781	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3782}
3783
3784static void
3785vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3786{
3787
3788	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3789}
3790
3791static uint32_t
3792vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3793{
3794
3795	vmxnet3_write_cmd(sc, cmd);
3796	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3797	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3798	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3799}
3800
3801static void
3802vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3803{
3804
3805	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3806}
3807
3808static void
3809vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3810{
3811
3812	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3813}
3814
3815static void
3816vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3817{
3818	int i;
3819
3820	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3821	for (i = 0; i < sc->vmx_nintrs; i++)
3822		vmxnet3_enable_intr(sc, i);
3823}
3824
3825static void
3826vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3827{
3828	int i;
3829
3830	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3831	for (i = 0; i < sc->vmx_nintrs; i++)
3832		vmxnet3_disable_intr(sc, i);
3833}
3834
3835static void
3836vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3837{
3838	bus_addr_t *baddr = arg;
3839
3840	if (error == 0)
3841		*baddr = segs->ds_addr;
3842}
3843
3844static int
3845vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3846    struct vmxnet3_dma_alloc *dma)
3847{
3848	device_t dev;
3849	int error;
3850
3851	dev = sc->vmx_dev;
3852	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3853
3854	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3855	    align, 0,		/* alignment, bounds */
3856	    BUS_SPACE_MAXADDR,	/* lowaddr */
3857	    BUS_SPACE_MAXADDR,	/* highaddr */
3858	    NULL, NULL,		/* filter, filterarg */
3859	    size,		/* maxsize */
3860	    1,			/* nsegments */
3861	    size,		/* maxsegsize */
3862	    BUS_DMA_ALLOCNOW,	/* flags */
3863	    NULL,		/* lockfunc */
3864	    NULL,		/* lockfuncarg */
3865	    &dma->dma_tag);
3866	if (error) {
3867		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3868		goto fail;
3869	}
3870
3871	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3872	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3873	if (error) {
3874		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3875		goto fail;
3876	}
3877
3878	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3879	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3880	if (error) {
3881		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3882		goto fail;
3883	}
3884
3885	dma->dma_size = size;
3886
3887fail:
3888	if (error)
3889		vmxnet3_dma_free(sc, dma);
3890
3891	return (error);
3892}
3893
3894static void
3895vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3896{
3897
3898	if (dma->dma_tag != NULL) {
3899		if (dma->dma_paddr != 0) {
3900			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3901			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3902			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3903		}
3904
3905		if (dma->dma_vaddr != NULL) {
3906			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3907			    dma->dma_map);
3908		}
3909
3910		bus_dma_tag_destroy(dma->dma_tag);
3911	}
3912	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3913}
3914
3915static int
3916vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3917{
3918	char path[64];
3919
3920	snprintf(path, sizeof(path),
3921	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3922	TUNABLE_INT_FETCH(path, &def);
3923
3924	return (def);
3925}
3926
3927/*
3928 * Since this is a purely paravirtualized device, we do not have
3929 * to worry about DMA coherency. But at times, we must make sure
3930 * both the compiler and CPU do not reorder memory operations.
3931 */
3932static inline void
3933vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3934{
3935
3936	switch (type) {
3937	case VMXNET3_BARRIER_RD:
3938		rmb();
3939		break;
3940	case VMXNET3_BARRIER_WR:
3941		wmb();
3942		break;
3943	case VMXNET3_BARRIER_RDWR:
3944		mb();
3945		break;
3946	default:
3947		panic("%s: bad barrier type %d", __func__, type);
3948	}
3949}
3950