1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: releng/10.2/sys/dev/vmware/vmxnet3/if_vmx.c 281955 2015-04-24 23:26:44Z hiren $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/endian.h>
29#include <sys/sockio.h>
30#include <sys/mbuf.h>
31#include <sys/malloc.h>
32#include <sys/module.h>
33#include <sys/socket.h>
34#include <sys/sysctl.h>
35#include <sys/smp.h>
36#include <sys/taskqueue.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <net/if_dl.h>
44#include <net/if_types.h>
45#include <net/if_media.h>
46#include <net/if_vlan_var.h>
47
48#include <net/bpf.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/udp.h>
56#include <netinet/tcp.h>
57
58#include <machine/in_cksum.h>
59
60#include <machine/bus.h>
61#include <machine/resource.h>
62#include <sys/bus.h>
63#include <sys/rman.h>
64
65#include <dev/pci/pcireg.h>
66#include <dev/pci/pcivar.h>
67
68#include "if_vmxreg.h"
69#include "if_vmxvar.h"
70
71#include "opt_inet.h"
72#include "opt_inet6.h"
73
74#ifdef VMXNET3_FAILPOINTS
75#include <sys/fail.h>
76static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77    "vmxnet3 fail points");
78#define VMXNET3_FP	_debug_fail_point_vmxnet3
79#endif
80
81static int	vmxnet3_probe(device_t);
82static int	vmxnet3_attach(device_t);
83static int	vmxnet3_detach(device_t);
84static int	vmxnet3_shutdown(device_t);
85
86static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87static void	vmxnet3_free_resources(struct vmxnet3_softc *);
88static int	vmxnet3_check_version(struct vmxnet3_softc *);
89static void	vmxnet3_initial_config(struct vmxnet3_softc *);
90static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96		    struct vmxnet3_interrupt *);
97static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104		    struct vmxnet3_interrupt *);
105static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107#ifndef VMXNET3_LEGACY_TX
108static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112#endif
113
114static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135static void	vmxnet3_free_data(struct vmxnet3_softc *);
136static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138static void	vmxnet3_evintr(struct vmxnet3_softc *);
139static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143		    struct vmxnet3_rxring *, int);
144static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145static void	vmxnet3_legacy_intr(void *);
146static void	vmxnet3_txq_intr(void *);
147static void	vmxnet3_rxq_intr(void *);
148static void	vmxnet3_event_intr(void *);
149
150static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152static void	vmxnet3_stop(struct vmxnet3_softc *);
153
154static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157static int	vmxnet3_enable_device(struct vmxnet3_softc *);
158static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159static int	vmxnet3_reinit(struct vmxnet3_softc *);
160static void	vmxnet3_init_locked(struct vmxnet3_softc *);
161static void	vmxnet3_init(void *);
162
163static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164		    int *, int *, int *);
165static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169#ifdef VMXNET3_LEGACY_TX
170static void	vmxnet3_start_locked(struct ifnet *);
171static void	vmxnet3_start(struct ifnet *);
172#else
173static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
174		    struct mbuf *);
175static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
176static void	vmxnet3_txq_tq_deferred(void *, int);
177#endif
178static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
179static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
180
181static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
182		    uint16_t);
183static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
184static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
186static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
187static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188
189#ifndef VMXNET3_LEGACY_TX
190static void	vmxnet3_qflush(struct ifnet *);
191#endif
192
193static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
194static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
195static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
196		    struct vmxnet3_txq_stats *);
197static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
198		    struct vmxnet3_rxq_stats *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242/* Tunables. */
243static int vmxnet3_mq_disable = 0;
244TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254static device_method_t vmxnet3_methods[] = {
255	/* Device interface. */
256	DEVMETHOD(device_probe,		vmxnet3_probe),
257	DEVMETHOD(device_attach,	vmxnet3_attach),
258	DEVMETHOD(device_detach,	vmxnet3_detach),
259	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
260
261	DEVMETHOD_END
262};
263
264static driver_t vmxnet3_driver = {
265	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266};
267
268static devclass_t vmxnet3_devclass;
269DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271MODULE_DEPEND(vmx, pci, 1, 1, 1);
272MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
275#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
276
277static int
278vmxnet3_probe(device_t dev)
279{
280
281	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284		return (BUS_PROBE_DEFAULT);
285	}
286
287	return (ENXIO);
288}
289
290static int
291vmxnet3_attach(device_t dev)
292{
293	struct vmxnet3_softc *sc;
294	int error;
295
296	sc = device_get_softc(dev);
297	sc->vmx_dev = dev;
298
299	pci_enable_busmaster(dev);
300
301	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304	vmxnet3_initial_config(sc);
305
306	error = vmxnet3_alloc_resources(sc);
307	if (error)
308		goto fail;
309
310	error = vmxnet3_check_version(sc);
311	if (error)
312		goto fail;
313
314	error = vmxnet3_alloc_rxtx_queues(sc);
315	if (error)
316		goto fail;
317
318#ifndef VMXNET3_LEGACY_TX
319	error = vmxnet3_alloc_taskqueue(sc);
320	if (error)
321		goto fail;
322#endif
323
324	error = vmxnet3_alloc_interrupts(sc);
325	if (error)
326		goto fail;
327
328	vmxnet3_check_multiqueue(sc);
329
330	error = vmxnet3_alloc_data(sc);
331	if (error)
332		goto fail;
333
334	error = vmxnet3_setup_interface(sc);
335	if (error)
336		goto fail;
337
338	error = vmxnet3_setup_interrupts(sc);
339	if (error) {
340		ether_ifdetach(sc->vmx_ifp);
341		device_printf(dev, "could not set up interrupt\n");
342		goto fail;
343	}
344
345	vmxnet3_setup_sysctl(sc);
346#ifndef VMXNET3_LEGACY_TX
347	vmxnet3_start_taskqueue(sc);
348#endif
349
350fail:
351	if (error)
352		vmxnet3_detach(dev);
353
354	return (error);
355}
356
357static int
358vmxnet3_detach(device_t dev)
359{
360	struct vmxnet3_softc *sc;
361	struct ifnet *ifp;
362
363	sc = device_get_softc(dev);
364	ifp = sc->vmx_ifp;
365
366	if (device_is_attached(dev)) {
367		VMXNET3_CORE_LOCK(sc);
368		vmxnet3_stop(sc);
369		VMXNET3_CORE_UNLOCK(sc);
370
371		callout_drain(&sc->vmx_tick);
372#ifndef VMXNET3_LEGACY_TX
373		vmxnet3_drain_taskqueue(sc);
374#endif
375
376		ether_ifdetach(ifp);
377	}
378
379	if (sc->vmx_vlan_attach != NULL) {
380		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381		sc->vmx_vlan_attach = NULL;
382	}
383	if (sc->vmx_vlan_detach != NULL) {
384		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385		sc->vmx_vlan_detach = NULL;
386	}
387
388#ifndef VMXNET3_LEGACY_TX
389	vmxnet3_free_taskqueue(sc);
390#endif
391	vmxnet3_free_interrupts(sc);
392
393	if (ifp != NULL) {
394		if_free(ifp);
395		sc->vmx_ifp = NULL;
396	}
397
398	ifmedia_removeall(&sc->vmx_media);
399
400	vmxnet3_free_data(sc);
401	vmxnet3_free_resources(sc);
402	vmxnet3_free_rxtx_queues(sc);
403
404	VMXNET3_CORE_LOCK_DESTROY(sc);
405
406	return (0);
407}
408
409static int
410vmxnet3_shutdown(device_t dev)
411{
412
413	return (0);
414}
415
416static int
417vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418{
419	device_t dev;
420	int rid;
421
422	dev = sc->vmx_dev;
423
424	rid = PCIR_BAR(0);
425	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426	    RF_ACTIVE);
427	if (sc->vmx_res0 == NULL) {
428		device_printf(dev,
429		    "could not map BAR0 memory\n");
430		return (ENXIO);
431	}
432
433	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436	rid = PCIR_BAR(1);
437	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438	    RF_ACTIVE);
439	if (sc->vmx_res1 == NULL) {
440		device_printf(dev,
441		    "could not map BAR1 memory\n");
442		return (ENXIO);
443	}
444
445	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449		rid = PCIR_BAR(2);
450		sc->vmx_msix_res = bus_alloc_resource_any(dev,
451		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
452	}
453
454	if (sc->vmx_msix_res == NULL)
455		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457	return (0);
458}
459
460static void
461vmxnet3_free_resources(struct vmxnet3_softc *sc)
462{
463	device_t dev;
464	int rid;
465
466	dev = sc->vmx_dev;
467
468	if (sc->vmx_res0 != NULL) {
469		rid = PCIR_BAR(0);
470		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471		sc->vmx_res0 = NULL;
472	}
473
474	if (sc->vmx_res1 != NULL) {
475		rid = PCIR_BAR(1);
476		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477		sc->vmx_res1 = NULL;
478	}
479
480	if (sc->vmx_msix_res != NULL) {
481		rid = PCIR_BAR(2);
482		bus_release_resource(dev, SYS_RES_MEMORY, rid,
483		    sc->vmx_msix_res);
484		sc->vmx_msix_res = NULL;
485	}
486}
487
488static int
489vmxnet3_check_version(struct vmxnet3_softc *sc)
490{
491	device_t dev;
492	uint32_t version;
493
494	dev = sc->vmx_dev;
495
496	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497	if ((version & 0x01) == 0) {
498		device_printf(dev, "unsupported hardware version %#x\n",
499		    version);
500		return (ENOTSUP);
501	}
502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505	if ((version & 0x01) == 0) {
506		device_printf(dev, "unsupported UPT version %#x\n", version);
507		return (ENOTSUP);
508	}
509	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511	return (0);
512}
513
514static void
515vmxnet3_initial_config(struct vmxnet3_softc *sc)
516{
517	int nqueue, ndesc;
518
519	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
520	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
521		nqueue = VMXNET3_DEF_TX_QUEUES;
522	if (nqueue > mp_ncpus)
523		nqueue = mp_ncpus;
524	sc->vmx_max_ntxqueues = nqueue;
525
526	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
527	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
528		nqueue = VMXNET3_DEF_RX_QUEUES;
529	if (nqueue > mp_ncpus)
530		nqueue = mp_ncpus;
531	sc->vmx_max_nrxqueues = nqueue;
532
533	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
534		sc->vmx_max_nrxqueues = 1;
535		sc->vmx_max_ntxqueues = 1;
536	}
537
538	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
539	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
540		ndesc = VMXNET3_DEF_TX_NDESC;
541	if (ndesc & VMXNET3_MASK_TX_NDESC)
542		ndesc &= ~VMXNET3_MASK_TX_NDESC;
543	sc->vmx_ntxdescs = ndesc;
544
545	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
546	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
547		ndesc = VMXNET3_DEF_RX_NDESC;
548	if (ndesc & VMXNET3_MASK_RX_NDESC)
549		ndesc &= ~VMXNET3_MASK_RX_NDESC;
550	sc->vmx_nrxdescs = ndesc;
551	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
552}
553
554static void
555vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
556{
557
558	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
559		goto out;
560
561	/* BMV: Just use the maximum configured for now. */
562	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
563	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
564
565	if (sc->vmx_nrxqueues > 1)
566		sc->vmx_flags |= VMXNET3_FLAG_RSS;
567
568	return;
569
570out:
571	sc->vmx_ntxqueues = 1;
572	sc->vmx_nrxqueues = 1;
573}
574
575static int
576vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
577{
578	device_t dev;
579	int nmsix, cnt, required;
580
581	dev = sc->vmx_dev;
582
583	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
584		return (1);
585
586	/* Allocate an additional vector for the events interrupt. */
587	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
588
589	nmsix = pci_msix_count(dev);
590	if (nmsix < required)
591		return (1);
592
593	cnt = required;
594	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
595		sc->vmx_nintrs = required;
596		return (0);
597	} else
598		pci_release_msi(dev);
599
600	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
601
602	return (1);
603}
604
605static int
606vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
607{
608	device_t dev;
609	int nmsi, cnt, required;
610
611	dev = sc->vmx_dev;
612	required = 1;
613
614	nmsi = pci_msi_count(dev);
615	if (nmsi < required)
616		return (1);
617
618	cnt = required;
619	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
620		sc->vmx_nintrs = 1;
621		return (0);
622	} else
623		pci_release_msi(dev);
624
625	return (1);
626}
627
628static int
629vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
630{
631
632	sc->vmx_nintrs = 1;
633	return (0);
634}
635
636static int
637vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
638    struct vmxnet3_interrupt *intr)
639{
640	struct resource *irq;
641
642	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
643	if (irq == NULL)
644		return (ENXIO);
645
646	intr->vmxi_irq = irq;
647	intr->vmxi_rid = rid;
648
649	return (0);
650}
651
652static int
653vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
654{
655	int i, rid, flags, error;
656
657	rid = 0;
658	flags = RF_ACTIVE;
659
660	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
661		flags |= RF_SHAREABLE;
662	else
663		rid = 1;
664
665	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
666		error = vmxnet3_alloc_interrupt(sc, rid, flags,
667		    &sc->vmx_intrs[i]);
668		if (error)
669			return (error);
670	}
671
672	return (0);
673}
674
675static int
676vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
677{
678	device_t dev;
679	struct vmxnet3_txqueue *txq;
680	struct vmxnet3_rxqueue *rxq;
681	struct vmxnet3_interrupt *intr;
682	enum intr_type type;
683	int i, error;
684
685	dev = sc->vmx_dev;
686	intr = &sc->vmx_intrs[0];
687	type = INTR_TYPE_NET | INTR_MPSAFE;
688
689	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
690		txq = &sc->vmx_txq[i];
691		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
692		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
693		if (error)
694			return (error);
695		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
696		    "tq%d", i);
697		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
698	}
699
700	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
701		rxq = &sc->vmx_rxq[i];
702		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
703		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
704		if (error)
705			return (error);
706		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
707		    "rq%d", i);
708		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
709	}
710
711	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
712	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
713	if (error)
714		return (error);
715	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
716	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
717
718	return (0);
719}
720
721static int
722vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
723{
724	struct vmxnet3_interrupt *intr;
725	int i, error;
726
727	intr = &sc->vmx_intrs[0];
728	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
729	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
730	    &intr->vmxi_handler);
731
732	for (i = 0; i < sc->vmx_ntxqueues; i++)
733		sc->vmx_txq[i].vxtxq_intr_idx = 0;
734	for (i = 0; i < sc->vmx_nrxqueues; i++)
735		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
736	sc->vmx_event_intr_idx = 0;
737
738	return (error);
739}
740
741static void
742vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
743{
744	struct vmxnet3_txqueue *txq;
745	struct vmxnet3_txq_shared *txs;
746	struct vmxnet3_rxqueue *rxq;
747	struct vmxnet3_rxq_shared *rxs;
748	int i;
749
750	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
751
752	for (i = 0; i < sc->vmx_ntxqueues; i++) {
753		txq = &sc->vmx_txq[i];
754		txs = txq->vxtxq_ts;
755		txs->intr_idx = txq->vxtxq_intr_idx;
756	}
757
758	for (i = 0; i < sc->vmx_nrxqueues; i++) {
759		rxq = &sc->vmx_rxq[i];
760		rxs = rxq->vxrxq_rs;
761		rxs->intr_idx = rxq->vxrxq_intr_idx;
762	}
763}
764
765static int
766vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
767{
768	int error;
769
770	error = vmxnet3_alloc_intr_resources(sc);
771	if (error)
772		return (error);
773
774	switch (sc->vmx_intr_type) {
775	case VMXNET3_IT_MSIX:
776		error = vmxnet3_setup_msix_interrupts(sc);
777		break;
778	case VMXNET3_IT_MSI:
779	case VMXNET3_IT_LEGACY:
780		error = vmxnet3_setup_legacy_interrupt(sc);
781		break;
782	default:
783		panic("%s: invalid interrupt type %d", __func__,
784		    sc->vmx_intr_type);
785	}
786
787	if (error == 0)
788		vmxnet3_set_interrupt_idx(sc);
789
790	return (error);
791}
792
793static int
794vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
795{
796	device_t dev;
797	uint32_t config;
798	int error;
799
800	dev = sc->vmx_dev;
801	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
802
803	sc->vmx_intr_type = config & 0x03;
804	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
805
806	switch (sc->vmx_intr_type) {
807	case VMXNET3_IT_AUTO:
808		sc->vmx_intr_type = VMXNET3_IT_MSIX;
809		/* FALLTHROUGH */
810	case VMXNET3_IT_MSIX:
811		error = vmxnet3_alloc_msix_interrupts(sc);
812		if (error == 0)
813			break;
814		sc->vmx_intr_type = VMXNET3_IT_MSI;
815		/* FALLTHROUGH */
816	case VMXNET3_IT_MSI:
817		error = vmxnet3_alloc_msi_interrupts(sc);
818		if (error == 0)
819			break;
820		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
821		/* FALLTHROUGH */
822	case VMXNET3_IT_LEGACY:
823		error = vmxnet3_alloc_legacy_interrupts(sc);
824		if (error == 0)
825			break;
826		/* FALLTHROUGH */
827	default:
828		sc->vmx_intr_type = -1;
829		device_printf(dev, "cannot allocate any interrupt resources\n");
830		return (ENXIO);
831	}
832
833	return (error);
834}
835
836static void
837vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
838    struct vmxnet3_interrupt *intr)
839{
840	device_t dev;
841
842	dev = sc->vmx_dev;
843
844	if (intr->vmxi_handler != NULL) {
845		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
846		intr->vmxi_handler = NULL;
847	}
848
849	if (intr->vmxi_irq != NULL) {
850		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
851		    intr->vmxi_irq);
852		intr->vmxi_irq = NULL;
853		intr->vmxi_rid = -1;
854	}
855}
856
857static void
858vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
859{
860	int i;
861
862	for (i = 0; i < sc->vmx_nintrs; i++)
863		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
864
865	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
866	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
867		pci_release_msi(sc->vmx_dev);
868}
869
870#ifndef VMXNET3_LEGACY_TX
871static int
872vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
873{
874	device_t dev;
875
876	dev = sc->vmx_dev;
877
878	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
879	    taskqueue_thread_enqueue, &sc->vmx_tq);
880	if (sc->vmx_tq == NULL)
881		return (ENOMEM);
882
883	return (0);
884}
885
886static void
887vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
888{
889	device_t dev;
890	int nthreads, error;
891
892	dev = sc->vmx_dev;
893
894	/*
895	 * The taskqueue is typically not frequently used, so a dedicated
896	 * thread for each queue is unnecessary.
897	 */
898	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
899
900	/*
901	 * Most drivers just ignore the return value - it only fails
902	 * with ENOMEM so an error is not likely. It is hard for us
903	 * to recover from an error here.
904	 */
905	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
906	    "%s taskq", device_get_nameunit(dev));
907	if (error)
908		device_printf(dev, "failed to start taskqueue: %d", error);
909}
910
911static void
912vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
913{
914	struct vmxnet3_txqueue *txq;
915	int i;
916
917	if (sc->vmx_tq != NULL) {
918		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
919			txq = &sc->vmx_txq[i];
920			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
921		}
922	}
923}
924
925static void
926vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
927{
928	if (sc->vmx_tq != NULL) {
929		taskqueue_free(sc->vmx_tq);
930		sc->vmx_tq = NULL;
931	}
932}
933#endif
934
935static int
936vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
937{
938	struct vmxnet3_rxqueue *rxq;
939	struct vmxnet3_rxring *rxr;
940	int i;
941
942	rxq = &sc->vmx_rxq[q];
943
944	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
945	    device_get_nameunit(sc->vmx_dev), q);
946	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
947
948	rxq->vxrxq_sc = sc;
949	rxq->vxrxq_id = q;
950
951	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
952		rxr = &rxq->vxrxq_cmd_ring[i];
953		rxr->vxrxr_rid = i;
954		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
955		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
956		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
957		if (rxr->vxrxr_rxbuf == NULL)
958			return (ENOMEM);
959
960		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
961	}
962
963	return (0);
964}
965
966static int
967vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
968{
969	struct vmxnet3_txqueue *txq;
970	struct vmxnet3_txring *txr;
971
972	txq = &sc->vmx_txq[q];
973	txr = &txq->vxtxq_cmd_ring;
974
975	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
976	    device_get_nameunit(sc->vmx_dev), q);
977	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
978
979	txq->vxtxq_sc = sc;
980	txq->vxtxq_id = q;
981
982	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
983	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
984	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
985	if (txr->vxtxr_txbuf == NULL)
986		return (ENOMEM);
987
988	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
989
990#ifndef VMXNET3_LEGACY_TX
991	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
992
993	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
994	    M_NOWAIT, &txq->vxtxq_mtx);
995	if (txq->vxtxq_br == NULL)
996		return (ENOMEM);
997#endif
998
999	return (0);
1000}
1001
1002static int
1003vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1004{
1005	int i, error;
1006
1007	/*
1008	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1009	 * disabled by default because its apparently broken for devices passed
1010	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1011	 * must be set to zero for MSIX. This check prevents us from allocating
1012	 * queue structures that we will not use.
1013	 */
1014	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1015		sc->vmx_max_nrxqueues = 1;
1016		sc->vmx_max_ntxqueues = 1;
1017	}
1018
1019	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1020	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1021	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1022	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1023	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1024		return (ENOMEM);
1025
1026	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1027		error = vmxnet3_init_rxq(sc, i);
1028		if (error)
1029			return (error);
1030	}
1031
1032	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1033		error = vmxnet3_init_txq(sc, i);
1034		if (error)
1035			return (error);
1036	}
1037
1038	return (0);
1039}
1040
1041static void
1042vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1043{
1044	struct vmxnet3_rxring *rxr;
1045	int i;
1046
1047	rxq->vxrxq_sc = NULL;
1048	rxq->vxrxq_id = -1;
1049
1050	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1051		rxr = &rxq->vxrxq_cmd_ring[i];
1052
1053		if (rxr->vxrxr_rxbuf != NULL) {
1054			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1055			rxr->vxrxr_rxbuf = NULL;
1056		}
1057	}
1058
1059	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1060		mtx_destroy(&rxq->vxrxq_mtx);
1061}
1062
1063static void
1064vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1065{
1066	struct vmxnet3_txring *txr;
1067
1068	txr = &txq->vxtxq_cmd_ring;
1069
1070	txq->vxtxq_sc = NULL;
1071	txq->vxtxq_id = -1;
1072
1073#ifndef VMXNET3_LEGACY_TX
1074	if (txq->vxtxq_br != NULL) {
1075		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1076		txq->vxtxq_br = NULL;
1077	}
1078#endif
1079
1080	if (txr->vxtxr_txbuf != NULL) {
1081		free(txr->vxtxr_txbuf, M_DEVBUF);
1082		txr->vxtxr_txbuf = NULL;
1083	}
1084
1085	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1086		mtx_destroy(&txq->vxtxq_mtx);
1087}
1088
1089static void
1090vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1091{
1092	int i;
1093
1094	if (sc->vmx_rxq != NULL) {
1095		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1096			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1097		free(sc->vmx_rxq, M_DEVBUF);
1098		sc->vmx_rxq = NULL;
1099	}
1100
1101	if (sc->vmx_txq != NULL) {
1102		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1103			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1104		free(sc->vmx_txq, M_DEVBUF);
1105		sc->vmx_txq = NULL;
1106	}
1107}
1108
1109static int
1110vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1111{
1112	device_t dev;
1113	uint8_t *kva;
1114	size_t size;
1115	int i, error;
1116
1117	dev = sc->vmx_dev;
1118
1119	size = sizeof(struct vmxnet3_driver_shared);
1120	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1121	if (error) {
1122		device_printf(dev, "cannot alloc shared memory\n");
1123		return (error);
1124	}
1125	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1126
1127	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1128	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1129	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1130	if (error) {
1131		device_printf(dev, "cannot alloc queue shared memory\n");
1132		return (error);
1133	}
1134	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1135	kva = sc->vmx_qs;
1136
1137	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1138		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1139		kva += sizeof(struct vmxnet3_txq_shared);
1140	}
1141	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1142		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1143		kva += sizeof(struct vmxnet3_rxq_shared);
1144	}
1145
1146	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1147		size = sizeof(struct vmxnet3_rss_shared);
1148		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1149		if (error) {
1150			device_printf(dev, "cannot alloc rss shared memory\n");
1151			return (error);
1152		}
1153		sc->vmx_rss =
1154		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1155	}
1156
1157	return (0);
1158}
1159
1160static void
1161vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1162{
1163
1164	if (sc->vmx_rss != NULL) {
1165		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1166		sc->vmx_rss = NULL;
1167	}
1168
1169	if (sc->vmx_qs != NULL) {
1170		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1171		sc->vmx_qs = NULL;
1172	}
1173
1174	if (sc->vmx_ds != NULL) {
1175		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1176		sc->vmx_ds = NULL;
1177	}
1178}
1179
1180static int
1181vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1182{
1183	device_t dev;
1184	struct vmxnet3_txqueue *txq;
1185	struct vmxnet3_txring *txr;
1186	struct vmxnet3_comp_ring *txc;
1187	size_t descsz, compsz;
1188	int i, q, error;
1189
1190	dev = sc->vmx_dev;
1191
1192	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1193		txq = &sc->vmx_txq[q];
1194		txr = &txq->vxtxq_cmd_ring;
1195		txc = &txq->vxtxq_comp_ring;
1196
1197		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1198		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1199
1200		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1201		    1, 0,			/* alignment, boundary */
1202		    BUS_SPACE_MAXADDR,		/* lowaddr */
1203		    BUS_SPACE_MAXADDR,		/* highaddr */
1204		    NULL, NULL,			/* filter, filterarg */
1205		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1206		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1207		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1208		    0,				/* flags */
1209		    NULL, NULL,			/* lockfunc, lockarg */
1210		    &txr->vxtxr_txtag);
1211		if (error) {
1212			device_printf(dev,
1213			    "unable to create Tx buffer tag for queue %d\n", q);
1214			return (error);
1215		}
1216
1217		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1218		if (error) {
1219			device_printf(dev, "cannot alloc Tx descriptors for "
1220			    "queue %d error %d\n", q, error);
1221			return (error);
1222		}
1223		txr->vxtxr_txd =
1224		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1225
1226		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1227		if (error) {
1228			device_printf(dev, "cannot alloc Tx comp descriptors "
1229			   "for queue %d error %d\n", q, error);
1230			return (error);
1231		}
1232		txc->vxcr_u.txcd =
1233		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1234
1235		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1236			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1237			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1238			if (error) {
1239				device_printf(dev, "unable to create Tx buf "
1240				    "dmamap for queue %d idx %d\n", q, i);
1241				return (error);
1242			}
1243		}
1244	}
1245
1246	return (0);
1247}
1248
1249static void
1250vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1251{
1252	device_t dev;
1253	struct vmxnet3_txqueue *txq;
1254	struct vmxnet3_txring *txr;
1255	struct vmxnet3_comp_ring *txc;
1256	struct vmxnet3_txbuf *txb;
1257	int i, q;
1258
1259	dev = sc->vmx_dev;
1260
1261	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1262		txq = &sc->vmx_txq[q];
1263		txr = &txq->vxtxq_cmd_ring;
1264		txc = &txq->vxtxq_comp_ring;
1265
1266		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1267			txb = &txr->vxtxr_txbuf[i];
1268			if (txb->vtxb_dmamap != NULL) {
1269				bus_dmamap_destroy(txr->vxtxr_txtag,
1270				    txb->vtxb_dmamap);
1271				txb->vtxb_dmamap = NULL;
1272			}
1273		}
1274
1275		if (txc->vxcr_u.txcd != NULL) {
1276			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1277			txc->vxcr_u.txcd = NULL;
1278		}
1279
1280		if (txr->vxtxr_txd != NULL) {
1281			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1282			txr->vxtxr_txd = NULL;
1283		}
1284
1285		if (txr->vxtxr_txtag != NULL) {
1286			bus_dma_tag_destroy(txr->vxtxr_txtag);
1287			txr->vxtxr_txtag = NULL;
1288		}
1289	}
1290}
1291
1292static int
1293vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1294{
1295	device_t dev;
1296	struct vmxnet3_rxqueue *rxq;
1297	struct vmxnet3_rxring *rxr;
1298	struct vmxnet3_comp_ring *rxc;
1299	int descsz, compsz;
1300	int i, j, q, error;
1301
1302	dev = sc->vmx_dev;
1303
1304	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1305		rxq = &sc->vmx_rxq[q];
1306		rxc = &rxq->vxrxq_comp_ring;
1307		compsz = 0;
1308
1309		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1310			rxr = &rxq->vxrxq_cmd_ring[i];
1311
1312			descsz = rxr->vxrxr_ndesc *
1313			    sizeof(struct vmxnet3_rxdesc);
1314			compsz += rxr->vxrxr_ndesc *
1315			    sizeof(struct vmxnet3_rxcompdesc);
1316
1317			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1318			    1, 0,		/* alignment, boundary */
1319			    BUS_SPACE_MAXADDR,	/* lowaddr */
1320			    BUS_SPACE_MAXADDR,	/* highaddr */
1321			    NULL, NULL,		/* filter, filterarg */
1322			    MJUMPAGESIZE,	/* maxsize */
1323			    1,			/* nsegments */
1324			    MJUMPAGESIZE,	/* maxsegsize */
1325			    0,			/* flags */
1326			    NULL, NULL,		/* lockfunc, lockarg */
1327			    &rxr->vxrxr_rxtag);
1328			if (error) {
1329				device_printf(dev,
1330				    "unable to create Rx buffer tag for "
1331				    "queue %d\n", q);
1332				return (error);
1333			}
1334
1335			error = vmxnet3_dma_malloc(sc, descsz, 512,
1336			    &rxr->vxrxr_dma);
1337			if (error) {
1338				device_printf(dev, "cannot allocate Rx "
1339				    "descriptors for queue %d/%d error %d\n",
1340				    i, q, error);
1341				return (error);
1342			}
1343			rxr->vxrxr_rxd =
1344			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1345		}
1346
1347		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1348		if (error) {
1349			device_printf(dev, "cannot alloc Rx comp descriptors "
1350			    "for queue %d error %d\n", q, error);
1351			return (error);
1352		}
1353		rxc->vxcr_u.rxcd =
1354		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1355
1356		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1357			rxr = &rxq->vxrxq_cmd_ring[i];
1358
1359			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1360			    &rxr->vxrxr_spare_dmap);
1361			if (error) {
1362				device_printf(dev, "unable to create spare "
1363				    "dmamap for queue %d/%d error %d\n",
1364				    q, i, error);
1365				return (error);
1366			}
1367
1368			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1369				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1370				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1371				if (error) {
1372					device_printf(dev, "unable to create "
1373					    "dmamap for queue %d/%d slot %d "
1374					    "error %d\n",
1375					    q, i, j, error);
1376					return (error);
1377				}
1378			}
1379		}
1380	}
1381
1382	return (0);
1383}
1384
1385static void
1386vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1387{
1388	device_t dev;
1389	struct vmxnet3_rxqueue *rxq;
1390	struct vmxnet3_rxring *rxr;
1391	struct vmxnet3_comp_ring *rxc;
1392	struct vmxnet3_rxbuf *rxb;
1393	int i, j, q;
1394
1395	dev = sc->vmx_dev;
1396
1397	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1398		rxq = &sc->vmx_rxq[q];
1399		rxc = &rxq->vxrxq_comp_ring;
1400
1401		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1402			rxr = &rxq->vxrxq_cmd_ring[i];
1403
1404			if (rxr->vxrxr_spare_dmap != NULL) {
1405				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1406				    rxr->vxrxr_spare_dmap);
1407				rxr->vxrxr_spare_dmap = NULL;
1408			}
1409
1410			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1411				rxb = &rxr->vxrxr_rxbuf[j];
1412				if (rxb->vrxb_dmamap != NULL) {
1413					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1414					    rxb->vrxb_dmamap);
1415					rxb->vrxb_dmamap = NULL;
1416				}
1417			}
1418		}
1419
1420		if (rxc->vxcr_u.rxcd != NULL) {
1421			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1422			rxc->vxcr_u.rxcd = NULL;
1423		}
1424
1425		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1426			rxr = &rxq->vxrxq_cmd_ring[i];
1427
1428			if (rxr->vxrxr_rxd != NULL) {
1429				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1430				rxr->vxrxr_rxd = NULL;
1431			}
1432
1433			if (rxr->vxrxr_rxtag != NULL) {
1434				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1435				rxr->vxrxr_rxtag = NULL;
1436			}
1437		}
1438	}
1439}
1440
1441static int
1442vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1443{
1444	int error;
1445
1446	error = vmxnet3_alloc_txq_data(sc);
1447	if (error)
1448		return (error);
1449
1450	error = vmxnet3_alloc_rxq_data(sc);
1451	if (error)
1452		return (error);
1453
1454	return (0);
1455}
1456
1457static void
1458vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1459{
1460
1461	if (sc->vmx_rxq != NULL)
1462		vmxnet3_free_rxq_data(sc);
1463
1464	if (sc->vmx_txq != NULL)
1465		vmxnet3_free_txq_data(sc);
1466}
1467
1468static int
1469vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1470{
1471	int error;
1472
1473	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1474	    32, &sc->vmx_mcast_dma);
1475	if (error)
1476		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1477	else
1478		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1479
1480	return (error);
1481}
1482
1483static void
1484vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1485{
1486
1487	if (sc->vmx_mcast != NULL) {
1488		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1489		sc->vmx_mcast = NULL;
1490	}
1491}
1492
1493static void
1494vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1495{
1496	struct vmxnet3_driver_shared *ds;
1497	struct vmxnet3_txqueue *txq;
1498	struct vmxnet3_txq_shared *txs;
1499	struct vmxnet3_rxqueue *rxq;
1500	struct vmxnet3_rxq_shared *rxs;
1501	int i;
1502
1503	ds = sc->vmx_ds;
1504
1505	/*
1506	 * Initialize fields of the shared data that remains the same across
1507	 * reinits. Note the shared data is zero'd when allocated.
1508	 */
1509
1510	ds->magic = VMXNET3_REV1_MAGIC;
1511
1512	/* DriverInfo */
1513	ds->version = VMXNET3_DRIVER_VERSION;
1514	ds->guest = VMXNET3_GOS_FREEBSD |
1515#ifdef __LP64__
1516	    VMXNET3_GOS_64BIT;
1517#else
1518	    VMXNET3_GOS_32BIT;
1519#endif
1520	ds->vmxnet3_revision = 1;
1521	ds->upt_version = 1;
1522
1523	/* Misc. conf */
1524	ds->driver_data = vtophys(sc);
1525	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1526	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1527	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1528	ds->nrxsg_max = sc->vmx_max_rxsegs;
1529
1530	/* RSS conf */
1531	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1532		ds->rss.version = 1;
1533		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1534		ds->rss.len = sc->vmx_rss_dma.dma_size;
1535	}
1536
1537	/* Interrupt control. */
1538	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1539	ds->nintr = sc->vmx_nintrs;
1540	ds->evintr = sc->vmx_event_intr_idx;
1541	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1542
1543	for (i = 0; i < sc->vmx_nintrs; i++)
1544		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1545
1546	/* Receive filter. */
1547	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1548	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1549
1550	/* Tx queues */
1551	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1552		txq = &sc->vmx_txq[i];
1553		txs = txq->vxtxq_ts;
1554
1555		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1556		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1557		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1558		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1559		txs->driver_data = vtophys(txq);
1560		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1561	}
1562
1563	/* Rx queues */
1564	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1565		rxq = &sc->vmx_rxq[i];
1566		rxs = rxq->vxrxq_rs;
1567
1568		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1569		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1570		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1571		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1572		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1573		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1574		rxs->driver_data = vtophys(rxq);
1575		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1576	}
1577}
1578
1579static void
1580vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1581{
1582	struct ifnet *ifp;
1583
1584	ifp = sc->vmx_ifp;
1585
1586	/* Use the current MAC address. */
1587	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1588	vmxnet3_set_lladdr(sc);
1589
1590	ifp->if_hwassist = 0;
1591	if (ifp->if_capenable & IFCAP_TXCSUM)
1592		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1593	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1594		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1595	if (ifp->if_capenable & IFCAP_TSO4)
1596		ifp->if_hwassist |= CSUM_IP_TSO;
1597	if (ifp->if_capenable & IFCAP_TSO6)
1598		ifp->if_hwassist |= CSUM_IP6_TSO;
1599}
1600
1601static void
1602vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1603{
1604	/*
1605	 * Use the same key as the Linux driver until FreeBSD can do
1606	 * RSS (presumably Toeplitz) in software.
1607	 */
1608	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1609	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1610	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1611	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1612	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1613	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1614	};
1615
1616	struct vmxnet3_driver_shared *ds;
1617	struct vmxnet3_rss_shared *rss;
1618	int i;
1619
1620	ds = sc->vmx_ds;
1621	rss = sc->vmx_rss;
1622
1623	rss->hash_type =
1624	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1625	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1626	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1627	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1628	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1629	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1630
1631	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1632		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1633}
1634
1635static void
1636vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1637{
1638	struct ifnet *ifp;
1639	struct vmxnet3_driver_shared *ds;
1640
1641	ifp = sc->vmx_ifp;
1642	ds = sc->vmx_ds;
1643
1644	ds->mtu = ifp->if_mtu;
1645	ds->ntxqueue = sc->vmx_ntxqueues;
1646	ds->nrxqueue = sc->vmx_nrxqueues;
1647
1648	ds->upt_features = 0;
1649	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1650		ds->upt_features |= UPT1_F_CSUM;
1651	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1652		ds->upt_features |= UPT1_F_VLAN;
1653	if (ifp->if_capenable & IFCAP_LRO)
1654		ds->upt_features |= UPT1_F_LRO;
1655
1656	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1657		ds->upt_features |= UPT1_F_RSS;
1658		vmxnet3_reinit_rss_shared_data(sc);
1659	}
1660
1661	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1662	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1663	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1664}
1665
1666static int
1667vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1668{
1669	int error;
1670
1671	error = vmxnet3_alloc_shared_data(sc);
1672	if (error)
1673		return (error);
1674
1675	error = vmxnet3_alloc_queue_data(sc);
1676	if (error)
1677		return (error);
1678
1679	error = vmxnet3_alloc_mcast_table(sc);
1680	if (error)
1681		return (error);
1682
1683	vmxnet3_init_shared_data(sc);
1684
1685	return (0);
1686}
1687
1688static void
1689vmxnet3_free_data(struct vmxnet3_softc *sc)
1690{
1691
1692	vmxnet3_free_mcast_table(sc);
1693	vmxnet3_free_queue_data(sc);
1694	vmxnet3_free_shared_data(sc);
1695}
1696
1697static int
1698vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1699{
1700	device_t dev;
1701	struct ifnet *ifp;
1702
1703	dev = sc->vmx_dev;
1704
1705	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1706	if (ifp == NULL) {
1707		device_printf(dev, "cannot allocate ifnet structure\n");
1708		return (ENOSPC);
1709	}
1710
1711	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1712#if __FreeBSD_version < 1000025
1713	ifp->if_baudrate = 1000000000;
1714#elif __FreeBSD_version < 1100011
1715	if_initbaudrate(ifp, IF_Gbps(10));
1716#else
1717	ifp->if_baudrate = IF_Gbps(10);
1718#endif
1719	ifp->if_softc = sc;
1720	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1721	ifp->if_init = vmxnet3_init;
1722	ifp->if_ioctl = vmxnet3_ioctl;
1723	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1724	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1725	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1726
1727#ifdef VMXNET3_LEGACY_TX
1728	ifp->if_start = vmxnet3_start;
1729	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1730	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1731	IFQ_SET_READY(&ifp->if_snd);
1732#else
1733	ifp->if_transmit = vmxnet3_txq_mq_start;
1734	ifp->if_qflush = vmxnet3_qflush;
1735#endif
1736
1737	vmxnet3_get_lladdr(sc);
1738	ether_ifattach(ifp, sc->vmx_lladdr);
1739
1740	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1741	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1742	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1743	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1744	    IFCAP_VLAN_HWCSUM;
1745	ifp->if_capenable = ifp->if_capabilities;
1746
1747	/* These capabilities are not enabled by default. */
1748	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1749
1750	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1751	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1752	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1753	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1754
1755	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1756	    vmxnet3_media_status);
1757	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1758	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1759
1760	return (0);
1761}
1762
1763static void
1764vmxnet3_evintr(struct vmxnet3_softc *sc)
1765{
1766	device_t dev;
1767	struct ifnet *ifp;
1768	struct vmxnet3_txq_shared *ts;
1769	struct vmxnet3_rxq_shared *rs;
1770	uint32_t event;
1771	int reset;
1772
1773	dev = sc->vmx_dev;
1774	ifp = sc->vmx_ifp;
1775	reset = 0;
1776
1777	VMXNET3_CORE_LOCK(sc);
1778
1779	/* Clear events. */
1780	event = sc->vmx_ds->event;
1781	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1782
1783	if (event & VMXNET3_EVENT_LINK) {
1784		vmxnet3_link_status(sc);
1785		if (sc->vmx_link_active != 0)
1786			vmxnet3_tx_start_all(sc);
1787	}
1788
1789	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1790		reset = 1;
1791		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1792		ts = sc->vmx_txq[0].vxtxq_ts;
1793		if (ts->stopped != 0)
1794			device_printf(dev, "Tx queue error %#x\n", ts->error);
1795		rs = sc->vmx_rxq[0].vxrxq_rs;
1796		if (rs->stopped != 0)
1797			device_printf(dev, "Rx queue error %#x\n", rs->error);
1798		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1799	}
1800
1801	if (event & VMXNET3_EVENT_DIC)
1802		device_printf(dev, "device implementation change event\n");
1803	if (event & VMXNET3_EVENT_DEBUG)
1804		device_printf(dev, "debug event\n");
1805
1806	if (reset != 0) {
1807		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1808		vmxnet3_init_locked(sc);
1809	}
1810
1811	VMXNET3_CORE_UNLOCK(sc);
1812}
1813
1814static void
1815vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1816{
1817	struct vmxnet3_softc *sc;
1818	struct ifnet *ifp;
1819	struct vmxnet3_txring *txr;
1820	struct vmxnet3_comp_ring *txc;
1821	struct vmxnet3_txcompdesc *txcd;
1822	struct vmxnet3_txbuf *txb;
1823	struct mbuf *m;
1824	u_int sop;
1825
1826	sc = txq->vxtxq_sc;
1827	ifp = sc->vmx_ifp;
1828	txr = &txq->vxtxq_cmd_ring;
1829	txc = &txq->vxtxq_comp_ring;
1830
1831	VMXNET3_TXQ_LOCK_ASSERT(txq);
1832
1833	for (;;) {
1834		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1835		if (txcd->gen != txc->vxcr_gen)
1836			break;
1837		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1838
1839		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1840			txc->vxcr_next = 0;
1841			txc->vxcr_gen ^= 1;
1842		}
1843
1844		sop = txr->vxtxr_next;
1845		txb = &txr->vxtxr_txbuf[sop];
1846
1847		if ((m = txb->vtxb_m) != NULL) {
1848			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1849			    BUS_DMASYNC_POSTWRITE);
1850			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1851
1852			txq->vxtxq_stats.vmtxs_opackets++;
1853			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1854			if (m->m_flags & M_MCAST)
1855				txq->vxtxq_stats.vmtxs_omcasts++;
1856
1857			m_freem(m);
1858			txb->vtxb_m = NULL;
1859		}
1860
1861		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1862	}
1863
1864	if (txr->vxtxr_head == txr->vxtxr_next)
1865		txq->vxtxq_watchdog = 0;
1866}
1867
1868static int
1869vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1870{
1871	struct ifnet *ifp;
1872	struct mbuf *m;
1873	struct vmxnet3_rxdesc *rxd;
1874	struct vmxnet3_rxbuf *rxb;
1875	bus_dma_tag_t tag;
1876	bus_dmamap_t dmap;
1877	bus_dma_segment_t segs[1];
1878	int idx, clsize, btype, flags, nsegs, error;
1879
1880	ifp = sc->vmx_ifp;
1881	tag = rxr->vxrxr_rxtag;
1882	dmap = rxr->vxrxr_spare_dmap;
1883	idx = rxr->vxrxr_fill;
1884	rxd = &rxr->vxrxr_rxd[idx];
1885	rxb = &rxr->vxrxr_rxbuf[idx];
1886
1887#ifdef VMXNET3_FAILPOINTS
1888	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1889	if (rxr->vxrxr_rid != 0)
1890		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1891#endif
1892
1893	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1894		flags = M_PKTHDR;
1895		clsize = MCLBYTES;
1896		btype = VMXNET3_BTYPE_HEAD;
1897	} else {
1898#if __FreeBSD_version < 902001
1899		/*
1900		 * These mbufs will never be used for the start of a frame.
1901		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1902		 * required the mbuf to always be a packet header. Avoid
1903		 * unnecessary mbuf initialization in newer versions where
1904		 * that is not the case.
1905		 */
1906		flags = M_PKTHDR;
1907#else
1908		flags = 0;
1909#endif
1910		clsize = MJUMPAGESIZE;
1911		btype = VMXNET3_BTYPE_BODY;
1912	}
1913
1914	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1915	if (m == NULL) {
1916		sc->vmx_stats.vmst_mgetcl_failed++;
1917		return (ENOBUFS);
1918	}
1919
1920	if (btype == VMXNET3_BTYPE_HEAD) {
1921		m->m_len = m->m_pkthdr.len = clsize;
1922		m_adj(m, ETHER_ALIGN);
1923	} else
1924		m->m_len = clsize;
1925
1926	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1927	    BUS_DMA_NOWAIT);
1928	if (error) {
1929		m_freem(m);
1930		sc->vmx_stats.vmst_mbuf_load_failed++;
1931		return (error);
1932	}
1933	KASSERT(nsegs == 1,
1934	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1935#if __FreeBSD_version < 902001
1936	if (btype == VMXNET3_BTYPE_BODY)
1937		m->m_flags &= ~M_PKTHDR;
1938#endif
1939
1940	if (rxb->vrxb_m != NULL) {
1941		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1942		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1943	}
1944
1945	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1946	rxb->vrxb_dmamap = dmap;
1947	rxb->vrxb_m = m;
1948
1949	rxd->addr = segs[0].ds_addr;
1950	rxd->len = segs[0].ds_len;
1951	rxd->btype = btype;
1952	rxd->gen = rxr->vxrxr_gen;
1953
1954	vmxnet3_rxr_increment_fill(rxr);
1955	return (0);
1956}
1957
1958static void
1959vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1960    struct vmxnet3_rxring *rxr, int idx)
1961{
1962	struct vmxnet3_rxdesc *rxd;
1963
1964	rxd = &rxr->vxrxr_rxd[idx];
1965	rxd->gen = rxr->vxrxr_gen;
1966	vmxnet3_rxr_increment_fill(rxr);
1967}
1968
1969static void
1970vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1971{
1972	struct vmxnet3_softc *sc;
1973	struct vmxnet3_rxring *rxr;
1974	struct vmxnet3_comp_ring *rxc;
1975	struct vmxnet3_rxcompdesc *rxcd;
1976	int idx, eof;
1977
1978	sc = rxq->vxrxq_sc;
1979	rxc = &rxq->vxrxq_comp_ring;
1980
1981	do {
1982		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1983		if (rxcd->gen != rxc->vxcr_gen)
1984			break;		/* Not expected. */
1985		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1986
1987		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1988			rxc->vxcr_next = 0;
1989			rxc->vxcr_gen ^= 1;
1990		}
1991
1992		idx = rxcd->rxd_idx;
1993		eof = rxcd->eop;
1994		if (rxcd->qid < sc->vmx_nrxqueues)
1995			rxr = &rxq->vxrxq_cmd_ring[0];
1996		else
1997			rxr = &rxq->vxrxq_cmd_ring[1];
1998		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1999	} while (!eof);
2000}
2001
2002static void
2003vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2004{
2005
2006	if (rxcd->ipv4) {
2007		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2008		if (rxcd->ipcsum_ok)
2009			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2010	}
2011
2012	if (!rxcd->fragment) {
2013		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2014			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2015			    CSUM_PSEUDO_HDR;
2016			m->m_pkthdr.csum_data = 0xFFFF;
2017		}
2018	}
2019}
2020
2021static void
2022vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2023    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2024{
2025	struct vmxnet3_softc *sc;
2026	struct ifnet *ifp;
2027
2028	sc = rxq->vxrxq_sc;
2029	ifp = sc->vmx_ifp;
2030
2031	if (rxcd->error) {
2032		rxq->vxrxq_stats.vmrxs_ierrors++;
2033		m_freem(m);
2034		return;
2035	}
2036
2037#ifdef notyet
2038	switch (rxcd->rss_type) {
2039	case VMXNET3_RCD_RSS_TYPE_IPV4:
2040		m->m_pkthdr.flowid = rxcd->rss_hash;
2041		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2042		break;
2043	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2044		m->m_pkthdr.flowid = rxcd->rss_hash;
2045		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2046		break;
2047	case VMXNET3_RCD_RSS_TYPE_IPV6:
2048		m->m_pkthdr.flowid = rxcd->rss_hash;
2049		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2050		break;
2051	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2052		m->m_pkthdr.flowid = rxcd->rss_hash;
2053		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2054		break;
2055	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2056		m->m_pkthdr.flowid = rxq->vxrxq_id;
2057		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2058		break;
2059	}
2060#else
2061	m->m_pkthdr.flowid = rxq->vxrxq_id;
2062	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2063#endif
2064
2065	if (!rxcd->no_csum)
2066		vmxnet3_rx_csum(rxcd, m);
2067	if (rxcd->vlan) {
2068		m->m_flags |= M_VLANTAG;
2069		m->m_pkthdr.ether_vtag = rxcd->vtag;
2070	}
2071
2072	rxq->vxrxq_stats.vmrxs_ipackets++;
2073	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2074
2075	VMXNET3_RXQ_UNLOCK(rxq);
2076	(*ifp->if_input)(ifp, m);
2077	VMXNET3_RXQ_LOCK(rxq);
2078}
2079
2080static void
2081vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2082{
2083	struct vmxnet3_softc *sc;
2084	struct ifnet *ifp;
2085	struct vmxnet3_rxring *rxr;
2086	struct vmxnet3_comp_ring *rxc;
2087	struct vmxnet3_rxdesc *rxd;
2088	struct vmxnet3_rxcompdesc *rxcd;
2089	struct mbuf *m, *m_head, *m_tail;
2090	int idx, length;
2091
2092	sc = rxq->vxrxq_sc;
2093	ifp = sc->vmx_ifp;
2094	rxc = &rxq->vxrxq_comp_ring;
2095
2096	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2097
2098	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2099		return;
2100
2101	m_head = rxq->vxrxq_mhead;
2102	rxq->vxrxq_mhead = NULL;
2103	m_tail = rxq->vxrxq_mtail;
2104	rxq->vxrxq_mtail = NULL;
2105	MPASS(m_head == NULL || m_tail != NULL);
2106
2107	for (;;) {
2108		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2109		if (rxcd->gen != rxc->vxcr_gen) {
2110			rxq->vxrxq_mhead = m_head;
2111			rxq->vxrxq_mtail = m_tail;
2112			break;
2113		}
2114		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2115
2116		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2117			rxc->vxcr_next = 0;
2118			rxc->vxcr_gen ^= 1;
2119		}
2120
2121		idx = rxcd->rxd_idx;
2122		length = rxcd->len;
2123		if (rxcd->qid < sc->vmx_nrxqueues)
2124			rxr = &rxq->vxrxq_cmd_ring[0];
2125		else
2126			rxr = &rxq->vxrxq_cmd_ring[1];
2127		rxd = &rxr->vxrxr_rxd[idx];
2128
2129		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2130		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2131		    __func__, rxcd->qid, idx));
2132
2133		/*
2134		 * The host may skip descriptors. We detect this when this
2135		 * descriptor does not match the previous fill index. Catch
2136		 * up with the host now.
2137		 */
2138		if (__predict_false(rxr->vxrxr_fill != idx)) {
2139			while (rxr->vxrxr_fill != idx) {
2140				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2141				    rxr->vxrxr_gen;
2142				vmxnet3_rxr_increment_fill(rxr);
2143			}
2144		}
2145
2146		if (rxcd->sop) {
2147			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2148			    ("%s: start of frame w/o head buffer", __func__));
2149			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2150			    ("%s: start of frame not in ring 0", __func__));
2151			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2152			    ("%s: start of frame at unexcepted index %d (%d)",
2153			     __func__, idx, sc->vmx_rx_max_chain));
2154			KASSERT(m_head == NULL,
2155			    ("%s: duplicate start of frame?", __func__));
2156
2157			if (length == 0) {
2158				/* Just ignore this descriptor. */
2159				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2160				goto nextp;
2161			}
2162
2163			if (vmxnet3_newbuf(sc, rxr) != 0) {
2164				rxq->vxrxq_stats.vmrxs_iqdrops++;
2165				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2166				if (!rxcd->eop)
2167					vmxnet3_rxq_discard_chain(rxq);
2168				goto nextp;
2169			}
2170
2171			m->m_pkthdr.rcvif = ifp;
2172			m->m_pkthdr.len = m->m_len = length;
2173			m->m_pkthdr.csum_flags = 0;
2174			m_head = m_tail = m;
2175
2176		} else {
2177			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2178			    ("%s: non start of frame w/o body buffer", __func__));
2179			KASSERT(m_head != NULL,
2180			    ("%s: frame not started?", __func__));
2181
2182			if (vmxnet3_newbuf(sc, rxr) != 0) {
2183				rxq->vxrxq_stats.vmrxs_iqdrops++;
2184				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2185				if (!rxcd->eop)
2186					vmxnet3_rxq_discard_chain(rxq);
2187				m_freem(m_head);
2188				m_head = m_tail = NULL;
2189				goto nextp;
2190			}
2191
2192			m->m_len = length;
2193			m_head->m_pkthdr.len += length;
2194			m_tail->m_next = m;
2195			m_tail = m;
2196		}
2197
2198		if (rxcd->eop) {
2199			vmxnet3_rxq_input(rxq, rxcd, m_head);
2200			m_head = m_tail = NULL;
2201
2202			/* Must recheck after dropping the Rx lock. */
2203			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2204				break;
2205		}
2206
2207nextp:
2208		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2209			int qid = rxcd->qid;
2210			bus_size_t r;
2211
2212			idx = (idx + 1) % rxr->vxrxr_ndesc;
2213			if (qid >= sc->vmx_nrxqueues) {
2214				qid -= sc->vmx_nrxqueues;
2215				r = VMXNET3_BAR0_RXH2(qid);
2216			} else
2217				r = VMXNET3_BAR0_RXH1(qid);
2218			vmxnet3_write_bar0(sc, r, idx);
2219		}
2220	}
2221}
2222
2223static void
2224vmxnet3_legacy_intr(void *xsc)
2225{
2226	struct vmxnet3_softc *sc;
2227	struct vmxnet3_rxqueue *rxq;
2228	struct vmxnet3_txqueue *txq;
2229
2230	sc = xsc;
2231	rxq = &sc->vmx_rxq[0];
2232	txq = &sc->vmx_txq[0];
2233
2234	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2235		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2236			return;
2237	}
2238	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2239		vmxnet3_disable_all_intrs(sc);
2240
2241	if (sc->vmx_ds->event != 0)
2242		vmxnet3_evintr(sc);
2243
2244	VMXNET3_RXQ_LOCK(rxq);
2245	vmxnet3_rxq_eof(rxq);
2246	VMXNET3_RXQ_UNLOCK(rxq);
2247
2248	VMXNET3_TXQ_LOCK(txq);
2249	vmxnet3_txq_eof(txq);
2250	vmxnet3_txq_start(txq);
2251	VMXNET3_TXQ_UNLOCK(txq);
2252
2253	vmxnet3_enable_all_intrs(sc);
2254}
2255
2256static void
2257vmxnet3_txq_intr(void *xtxq)
2258{
2259	struct vmxnet3_softc *sc;
2260	struct vmxnet3_txqueue *txq;
2261
2262	txq = xtxq;
2263	sc = txq->vxtxq_sc;
2264
2265	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2266		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2267
2268	VMXNET3_TXQ_LOCK(txq);
2269	vmxnet3_txq_eof(txq);
2270	vmxnet3_txq_start(txq);
2271	VMXNET3_TXQ_UNLOCK(txq);
2272
2273	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2274}
2275
2276static void
2277vmxnet3_rxq_intr(void *xrxq)
2278{
2279	struct vmxnet3_softc *sc;
2280	struct vmxnet3_rxqueue *rxq;
2281
2282	rxq = xrxq;
2283	sc = rxq->vxrxq_sc;
2284
2285	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2286		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2287
2288	VMXNET3_RXQ_LOCK(rxq);
2289	vmxnet3_rxq_eof(rxq);
2290	VMXNET3_RXQ_UNLOCK(rxq);
2291
2292	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2293}
2294
2295static void
2296vmxnet3_event_intr(void *xsc)
2297{
2298	struct vmxnet3_softc *sc;
2299
2300	sc = xsc;
2301
2302	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2303		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2304
2305	if (sc->vmx_ds->event != 0)
2306		vmxnet3_evintr(sc);
2307
2308	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2309}
2310
2311static void
2312vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2313{
2314	struct vmxnet3_txring *txr;
2315	struct vmxnet3_txbuf *txb;
2316	int i;
2317
2318	txr = &txq->vxtxq_cmd_ring;
2319
2320	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2321		txb = &txr->vxtxr_txbuf[i];
2322
2323		if (txb->vtxb_m == NULL)
2324			continue;
2325
2326		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2327		    BUS_DMASYNC_POSTWRITE);
2328		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2329		m_freem(txb->vtxb_m);
2330		txb->vtxb_m = NULL;
2331	}
2332}
2333
2334static void
2335vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2336{
2337	struct vmxnet3_rxring *rxr;
2338	struct vmxnet3_rxbuf *rxb;
2339	int i, j;
2340
2341	if (rxq->vxrxq_mhead != NULL) {
2342		m_freem(rxq->vxrxq_mhead);
2343		rxq->vxrxq_mhead = NULL;
2344		rxq->vxrxq_mtail = NULL;
2345	}
2346
2347	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2348		rxr = &rxq->vxrxq_cmd_ring[i];
2349
2350		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2351			rxb = &rxr->vxrxr_rxbuf[j];
2352
2353			if (rxb->vrxb_m == NULL)
2354				continue;
2355
2356			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2357			    BUS_DMASYNC_POSTREAD);
2358			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2359			m_freem(rxb->vrxb_m);
2360			rxb->vrxb_m = NULL;
2361		}
2362	}
2363}
2364
2365static void
2366vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2367{
2368	struct vmxnet3_rxqueue *rxq;
2369	struct vmxnet3_txqueue *txq;
2370	int i;
2371
2372	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2373		rxq = &sc->vmx_rxq[i];
2374		VMXNET3_RXQ_LOCK(rxq);
2375		VMXNET3_RXQ_UNLOCK(rxq);
2376	}
2377
2378	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2379		txq = &sc->vmx_txq[i];
2380		VMXNET3_TXQ_LOCK(txq);
2381		VMXNET3_TXQ_UNLOCK(txq);
2382	}
2383}
2384
2385static void
2386vmxnet3_stop(struct vmxnet3_softc *sc)
2387{
2388	struct ifnet *ifp;
2389	int q;
2390
2391	ifp = sc->vmx_ifp;
2392	VMXNET3_CORE_LOCK_ASSERT(sc);
2393
2394	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2395	sc->vmx_link_active = 0;
2396	callout_stop(&sc->vmx_tick);
2397
2398	/* Disable interrupts. */
2399	vmxnet3_disable_all_intrs(sc);
2400	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2401
2402	vmxnet3_stop_rendezvous(sc);
2403
2404	for (q = 0; q < sc->vmx_ntxqueues; q++)
2405		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2406	for (q = 0; q < sc->vmx_nrxqueues; q++)
2407		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2408
2409	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2410}
2411
2412static void
2413vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2414{
2415	struct vmxnet3_txring *txr;
2416	struct vmxnet3_comp_ring *txc;
2417
2418	txr = &txq->vxtxq_cmd_ring;
2419	txr->vxtxr_head = 0;
2420	txr->vxtxr_next = 0;
2421	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2422	bzero(txr->vxtxr_txd,
2423	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2424
2425	txc = &txq->vxtxq_comp_ring;
2426	txc->vxcr_next = 0;
2427	txc->vxcr_gen = VMXNET3_INIT_GEN;
2428	bzero(txc->vxcr_u.txcd,
2429	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2430}
2431
2432static int
2433vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2434{
2435	struct ifnet *ifp;
2436	struct vmxnet3_rxring *rxr;
2437	struct vmxnet3_comp_ring *rxc;
2438	int i, populate, idx, frame_size, error;
2439
2440	ifp = sc->vmx_ifp;
2441	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2442	    ifp->if_mtu;
2443
2444	/*
2445	 * If the MTU causes us to exceed what a regular sized cluster can
2446	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2447	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2448	 *
2449	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2450	 * our life easier. We do not support changing the ring size after
2451	 * the attach.
2452	 */
2453	if (frame_size <= MCLBYTES)
2454		sc->vmx_rx_max_chain = 1;
2455	else
2456		sc->vmx_rx_max_chain = 2;
2457
2458	/*
2459	 * Only populate ring 1 if the configuration will take advantage
2460	 * of it. That is either when LRO is enabled or the frame size
2461	 * exceeds what ring 0 can contain.
2462	 */
2463	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2464	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2465		populate = 1;
2466	else
2467		populate = VMXNET3_RXRINGS_PERQ;
2468
2469	for (i = 0; i < populate; i++) {
2470		rxr = &rxq->vxrxq_cmd_ring[i];
2471		rxr->vxrxr_fill = 0;
2472		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2473		bzero(rxr->vxrxr_rxd,
2474		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2475
2476		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2477			error = vmxnet3_newbuf(sc, rxr);
2478			if (error)
2479				return (error);
2480		}
2481	}
2482
2483	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2484		rxr = &rxq->vxrxq_cmd_ring[i];
2485		rxr->vxrxr_fill = 0;
2486		rxr->vxrxr_gen = 0;
2487		bzero(rxr->vxrxr_rxd,
2488		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2489	}
2490
2491	rxc = &rxq->vxrxq_comp_ring;
2492	rxc->vxcr_next = 0;
2493	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2494	bzero(rxc->vxcr_u.rxcd,
2495	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2496
2497	return (0);
2498}
2499
2500static int
2501vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2502{
2503	device_t dev;
2504	int q, error;
2505
2506	dev = sc->vmx_dev;
2507
2508	for (q = 0; q < sc->vmx_ntxqueues; q++)
2509		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2510
2511	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2512		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2513		if (error) {
2514			device_printf(dev, "cannot populate Rx queue %d\n", q);
2515			return (error);
2516		}
2517	}
2518
2519	return (0);
2520}
2521
2522static int
2523vmxnet3_enable_device(struct vmxnet3_softc *sc)
2524{
2525	int q;
2526
2527	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2528		device_printf(sc->vmx_dev, "device enable command failed!\n");
2529		return (1);
2530	}
2531
2532	/* Reset the Rx queue heads. */
2533	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2534		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2535		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2536	}
2537
2538	return (0);
2539}
2540
2541static void
2542vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2543{
2544	struct ifnet *ifp;
2545
2546	ifp = sc->vmx_ifp;
2547
2548	vmxnet3_set_rxfilter(sc);
2549
2550	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2551		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2552		    sizeof(sc->vmx_ds->vlan_filter));
2553	else
2554		bzero(sc->vmx_ds->vlan_filter,
2555		    sizeof(sc->vmx_ds->vlan_filter));
2556	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2557}
2558
2559static int
2560vmxnet3_reinit(struct vmxnet3_softc *sc)
2561{
2562
2563	vmxnet3_reinit_interface(sc);
2564	vmxnet3_reinit_shared_data(sc);
2565
2566	if (vmxnet3_reinit_queues(sc) != 0)
2567		return (ENXIO);
2568
2569	if (vmxnet3_enable_device(sc) != 0)
2570		return (ENXIO);
2571
2572	vmxnet3_reinit_rxfilters(sc);
2573
2574	return (0);
2575}
2576
2577static void
2578vmxnet3_init_locked(struct vmxnet3_softc *sc)
2579{
2580	struct ifnet *ifp;
2581
2582	ifp = sc->vmx_ifp;
2583
2584	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2585		return;
2586
2587	vmxnet3_stop(sc);
2588
2589	if (vmxnet3_reinit(sc) != 0) {
2590		vmxnet3_stop(sc);
2591		return;
2592	}
2593
2594	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2595	vmxnet3_link_status(sc);
2596
2597	vmxnet3_enable_all_intrs(sc);
2598	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2599}
2600
2601static void
2602vmxnet3_init(void *xsc)
2603{
2604	struct vmxnet3_softc *sc;
2605
2606	sc = xsc;
2607
2608	VMXNET3_CORE_LOCK(sc);
2609	vmxnet3_init_locked(sc);
2610	VMXNET3_CORE_UNLOCK(sc);
2611}
2612
2613/*
2614 * BMV: Much of this can go away once we finally have offsets in
2615 * the mbuf packet header. Bug andre@.
2616 */
2617static int
2618vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2619    int *etype, int *proto, int *start)
2620{
2621	struct ether_vlan_header *evh;
2622	int offset;
2623#if defined(INET)
2624	struct ip *ip = NULL;
2625	struct ip iphdr;
2626#endif
2627#if defined(INET6)
2628	struct ip6_hdr *ip6 = NULL;
2629	struct ip6_hdr ip6hdr;
2630#endif
2631
2632	evh = mtod(m, struct ether_vlan_header *);
2633	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2634		/* BMV: We should handle nested VLAN tags too. */
2635		*etype = ntohs(evh->evl_proto);
2636		offset = sizeof(struct ether_vlan_header);
2637	} else {
2638		*etype = ntohs(evh->evl_encap_proto);
2639		offset = sizeof(struct ether_header);
2640	}
2641
2642	switch (*etype) {
2643#if defined(INET)
2644	case ETHERTYPE_IP:
2645		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2646			m_copydata(m, offset, sizeof(struct ip),
2647			    (caddr_t) &iphdr);
2648			ip = &iphdr;
2649		} else
2650			ip = mtodo(m, offset);
2651		*proto = ip->ip_p;
2652		*start = offset + (ip->ip_hl << 2);
2653		break;
2654#endif
2655#if defined(INET6)
2656	case ETHERTYPE_IPV6:
2657		if (__predict_false(m->m_len <
2658		    offset + sizeof(struct ip6_hdr))) {
2659			m_copydata(m, offset, sizeof(struct ip6_hdr),
2660			    (caddr_t) &ip6hdr);
2661			ip6 = &ip6hdr;
2662		} else
2663			ip6 = mtodo(m, offset);
2664		*proto = -1;
2665		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2666		/* Assert the network stack sent us a valid packet. */
2667		KASSERT(*start > offset,
2668		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2669		    *start, offset, *proto));
2670		break;
2671#endif
2672	default:
2673		return (EINVAL);
2674	}
2675
2676	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2677		struct tcphdr *tcp, tcphdr;
2678		uint16_t sum;
2679
2680		if (__predict_false(*proto != IPPROTO_TCP)) {
2681			/* Likely failed to correctly parse the mbuf. */
2682			return (EINVAL);
2683		}
2684
2685		txq->vxtxq_stats.vmtxs_tso++;
2686
2687		switch (*etype) {
2688#if defined(INET)
2689		case ETHERTYPE_IP:
2690			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2691			    htons(IPPROTO_TCP));
2692			break;
2693#endif
2694#if defined(INET6)
2695		case ETHERTYPE_IPV6:
2696			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2697			break;
2698#endif
2699		default:
2700			sum = 0;
2701			break;
2702		}
2703
2704		if (m->m_len < *start + sizeof(struct tcphdr)) {
2705			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2706			    sizeof(uint16_t), (caddr_t) &sum);
2707			m_copydata(m, *start, sizeof(struct tcphdr),
2708			    (caddr_t) &tcphdr);
2709			tcp = &tcphdr;
2710		} else {
2711			tcp = mtodo(m, *start);
2712			tcp->th_sum = sum;
2713		}
2714
2715		/*
2716		 * For TSO, the size of the protocol header is also
2717		 * included in the descriptor header size.
2718		 */
2719		*start += (tcp->th_off << 2);
2720	} else
2721		txq->vxtxq_stats.vmtxs_csum++;
2722
2723	return (0);
2724}
2725
2726static int
2727vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2728    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2729{
2730	struct vmxnet3_txring *txr;
2731	struct mbuf *m;
2732	bus_dma_tag_t tag;
2733	int error;
2734
2735	txr = &txq->vxtxq_cmd_ring;
2736	m = *m0;
2737	tag = txr->vxtxr_txtag;
2738
2739	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2740	if (error == 0 || error != EFBIG)
2741		return (error);
2742
2743	m = m_defrag(m, M_NOWAIT);
2744	if (m != NULL) {
2745		*m0 = m;
2746		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2747	} else
2748		error = ENOBUFS;
2749
2750	if (error) {
2751		m_freem(*m0);
2752		*m0 = NULL;
2753		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2754	} else
2755		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2756
2757	return (error);
2758}
2759
2760static void
2761vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2762{
2763	struct vmxnet3_txring *txr;
2764
2765	txr = &txq->vxtxq_cmd_ring;
2766	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2767}
2768
2769static int
2770vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2771{
2772	struct vmxnet3_softc *sc;
2773	struct vmxnet3_txring *txr;
2774	struct vmxnet3_txdesc *txd, *sop;
2775	struct mbuf *m;
2776	bus_dmamap_t dmap;
2777	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2778	int i, gen, nsegs, etype, proto, start, error;
2779
2780	sc = txq->vxtxq_sc;
2781	start = 0;
2782	txd = NULL;
2783	txr = &txq->vxtxq_cmd_ring;
2784	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2785
2786	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2787	if (error)
2788		return (error);
2789
2790	m = *m0;
2791	M_ASSERTPKTHDR(m);
2792	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2793	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2794
2795	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2796		txq->vxtxq_stats.vmtxs_full++;
2797		vmxnet3_txq_unload_mbuf(txq, dmap);
2798		return (ENOSPC);
2799	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2800		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2801		if (error) {
2802			txq->vxtxq_stats.vmtxs_offload_failed++;
2803			vmxnet3_txq_unload_mbuf(txq, dmap);
2804			m_freem(m);
2805			*m0 = NULL;
2806			return (error);
2807		}
2808	}
2809
2810	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2811	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2812	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2813
2814	for (i = 0; i < nsegs; i++) {
2815		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2816
2817		txd->addr = segs[i].ds_addr;
2818		txd->len = segs[i].ds_len;
2819		txd->gen = gen;
2820		txd->dtype = 0;
2821		txd->offload_mode = VMXNET3_OM_NONE;
2822		txd->offload_pos = 0;
2823		txd->hlen = 0;
2824		txd->eop = 0;
2825		txd->compreq = 0;
2826		txd->vtag_mode = 0;
2827		txd->vtag = 0;
2828
2829		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2830			txr->vxtxr_head = 0;
2831			txr->vxtxr_gen ^= 1;
2832		}
2833		gen = txr->vxtxr_gen;
2834	}
2835	txd->eop = 1;
2836	txd->compreq = 1;
2837
2838	if (m->m_flags & M_VLANTAG) {
2839		sop->vtag_mode = 1;
2840		sop->vtag = m->m_pkthdr.ether_vtag;
2841	}
2842
2843	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2844		sop->offload_mode = VMXNET3_OM_TSO;
2845		sop->hlen = start;
2846		sop->offload_pos = m->m_pkthdr.tso_segsz;
2847	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2848	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2849		sop->offload_mode = VMXNET3_OM_CSUM;
2850		sop->hlen = start;
2851		sop->offload_pos = start + m->m_pkthdr.csum_data;
2852	}
2853
2854	/* Finally, change the ownership. */
2855	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2856	sop->gen ^= 1;
2857
2858	txq->vxtxq_ts->npending += nsegs;
2859	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2860		txq->vxtxq_ts->npending = 0;
2861		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2862		    txr->vxtxr_head);
2863	}
2864
2865	return (0);
2866}
2867
2868#ifdef VMXNET3_LEGACY_TX
2869
2870static void
2871vmxnet3_start_locked(struct ifnet *ifp)
2872{
2873	struct vmxnet3_softc *sc;
2874	struct vmxnet3_txqueue *txq;
2875	struct vmxnet3_txring *txr;
2876	struct mbuf *m_head;
2877	int tx, avail;
2878
2879	sc = ifp->if_softc;
2880	txq = &sc->vmx_txq[0];
2881	txr = &txq->vxtxq_cmd_ring;
2882	tx = 0;
2883
2884	VMXNET3_TXQ_LOCK_ASSERT(txq);
2885
2886	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2887	    sc->vmx_link_active == 0)
2888		return;
2889
2890	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2891		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2892			break;
2893
2894		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2895		if (m_head == NULL)
2896			break;
2897
2898		/* Assume worse case if this mbuf is the head of a chain. */
2899		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2900			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2901			break;
2902		}
2903
2904		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2905			if (m_head != NULL)
2906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2907			break;
2908		}
2909
2910		tx++;
2911		ETHER_BPF_MTAP(ifp, m_head);
2912	}
2913
2914	if (tx > 0)
2915		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2916}
2917
2918static void
2919vmxnet3_start(struct ifnet *ifp)
2920{
2921	struct vmxnet3_softc *sc;
2922	struct vmxnet3_txqueue *txq;
2923
2924	sc = ifp->if_softc;
2925	txq = &sc->vmx_txq[0];
2926
2927	VMXNET3_TXQ_LOCK(txq);
2928	vmxnet3_start_locked(ifp);
2929	VMXNET3_TXQ_UNLOCK(txq);
2930}
2931
2932#else /* !VMXNET3_LEGACY_TX */
2933
2934static int
2935vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2936{
2937	struct vmxnet3_softc *sc;
2938	struct vmxnet3_txring *txr;
2939	struct buf_ring *br;
2940	struct ifnet *ifp;
2941	int tx, avail, error;
2942
2943	sc = txq->vxtxq_sc;
2944	br = txq->vxtxq_br;
2945	ifp = sc->vmx_ifp;
2946	txr = &txq->vxtxq_cmd_ring;
2947	tx = 0;
2948	error = 0;
2949
2950	VMXNET3_TXQ_LOCK_ASSERT(txq);
2951
2952	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2953	    sc->vmx_link_active == 0) {
2954		if (m != NULL)
2955			error = drbr_enqueue(ifp, br, m);
2956		return (error);
2957	}
2958
2959	if (m != NULL) {
2960		error = drbr_enqueue(ifp, br, m);
2961		if (error)
2962			return (error);
2963	}
2964
2965	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2966		m = drbr_peek(ifp, br);
2967		if (m == NULL)
2968			break;
2969
2970		/* Assume worse case if this mbuf is the head of a chain. */
2971		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2972			drbr_putback(ifp, br, m);
2973			break;
2974		}
2975
2976		if (vmxnet3_txq_encap(txq, &m) != 0) {
2977			if (m != NULL)
2978				drbr_putback(ifp, br, m);
2979			else
2980				drbr_advance(ifp, br);
2981			break;
2982		}
2983		drbr_advance(ifp, br);
2984
2985		tx++;
2986		ETHER_BPF_MTAP(ifp, m);
2987	}
2988
2989	if (tx > 0)
2990		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2991
2992	return (0);
2993}
2994
2995static int
2996vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2997{
2998	struct vmxnet3_softc *sc;
2999	struct vmxnet3_txqueue *txq;
3000	int i, ntxq, error;
3001
3002	sc = ifp->if_softc;
3003	ntxq = sc->vmx_ntxqueues;
3004
3005	/* check if flowid is set */
3006	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3007		i = m->m_pkthdr.flowid % ntxq;
3008	else
3009		i = curcpu % ntxq;
3010
3011	txq = &sc->vmx_txq[i];
3012
3013	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3014		error = vmxnet3_txq_mq_start_locked(txq, m);
3015		VMXNET3_TXQ_UNLOCK(txq);
3016	} else {
3017		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3018		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3019	}
3020
3021	return (error);
3022}
3023
3024static void
3025vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3026{
3027	struct vmxnet3_softc *sc;
3028	struct vmxnet3_txqueue *txq;
3029
3030	txq = xtxq;
3031	sc = txq->vxtxq_sc;
3032
3033	VMXNET3_TXQ_LOCK(txq);
3034	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3035		vmxnet3_txq_mq_start_locked(txq, NULL);
3036	VMXNET3_TXQ_UNLOCK(txq);
3037}
3038
3039#endif /* VMXNET3_LEGACY_TX */
3040
3041static void
3042vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3043{
3044	struct vmxnet3_softc *sc;
3045	struct ifnet *ifp;
3046
3047	sc = txq->vxtxq_sc;
3048	ifp = sc->vmx_ifp;
3049
3050#ifdef VMXNET3_LEGACY_TX
3051	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3052		vmxnet3_start_locked(ifp);
3053#else
3054	if (!drbr_empty(ifp, txq->vxtxq_br))
3055		vmxnet3_txq_mq_start_locked(txq, NULL);
3056#endif
3057}
3058
3059static void
3060vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3061{
3062	struct vmxnet3_txqueue *txq;
3063	int i;
3064
3065	VMXNET3_CORE_LOCK_ASSERT(sc);
3066
3067	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3068		txq = &sc->vmx_txq[i];
3069
3070		VMXNET3_TXQ_LOCK(txq);
3071		vmxnet3_txq_start(txq);
3072		VMXNET3_TXQ_UNLOCK(txq);
3073	}
3074}
3075
3076static void
3077vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3078{
3079	struct ifnet *ifp;
3080	int idx, bit;
3081
3082	ifp = sc->vmx_ifp;
3083	idx = (tag >> 5) & 0x7F;
3084	bit = tag & 0x1F;
3085
3086	if (tag == 0 || tag > 4095)
3087		return;
3088
3089	VMXNET3_CORE_LOCK(sc);
3090
3091	/* Update our private VLAN bitvector. */
3092	if (add)
3093		sc->vmx_vlan_filter[idx] |= (1 << bit);
3094	else
3095		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3096
3097	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3098		if (add)
3099			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3100		else
3101			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3102		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3103	}
3104
3105	VMXNET3_CORE_UNLOCK(sc);
3106}
3107
3108static void
3109vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3110{
3111
3112	if (ifp->if_softc == arg)
3113		vmxnet3_update_vlan_filter(arg, 1, tag);
3114}
3115
3116static void
3117vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3118{
3119
3120	if (ifp->if_softc == arg)
3121		vmxnet3_update_vlan_filter(arg, 0, tag);
3122}
3123
3124static void
3125vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3126{
3127	struct ifnet *ifp;
3128	struct vmxnet3_driver_shared *ds;
3129	struct ifmultiaddr *ifma;
3130	u_int mode;
3131
3132	ifp = sc->vmx_ifp;
3133	ds = sc->vmx_ds;
3134
3135	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3136	if (ifp->if_flags & IFF_PROMISC)
3137		mode |= VMXNET3_RXMODE_PROMISC;
3138	if (ifp->if_flags & IFF_ALLMULTI)
3139		mode |= VMXNET3_RXMODE_ALLMULTI;
3140	else {
3141		int cnt = 0, overflow = 0;
3142
3143		if_maddr_rlock(ifp);
3144		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3145			if (ifma->ifma_addr->sa_family != AF_LINK)
3146				continue;
3147			else if (cnt == VMXNET3_MULTICAST_MAX) {
3148				overflow = 1;
3149				break;
3150			}
3151
3152			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3153			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3154			cnt++;
3155		}
3156		if_maddr_runlock(ifp);
3157
3158		if (overflow != 0) {
3159			cnt = 0;
3160			mode |= VMXNET3_RXMODE_ALLMULTI;
3161		} else if (cnt > 0)
3162			mode |= VMXNET3_RXMODE_MCAST;
3163		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3164	}
3165
3166	ds->rxmode = mode;
3167
3168	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3169	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3170}
3171
3172static int
3173vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3174{
3175	struct ifnet *ifp;
3176
3177	ifp = sc->vmx_ifp;
3178
3179	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3180		return (EINVAL);
3181
3182	ifp->if_mtu = mtu;
3183
3184	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3185		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3186		vmxnet3_init_locked(sc);
3187	}
3188
3189	return (0);
3190}
3191
3192static int
3193vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3194{
3195	struct vmxnet3_softc *sc;
3196	struct ifreq *ifr;
3197	int reinit, mask, error;
3198
3199	sc = ifp->if_softc;
3200	ifr = (struct ifreq *) data;
3201	error = 0;
3202
3203	switch (cmd) {
3204	case SIOCSIFMTU:
3205		if (ifp->if_mtu != ifr->ifr_mtu) {
3206			VMXNET3_CORE_LOCK(sc);
3207			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3208			VMXNET3_CORE_UNLOCK(sc);
3209		}
3210		break;
3211
3212	case SIOCSIFFLAGS:
3213		VMXNET3_CORE_LOCK(sc);
3214		if (ifp->if_flags & IFF_UP) {
3215			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3216				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3217				    (IFF_PROMISC | IFF_ALLMULTI)) {
3218					vmxnet3_set_rxfilter(sc);
3219				}
3220			} else
3221				vmxnet3_init_locked(sc);
3222		} else {
3223			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3224				vmxnet3_stop(sc);
3225		}
3226		sc->vmx_if_flags = ifp->if_flags;
3227		VMXNET3_CORE_UNLOCK(sc);
3228		break;
3229
3230	case SIOCADDMULTI:
3231	case SIOCDELMULTI:
3232		VMXNET3_CORE_LOCK(sc);
3233		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3234			vmxnet3_set_rxfilter(sc);
3235		VMXNET3_CORE_UNLOCK(sc);
3236		break;
3237
3238	case SIOCSIFMEDIA:
3239	case SIOCGIFMEDIA:
3240		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3241		break;
3242
3243	case SIOCSIFCAP:
3244		VMXNET3_CORE_LOCK(sc);
3245		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3246
3247		if (mask & IFCAP_TXCSUM)
3248			ifp->if_capenable ^= IFCAP_TXCSUM;
3249		if (mask & IFCAP_TXCSUM_IPV6)
3250			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3251		if (mask & IFCAP_TSO4)
3252			ifp->if_capenable ^= IFCAP_TSO4;
3253		if (mask & IFCAP_TSO6)
3254			ifp->if_capenable ^= IFCAP_TSO6;
3255
3256		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3257		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3258			/* Changing these features requires us to reinit. */
3259			reinit = 1;
3260
3261			if (mask & IFCAP_RXCSUM)
3262				ifp->if_capenable ^= IFCAP_RXCSUM;
3263			if (mask & IFCAP_RXCSUM_IPV6)
3264				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3265			if (mask & IFCAP_LRO)
3266				ifp->if_capenable ^= IFCAP_LRO;
3267			if (mask & IFCAP_VLAN_HWTAGGING)
3268				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3269			if (mask & IFCAP_VLAN_HWFILTER)
3270				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3271		} else
3272			reinit = 0;
3273
3274		if (mask & IFCAP_VLAN_HWTSO)
3275			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3276
3277		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3278			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3279			vmxnet3_init_locked(sc);
3280		}
3281
3282		VMXNET3_CORE_UNLOCK(sc);
3283		VLAN_CAPABILITIES(ifp);
3284		break;
3285
3286	default:
3287		error = ether_ioctl(ifp, cmd, data);
3288		break;
3289	}
3290
3291	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3292
3293	return (error);
3294}
3295
3296#ifndef VMXNET3_LEGACY_TX
3297static void
3298vmxnet3_qflush(struct ifnet *ifp)
3299{
3300	struct vmxnet3_softc *sc;
3301	struct vmxnet3_txqueue *txq;
3302	struct mbuf *m;
3303	int i;
3304
3305	sc = ifp->if_softc;
3306
3307	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3308		txq = &sc->vmx_txq[i];
3309
3310		VMXNET3_TXQ_LOCK(txq);
3311		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3312			m_freem(m);
3313		VMXNET3_TXQ_UNLOCK(txq);
3314	}
3315
3316	if_qflush(ifp);
3317}
3318#endif
3319
3320static int
3321vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3322{
3323	struct vmxnet3_softc *sc;
3324
3325	sc = txq->vxtxq_sc;
3326
3327	VMXNET3_TXQ_LOCK(txq);
3328	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3329		VMXNET3_TXQ_UNLOCK(txq);
3330		return (0);
3331	}
3332	VMXNET3_TXQ_UNLOCK(txq);
3333
3334	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3335	    txq->vxtxq_id);
3336	return (1);
3337}
3338
3339static void
3340vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3341{
3342
3343	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3344}
3345
3346static void
3347vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3348    struct vmxnet3_txq_stats *accum)
3349{
3350	struct vmxnet3_txq_stats *st;
3351
3352	st = &txq->vxtxq_stats;
3353
3354	accum->vmtxs_opackets += st->vmtxs_opackets;
3355	accum->vmtxs_obytes += st->vmtxs_obytes;
3356	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3357	accum->vmtxs_csum += st->vmtxs_csum;
3358	accum->vmtxs_tso += st->vmtxs_tso;
3359	accum->vmtxs_full += st->vmtxs_full;
3360	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3361}
3362
3363static void
3364vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3365    struct vmxnet3_rxq_stats *accum)
3366{
3367	struct vmxnet3_rxq_stats *st;
3368
3369	st = &rxq->vxrxq_stats;
3370
3371	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3372	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3373	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3374	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3375}
3376
3377static void
3378vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3379{
3380	struct ifnet *ifp;
3381	struct vmxnet3_statistics *st;
3382	struct vmxnet3_txq_stats txaccum;
3383	struct vmxnet3_rxq_stats rxaccum;
3384	int i;
3385
3386	ifp = sc->vmx_ifp;
3387	st = &sc->vmx_stats;
3388
3389	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3390	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3391
3392	for (i = 0; i < sc->vmx_ntxqueues; i++)
3393		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3394	for (i = 0; i < sc->vmx_nrxqueues; i++)
3395		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3396
3397	/*
3398	 * With the exception of if_ierrors, these ifnet statistics are
3399	 * only updated in the driver, so just set them to our accumulated
3400	 * values. if_ierrors is updated in ether_input() for malformed
3401	 * frames that we should have already discarded.
3402	 */
3403	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3404	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3405	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3406	ifp->if_opackets = txaccum.vmtxs_opackets;
3407#ifndef VMXNET3_LEGACY_TX
3408	ifp->if_obytes = txaccum.vmtxs_obytes;
3409	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3410#endif
3411}
3412
3413static void
3414vmxnet3_tick(void *xsc)
3415{
3416	struct vmxnet3_softc *sc;
3417	struct ifnet *ifp;
3418	int i, timedout;
3419
3420	sc = xsc;
3421	ifp = sc->vmx_ifp;
3422	timedout = 0;
3423
3424	VMXNET3_CORE_LOCK_ASSERT(sc);
3425
3426	vmxnet3_accumulate_stats(sc);
3427	vmxnet3_refresh_host_stats(sc);
3428
3429	for (i = 0; i < sc->vmx_ntxqueues; i++)
3430		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3431
3432	if (timedout != 0) {
3433		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3434		vmxnet3_init_locked(sc);
3435	} else
3436		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3437}
3438
3439static int
3440vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3441{
3442	uint32_t status;
3443
3444	/* Also update the link speed while here. */
3445	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3446	sc->vmx_link_speed = status >> 16;
3447	return !!(status & 0x1);
3448}
3449
3450static void
3451vmxnet3_link_status(struct vmxnet3_softc *sc)
3452{
3453	struct ifnet *ifp;
3454	int link;
3455
3456	ifp = sc->vmx_ifp;
3457	link = vmxnet3_link_is_up(sc);
3458
3459	if (link != 0 && sc->vmx_link_active == 0) {
3460		sc->vmx_link_active = 1;
3461		if_link_state_change(ifp, LINK_STATE_UP);
3462	} else if (link == 0 && sc->vmx_link_active != 0) {
3463		sc->vmx_link_active = 0;
3464		if_link_state_change(ifp, LINK_STATE_DOWN);
3465	}
3466}
3467
3468static void
3469vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3470{
3471	struct vmxnet3_softc *sc;
3472
3473	sc = ifp->if_softc;
3474
3475	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3476	ifmr->ifm_status = IFM_AVALID;
3477
3478	VMXNET3_CORE_LOCK(sc);
3479	if (vmxnet3_link_is_up(sc) != 0)
3480		ifmr->ifm_status |= IFM_ACTIVE;
3481	else
3482		ifmr->ifm_status |= IFM_NONE;
3483	VMXNET3_CORE_UNLOCK(sc);
3484}
3485
3486static int
3487vmxnet3_media_change(struct ifnet *ifp)
3488{
3489
3490	/* Ignore. */
3491	return (0);
3492}
3493
3494static void
3495vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3496{
3497	uint32_t ml, mh;
3498
3499	ml  = sc->vmx_lladdr[0];
3500	ml |= sc->vmx_lladdr[1] << 8;
3501	ml |= sc->vmx_lladdr[2] << 16;
3502	ml |= sc->vmx_lladdr[3] << 24;
3503	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3504
3505	mh  = sc->vmx_lladdr[4];
3506	mh |= sc->vmx_lladdr[5] << 8;
3507	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3508}
3509
3510static void
3511vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3512{
3513	uint32_t ml, mh;
3514
3515	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3516	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3517
3518	sc->vmx_lladdr[0] = ml;
3519	sc->vmx_lladdr[1] = ml >> 8;
3520	sc->vmx_lladdr[2] = ml >> 16;
3521	sc->vmx_lladdr[3] = ml >> 24;
3522	sc->vmx_lladdr[4] = mh;
3523	sc->vmx_lladdr[5] = mh >> 8;
3524}
3525
3526static void
3527vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3528    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3529{
3530	struct sysctl_oid *node, *txsnode;
3531	struct sysctl_oid_list *list, *txslist;
3532	struct vmxnet3_txq_stats *stats;
3533	struct UPT1_TxStats *txstats;
3534	char namebuf[16];
3535
3536	stats = &txq->vxtxq_stats;
3537	txstats = &txq->vxtxq_ts->stats;
3538
3539	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3540	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3541	    NULL, "Transmit Queue");
3542	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3543
3544	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3545	    &stats->vmtxs_opackets, "Transmit packets");
3546	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3547	    &stats->vmtxs_obytes, "Transmit bytes");
3548	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3549	    &stats->vmtxs_omcasts, "Transmit multicasts");
3550	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3551	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3552	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3553	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3554	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3555	    &stats->vmtxs_full, "Transmit ring full");
3556	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3557	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3558
3559	/*
3560	 * Add statistics reported by the host. These are updated once
3561	 * per second.
3562	 */
3563	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3564	    NULL, "Host Statistics");
3565	txslist = SYSCTL_CHILDREN(txsnode);
3566	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3567	    &txstats->TSO_packets, "TSO packets");
3568	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3569	    &txstats->TSO_bytes, "TSO bytes");
3570	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3571	    &txstats->ucast_packets, "Unicast packets");
3572	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3573	    &txstats->ucast_bytes, "Unicast bytes");
3574	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3575	    &txstats->mcast_packets, "Multicast packets");
3576	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3577	    &txstats->mcast_bytes, "Multicast bytes");
3578	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3579	    &txstats->error, "Errors");
3580	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3581	    &txstats->discard, "Discards");
3582}
3583
3584static void
3585vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3586    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3587{
3588	struct sysctl_oid *node, *rxsnode;
3589	struct sysctl_oid_list *list, *rxslist;
3590	struct vmxnet3_rxq_stats *stats;
3591	struct UPT1_RxStats *rxstats;
3592	char namebuf[16];
3593
3594	stats = &rxq->vxrxq_stats;
3595	rxstats = &rxq->vxrxq_rs->stats;
3596
3597	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3598	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3599	    NULL, "Receive Queue");
3600	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3601
3602	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3603	    &stats->vmrxs_ipackets, "Receive packets");
3604	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3605	    &stats->vmrxs_ibytes, "Receive bytes");
3606	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3607	    &stats->vmrxs_iqdrops, "Receive drops");
3608	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3609	    &stats->vmrxs_ierrors, "Receive errors");
3610
3611	/*
3612	 * Add statistics reported by the host. These are updated once
3613	 * per second.
3614	 */
3615	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3616	    NULL, "Host Statistics");
3617	rxslist = SYSCTL_CHILDREN(rxsnode);
3618	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3619	    &rxstats->LRO_packets, "LRO packets");
3620	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3621	    &rxstats->LRO_bytes, "LRO bytes");
3622	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3623	    &rxstats->ucast_packets, "Unicast packets");
3624	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3625	    &rxstats->ucast_bytes, "Unicast bytes");
3626	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3627	    &rxstats->mcast_packets, "Multicast packets");
3628	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3629	    &rxstats->mcast_bytes, "Multicast bytes");
3630	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3631	    &rxstats->bcast_packets, "Broadcast packets");
3632	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3633	    &rxstats->bcast_bytes, "Broadcast bytes");
3634	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3635	    &rxstats->nobuffer, "No buffer");
3636	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3637	    &rxstats->error, "Errors");
3638}
3639
3640static void
3641vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3642    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3643{
3644	struct sysctl_oid *node;
3645	struct sysctl_oid_list *list;
3646	int i;
3647
3648	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3649		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3650
3651		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3652		    "debug", CTLFLAG_RD, NULL, "");
3653		list = SYSCTL_CHILDREN(node);
3654
3655		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3656		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3657		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3658		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3659		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3660		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3661		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3662		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3663		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3664		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3665		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3666		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3667		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3668		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3669	}
3670
3671	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3672		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3673
3674		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3675		    "debug", CTLFLAG_RD, NULL, "");
3676		list = SYSCTL_CHILDREN(node);
3677
3678		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3679		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3680		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3681		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3682		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3683		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3684		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3685		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3686		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3687		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3688		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3689		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3690		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3691		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3692		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3693		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3694		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3695		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3696	}
3697}
3698
3699static void
3700vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3701    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3702{
3703	int i;
3704
3705	for (i = 0; i < sc->vmx_ntxqueues; i++)
3706		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3707	for (i = 0; i < sc->vmx_nrxqueues; i++)
3708		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3709
3710	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3711}
3712
3713static void
3714vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3715{
3716	device_t dev;
3717	struct vmxnet3_statistics *stats;
3718	struct sysctl_ctx_list *ctx;
3719	struct sysctl_oid *tree;
3720	struct sysctl_oid_list *child;
3721
3722	dev = sc->vmx_dev;
3723	ctx = device_get_sysctl_ctx(dev);
3724	tree = device_get_sysctl_tree(dev);
3725	child = SYSCTL_CHILDREN(tree);
3726
3727	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3728	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3729	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3730	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3731	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3732	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3733	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3734	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3735
3736	stats = &sc->vmx_stats;
3737	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3738	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3739	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3740	    &stats->vmst_defrag_failed, 0,
3741	    "Tx mbuf dropped because defrag failed");
3742	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3743	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3744	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3745	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3746
3747	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3748}
3749
3750static void
3751vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3752{
3753
3754	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3755}
3756
3757static uint32_t
3758vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3759{
3760
3761	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3762}
3763
3764static void
3765vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3766{
3767
3768	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3769}
3770
3771static void
3772vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3773{
3774
3775	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3776}
3777
3778static uint32_t
3779vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3780{
3781
3782	vmxnet3_write_cmd(sc, cmd);
3783	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3784	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3785	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3786}
3787
3788static void
3789vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3790{
3791
3792	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3793}
3794
3795static void
3796vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3797{
3798
3799	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3800}
3801
3802static void
3803vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3804{
3805	int i;
3806
3807	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3808	for (i = 0; i < sc->vmx_nintrs; i++)
3809		vmxnet3_enable_intr(sc, i);
3810}
3811
3812static void
3813vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3814{
3815	int i;
3816
3817	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3818	for (i = 0; i < sc->vmx_nintrs; i++)
3819		vmxnet3_disable_intr(sc, i);
3820}
3821
3822static void
3823vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3824{
3825	bus_addr_t *baddr = arg;
3826
3827	if (error == 0)
3828		*baddr = segs->ds_addr;
3829}
3830
3831static int
3832vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3833    struct vmxnet3_dma_alloc *dma)
3834{
3835	device_t dev;
3836	int error;
3837
3838	dev = sc->vmx_dev;
3839	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3840
3841	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3842	    align, 0,		/* alignment, bounds */
3843	    BUS_SPACE_MAXADDR,	/* lowaddr */
3844	    BUS_SPACE_MAXADDR,	/* highaddr */
3845	    NULL, NULL,		/* filter, filterarg */
3846	    size,		/* maxsize */
3847	    1,			/* nsegments */
3848	    size,		/* maxsegsize */
3849	    BUS_DMA_ALLOCNOW,	/* flags */
3850	    NULL,		/* lockfunc */
3851	    NULL,		/* lockfuncarg */
3852	    &dma->dma_tag);
3853	if (error) {
3854		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3855		goto fail;
3856	}
3857
3858	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3859	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3860	if (error) {
3861		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3862		goto fail;
3863	}
3864
3865	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3866	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3867	if (error) {
3868		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3869		goto fail;
3870	}
3871
3872	dma->dma_size = size;
3873
3874fail:
3875	if (error)
3876		vmxnet3_dma_free(sc, dma);
3877
3878	return (error);
3879}
3880
3881static void
3882vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3883{
3884
3885	if (dma->dma_tag != NULL) {
3886		if (dma->dma_map != NULL) {
3887			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3888			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3889			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3890		}
3891
3892		if (dma->dma_vaddr != NULL) {
3893			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3894			    dma->dma_map);
3895		}
3896
3897		bus_dma_tag_destroy(dma->dma_tag);
3898	}
3899	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3900}
3901
3902static int
3903vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3904{
3905	char path[64];
3906
3907	snprintf(path, sizeof(path),
3908	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3909	TUNABLE_INT_FETCH(path, &def);
3910
3911	return (def);
3912}
3913
3914/*
3915 * Since this is a purely paravirtualized device, we do not have
3916 * to worry about DMA coherency. But at times, we must make sure
3917 * both the compiler and CPU do not reorder memory operations.
3918 */
3919static inline void
3920vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3921{
3922
3923	switch (type) {
3924	case VMXNET3_BARRIER_RD:
3925		rmb();
3926		break;
3927	case VMXNET3_BARRIER_WR:
3928		wmb();
3929		break;
3930	case VMXNET3_BARRIER_RDWR:
3931		mb();
3932		break;
3933	default:
3934		panic("%s: bad barrier type %d", __func__, type);
3935	}
3936}
3937