1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: stable/11/sys/dev/vmware/vmxnet3/if_vmx.c 344272 2019-02-19 10:07:48Z vmaffione $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/eventhandler.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <sys/taskqueue.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <net/ethernet.h>
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_arp.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47#include <net/if_media.h>
48#include <net/if_vlan_var.h>
49
50#include <net/bpf.h>
51
52#include <netinet/in_systm.h>
53#include <netinet/in.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#include <netinet/udp.h>
58#include <netinet/tcp.h>
59
60#include <machine/in_cksum.h>
61
62#include <machine/bus.h>
63#include <machine/resource.h>
64#include <sys/bus.h>
65#include <sys/rman.h>
66
67#include <dev/pci/pcireg.h>
68#include <dev/pci/pcivar.h>
69
70#include "if_vmxreg.h"
71#include "if_vmxvar.h"
72
73#include "opt_inet.h"
74#include "opt_inet6.h"
75
76#ifdef VMXNET3_FAILPOINTS
77#include <sys/fail.h>
78static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
79    "vmxnet3 fail points");
80#define VMXNET3_FP	_debug_fail_point_vmxnet3
81#endif
82
83static int	vmxnet3_probe(device_t);
84static int	vmxnet3_attach(device_t);
85static int	vmxnet3_detach(device_t);
86static int	vmxnet3_shutdown(device_t);
87
88static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
89static void	vmxnet3_free_resources(struct vmxnet3_softc *);
90static int	vmxnet3_check_version(struct vmxnet3_softc *);
91static void	vmxnet3_initial_config(struct vmxnet3_softc *);
92static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
93
94static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
97static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
98		    struct vmxnet3_interrupt *);
99static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
100static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
102static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
103static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
104
105static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
106		    struct vmxnet3_interrupt *);
107static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
108
109#ifndef VMXNET3_LEGACY_TX
110static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
112static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
113static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
114#endif
115
116static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
117static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
118static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
119static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
120static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
121static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
122
123static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
130static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
131static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
132static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_init_hwassist(struct vmxnet3_softc *);
134static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
135static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
136static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
137static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
138static void	vmxnet3_free_data(struct vmxnet3_softc *);
139static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
140
141static void	vmxnet3_evintr(struct vmxnet3_softc *);
142static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
143static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
144static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
145static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
146		    struct vmxnet3_rxring *, int);
147static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
148static void	vmxnet3_legacy_intr(void *);
149static void	vmxnet3_txq_intr(void *);
150static void	vmxnet3_rxq_intr(void *);
151static void	vmxnet3_event_intr(void *);
152
153static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
154static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
155static void	vmxnet3_stop(struct vmxnet3_softc *);
156
157static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
158static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
159static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
160static int	vmxnet3_enable_device(struct vmxnet3_softc *);
161static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
162static int	vmxnet3_reinit(struct vmxnet3_softc *);
163static void	vmxnet3_init_locked(struct vmxnet3_softc *);
164static void	vmxnet3_init(void *);
165
166static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
167		    int *, int *, int *);
168static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
169		    bus_dmamap_t, bus_dma_segment_t [], int *);
170static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
171static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
172#ifdef VMXNET3_LEGACY_TX
173static void	vmxnet3_start_locked(struct ifnet *);
174static void	vmxnet3_start(struct ifnet *);
175#else
176static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
177		    struct mbuf *);
178static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
179static void	vmxnet3_txq_tq_deferred(void *, int);
180#endif
181static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
182static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
183
184static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
185		    uint16_t);
186static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
187static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
188static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
189static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
190static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
191static uint64_t	vmxnet3_get_counter(struct ifnet *, ift_counter);
192
193#ifndef VMXNET3_LEGACY_TX
194static void	vmxnet3_qflush(struct ifnet *);
195#endif
196
197static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
198static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242#ifdef DEV_NETMAP
243#include "vmx_netmap.h"
244#endif
245
246/* Tunables. */
247static int vmxnet3_mq_disable = 0;
248TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
249static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
250TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
251static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
252TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
253static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
254TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
255static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
256TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
257
258static device_method_t vmxnet3_methods[] = {
259	/* Device interface. */
260	DEVMETHOD(device_probe,		vmxnet3_probe),
261	DEVMETHOD(device_attach,	vmxnet3_attach),
262	DEVMETHOD(device_detach,	vmxnet3_detach),
263	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
264
265	DEVMETHOD_END
266};
267
268static driver_t vmxnet3_driver = {
269	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
270};
271
272static devclass_t vmxnet3_devclass;
273DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
274
275MODULE_DEPEND(vmx, pci, 1, 1, 1);
276MODULE_DEPEND(vmx, ether, 1, 1, 1);
277#ifdef DEV_NETMAP
278MODULE_DEPEND(vmx, netmap, 1, 1, 1);
279#endif
280
281#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
282#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
283
284static int
285vmxnet3_probe(device_t dev)
286{
287
288	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
289	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
290		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
291		return (BUS_PROBE_DEFAULT);
292	}
293
294	return (ENXIO);
295}
296
297static int
298vmxnet3_attach(device_t dev)
299{
300	struct vmxnet3_softc *sc;
301	int error;
302
303	sc = device_get_softc(dev);
304	sc->vmx_dev = dev;
305
306	pci_enable_busmaster(dev);
307
308	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
309	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
310
311	vmxnet3_initial_config(sc);
312
313	error = vmxnet3_alloc_resources(sc);
314	if (error)
315		goto fail;
316
317	error = vmxnet3_check_version(sc);
318	if (error)
319		goto fail;
320
321	error = vmxnet3_alloc_rxtx_queues(sc);
322	if (error)
323		goto fail;
324
325#ifndef VMXNET3_LEGACY_TX
326	error = vmxnet3_alloc_taskqueue(sc);
327	if (error)
328		goto fail;
329#endif
330
331	error = vmxnet3_alloc_interrupts(sc);
332	if (error)
333		goto fail;
334
335	vmxnet3_check_multiqueue(sc);
336
337	error = vmxnet3_alloc_data(sc);
338	if (error)
339		goto fail;
340
341	error = vmxnet3_setup_interface(sc);
342	if (error)
343		goto fail;
344
345	error = vmxnet3_setup_interrupts(sc);
346	if (error) {
347		ether_ifdetach(sc->vmx_ifp);
348		device_printf(dev, "could not set up interrupt\n");
349		goto fail;
350	}
351
352	vmxnet3_setup_sysctl(sc);
353#ifndef VMXNET3_LEGACY_TX
354	vmxnet3_start_taskqueue(sc);
355#endif
356
357#ifdef DEV_NETMAP
358	vmxnet3_netmap_attach(sc);
359#endif
360
361fail:
362	if (error)
363		vmxnet3_detach(dev);
364
365	return (error);
366}
367
368static int
369vmxnet3_detach(device_t dev)
370{
371	struct vmxnet3_softc *sc;
372	struct ifnet *ifp;
373
374	sc = device_get_softc(dev);
375	ifp = sc->vmx_ifp;
376
377	if (device_is_attached(dev)) {
378		VMXNET3_CORE_LOCK(sc);
379		vmxnet3_stop(sc);
380		VMXNET3_CORE_UNLOCK(sc);
381
382		callout_drain(&sc->vmx_tick);
383#ifndef VMXNET3_LEGACY_TX
384		vmxnet3_drain_taskqueue(sc);
385#endif
386
387		ether_ifdetach(ifp);
388	}
389
390	if (sc->vmx_vlan_attach != NULL) {
391		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
392		sc->vmx_vlan_attach = NULL;
393	}
394	if (sc->vmx_vlan_detach != NULL) {
395		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
396		sc->vmx_vlan_detach = NULL;
397	}
398
399#ifndef VMXNET3_LEGACY_TX
400	vmxnet3_free_taskqueue(sc);
401#endif
402	vmxnet3_free_interrupts(sc);
403
404#ifdef DEV_NETMAP
405	netmap_detach(ifp);
406#endif
407
408	if (ifp != NULL) {
409		if_free(ifp);
410		sc->vmx_ifp = NULL;
411	}
412
413	ifmedia_removeall(&sc->vmx_media);
414
415	vmxnet3_free_data(sc);
416	vmxnet3_free_resources(sc);
417	vmxnet3_free_rxtx_queues(sc);
418
419	VMXNET3_CORE_LOCK_DESTROY(sc);
420
421	return (0);
422}
423
424static int
425vmxnet3_shutdown(device_t dev)
426{
427
428	return (0);
429}
430
431static int
432vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
433{
434	device_t dev;
435	int rid;
436
437	dev = sc->vmx_dev;
438
439	rid = PCIR_BAR(0);
440	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
441	    RF_ACTIVE);
442	if (sc->vmx_res0 == NULL) {
443		device_printf(dev,
444		    "could not map BAR0 memory\n");
445		return (ENXIO);
446	}
447
448	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
449	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
450
451	rid = PCIR_BAR(1);
452	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
453	    RF_ACTIVE);
454	if (sc->vmx_res1 == NULL) {
455		device_printf(dev,
456		    "could not map BAR1 memory\n");
457		return (ENXIO);
458	}
459
460	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
461	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
462
463	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
464		rid = PCIR_BAR(2);
465		sc->vmx_msix_res = bus_alloc_resource_any(dev,
466		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
467	}
468
469	if (sc->vmx_msix_res == NULL)
470		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
471
472	return (0);
473}
474
475static void
476vmxnet3_free_resources(struct vmxnet3_softc *sc)
477{
478	device_t dev;
479	int rid;
480
481	dev = sc->vmx_dev;
482
483	if (sc->vmx_res0 != NULL) {
484		rid = PCIR_BAR(0);
485		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
486		sc->vmx_res0 = NULL;
487	}
488
489	if (sc->vmx_res1 != NULL) {
490		rid = PCIR_BAR(1);
491		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
492		sc->vmx_res1 = NULL;
493	}
494
495	if (sc->vmx_msix_res != NULL) {
496		rid = PCIR_BAR(2);
497		bus_release_resource(dev, SYS_RES_MEMORY, rid,
498		    sc->vmx_msix_res);
499		sc->vmx_msix_res = NULL;
500	}
501}
502
503static int
504vmxnet3_check_version(struct vmxnet3_softc *sc)
505{
506	device_t dev;
507	uint32_t version;
508
509	dev = sc->vmx_dev;
510
511	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
512	if ((version & 0x01) == 0) {
513		device_printf(dev, "unsupported hardware version %#x\n",
514		    version);
515		return (ENOTSUP);
516	}
517	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
518
519	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
520	if ((version & 0x01) == 0) {
521		device_printf(dev, "unsupported UPT version %#x\n", version);
522		return (ENOTSUP);
523	}
524	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
525
526	return (0);
527}
528
529static int
530trunc_powerof2(int val)
531{
532
533	return (1U << (fls(val) - 1));
534}
535
536static void
537vmxnet3_initial_config(struct vmxnet3_softc *sc)
538{
539	int nqueue, ndesc;
540
541	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
542	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
543		nqueue = VMXNET3_DEF_TX_QUEUES;
544	if (nqueue > mp_ncpus)
545		nqueue = mp_ncpus;
546	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
547
548	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
549	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
550		nqueue = VMXNET3_DEF_RX_QUEUES;
551	if (nqueue > mp_ncpus)
552		nqueue = mp_ncpus;
553	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
554
555	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
556		sc->vmx_max_nrxqueues = 1;
557		sc->vmx_max_ntxqueues = 1;
558	}
559
560	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
561	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
562		ndesc = VMXNET3_DEF_TX_NDESC;
563	if (ndesc & VMXNET3_MASK_TX_NDESC)
564		ndesc &= ~VMXNET3_MASK_TX_NDESC;
565	sc->vmx_ntxdescs = ndesc;
566
567	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
568	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
569		ndesc = VMXNET3_DEF_RX_NDESC;
570	if (ndesc & VMXNET3_MASK_RX_NDESC)
571		ndesc &= ~VMXNET3_MASK_RX_NDESC;
572	sc->vmx_nrxdescs = ndesc;
573	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
574}
575
576static void
577vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
578{
579
580	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
581		goto out;
582
583	/* BMV: Just use the maximum configured for now. */
584	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
585	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
586
587	if (sc->vmx_nrxqueues > 1)
588		sc->vmx_flags |= VMXNET3_FLAG_RSS;
589
590	return;
591
592out:
593	sc->vmx_ntxqueues = 1;
594	sc->vmx_nrxqueues = 1;
595}
596
597static int
598vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
599{
600	device_t dev;
601	int nmsix, cnt, required;
602
603	dev = sc->vmx_dev;
604
605	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
606		return (1);
607
608	/* Allocate an additional vector for the events interrupt. */
609	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
610
611	nmsix = pci_msix_count(dev);
612	if (nmsix < required)
613		return (1);
614
615	cnt = required;
616	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
617		sc->vmx_nintrs = required;
618		return (0);
619	} else
620		pci_release_msi(dev);
621
622	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
623
624	return (1);
625}
626
627static int
628vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
629{
630	device_t dev;
631	int nmsi, cnt, required;
632
633	dev = sc->vmx_dev;
634	required = 1;
635
636	nmsi = pci_msi_count(dev);
637	if (nmsi < required)
638		return (1);
639
640	cnt = required;
641	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
642		sc->vmx_nintrs = 1;
643		return (0);
644	} else
645		pci_release_msi(dev);
646
647	return (1);
648}
649
650static int
651vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
652{
653
654	sc->vmx_nintrs = 1;
655	return (0);
656}
657
658static int
659vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
660    struct vmxnet3_interrupt *intr)
661{
662	struct resource *irq;
663
664	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
665	if (irq == NULL)
666		return (ENXIO);
667
668	intr->vmxi_irq = irq;
669	intr->vmxi_rid = rid;
670
671	return (0);
672}
673
674static int
675vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
676{
677	int i, rid, flags, error;
678
679	rid = 0;
680	flags = RF_ACTIVE;
681
682	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
683		flags |= RF_SHAREABLE;
684	else
685		rid = 1;
686
687	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
688		error = vmxnet3_alloc_interrupt(sc, rid, flags,
689		    &sc->vmx_intrs[i]);
690		if (error)
691			return (error);
692	}
693
694	return (0);
695}
696
697static int
698vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
699{
700	device_t dev;
701	struct vmxnet3_txqueue *txq;
702	struct vmxnet3_rxqueue *rxq;
703	struct vmxnet3_interrupt *intr;
704	enum intr_type type;
705	int i, error;
706
707	dev = sc->vmx_dev;
708	intr = &sc->vmx_intrs[0];
709	type = INTR_TYPE_NET | INTR_MPSAFE;
710
711	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
712		txq = &sc->vmx_txq[i];
713		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
714		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
715		if (error)
716			return (error);
717		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
718		    "tq%d", i);
719		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
720	}
721
722	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
723		rxq = &sc->vmx_rxq[i];
724		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
725		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
726		if (error)
727			return (error);
728		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
729		    "rq%d", i);
730		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
731	}
732
733	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
734	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
735	if (error)
736		return (error);
737	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
738	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
739
740	return (0);
741}
742
743static int
744vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
745{
746	struct vmxnet3_interrupt *intr;
747	int i, error;
748
749	intr = &sc->vmx_intrs[0];
750	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
751	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
752	    &intr->vmxi_handler);
753
754	for (i = 0; i < sc->vmx_ntxqueues; i++)
755		sc->vmx_txq[i].vxtxq_intr_idx = 0;
756	for (i = 0; i < sc->vmx_nrxqueues; i++)
757		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
758	sc->vmx_event_intr_idx = 0;
759
760	return (error);
761}
762
763static void
764vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
765{
766	struct vmxnet3_txqueue *txq;
767	struct vmxnet3_txq_shared *txs;
768	struct vmxnet3_rxqueue *rxq;
769	struct vmxnet3_rxq_shared *rxs;
770	int i;
771
772	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
773
774	for (i = 0; i < sc->vmx_ntxqueues; i++) {
775		txq = &sc->vmx_txq[i];
776		txs = txq->vxtxq_ts;
777		txs->intr_idx = txq->vxtxq_intr_idx;
778	}
779
780	for (i = 0; i < sc->vmx_nrxqueues; i++) {
781		rxq = &sc->vmx_rxq[i];
782		rxs = rxq->vxrxq_rs;
783		rxs->intr_idx = rxq->vxrxq_intr_idx;
784	}
785}
786
787static int
788vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
789{
790	int error;
791
792	error = vmxnet3_alloc_intr_resources(sc);
793	if (error)
794		return (error);
795
796	switch (sc->vmx_intr_type) {
797	case VMXNET3_IT_MSIX:
798		error = vmxnet3_setup_msix_interrupts(sc);
799		break;
800	case VMXNET3_IT_MSI:
801	case VMXNET3_IT_LEGACY:
802		error = vmxnet3_setup_legacy_interrupt(sc);
803		break;
804	default:
805		panic("%s: invalid interrupt type %d", __func__,
806		    sc->vmx_intr_type);
807	}
808
809	if (error == 0)
810		vmxnet3_set_interrupt_idx(sc);
811
812	return (error);
813}
814
815static int
816vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
817{
818	device_t dev;
819	uint32_t config;
820	int error;
821
822	dev = sc->vmx_dev;
823	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
824
825	sc->vmx_intr_type = config & 0x03;
826	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
827
828	switch (sc->vmx_intr_type) {
829	case VMXNET3_IT_AUTO:
830		sc->vmx_intr_type = VMXNET3_IT_MSIX;
831		/* FALLTHROUGH */
832	case VMXNET3_IT_MSIX:
833		error = vmxnet3_alloc_msix_interrupts(sc);
834		if (error == 0)
835			break;
836		sc->vmx_intr_type = VMXNET3_IT_MSI;
837		/* FALLTHROUGH */
838	case VMXNET3_IT_MSI:
839		error = vmxnet3_alloc_msi_interrupts(sc);
840		if (error == 0)
841			break;
842		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
843		/* FALLTHROUGH */
844	case VMXNET3_IT_LEGACY:
845		error = vmxnet3_alloc_legacy_interrupts(sc);
846		if (error == 0)
847			break;
848		/* FALLTHROUGH */
849	default:
850		sc->vmx_intr_type = -1;
851		device_printf(dev, "cannot allocate any interrupt resources\n");
852		return (ENXIO);
853	}
854
855	return (error);
856}
857
858static void
859vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
860    struct vmxnet3_interrupt *intr)
861{
862	device_t dev;
863
864	dev = sc->vmx_dev;
865
866	if (intr->vmxi_handler != NULL) {
867		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
868		intr->vmxi_handler = NULL;
869	}
870
871	if (intr->vmxi_irq != NULL) {
872		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
873		    intr->vmxi_irq);
874		intr->vmxi_irq = NULL;
875		intr->vmxi_rid = -1;
876	}
877}
878
879static void
880vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
881{
882	int i;
883
884	for (i = 0; i < sc->vmx_nintrs; i++)
885		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
886
887	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
888	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
889		pci_release_msi(sc->vmx_dev);
890}
891
892#ifndef VMXNET3_LEGACY_TX
893static int
894vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
895{
896	device_t dev;
897
898	dev = sc->vmx_dev;
899
900	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
901	    taskqueue_thread_enqueue, &sc->vmx_tq);
902	if (sc->vmx_tq == NULL)
903		return (ENOMEM);
904
905	return (0);
906}
907
908static void
909vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
910{
911	device_t dev;
912	int nthreads, error;
913
914	dev = sc->vmx_dev;
915
916	/*
917	 * The taskqueue is typically not frequently used, so a dedicated
918	 * thread for each queue is unnecessary.
919	 */
920	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
921
922	/*
923	 * Most drivers just ignore the return value - it only fails
924	 * with ENOMEM so an error is not likely. It is hard for us
925	 * to recover from an error here.
926	 */
927	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
928	    "%s taskq", device_get_nameunit(dev));
929	if (error)
930		device_printf(dev, "failed to start taskqueue: %d", error);
931}
932
933static void
934vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
935{
936	struct vmxnet3_txqueue *txq;
937	int i;
938
939	if (sc->vmx_tq != NULL) {
940		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
941			txq = &sc->vmx_txq[i];
942			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
943		}
944	}
945}
946
947static void
948vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
949{
950	if (sc->vmx_tq != NULL) {
951		taskqueue_free(sc->vmx_tq);
952		sc->vmx_tq = NULL;
953	}
954}
955#endif
956
957static int
958vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
959{
960	struct vmxnet3_rxqueue *rxq;
961	struct vmxnet3_rxring *rxr;
962	int i;
963
964	rxq = &sc->vmx_rxq[q];
965
966	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
967	    device_get_nameunit(sc->vmx_dev), q);
968	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
969
970	rxq->vxrxq_sc = sc;
971	rxq->vxrxq_id = q;
972
973	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
974		rxr = &rxq->vxrxq_cmd_ring[i];
975		rxr->vxrxr_rid = i;
976		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
977		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
978		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
979		if (rxr->vxrxr_rxbuf == NULL)
980			return (ENOMEM);
981
982		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
983	}
984
985	return (0);
986}
987
988static int
989vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
990{
991	struct vmxnet3_txqueue *txq;
992	struct vmxnet3_txring *txr;
993
994	txq = &sc->vmx_txq[q];
995	txr = &txq->vxtxq_cmd_ring;
996
997	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
998	    device_get_nameunit(sc->vmx_dev), q);
999	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
1000
1001	txq->vxtxq_sc = sc;
1002	txq->vxtxq_id = q;
1003
1004	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1005	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
1006	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
1007	if (txr->vxtxr_txbuf == NULL)
1008		return (ENOMEM);
1009
1010	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1011
1012#ifndef VMXNET3_LEGACY_TX
1013	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
1014
1015	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
1016	    M_NOWAIT, &txq->vxtxq_mtx);
1017	if (txq->vxtxq_br == NULL)
1018		return (ENOMEM);
1019#endif
1020
1021	return (0);
1022}
1023
1024static int
1025vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1026{
1027	int i, error;
1028
1029	/*
1030	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1031	 * disabled by default because its apparently broken for devices passed
1032	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1033	 * must be set to zero for MSIX. This check prevents us from allocating
1034	 * queue structures that we will not use.
1035	 */
1036	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1037		sc->vmx_max_nrxqueues = 1;
1038		sc->vmx_max_ntxqueues = 1;
1039	}
1040
1041	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1042	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1043	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1044	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1045	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1046		return (ENOMEM);
1047
1048	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1049		error = vmxnet3_init_rxq(sc, i);
1050		if (error)
1051			return (error);
1052	}
1053
1054	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1055		error = vmxnet3_init_txq(sc, i);
1056		if (error)
1057			return (error);
1058	}
1059
1060	return (0);
1061}
1062
1063static void
1064vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1065{
1066	struct vmxnet3_rxring *rxr;
1067	int i;
1068
1069	rxq->vxrxq_sc = NULL;
1070	rxq->vxrxq_id = -1;
1071
1072	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1073		rxr = &rxq->vxrxq_cmd_ring[i];
1074
1075		if (rxr->vxrxr_rxbuf != NULL) {
1076			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1077			rxr->vxrxr_rxbuf = NULL;
1078		}
1079	}
1080
1081	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1082		mtx_destroy(&rxq->vxrxq_mtx);
1083}
1084
1085static void
1086vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1087{
1088	struct vmxnet3_txring *txr;
1089
1090	txr = &txq->vxtxq_cmd_ring;
1091
1092	txq->vxtxq_sc = NULL;
1093	txq->vxtxq_id = -1;
1094
1095#ifndef VMXNET3_LEGACY_TX
1096	if (txq->vxtxq_br != NULL) {
1097		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1098		txq->vxtxq_br = NULL;
1099	}
1100#endif
1101
1102	if (txr->vxtxr_txbuf != NULL) {
1103		free(txr->vxtxr_txbuf, M_DEVBUF);
1104		txr->vxtxr_txbuf = NULL;
1105	}
1106
1107	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1108		mtx_destroy(&txq->vxtxq_mtx);
1109}
1110
1111static void
1112vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1113{
1114	int i;
1115
1116	if (sc->vmx_rxq != NULL) {
1117		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1118			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1119		free(sc->vmx_rxq, M_DEVBUF);
1120		sc->vmx_rxq = NULL;
1121	}
1122
1123	if (sc->vmx_txq != NULL) {
1124		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1125			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1126		free(sc->vmx_txq, M_DEVBUF);
1127		sc->vmx_txq = NULL;
1128	}
1129}
1130
1131static int
1132vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1133{
1134	device_t dev;
1135	uint8_t *kva;
1136	size_t size;
1137	int i, error;
1138
1139	dev = sc->vmx_dev;
1140
1141	size = sizeof(struct vmxnet3_driver_shared);
1142	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1143	if (error) {
1144		device_printf(dev, "cannot alloc shared memory\n");
1145		return (error);
1146	}
1147	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1148
1149	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1150	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1151	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1152	if (error) {
1153		device_printf(dev, "cannot alloc queue shared memory\n");
1154		return (error);
1155	}
1156	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1157	kva = sc->vmx_qs;
1158
1159	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1160		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1161		kva += sizeof(struct vmxnet3_txq_shared);
1162	}
1163	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1164		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1165		kva += sizeof(struct vmxnet3_rxq_shared);
1166	}
1167
1168	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1169		size = sizeof(struct vmxnet3_rss_shared);
1170		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1171		if (error) {
1172			device_printf(dev, "cannot alloc rss shared memory\n");
1173			return (error);
1174		}
1175		sc->vmx_rss =
1176		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1177	}
1178
1179	return (0);
1180}
1181
1182static void
1183vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1184{
1185
1186	if (sc->vmx_rss != NULL) {
1187		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1188		sc->vmx_rss = NULL;
1189	}
1190
1191	if (sc->vmx_qs != NULL) {
1192		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1193		sc->vmx_qs = NULL;
1194	}
1195
1196	if (sc->vmx_ds != NULL) {
1197		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1198		sc->vmx_ds = NULL;
1199	}
1200}
1201
1202static int
1203vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1204{
1205	device_t dev;
1206	struct vmxnet3_txqueue *txq;
1207	struct vmxnet3_txring *txr;
1208	struct vmxnet3_comp_ring *txc;
1209	size_t descsz, compsz;
1210	int i, q, error;
1211
1212	dev = sc->vmx_dev;
1213
1214	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1215		txq = &sc->vmx_txq[q];
1216		txr = &txq->vxtxq_cmd_ring;
1217		txc = &txq->vxtxq_comp_ring;
1218
1219		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1220		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1221
1222		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1223		    1, 0,			/* alignment, boundary */
1224		    BUS_SPACE_MAXADDR,		/* lowaddr */
1225		    BUS_SPACE_MAXADDR,		/* highaddr */
1226		    NULL, NULL,			/* filter, filterarg */
1227		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1228		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1229		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1230		    0,				/* flags */
1231		    NULL, NULL,			/* lockfunc, lockarg */
1232		    &txr->vxtxr_txtag);
1233		if (error) {
1234			device_printf(dev,
1235			    "unable to create Tx buffer tag for queue %d\n", q);
1236			return (error);
1237		}
1238
1239		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1240		if (error) {
1241			device_printf(dev, "cannot alloc Tx descriptors for "
1242			    "queue %d error %d\n", q, error);
1243			return (error);
1244		}
1245		txr->vxtxr_txd =
1246		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1247
1248		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1249		if (error) {
1250			device_printf(dev, "cannot alloc Tx comp descriptors "
1251			   "for queue %d error %d\n", q, error);
1252			return (error);
1253		}
1254		txc->vxcr_u.txcd =
1255		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1256
1257		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1258			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1259			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1260			if (error) {
1261				device_printf(dev, "unable to create Tx buf "
1262				    "dmamap for queue %d idx %d\n", q, i);
1263				return (error);
1264			}
1265		}
1266	}
1267
1268	return (0);
1269}
1270
1271static void
1272vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1273{
1274	device_t dev;
1275	struct vmxnet3_txqueue *txq;
1276	struct vmxnet3_txring *txr;
1277	struct vmxnet3_comp_ring *txc;
1278	struct vmxnet3_txbuf *txb;
1279	int i, q;
1280
1281	dev = sc->vmx_dev;
1282
1283	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1284		txq = &sc->vmx_txq[q];
1285		txr = &txq->vxtxq_cmd_ring;
1286		txc = &txq->vxtxq_comp_ring;
1287
1288		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1289			txb = &txr->vxtxr_txbuf[i];
1290			if (txb->vtxb_dmamap != NULL) {
1291				bus_dmamap_destroy(txr->vxtxr_txtag,
1292				    txb->vtxb_dmamap);
1293				txb->vtxb_dmamap = NULL;
1294			}
1295		}
1296
1297		if (txc->vxcr_u.txcd != NULL) {
1298			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1299			txc->vxcr_u.txcd = NULL;
1300		}
1301
1302		if (txr->vxtxr_txd != NULL) {
1303			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1304			txr->vxtxr_txd = NULL;
1305		}
1306
1307		if (txr->vxtxr_txtag != NULL) {
1308			bus_dma_tag_destroy(txr->vxtxr_txtag);
1309			txr->vxtxr_txtag = NULL;
1310		}
1311	}
1312}
1313
1314static int
1315vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1316{
1317	device_t dev;
1318	struct vmxnet3_rxqueue *rxq;
1319	struct vmxnet3_rxring *rxr;
1320	struct vmxnet3_comp_ring *rxc;
1321	int descsz, compsz;
1322	int i, j, q, error;
1323
1324	dev = sc->vmx_dev;
1325
1326	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1327		rxq = &sc->vmx_rxq[q];
1328		rxc = &rxq->vxrxq_comp_ring;
1329		compsz = 0;
1330
1331		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1332			rxr = &rxq->vxrxq_cmd_ring[i];
1333
1334			descsz = rxr->vxrxr_ndesc *
1335			    sizeof(struct vmxnet3_rxdesc);
1336			compsz += rxr->vxrxr_ndesc *
1337			    sizeof(struct vmxnet3_rxcompdesc);
1338
1339			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1340			    1, 0,		/* alignment, boundary */
1341			    BUS_SPACE_MAXADDR,	/* lowaddr */
1342			    BUS_SPACE_MAXADDR,	/* highaddr */
1343			    NULL, NULL,		/* filter, filterarg */
1344			    MJUMPAGESIZE,	/* maxsize */
1345			    1,			/* nsegments */
1346			    MJUMPAGESIZE,	/* maxsegsize */
1347			    0,			/* flags */
1348			    NULL, NULL,		/* lockfunc, lockarg */
1349			    &rxr->vxrxr_rxtag);
1350			if (error) {
1351				device_printf(dev,
1352				    "unable to create Rx buffer tag for "
1353				    "queue %d\n", q);
1354				return (error);
1355			}
1356
1357			error = vmxnet3_dma_malloc(sc, descsz, 512,
1358			    &rxr->vxrxr_dma);
1359			if (error) {
1360				device_printf(dev, "cannot allocate Rx "
1361				    "descriptors for queue %d/%d error %d\n",
1362				    i, q, error);
1363				return (error);
1364			}
1365			rxr->vxrxr_rxd =
1366			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1367		}
1368
1369		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1370		if (error) {
1371			device_printf(dev, "cannot alloc Rx comp descriptors "
1372			    "for queue %d error %d\n", q, error);
1373			return (error);
1374		}
1375		rxc->vxcr_u.rxcd =
1376		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1377
1378		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1379			rxr = &rxq->vxrxq_cmd_ring[i];
1380
1381			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1382			    &rxr->vxrxr_spare_dmap);
1383			if (error) {
1384				device_printf(dev, "unable to create spare "
1385				    "dmamap for queue %d/%d error %d\n",
1386				    q, i, error);
1387				return (error);
1388			}
1389
1390			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1391				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1392				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1393				if (error) {
1394					device_printf(dev, "unable to create "
1395					    "dmamap for queue %d/%d slot %d "
1396					    "error %d\n",
1397					    q, i, j, error);
1398					return (error);
1399				}
1400			}
1401		}
1402	}
1403
1404	return (0);
1405}
1406
1407static void
1408vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1409{
1410	device_t dev;
1411	struct vmxnet3_rxqueue *rxq;
1412	struct vmxnet3_rxring *rxr;
1413	struct vmxnet3_comp_ring *rxc;
1414	struct vmxnet3_rxbuf *rxb;
1415	int i, j, q;
1416
1417	dev = sc->vmx_dev;
1418
1419	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1420		rxq = &sc->vmx_rxq[q];
1421		rxc = &rxq->vxrxq_comp_ring;
1422
1423		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1424			rxr = &rxq->vxrxq_cmd_ring[i];
1425
1426			if (rxr->vxrxr_spare_dmap != NULL) {
1427				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1428				    rxr->vxrxr_spare_dmap);
1429				rxr->vxrxr_spare_dmap = NULL;
1430			}
1431
1432			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1433				rxb = &rxr->vxrxr_rxbuf[j];
1434				if (rxb->vrxb_dmamap != NULL) {
1435					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1436					    rxb->vrxb_dmamap);
1437					rxb->vrxb_dmamap = NULL;
1438				}
1439			}
1440		}
1441
1442		if (rxc->vxcr_u.rxcd != NULL) {
1443			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1444			rxc->vxcr_u.rxcd = NULL;
1445		}
1446
1447		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1448			rxr = &rxq->vxrxq_cmd_ring[i];
1449
1450			if (rxr->vxrxr_rxd != NULL) {
1451				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1452				rxr->vxrxr_rxd = NULL;
1453			}
1454
1455			if (rxr->vxrxr_rxtag != NULL) {
1456				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1457				rxr->vxrxr_rxtag = NULL;
1458			}
1459		}
1460	}
1461}
1462
1463static int
1464vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1465{
1466	int error;
1467
1468	error = vmxnet3_alloc_txq_data(sc);
1469	if (error)
1470		return (error);
1471
1472	error = vmxnet3_alloc_rxq_data(sc);
1473	if (error)
1474		return (error);
1475
1476	return (0);
1477}
1478
1479static void
1480vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1481{
1482
1483	if (sc->vmx_rxq != NULL)
1484		vmxnet3_free_rxq_data(sc);
1485
1486	if (sc->vmx_txq != NULL)
1487		vmxnet3_free_txq_data(sc);
1488}
1489
1490static int
1491vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1492{
1493	int error;
1494
1495	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1496	    32, &sc->vmx_mcast_dma);
1497	if (error)
1498		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1499	else
1500		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1501
1502	return (error);
1503}
1504
1505static void
1506vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1507{
1508
1509	if (sc->vmx_mcast != NULL) {
1510		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1511		sc->vmx_mcast = NULL;
1512	}
1513}
1514
1515static void
1516vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1517{
1518	struct vmxnet3_driver_shared *ds;
1519	struct vmxnet3_txqueue *txq;
1520	struct vmxnet3_txq_shared *txs;
1521	struct vmxnet3_rxqueue *rxq;
1522	struct vmxnet3_rxq_shared *rxs;
1523	int i;
1524
1525	ds = sc->vmx_ds;
1526
1527	/*
1528	 * Initialize fields of the shared data that remains the same across
1529	 * reinits. Note the shared data is zero'd when allocated.
1530	 */
1531
1532	ds->magic = VMXNET3_REV1_MAGIC;
1533
1534	/* DriverInfo */
1535	ds->version = VMXNET3_DRIVER_VERSION;
1536	ds->guest = VMXNET3_GOS_FREEBSD |
1537#ifdef __LP64__
1538	    VMXNET3_GOS_64BIT;
1539#else
1540	    VMXNET3_GOS_32BIT;
1541#endif
1542	ds->vmxnet3_revision = 1;
1543	ds->upt_version = 1;
1544
1545	/* Misc. conf */
1546	ds->driver_data = vtophys(sc);
1547	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1548	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1549	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1550	ds->nrxsg_max = sc->vmx_max_rxsegs;
1551
1552	/* RSS conf */
1553	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1554		ds->rss.version = 1;
1555		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1556		ds->rss.len = sc->vmx_rss_dma.dma_size;
1557	}
1558
1559	/* Interrupt control. */
1560	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1561	ds->nintr = sc->vmx_nintrs;
1562	ds->evintr = sc->vmx_event_intr_idx;
1563	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1564
1565	for (i = 0; i < sc->vmx_nintrs; i++)
1566		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1567
1568	/* Receive filter. */
1569	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1570	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1571
1572	/* Tx queues */
1573	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1574		txq = &sc->vmx_txq[i];
1575		txs = txq->vxtxq_ts;
1576
1577		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1578		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1579		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1580		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1581		txs->driver_data = vtophys(txq);
1582		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1583	}
1584
1585	/* Rx queues */
1586	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1587		rxq = &sc->vmx_rxq[i];
1588		rxs = rxq->vxrxq_rs;
1589
1590		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1591		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1592		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1593		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1594		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1595		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1596		rxs->driver_data = vtophys(rxq);
1597		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1598	}
1599}
1600
1601static void
1602vmxnet3_init_hwassist(struct vmxnet3_softc *sc)
1603{
1604	struct ifnet *ifp = sc->vmx_ifp;
1605	uint64_t hwassist;
1606
1607	hwassist = 0;
1608	if (ifp->if_capenable & IFCAP_TXCSUM)
1609		hwassist |= VMXNET3_CSUM_OFFLOAD;
1610	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1611		hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1612	if (ifp->if_capenable & IFCAP_TSO4)
1613		hwassist |= CSUM_IP_TSO;
1614	if (ifp->if_capenable & IFCAP_TSO6)
1615		hwassist |= CSUM_IP6_TSO;
1616	ifp->if_hwassist = hwassist;
1617}
1618
1619static void
1620vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1621{
1622	struct ifnet *ifp;
1623
1624	ifp = sc->vmx_ifp;
1625
1626	/* Use the current MAC address. */
1627	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1628	vmxnet3_set_lladdr(sc);
1629
1630	vmxnet3_init_hwassist(sc);
1631}
1632
1633static void
1634vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1635{
1636	/*
1637	 * Use the same key as the Linux driver until FreeBSD can do
1638	 * RSS (presumably Toeplitz) in software.
1639	 */
1640	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1641	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1642	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1643	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1644	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1645	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1646	};
1647
1648	struct vmxnet3_driver_shared *ds;
1649	struct vmxnet3_rss_shared *rss;
1650	int i;
1651
1652	ds = sc->vmx_ds;
1653	rss = sc->vmx_rss;
1654
1655	rss->hash_type =
1656	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1657	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1658	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1659	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1660	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1661	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1662
1663	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1664		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1665}
1666
1667static void
1668vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1669{
1670	struct ifnet *ifp;
1671	struct vmxnet3_driver_shared *ds;
1672
1673	ifp = sc->vmx_ifp;
1674	ds = sc->vmx_ds;
1675
1676	ds->mtu = ifp->if_mtu;
1677	ds->ntxqueue = sc->vmx_ntxqueues;
1678	ds->nrxqueue = sc->vmx_nrxqueues;
1679
1680	ds->upt_features = 0;
1681	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1682		ds->upt_features |= UPT1_F_CSUM;
1683	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1684		ds->upt_features |= UPT1_F_VLAN;
1685	if (ifp->if_capenable & IFCAP_LRO)
1686		ds->upt_features |= UPT1_F_LRO;
1687
1688	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1689		ds->upt_features |= UPT1_F_RSS;
1690		vmxnet3_reinit_rss_shared_data(sc);
1691	}
1692
1693	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1694	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1695	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1696}
1697
1698static int
1699vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1700{
1701	int error;
1702
1703	error = vmxnet3_alloc_shared_data(sc);
1704	if (error)
1705		return (error);
1706
1707	error = vmxnet3_alloc_queue_data(sc);
1708	if (error)
1709		return (error);
1710
1711	error = vmxnet3_alloc_mcast_table(sc);
1712	if (error)
1713		return (error);
1714
1715	vmxnet3_init_shared_data(sc);
1716
1717	return (0);
1718}
1719
1720static void
1721vmxnet3_free_data(struct vmxnet3_softc *sc)
1722{
1723
1724	vmxnet3_free_mcast_table(sc);
1725	vmxnet3_free_queue_data(sc);
1726	vmxnet3_free_shared_data(sc);
1727}
1728
1729static int
1730vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1731{
1732	device_t dev;
1733	struct ifnet *ifp;
1734
1735	dev = sc->vmx_dev;
1736
1737	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1738	if (ifp == NULL) {
1739		device_printf(dev, "cannot allocate ifnet structure\n");
1740		return (ENOSPC);
1741	}
1742
1743	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1744#if __FreeBSD_version < 1000025
1745	ifp->if_baudrate = 1000000000;
1746#elif __FreeBSD_version < 1100011
1747	if_initbaudrate(ifp, IF_Gbps(10));
1748#else
1749	ifp->if_baudrate = IF_Gbps(10);
1750#endif
1751	ifp->if_softc = sc;
1752	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1753	ifp->if_init = vmxnet3_init;
1754	ifp->if_ioctl = vmxnet3_ioctl;
1755	ifp->if_get_counter = vmxnet3_get_counter;
1756	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1757	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1758	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1759
1760#ifdef VMXNET3_LEGACY_TX
1761	ifp->if_start = vmxnet3_start;
1762	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1763	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1764	IFQ_SET_READY(&ifp->if_snd);
1765#else
1766	ifp->if_transmit = vmxnet3_txq_mq_start;
1767	ifp->if_qflush = vmxnet3_qflush;
1768#endif
1769
1770	vmxnet3_get_lladdr(sc);
1771	ether_ifattach(ifp, sc->vmx_lladdr);
1772
1773	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1774	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1775	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1776	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1777	    IFCAP_VLAN_HWCSUM;
1778	ifp->if_capenable = ifp->if_capabilities;
1779
1780	/* These capabilities are not enabled by default. */
1781	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1782
1783	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1784	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1785	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1786	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1787
1788	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1789	    vmxnet3_media_status);
1790	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1791	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1792
1793	return (0);
1794}
1795
1796static void
1797vmxnet3_evintr(struct vmxnet3_softc *sc)
1798{
1799	device_t dev;
1800	struct ifnet *ifp;
1801	struct vmxnet3_txq_shared *ts;
1802	struct vmxnet3_rxq_shared *rs;
1803	uint32_t event;
1804	int reset;
1805
1806	dev = sc->vmx_dev;
1807	ifp = sc->vmx_ifp;
1808	reset = 0;
1809
1810	VMXNET3_CORE_LOCK(sc);
1811
1812	/* Clear events. */
1813	event = sc->vmx_ds->event;
1814	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1815
1816	if (event & VMXNET3_EVENT_LINK) {
1817		vmxnet3_link_status(sc);
1818		if (sc->vmx_link_active != 0)
1819			vmxnet3_tx_start_all(sc);
1820	}
1821
1822	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1823		reset = 1;
1824		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1825		ts = sc->vmx_txq[0].vxtxq_ts;
1826		if (ts->stopped != 0)
1827			device_printf(dev, "Tx queue error %#x\n", ts->error);
1828		rs = sc->vmx_rxq[0].vxrxq_rs;
1829		if (rs->stopped != 0)
1830			device_printf(dev, "Rx queue error %#x\n", rs->error);
1831		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1832	}
1833
1834	if (event & VMXNET3_EVENT_DIC)
1835		device_printf(dev, "device implementation change event\n");
1836	if (event & VMXNET3_EVENT_DEBUG)
1837		device_printf(dev, "debug event\n");
1838
1839	if (reset != 0) {
1840		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1841		vmxnet3_init_locked(sc);
1842	}
1843
1844	VMXNET3_CORE_UNLOCK(sc);
1845}
1846
1847static void
1848vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1849{
1850	struct vmxnet3_softc *sc;
1851	struct ifnet *ifp;
1852	struct vmxnet3_txring *txr;
1853	struct vmxnet3_comp_ring *txc;
1854	struct vmxnet3_txcompdesc *txcd;
1855	struct vmxnet3_txbuf *txb;
1856	struct mbuf *m;
1857	u_int sop;
1858
1859	sc = txq->vxtxq_sc;
1860	ifp = sc->vmx_ifp;
1861	txr = &txq->vxtxq_cmd_ring;
1862	txc = &txq->vxtxq_comp_ring;
1863
1864#ifdef DEV_NETMAP
1865	if (netmap_tx_irq(sc->vmx_ifp, txq - sc->vmx_txq) != NM_IRQ_PASS)
1866		return;
1867#endif
1868
1869	VMXNET3_TXQ_LOCK_ASSERT(txq);
1870
1871	for (;;) {
1872		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1873		if (txcd->gen != txc->vxcr_gen)
1874			break;
1875		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1876
1877		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1878			txc->vxcr_next = 0;
1879			txc->vxcr_gen ^= 1;
1880		}
1881
1882		sop = txr->vxtxr_next;
1883		txb = &txr->vxtxr_txbuf[sop];
1884
1885		if ((m = txb->vtxb_m) != NULL) {
1886			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1887			    BUS_DMASYNC_POSTWRITE);
1888			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1889
1890			txq->vxtxq_stats.vmtxs_opackets++;
1891			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1892			if (m->m_flags & M_MCAST)
1893				txq->vxtxq_stats.vmtxs_omcasts++;
1894
1895			m_freem(m);
1896			txb->vtxb_m = NULL;
1897		}
1898
1899		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1900	}
1901
1902	if (txr->vxtxr_head == txr->vxtxr_next)
1903		txq->vxtxq_watchdog = 0;
1904}
1905
1906static int
1907vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1908{
1909	struct ifnet *ifp;
1910	struct mbuf *m;
1911	struct vmxnet3_rxdesc *rxd;
1912	struct vmxnet3_rxbuf *rxb;
1913	bus_dma_tag_t tag;
1914	bus_dmamap_t dmap;
1915	bus_dma_segment_t segs[1];
1916	int idx, clsize, btype, flags, nsegs, error;
1917
1918	ifp = sc->vmx_ifp;
1919	tag = rxr->vxrxr_rxtag;
1920	dmap = rxr->vxrxr_spare_dmap;
1921	idx = rxr->vxrxr_fill;
1922	rxd = &rxr->vxrxr_rxd[idx];
1923	rxb = &rxr->vxrxr_rxbuf[idx];
1924
1925#ifdef VMXNET3_FAILPOINTS
1926	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1927	if (rxr->vxrxr_rid != 0)
1928		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1929#endif
1930
1931	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1932		flags = M_PKTHDR;
1933		clsize = MCLBYTES;
1934		btype = VMXNET3_BTYPE_HEAD;
1935	} else {
1936#if __FreeBSD_version < 902001
1937		/*
1938		 * These mbufs will never be used for the start of a frame.
1939		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1940		 * required the mbuf to always be a packet header. Avoid
1941		 * unnecessary mbuf initialization in newer versions where
1942		 * that is not the case.
1943		 */
1944		flags = M_PKTHDR;
1945#else
1946		flags = 0;
1947#endif
1948		clsize = MJUMPAGESIZE;
1949		btype = VMXNET3_BTYPE_BODY;
1950	}
1951
1952	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1953	if (m == NULL) {
1954		sc->vmx_stats.vmst_mgetcl_failed++;
1955		return (ENOBUFS);
1956	}
1957
1958	if (btype == VMXNET3_BTYPE_HEAD) {
1959		m->m_len = m->m_pkthdr.len = clsize;
1960		m_adj(m, ETHER_ALIGN);
1961	} else
1962		m->m_len = clsize;
1963
1964	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1965	    BUS_DMA_NOWAIT);
1966	if (error) {
1967		m_freem(m);
1968		sc->vmx_stats.vmst_mbuf_load_failed++;
1969		return (error);
1970	}
1971	KASSERT(nsegs == 1,
1972	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1973#if __FreeBSD_version < 902001
1974	if (btype == VMXNET3_BTYPE_BODY)
1975		m->m_flags &= ~M_PKTHDR;
1976#endif
1977
1978	if (rxb->vrxb_m != NULL) {
1979		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1980		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1981	}
1982
1983	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1984	rxb->vrxb_dmamap = dmap;
1985	rxb->vrxb_m = m;
1986
1987	rxd->addr = segs[0].ds_addr;
1988	rxd->len = segs[0].ds_len;
1989	rxd->btype = btype;
1990	rxd->gen = rxr->vxrxr_gen;
1991
1992	vmxnet3_rxr_increment_fill(rxr);
1993	return (0);
1994}
1995
1996static void
1997vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1998    struct vmxnet3_rxring *rxr, int idx)
1999{
2000	struct vmxnet3_rxdesc *rxd;
2001
2002	rxd = &rxr->vxrxr_rxd[idx];
2003	rxd->gen = rxr->vxrxr_gen;
2004	vmxnet3_rxr_increment_fill(rxr);
2005}
2006
2007static void
2008vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2009{
2010	struct vmxnet3_softc *sc;
2011	struct vmxnet3_rxring *rxr;
2012	struct vmxnet3_comp_ring *rxc;
2013	struct vmxnet3_rxcompdesc *rxcd;
2014	int idx, eof;
2015
2016	sc = rxq->vxrxq_sc;
2017	rxc = &rxq->vxrxq_comp_ring;
2018
2019	do {
2020		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2021		if (rxcd->gen != rxc->vxcr_gen)
2022			break;		/* Not expected. */
2023		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2024
2025		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2026			rxc->vxcr_next = 0;
2027			rxc->vxcr_gen ^= 1;
2028		}
2029
2030		idx = rxcd->rxd_idx;
2031		eof = rxcd->eop;
2032		if (rxcd->qid < sc->vmx_nrxqueues)
2033			rxr = &rxq->vxrxq_cmd_ring[0];
2034		else
2035			rxr = &rxq->vxrxq_cmd_ring[1];
2036		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2037	} while (!eof);
2038}
2039
2040static void
2041vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2042{
2043
2044	if (rxcd->ipv4) {
2045		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2046		if (rxcd->ipcsum_ok)
2047			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2048	}
2049
2050	if (!rxcd->fragment) {
2051		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2052			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2053			    CSUM_PSEUDO_HDR;
2054			m->m_pkthdr.csum_data = 0xFFFF;
2055		}
2056	}
2057}
2058
2059static void
2060vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2061    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2062{
2063	struct vmxnet3_softc *sc;
2064	struct ifnet *ifp;
2065
2066	sc = rxq->vxrxq_sc;
2067	ifp = sc->vmx_ifp;
2068
2069	if (rxcd->error) {
2070		rxq->vxrxq_stats.vmrxs_ierrors++;
2071		m_freem(m);
2072		return;
2073	}
2074
2075#ifdef notyet
2076	switch (rxcd->rss_type) {
2077	case VMXNET3_RCD_RSS_TYPE_IPV4:
2078		m->m_pkthdr.flowid = rxcd->rss_hash;
2079		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2080		break;
2081	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2082		m->m_pkthdr.flowid = rxcd->rss_hash;
2083		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2084		break;
2085	case VMXNET3_RCD_RSS_TYPE_IPV6:
2086		m->m_pkthdr.flowid = rxcd->rss_hash;
2087		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2088		break;
2089	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2090		m->m_pkthdr.flowid = rxcd->rss_hash;
2091		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2092		break;
2093	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2094		m->m_pkthdr.flowid = rxq->vxrxq_id;
2095		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2096		break;
2097	}
2098#else
2099	m->m_pkthdr.flowid = rxq->vxrxq_id;
2100	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2101#endif
2102
2103	if (!rxcd->no_csum)
2104		vmxnet3_rx_csum(rxcd, m);
2105	if (rxcd->vlan) {
2106		m->m_flags |= M_VLANTAG;
2107		m->m_pkthdr.ether_vtag = rxcd->vtag;
2108	}
2109
2110	rxq->vxrxq_stats.vmrxs_ipackets++;
2111	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2112
2113	VMXNET3_RXQ_UNLOCK(rxq);
2114	(*ifp->if_input)(ifp, m);
2115	VMXNET3_RXQ_LOCK(rxq);
2116}
2117
2118static void
2119vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2120{
2121	struct vmxnet3_softc *sc;
2122	struct ifnet *ifp;
2123	struct vmxnet3_rxring *rxr;
2124	struct vmxnet3_comp_ring *rxc;
2125	struct vmxnet3_rxdesc *rxd;
2126	struct vmxnet3_rxcompdesc *rxcd;
2127	struct mbuf *m, *m_head, *m_tail;
2128	int idx, length;
2129
2130	sc = rxq->vxrxq_sc;
2131	ifp = sc->vmx_ifp;
2132	rxc = &rxq->vxrxq_comp_ring;
2133
2134#ifdef DEV_NETMAP
2135	{
2136		int dummy;
2137		if (netmap_rx_irq(ifp, rxq - sc->vmx_rxq, &dummy) !=
2138		    NM_IRQ_PASS)
2139			return;
2140	}
2141#endif
2142
2143	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2144
2145	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2146		return;
2147
2148	m_head = rxq->vxrxq_mhead;
2149	rxq->vxrxq_mhead = NULL;
2150	m_tail = rxq->vxrxq_mtail;
2151	rxq->vxrxq_mtail = NULL;
2152	MPASS(m_head == NULL || m_tail != NULL);
2153
2154	for (;;) {
2155		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2156		if (rxcd->gen != rxc->vxcr_gen) {
2157			rxq->vxrxq_mhead = m_head;
2158			rxq->vxrxq_mtail = m_tail;
2159			break;
2160		}
2161		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2162
2163		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2164			rxc->vxcr_next = 0;
2165			rxc->vxcr_gen ^= 1;
2166		}
2167
2168		idx = rxcd->rxd_idx;
2169		length = rxcd->len;
2170		if (rxcd->qid < sc->vmx_nrxqueues)
2171			rxr = &rxq->vxrxq_cmd_ring[0];
2172		else
2173			rxr = &rxq->vxrxq_cmd_ring[1];
2174		rxd = &rxr->vxrxr_rxd[idx];
2175
2176		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2177		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2178		    __func__, rxcd->qid, idx));
2179
2180		/*
2181		 * The host may skip descriptors. We detect this when this
2182		 * descriptor does not match the previous fill index. Catch
2183		 * up with the host now.
2184		 */
2185		if (__predict_false(rxr->vxrxr_fill != idx)) {
2186			while (rxr->vxrxr_fill != idx) {
2187				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2188				    rxr->vxrxr_gen;
2189				vmxnet3_rxr_increment_fill(rxr);
2190			}
2191		}
2192
2193		if (rxcd->sop) {
2194			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2195			    ("%s: start of frame w/o head buffer", __func__));
2196			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2197			    ("%s: start of frame not in ring 0", __func__));
2198			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2199			    ("%s: start of frame at unexcepted index %d (%d)",
2200			     __func__, idx, sc->vmx_rx_max_chain));
2201			KASSERT(m_head == NULL,
2202			    ("%s: duplicate start of frame?", __func__));
2203
2204			if (length == 0) {
2205				/* Just ignore this descriptor. */
2206				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2207				goto nextp;
2208			}
2209
2210			if (vmxnet3_newbuf(sc, rxr) != 0) {
2211				rxq->vxrxq_stats.vmrxs_iqdrops++;
2212				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2213				if (!rxcd->eop)
2214					vmxnet3_rxq_discard_chain(rxq);
2215				goto nextp;
2216			}
2217
2218			m->m_pkthdr.rcvif = ifp;
2219			m->m_pkthdr.len = m->m_len = length;
2220			m->m_pkthdr.csum_flags = 0;
2221			m_head = m_tail = m;
2222
2223		} else {
2224			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2225			    ("%s: non start of frame w/o body buffer", __func__));
2226
2227			if (m_head == NULL && m_tail == NULL) {
2228				/*
2229				 * This is a continuation of a packet that we
2230				 * started to drop, but could not drop entirely
2231				 * because this segment was still owned by the
2232				 * host.  So, drop the remainder now.
2233				 */
2234				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2235				if (!rxcd->eop)
2236					vmxnet3_rxq_discard_chain(rxq);
2237				goto nextp;
2238			}
2239
2240			KASSERT(m_head != NULL,
2241			    ("%s: frame not started?", __func__));
2242
2243			if (vmxnet3_newbuf(sc, rxr) != 0) {
2244				rxq->vxrxq_stats.vmrxs_iqdrops++;
2245				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2246				if (!rxcd->eop)
2247					vmxnet3_rxq_discard_chain(rxq);
2248				m_freem(m_head);
2249				m_head = m_tail = NULL;
2250				goto nextp;
2251			}
2252
2253			m->m_len = length;
2254			m_head->m_pkthdr.len += length;
2255			m_tail->m_next = m;
2256			m_tail = m;
2257		}
2258
2259		if (rxcd->eop) {
2260			vmxnet3_rxq_input(rxq, rxcd, m_head);
2261			m_head = m_tail = NULL;
2262
2263			/* Must recheck after dropping the Rx lock. */
2264			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2265				break;
2266		}
2267
2268nextp:
2269		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2270			int qid = rxcd->qid;
2271			bus_size_t r;
2272
2273			idx = (idx + 1) % rxr->vxrxr_ndesc;
2274			if (qid >= sc->vmx_nrxqueues) {
2275				qid -= sc->vmx_nrxqueues;
2276				r = VMXNET3_BAR0_RXH2(qid);
2277			} else
2278				r = VMXNET3_BAR0_RXH1(qid);
2279			vmxnet3_write_bar0(sc, r, idx);
2280		}
2281	}
2282}
2283
2284static void
2285vmxnet3_legacy_intr(void *xsc)
2286{
2287	struct vmxnet3_softc *sc;
2288	struct vmxnet3_rxqueue *rxq;
2289	struct vmxnet3_txqueue *txq;
2290
2291	sc = xsc;
2292	rxq = &sc->vmx_rxq[0];
2293	txq = &sc->vmx_txq[0];
2294
2295	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2296		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2297			return;
2298	}
2299	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2300		vmxnet3_disable_all_intrs(sc);
2301
2302	if (sc->vmx_ds->event != 0)
2303		vmxnet3_evintr(sc);
2304
2305	VMXNET3_RXQ_LOCK(rxq);
2306	vmxnet3_rxq_eof(rxq);
2307	VMXNET3_RXQ_UNLOCK(rxq);
2308
2309	VMXNET3_TXQ_LOCK(txq);
2310	vmxnet3_txq_eof(txq);
2311	vmxnet3_txq_start(txq);
2312	VMXNET3_TXQ_UNLOCK(txq);
2313
2314	vmxnet3_enable_all_intrs(sc);
2315}
2316
2317static void
2318vmxnet3_txq_intr(void *xtxq)
2319{
2320	struct vmxnet3_softc *sc;
2321	struct vmxnet3_txqueue *txq;
2322
2323	txq = xtxq;
2324	sc = txq->vxtxq_sc;
2325
2326	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2327		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2328
2329	VMXNET3_TXQ_LOCK(txq);
2330	vmxnet3_txq_eof(txq);
2331	vmxnet3_txq_start(txq);
2332	VMXNET3_TXQ_UNLOCK(txq);
2333
2334	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2335}
2336
2337static void
2338vmxnet3_rxq_intr(void *xrxq)
2339{
2340	struct vmxnet3_softc *sc;
2341	struct vmxnet3_rxqueue *rxq;
2342
2343	rxq = xrxq;
2344	sc = rxq->vxrxq_sc;
2345
2346	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2347		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2348
2349	VMXNET3_RXQ_LOCK(rxq);
2350	vmxnet3_rxq_eof(rxq);
2351	VMXNET3_RXQ_UNLOCK(rxq);
2352
2353	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2354}
2355
2356static void
2357vmxnet3_event_intr(void *xsc)
2358{
2359	struct vmxnet3_softc *sc;
2360
2361	sc = xsc;
2362
2363	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2364		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2365
2366	if (sc->vmx_ds->event != 0)
2367		vmxnet3_evintr(sc);
2368
2369	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2370}
2371
2372static void
2373vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2374{
2375	struct vmxnet3_txring *txr;
2376	struct vmxnet3_txbuf *txb;
2377	int i;
2378
2379	txr = &txq->vxtxq_cmd_ring;
2380
2381	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2382		txb = &txr->vxtxr_txbuf[i];
2383
2384		if (txb->vtxb_m == NULL)
2385			continue;
2386
2387		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2388		    BUS_DMASYNC_POSTWRITE);
2389		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2390		m_freem(txb->vtxb_m);
2391		txb->vtxb_m = NULL;
2392	}
2393}
2394
2395static void
2396vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2397{
2398	struct vmxnet3_rxring *rxr;
2399	struct vmxnet3_rxbuf *rxb;
2400	int i, j;
2401
2402	if (rxq->vxrxq_mhead != NULL) {
2403		m_freem(rxq->vxrxq_mhead);
2404		rxq->vxrxq_mhead = NULL;
2405		rxq->vxrxq_mtail = NULL;
2406	}
2407
2408	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2409		rxr = &rxq->vxrxq_cmd_ring[i];
2410
2411		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2412			rxb = &rxr->vxrxr_rxbuf[j];
2413
2414			if (rxb->vrxb_m == NULL)
2415				continue;
2416
2417			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2418			    BUS_DMASYNC_POSTREAD);
2419			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2420			m_freem(rxb->vrxb_m);
2421			rxb->vrxb_m = NULL;
2422		}
2423	}
2424}
2425
2426static void
2427vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2428{
2429	struct vmxnet3_rxqueue *rxq;
2430	struct vmxnet3_txqueue *txq;
2431	int i;
2432
2433#ifdef DEV_NETMAP
2434	netmap_disable_all_rings(sc->vmx_ifp);
2435#endif
2436
2437	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2438		rxq = &sc->vmx_rxq[i];
2439		VMXNET3_RXQ_LOCK(rxq);
2440		VMXNET3_RXQ_UNLOCK(rxq);
2441	}
2442
2443	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2444		txq = &sc->vmx_txq[i];
2445		VMXNET3_TXQ_LOCK(txq);
2446		VMXNET3_TXQ_UNLOCK(txq);
2447	}
2448}
2449
2450static void
2451vmxnet3_stop(struct vmxnet3_softc *sc)
2452{
2453	struct ifnet *ifp;
2454	int q;
2455
2456	ifp = sc->vmx_ifp;
2457	VMXNET3_CORE_LOCK_ASSERT(sc);
2458
2459	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2460	sc->vmx_link_active = 0;
2461	callout_stop(&sc->vmx_tick);
2462
2463	/* Disable interrupts. */
2464	vmxnet3_disable_all_intrs(sc);
2465	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2466
2467	vmxnet3_stop_rendezvous(sc);
2468
2469	for (q = 0; q < sc->vmx_ntxqueues; q++)
2470		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2471	for (q = 0; q < sc->vmx_nrxqueues; q++)
2472		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2473
2474	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2475}
2476
2477static void
2478vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2479{
2480	struct vmxnet3_txring *txr;
2481	struct vmxnet3_comp_ring *txc;
2482
2483	txr = &txq->vxtxq_cmd_ring;
2484	txr->vxtxr_head = 0;
2485	txr->vxtxr_next = 0;
2486	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2487	bzero(txr->vxtxr_txd,
2488	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2489
2490#ifdef DEV_NETMAP
2491	vmxnet3_netmap_txq_init(sc, txq);
2492#endif
2493
2494	txc = &txq->vxtxq_comp_ring;
2495	txc->vxcr_next = 0;
2496	txc->vxcr_gen = VMXNET3_INIT_GEN;
2497	bzero(txc->vxcr_u.txcd,
2498	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2499}
2500
2501static int
2502vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2503{
2504	struct ifnet *ifp;
2505	struct vmxnet3_rxring *rxr;
2506	struct vmxnet3_comp_ring *rxc;
2507	int i, populate, idx, frame_size, error;
2508#ifdef DEV_NETMAP
2509	struct netmap_adapter *na;
2510	struct netmap_slot *slot;
2511#endif
2512
2513	ifp = sc->vmx_ifp;
2514	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2515	    ifp->if_mtu;
2516
2517	/*
2518	 * If the MTU causes us to exceed what a regular sized cluster can
2519	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2520	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2521	 *
2522	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2523	 * our life easier. We do not support changing the ring size after
2524	 * the attach.
2525	 */
2526	if (frame_size <= MCLBYTES)
2527		sc->vmx_rx_max_chain = 1;
2528	else
2529		sc->vmx_rx_max_chain = 2;
2530
2531	/*
2532	 * Only populate ring 1 if the configuration will take advantage
2533	 * of it. That is either when LRO is enabled or the frame size
2534	 * exceeds what ring 0 can contain.
2535	 */
2536	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2537	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2538		populate = 1;
2539	else
2540		populate = VMXNET3_RXRINGS_PERQ;
2541
2542#ifdef DEV_NETMAP
2543	na = NA(ifp);
2544	slot = netmap_reset(na, NR_RX, rxq - sc->vmx_rxq, 0);
2545#endif
2546
2547	for (i = 0; i < populate; i++) {
2548		rxr = &rxq->vxrxq_cmd_ring[i];
2549		rxr->vxrxr_fill = 0;
2550		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2551		bzero(rxr->vxrxr_rxd,
2552		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2553#ifdef DEV_NETMAP
2554		if (slot != NULL) {
2555			vmxnet3_netmap_rxq_init(sc, rxq, rxr, slot);
2556			i = populate;
2557			break;
2558		}
2559#endif
2560
2561		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2562			error = vmxnet3_newbuf(sc, rxr);
2563			if (error)
2564				return (error);
2565		}
2566	}
2567
2568	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2569		rxr = &rxq->vxrxq_cmd_ring[i];
2570		rxr->vxrxr_fill = 0;
2571		rxr->vxrxr_gen = 0;
2572		bzero(rxr->vxrxr_rxd,
2573		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2574	}
2575
2576	rxc = &rxq->vxrxq_comp_ring;
2577	rxc->vxcr_next = 0;
2578	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2579	bzero(rxc->vxcr_u.rxcd,
2580	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2581
2582	return (0);
2583}
2584
2585static int
2586vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2587{
2588	device_t dev;
2589	int q, error;
2590
2591	dev = sc->vmx_dev;
2592
2593	for (q = 0; q < sc->vmx_ntxqueues; q++)
2594		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2595
2596	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2597		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2598		if (error) {
2599			device_printf(dev, "cannot populate Rx queue %d\n", q);
2600			return (error);
2601		}
2602	}
2603
2604	return (0);
2605}
2606
2607static int
2608vmxnet3_enable_device(struct vmxnet3_softc *sc)
2609{
2610	int q;
2611
2612	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2613		device_printf(sc->vmx_dev, "device enable command failed!\n");
2614		return (1);
2615	}
2616
2617	/* Reset the Rx queue heads. */
2618	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2619		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2620		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2621	}
2622
2623	return (0);
2624}
2625
2626static void
2627vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2628{
2629	struct ifnet *ifp;
2630
2631	ifp = sc->vmx_ifp;
2632
2633	vmxnet3_set_rxfilter(sc);
2634
2635	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2636		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2637		    sizeof(sc->vmx_ds->vlan_filter));
2638	else
2639		bzero(sc->vmx_ds->vlan_filter,
2640		    sizeof(sc->vmx_ds->vlan_filter));
2641	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2642}
2643
2644static int
2645vmxnet3_reinit(struct vmxnet3_softc *sc)
2646{
2647
2648	vmxnet3_reinit_interface(sc);
2649	vmxnet3_reinit_shared_data(sc);
2650
2651	if (vmxnet3_reinit_queues(sc) != 0)
2652		return (ENXIO);
2653
2654	if (vmxnet3_enable_device(sc) != 0)
2655		return (ENXIO);
2656
2657	vmxnet3_reinit_rxfilters(sc);
2658
2659	return (0);
2660}
2661
2662static void
2663vmxnet3_init_locked(struct vmxnet3_softc *sc)
2664{
2665	struct ifnet *ifp;
2666
2667	ifp = sc->vmx_ifp;
2668
2669	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2670		return;
2671
2672	vmxnet3_stop(sc);
2673
2674	if (vmxnet3_reinit(sc) != 0) {
2675		vmxnet3_stop(sc);
2676		return;
2677	}
2678
2679	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2680	vmxnet3_link_status(sc);
2681
2682#ifdef DEV_NETMAP
2683	netmap_enable_all_rings(ifp);
2684#endif
2685
2686	vmxnet3_enable_all_intrs(sc);
2687	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2688}
2689
2690static void
2691vmxnet3_init(void *xsc)
2692{
2693	struct vmxnet3_softc *sc;
2694
2695	sc = xsc;
2696
2697	VMXNET3_CORE_LOCK(sc);
2698	vmxnet3_init_locked(sc);
2699	VMXNET3_CORE_UNLOCK(sc);
2700}
2701
2702/*
2703 * BMV: Much of this can go away once we finally have offsets in
2704 * the mbuf packet header. Bug andre@.
2705 */
2706static int
2707vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2708    int *etype, int *proto, int *start)
2709{
2710	struct ether_vlan_header *evh;
2711	int offset;
2712#if defined(INET)
2713	struct ip *ip = NULL;
2714	struct ip iphdr;
2715#endif
2716#if defined(INET6)
2717	struct ip6_hdr *ip6 = NULL;
2718	struct ip6_hdr ip6hdr;
2719#endif
2720
2721	evh = mtod(m, struct ether_vlan_header *);
2722	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2723		/* BMV: We should handle nested VLAN tags too. */
2724		*etype = ntohs(evh->evl_proto);
2725		offset = sizeof(struct ether_vlan_header);
2726	} else {
2727		*etype = ntohs(evh->evl_encap_proto);
2728		offset = sizeof(struct ether_header);
2729	}
2730
2731	switch (*etype) {
2732#if defined(INET)
2733	case ETHERTYPE_IP:
2734		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2735			m_copydata(m, offset, sizeof(struct ip),
2736			    (caddr_t) &iphdr);
2737			ip = &iphdr;
2738		} else
2739			ip = mtodo(m, offset);
2740		*proto = ip->ip_p;
2741		*start = offset + (ip->ip_hl << 2);
2742		break;
2743#endif
2744#if defined(INET6)
2745	case ETHERTYPE_IPV6:
2746		if (__predict_false(m->m_len <
2747		    offset + sizeof(struct ip6_hdr))) {
2748			m_copydata(m, offset, sizeof(struct ip6_hdr),
2749			    (caddr_t) &ip6hdr);
2750			ip6 = &ip6hdr;
2751		} else
2752			ip6 = mtodo(m, offset);
2753		*proto = -1;
2754		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2755		/* Assert the network stack sent us a valid packet. */
2756		KASSERT(*start > offset,
2757		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2758		    *start, offset, *proto));
2759		break;
2760#endif
2761	default:
2762		return (EINVAL);
2763	}
2764
2765	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2766		struct tcphdr *tcp, tcphdr;
2767		uint16_t sum;
2768
2769		if (__predict_false(*proto != IPPROTO_TCP)) {
2770			/* Likely failed to correctly parse the mbuf. */
2771			return (EINVAL);
2772		}
2773
2774		txq->vxtxq_stats.vmtxs_tso++;
2775
2776		switch (*etype) {
2777#if defined(INET)
2778		case ETHERTYPE_IP:
2779			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2780			    htons(IPPROTO_TCP));
2781			break;
2782#endif
2783#if defined(INET6)
2784		case ETHERTYPE_IPV6:
2785			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2786			break;
2787#endif
2788		default:
2789			sum = 0;
2790			break;
2791		}
2792
2793		if (m->m_len < *start + sizeof(struct tcphdr)) {
2794			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2795			    sizeof(uint16_t), (caddr_t) &sum);
2796			m_copydata(m, *start, sizeof(struct tcphdr),
2797			    (caddr_t) &tcphdr);
2798			tcp = &tcphdr;
2799		} else {
2800			tcp = mtodo(m, *start);
2801			tcp->th_sum = sum;
2802		}
2803
2804		/*
2805		 * For TSO, the size of the protocol header is also
2806		 * included in the descriptor header size.
2807		 */
2808		*start += (tcp->th_off << 2);
2809	} else
2810		txq->vxtxq_stats.vmtxs_csum++;
2811
2812	return (0);
2813}
2814
2815static int
2816vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2817    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2818{
2819	struct vmxnet3_txring *txr;
2820	struct mbuf *m;
2821	bus_dma_tag_t tag;
2822	int error;
2823
2824	txr = &txq->vxtxq_cmd_ring;
2825	m = *m0;
2826	tag = txr->vxtxr_txtag;
2827
2828	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2829	if (error == 0 || error != EFBIG)
2830		return (error);
2831
2832	m = m_defrag(m, M_NOWAIT);
2833	if (m != NULL) {
2834		*m0 = m;
2835		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2836	} else
2837		error = ENOBUFS;
2838
2839	if (error) {
2840		m_freem(*m0);
2841		*m0 = NULL;
2842		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2843	} else
2844		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2845
2846	return (error);
2847}
2848
2849static void
2850vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2851{
2852	struct vmxnet3_txring *txr;
2853
2854	txr = &txq->vxtxq_cmd_ring;
2855	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2856}
2857
2858static int
2859vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2860{
2861	struct vmxnet3_softc *sc;
2862	struct vmxnet3_txring *txr;
2863	struct vmxnet3_txdesc *txd, *sop;
2864	struct mbuf *m;
2865	bus_dmamap_t dmap;
2866	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2867	int i, gen, nsegs, etype, proto, start, error;
2868
2869	sc = txq->vxtxq_sc;
2870	start = 0;
2871	txd = NULL;
2872	txr = &txq->vxtxq_cmd_ring;
2873	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2874
2875	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2876	if (error)
2877		return (error);
2878
2879	m = *m0;
2880	M_ASSERTPKTHDR(m);
2881	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2882	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2883
2884	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2885		txq->vxtxq_stats.vmtxs_full++;
2886		vmxnet3_txq_unload_mbuf(txq, dmap);
2887		return (ENOSPC);
2888	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2889		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2890		if (error) {
2891			txq->vxtxq_stats.vmtxs_offload_failed++;
2892			vmxnet3_txq_unload_mbuf(txq, dmap);
2893			m_freem(m);
2894			*m0 = NULL;
2895			return (error);
2896		}
2897	}
2898
2899	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2900	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2901	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2902
2903	for (i = 0; i < nsegs; i++) {
2904		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2905
2906		txd->addr = segs[i].ds_addr;
2907		txd->len = segs[i].ds_len;
2908		txd->gen = gen;
2909		txd->dtype = 0;
2910		txd->offload_mode = VMXNET3_OM_NONE;
2911		txd->offload_pos = 0;
2912		txd->hlen = 0;
2913		txd->eop = 0;
2914		txd->compreq = 0;
2915		txd->vtag_mode = 0;
2916		txd->vtag = 0;
2917
2918		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2919			txr->vxtxr_head = 0;
2920			txr->vxtxr_gen ^= 1;
2921		}
2922		gen = txr->vxtxr_gen;
2923	}
2924	txd->eop = 1;
2925	txd->compreq = 1;
2926
2927	if (m->m_flags & M_VLANTAG) {
2928		sop->vtag_mode = 1;
2929		sop->vtag = m->m_pkthdr.ether_vtag;
2930	}
2931
2932	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2933		sop->offload_mode = VMXNET3_OM_TSO;
2934		sop->hlen = start;
2935		sop->offload_pos = m->m_pkthdr.tso_segsz;
2936	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2937	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2938		sop->offload_mode = VMXNET3_OM_CSUM;
2939		sop->hlen = start;
2940		sop->offload_pos = start + m->m_pkthdr.csum_data;
2941	}
2942
2943	/* Finally, change the ownership. */
2944	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2945	sop->gen ^= 1;
2946
2947	txq->vxtxq_ts->npending += nsegs;
2948	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2949		txq->vxtxq_ts->npending = 0;
2950		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2951		    txr->vxtxr_head);
2952	}
2953
2954	return (0);
2955}
2956
2957#ifdef VMXNET3_LEGACY_TX
2958
2959static void
2960vmxnet3_start_locked(struct ifnet *ifp)
2961{
2962	struct vmxnet3_softc *sc;
2963	struct vmxnet3_txqueue *txq;
2964	struct vmxnet3_txring *txr;
2965	struct mbuf *m_head;
2966	int tx, avail;
2967
2968	sc = ifp->if_softc;
2969	txq = &sc->vmx_txq[0];
2970	txr = &txq->vxtxq_cmd_ring;
2971	tx = 0;
2972
2973	VMXNET3_TXQ_LOCK_ASSERT(txq);
2974
2975	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2976	    sc->vmx_link_active == 0)
2977		return;
2978
2979	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2980		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2981			break;
2982
2983		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2984		if (m_head == NULL)
2985			break;
2986
2987		/* Assume worse case if this mbuf is the head of a chain. */
2988		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2989			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2990			break;
2991		}
2992
2993		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2994			if (m_head != NULL)
2995				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2996			break;
2997		}
2998
2999		tx++;
3000		ETHER_BPF_MTAP(ifp, m_head);
3001	}
3002
3003	if (tx > 0)
3004		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3005}
3006
3007static void
3008vmxnet3_start(struct ifnet *ifp)
3009{
3010	struct vmxnet3_softc *sc;
3011	struct vmxnet3_txqueue *txq;
3012
3013	sc = ifp->if_softc;
3014	txq = &sc->vmx_txq[0];
3015
3016	VMXNET3_TXQ_LOCK(txq);
3017	vmxnet3_start_locked(ifp);
3018	VMXNET3_TXQ_UNLOCK(txq);
3019}
3020
3021#else /* !VMXNET3_LEGACY_TX */
3022
3023static int
3024vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
3025{
3026	struct vmxnet3_softc *sc;
3027	struct vmxnet3_txring *txr;
3028	struct buf_ring *br;
3029	struct ifnet *ifp;
3030	int tx, avail, error;
3031
3032	sc = txq->vxtxq_sc;
3033	br = txq->vxtxq_br;
3034	ifp = sc->vmx_ifp;
3035	txr = &txq->vxtxq_cmd_ring;
3036	tx = 0;
3037	error = 0;
3038
3039	VMXNET3_TXQ_LOCK_ASSERT(txq);
3040
3041	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
3042	    sc->vmx_link_active == 0) {
3043		if (m != NULL)
3044			error = drbr_enqueue(ifp, br, m);
3045		return (error);
3046	}
3047
3048	if (m != NULL) {
3049		error = drbr_enqueue(ifp, br, m);
3050		if (error)
3051			return (error);
3052	}
3053
3054	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
3055		m = drbr_peek(ifp, br);
3056		if (m == NULL)
3057			break;
3058
3059		/* Assume worse case if this mbuf is the head of a chain. */
3060		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3061			drbr_putback(ifp, br, m);
3062			break;
3063		}
3064
3065		if (vmxnet3_txq_encap(txq, &m) != 0) {
3066			if (m != NULL)
3067				drbr_putback(ifp, br, m);
3068			else
3069				drbr_advance(ifp, br);
3070			break;
3071		}
3072		drbr_advance(ifp, br);
3073
3074		tx++;
3075		ETHER_BPF_MTAP(ifp, m);
3076	}
3077
3078	if (tx > 0)
3079		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3080
3081	return (0);
3082}
3083
3084static int
3085vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3086{
3087	struct vmxnet3_softc *sc;
3088	struct vmxnet3_txqueue *txq;
3089	int i, ntxq, error;
3090
3091	sc = ifp->if_softc;
3092	ntxq = sc->vmx_ntxqueues;
3093
3094	/* check if flowid is set */
3095	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3096		i = m->m_pkthdr.flowid % ntxq;
3097	else
3098		i = curcpu % ntxq;
3099
3100	txq = &sc->vmx_txq[i];
3101
3102	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3103		error = vmxnet3_txq_mq_start_locked(txq, m);
3104		VMXNET3_TXQ_UNLOCK(txq);
3105	} else {
3106		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3107		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3108	}
3109
3110	return (error);
3111}
3112
3113static void
3114vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3115{
3116	struct vmxnet3_softc *sc;
3117	struct vmxnet3_txqueue *txq;
3118
3119	txq = xtxq;
3120	sc = txq->vxtxq_sc;
3121
3122	VMXNET3_TXQ_LOCK(txq);
3123	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3124		vmxnet3_txq_mq_start_locked(txq, NULL);
3125	VMXNET3_TXQ_UNLOCK(txq);
3126}
3127
3128#endif /* VMXNET3_LEGACY_TX */
3129
3130static void
3131vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3132{
3133	struct vmxnet3_softc *sc;
3134	struct ifnet *ifp;
3135
3136	sc = txq->vxtxq_sc;
3137	ifp = sc->vmx_ifp;
3138
3139#ifdef VMXNET3_LEGACY_TX
3140	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3141		vmxnet3_start_locked(ifp);
3142#else
3143	if (!drbr_empty(ifp, txq->vxtxq_br))
3144		vmxnet3_txq_mq_start_locked(txq, NULL);
3145#endif
3146}
3147
3148static void
3149vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3150{
3151	struct vmxnet3_txqueue *txq;
3152	int i;
3153
3154	VMXNET3_CORE_LOCK_ASSERT(sc);
3155
3156	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3157		txq = &sc->vmx_txq[i];
3158
3159		VMXNET3_TXQ_LOCK(txq);
3160		vmxnet3_txq_start(txq);
3161		VMXNET3_TXQ_UNLOCK(txq);
3162	}
3163}
3164
3165static void
3166vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3167{
3168	struct ifnet *ifp;
3169	int idx, bit;
3170
3171	ifp = sc->vmx_ifp;
3172	idx = (tag >> 5) & 0x7F;
3173	bit = tag & 0x1F;
3174
3175	if (tag == 0 || tag > 4095)
3176		return;
3177
3178	VMXNET3_CORE_LOCK(sc);
3179
3180	/* Update our private VLAN bitvector. */
3181	if (add)
3182		sc->vmx_vlan_filter[idx] |= (1 << bit);
3183	else
3184		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3185
3186	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3187		if (add)
3188			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3189		else
3190			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3191		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3192	}
3193
3194	VMXNET3_CORE_UNLOCK(sc);
3195}
3196
3197static void
3198vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3199{
3200
3201	if (ifp->if_softc == arg)
3202		vmxnet3_update_vlan_filter(arg, 1, tag);
3203}
3204
3205static void
3206vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3207{
3208
3209	if (ifp->if_softc == arg)
3210		vmxnet3_update_vlan_filter(arg, 0, tag);
3211}
3212
3213static void
3214vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3215{
3216	struct ifnet *ifp;
3217	struct vmxnet3_driver_shared *ds;
3218	struct ifmultiaddr *ifma;
3219	u_int mode;
3220
3221	ifp = sc->vmx_ifp;
3222	ds = sc->vmx_ds;
3223
3224	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3225	if (ifp->if_flags & IFF_PROMISC)
3226		mode |= VMXNET3_RXMODE_PROMISC;
3227	if (ifp->if_flags & IFF_ALLMULTI)
3228		mode |= VMXNET3_RXMODE_ALLMULTI;
3229	else {
3230		int cnt = 0, overflow = 0;
3231
3232		if_maddr_rlock(ifp);
3233		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3234			if (ifma->ifma_addr->sa_family != AF_LINK)
3235				continue;
3236			else if (cnt == VMXNET3_MULTICAST_MAX) {
3237				overflow = 1;
3238				break;
3239			}
3240
3241			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3242			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3243			cnt++;
3244		}
3245		if_maddr_runlock(ifp);
3246
3247		if (overflow != 0) {
3248			cnt = 0;
3249			mode |= VMXNET3_RXMODE_ALLMULTI;
3250		} else if (cnt > 0)
3251			mode |= VMXNET3_RXMODE_MCAST;
3252		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3253	}
3254
3255	ds->rxmode = mode;
3256
3257	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3258	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3259}
3260
3261static int
3262vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3263{
3264	struct ifnet *ifp;
3265
3266	ifp = sc->vmx_ifp;
3267
3268	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3269		return (EINVAL);
3270
3271	ifp->if_mtu = mtu;
3272
3273	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3274		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3275		vmxnet3_init_locked(sc);
3276	}
3277
3278	return (0);
3279}
3280
3281static int
3282vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3283{
3284	struct vmxnet3_softc *sc;
3285	struct ifreq *ifr;
3286	int reinit, mask, error;
3287
3288	sc = ifp->if_softc;
3289	ifr = (struct ifreq *) data;
3290	error = 0;
3291
3292	switch (cmd) {
3293	case SIOCSIFMTU:
3294		if (ifp->if_mtu != ifr->ifr_mtu) {
3295			VMXNET3_CORE_LOCK(sc);
3296			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3297			VMXNET3_CORE_UNLOCK(sc);
3298		}
3299		break;
3300
3301	case SIOCSIFFLAGS:
3302		VMXNET3_CORE_LOCK(sc);
3303		if (ifp->if_flags & IFF_UP) {
3304			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3305				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3306				    (IFF_PROMISC | IFF_ALLMULTI)) {
3307					vmxnet3_set_rxfilter(sc);
3308				}
3309			} else
3310				vmxnet3_init_locked(sc);
3311		} else {
3312			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3313				vmxnet3_stop(sc);
3314		}
3315		sc->vmx_if_flags = ifp->if_flags;
3316		VMXNET3_CORE_UNLOCK(sc);
3317		break;
3318
3319	case SIOCADDMULTI:
3320	case SIOCDELMULTI:
3321		VMXNET3_CORE_LOCK(sc);
3322		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3323			vmxnet3_set_rxfilter(sc);
3324		VMXNET3_CORE_UNLOCK(sc);
3325		break;
3326
3327	case SIOCSIFMEDIA:
3328	case SIOCGIFMEDIA:
3329		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3330		break;
3331
3332	case SIOCSIFCAP:
3333		VMXNET3_CORE_LOCK(sc);
3334		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3335
3336		if (mask & IFCAP_TXCSUM)
3337			ifp->if_capenable ^= IFCAP_TXCSUM;
3338		if (mask & IFCAP_TXCSUM_IPV6)
3339			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3340		if (mask & IFCAP_TSO4)
3341			ifp->if_capenable ^= IFCAP_TSO4;
3342		if (mask & IFCAP_TSO6)
3343			ifp->if_capenable ^= IFCAP_TSO6;
3344
3345		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3346		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3347			/* Changing these features requires us to reinit. */
3348			reinit = 1;
3349
3350			if (mask & IFCAP_RXCSUM)
3351				ifp->if_capenable ^= IFCAP_RXCSUM;
3352			if (mask & IFCAP_RXCSUM_IPV6)
3353				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3354			if (mask & IFCAP_LRO)
3355				ifp->if_capenable ^= IFCAP_LRO;
3356			if (mask & IFCAP_VLAN_HWTAGGING)
3357				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3358			if (mask & IFCAP_VLAN_HWFILTER)
3359				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3360		} else
3361			reinit = 0;
3362
3363		if (mask & IFCAP_VLAN_HWTSO)
3364			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3365
3366		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3367			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3368			vmxnet3_init_locked(sc);
3369		} else {
3370			vmxnet3_init_hwassist(sc);
3371		}
3372
3373		VMXNET3_CORE_UNLOCK(sc);
3374		VLAN_CAPABILITIES(ifp);
3375		break;
3376
3377	default:
3378		error = ether_ioctl(ifp, cmd, data);
3379		break;
3380	}
3381
3382	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3383
3384	return (error);
3385}
3386
3387#ifndef VMXNET3_LEGACY_TX
3388static void
3389vmxnet3_qflush(struct ifnet *ifp)
3390{
3391	struct vmxnet3_softc *sc;
3392	struct vmxnet3_txqueue *txq;
3393	struct mbuf *m;
3394	int i;
3395
3396	sc = ifp->if_softc;
3397
3398	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3399		txq = &sc->vmx_txq[i];
3400
3401		VMXNET3_TXQ_LOCK(txq);
3402		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3403			m_freem(m);
3404		VMXNET3_TXQ_UNLOCK(txq);
3405	}
3406
3407	if_qflush(ifp);
3408}
3409#endif
3410
3411static int
3412vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3413{
3414	struct vmxnet3_softc *sc;
3415
3416	sc = txq->vxtxq_sc;
3417
3418	VMXNET3_TXQ_LOCK(txq);
3419	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3420		VMXNET3_TXQ_UNLOCK(txq);
3421		return (0);
3422	}
3423	VMXNET3_TXQ_UNLOCK(txq);
3424
3425	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3426	    txq->vxtxq_id);
3427	return (1);
3428}
3429
3430static void
3431vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3432{
3433
3434	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3435}
3436
3437static uint64_t
3438vmxnet3_get_counter(struct ifnet *ifp, ift_counter cnt)
3439{
3440	struct vmxnet3_softc *sc;
3441	uint64_t rv;
3442
3443	sc = if_getsoftc(ifp);
3444	rv = 0;
3445
3446	/*
3447	 * With the exception of if_ierrors, these ifnet statistics are
3448	 * only updated in the driver, so just set them to our accumulated
3449	 * values. if_ierrors is updated in ether_input() for malformed
3450	 * frames that we should have already discarded.
3451	 */
3452	switch (cnt) {
3453	case IFCOUNTER_IPACKETS:
3454		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3455			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ipackets;
3456		return (rv);
3457	case IFCOUNTER_IQDROPS:
3458		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3459			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_iqdrops;
3460		return (rv);
3461	case IFCOUNTER_IERRORS:
3462		for (int i = 0; i < sc->vmx_nrxqueues; i++)
3463			rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ierrors;
3464		return (rv);
3465	case IFCOUNTER_OPACKETS:
3466		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3467			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_opackets;
3468		return (rv);
3469#ifndef VMXNET3_LEGACY_TX
3470	case IFCOUNTER_OBYTES:
3471		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3472			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_obytes;
3473		return (rv);
3474	case IFCOUNTER_OMCASTS:
3475		for (int i = 0; i < sc->vmx_ntxqueues; i++)
3476			rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_omcasts;
3477		return (rv);
3478#endif
3479	default:
3480		return (if_get_counter_default(ifp, cnt));
3481	}
3482}
3483
3484static void
3485vmxnet3_tick(void *xsc)
3486{
3487	struct vmxnet3_softc *sc;
3488	struct ifnet *ifp;
3489	int i, timedout;
3490
3491	sc = xsc;
3492	ifp = sc->vmx_ifp;
3493	timedout = 0;
3494
3495	VMXNET3_CORE_LOCK_ASSERT(sc);
3496
3497	vmxnet3_refresh_host_stats(sc);
3498
3499	for (i = 0; i < sc->vmx_ntxqueues; i++)
3500		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3501
3502	if (timedout != 0) {
3503		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3504		vmxnet3_init_locked(sc);
3505	} else
3506		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3507}
3508
3509static int
3510vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3511{
3512	uint32_t status;
3513
3514	/* Also update the link speed while here. */
3515	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3516	sc->vmx_link_speed = status >> 16;
3517	return !!(status & 0x1);
3518}
3519
3520static void
3521vmxnet3_link_status(struct vmxnet3_softc *sc)
3522{
3523	struct ifnet *ifp;
3524	int link;
3525
3526	ifp = sc->vmx_ifp;
3527	link = vmxnet3_link_is_up(sc);
3528
3529	if (link != 0 && sc->vmx_link_active == 0) {
3530		sc->vmx_link_active = 1;
3531		if_link_state_change(ifp, LINK_STATE_UP);
3532	} else if (link == 0 && sc->vmx_link_active != 0) {
3533		sc->vmx_link_active = 0;
3534		if_link_state_change(ifp, LINK_STATE_DOWN);
3535	}
3536}
3537
3538static void
3539vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3540{
3541	struct vmxnet3_softc *sc;
3542
3543	sc = ifp->if_softc;
3544
3545	ifmr->ifm_status = IFM_AVALID;
3546	ifmr->ifm_active = IFM_ETHER;
3547
3548	VMXNET3_CORE_LOCK(sc);
3549	if (vmxnet3_link_is_up(sc) != 0) {
3550		ifmr->ifm_status |= IFM_ACTIVE;
3551		ifmr->ifm_active |= IFM_AUTO;
3552	} else
3553		ifmr->ifm_active |= IFM_NONE;
3554	VMXNET3_CORE_UNLOCK(sc);
3555}
3556
3557static int
3558vmxnet3_media_change(struct ifnet *ifp)
3559{
3560
3561	/* Ignore. */
3562	return (0);
3563}
3564
3565static void
3566vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3567{
3568	uint32_t ml, mh;
3569
3570	ml  = sc->vmx_lladdr[0];
3571	ml |= sc->vmx_lladdr[1] << 8;
3572	ml |= sc->vmx_lladdr[2] << 16;
3573	ml |= sc->vmx_lladdr[3] << 24;
3574	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3575
3576	mh  = sc->vmx_lladdr[4];
3577	mh |= sc->vmx_lladdr[5] << 8;
3578	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3579}
3580
3581static void
3582vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3583{
3584	uint32_t ml, mh;
3585
3586	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3587	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3588
3589	sc->vmx_lladdr[0] = ml;
3590	sc->vmx_lladdr[1] = ml >> 8;
3591	sc->vmx_lladdr[2] = ml >> 16;
3592	sc->vmx_lladdr[3] = ml >> 24;
3593	sc->vmx_lladdr[4] = mh;
3594	sc->vmx_lladdr[5] = mh >> 8;
3595}
3596
3597static void
3598vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3599    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3600{
3601	struct sysctl_oid *node, *txsnode;
3602	struct sysctl_oid_list *list, *txslist;
3603	struct vmxnet3_txq_stats *stats;
3604	struct UPT1_TxStats *txstats;
3605	char namebuf[16];
3606
3607	stats = &txq->vxtxq_stats;
3608	txstats = &txq->vxtxq_ts->stats;
3609
3610	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3611	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3612	    NULL, "Transmit Queue");
3613	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3614
3615	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3616	    &stats->vmtxs_opackets, "Transmit packets");
3617	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3618	    &stats->vmtxs_obytes, "Transmit bytes");
3619	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3620	    &stats->vmtxs_omcasts, "Transmit multicasts");
3621	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3622	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3623	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3624	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3625	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3626	    &stats->vmtxs_full, "Transmit ring full");
3627	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3628	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3629
3630	/*
3631	 * Add statistics reported by the host. These are updated once
3632	 * per second.
3633	 */
3634	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3635	    NULL, "Host Statistics");
3636	txslist = SYSCTL_CHILDREN(txsnode);
3637	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3638	    &txstats->TSO_packets, "TSO packets");
3639	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3640	    &txstats->TSO_bytes, "TSO bytes");
3641	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3642	    &txstats->ucast_packets, "Unicast packets");
3643	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3644	    &txstats->ucast_bytes, "Unicast bytes");
3645	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3646	    &txstats->mcast_packets, "Multicast packets");
3647	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3648	    &txstats->mcast_bytes, "Multicast bytes");
3649	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3650	    &txstats->error, "Errors");
3651	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3652	    &txstats->discard, "Discards");
3653}
3654
3655static void
3656vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3657    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3658{
3659	struct sysctl_oid *node, *rxsnode;
3660	struct sysctl_oid_list *list, *rxslist;
3661	struct vmxnet3_rxq_stats *stats;
3662	struct UPT1_RxStats *rxstats;
3663	char namebuf[16];
3664
3665	stats = &rxq->vxrxq_stats;
3666	rxstats = &rxq->vxrxq_rs->stats;
3667
3668	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3669	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3670	    NULL, "Receive Queue");
3671	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3672
3673	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3674	    &stats->vmrxs_ipackets, "Receive packets");
3675	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3676	    &stats->vmrxs_ibytes, "Receive bytes");
3677	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3678	    &stats->vmrxs_iqdrops, "Receive drops");
3679	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3680	    &stats->vmrxs_ierrors, "Receive errors");
3681
3682	/*
3683	 * Add statistics reported by the host. These are updated once
3684	 * per second.
3685	 */
3686	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3687	    NULL, "Host Statistics");
3688	rxslist = SYSCTL_CHILDREN(rxsnode);
3689	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3690	    &rxstats->LRO_packets, "LRO packets");
3691	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3692	    &rxstats->LRO_bytes, "LRO bytes");
3693	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3694	    &rxstats->ucast_packets, "Unicast packets");
3695	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3696	    &rxstats->ucast_bytes, "Unicast bytes");
3697	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3698	    &rxstats->mcast_packets, "Multicast packets");
3699	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3700	    &rxstats->mcast_bytes, "Multicast bytes");
3701	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3702	    &rxstats->bcast_packets, "Broadcast packets");
3703	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3704	    &rxstats->bcast_bytes, "Broadcast bytes");
3705	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3706	    &rxstats->nobuffer, "No buffer");
3707	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3708	    &rxstats->error, "Errors");
3709}
3710
3711static void
3712vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3713    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3714{
3715	struct sysctl_oid *node;
3716	struct sysctl_oid_list *list;
3717	int i;
3718
3719	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3720		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3721
3722		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3723		    "debug", CTLFLAG_RD, NULL, "");
3724		list = SYSCTL_CHILDREN(node);
3725
3726		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3727		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3728		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3729		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3730		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3731		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3732		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3733		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3734		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3735		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3736		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3737		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3738		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3739		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3740	}
3741
3742	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3743		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3744
3745		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3746		    "debug", CTLFLAG_RD, NULL, "");
3747		list = SYSCTL_CHILDREN(node);
3748
3749		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3750		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3751		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3752		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3753		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3754		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3755		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3756		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3757		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3758		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3759		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3760		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3761		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3762		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3763		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3764		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3765		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3766		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3767	}
3768}
3769
3770static void
3771vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3772    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3773{
3774	int i;
3775
3776	for (i = 0; i < sc->vmx_ntxqueues; i++)
3777		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3778	for (i = 0; i < sc->vmx_nrxqueues; i++)
3779		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3780
3781	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3782}
3783
3784static void
3785vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3786{
3787	device_t dev;
3788	struct vmxnet3_statistics *stats;
3789	struct sysctl_ctx_list *ctx;
3790	struct sysctl_oid *tree;
3791	struct sysctl_oid_list *child;
3792
3793	dev = sc->vmx_dev;
3794	ctx = device_get_sysctl_ctx(dev);
3795	tree = device_get_sysctl_tree(dev);
3796	child = SYSCTL_CHILDREN(tree);
3797
3798	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3799	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3800	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3801	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3802	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3803	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3804	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3805	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3806
3807	stats = &sc->vmx_stats;
3808	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3809	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3810	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3811	    &stats->vmst_defrag_failed, 0,
3812	    "Tx mbuf dropped because defrag failed");
3813	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3814	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3815	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3816	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3817
3818	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3819}
3820
3821static void
3822vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3823{
3824
3825	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3826}
3827
3828static uint32_t
3829vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3830{
3831
3832	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3833}
3834
3835static void
3836vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3837{
3838
3839	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3840}
3841
3842static void
3843vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3844{
3845
3846	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3847}
3848
3849static uint32_t
3850vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3851{
3852
3853	vmxnet3_write_cmd(sc, cmd);
3854	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3855	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3856	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3857}
3858
3859static void
3860vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3861{
3862
3863	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3864}
3865
3866static void
3867vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3868{
3869
3870	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3871}
3872
3873static void
3874vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3875{
3876	int i;
3877
3878	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3879	for (i = 0; i < sc->vmx_nintrs; i++)
3880		vmxnet3_enable_intr(sc, i);
3881}
3882
3883static void
3884vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3885{
3886	int i;
3887
3888	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3889	for (i = 0; i < sc->vmx_nintrs; i++)
3890		vmxnet3_disable_intr(sc, i);
3891}
3892
3893static void
3894vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3895{
3896	bus_addr_t *baddr = arg;
3897
3898	if (error == 0)
3899		*baddr = segs->ds_addr;
3900}
3901
3902static int
3903vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3904    struct vmxnet3_dma_alloc *dma)
3905{
3906	device_t dev;
3907	int error;
3908
3909	dev = sc->vmx_dev;
3910	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3911
3912	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3913	    align, 0,		/* alignment, bounds */
3914	    BUS_SPACE_MAXADDR,	/* lowaddr */
3915	    BUS_SPACE_MAXADDR,	/* highaddr */
3916	    NULL, NULL,		/* filter, filterarg */
3917	    size,		/* maxsize */
3918	    1,			/* nsegments */
3919	    size,		/* maxsegsize */
3920	    BUS_DMA_ALLOCNOW,	/* flags */
3921	    NULL,		/* lockfunc */
3922	    NULL,		/* lockfuncarg */
3923	    &dma->dma_tag);
3924	if (error) {
3925		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3926		goto fail;
3927	}
3928
3929	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3930	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3931	if (error) {
3932		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3933		goto fail;
3934	}
3935
3936	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3937	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3938	if (error) {
3939		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3940		goto fail;
3941	}
3942
3943	dma->dma_size = size;
3944
3945fail:
3946	if (error)
3947		vmxnet3_dma_free(sc, dma);
3948
3949	return (error);
3950}
3951
3952static void
3953vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3954{
3955
3956	if (dma->dma_tag != NULL) {
3957		if (dma->dma_paddr != 0) {
3958			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3959			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3960			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3961		}
3962
3963		if (dma->dma_vaddr != NULL) {
3964			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3965			    dma->dma_map);
3966		}
3967
3968		bus_dma_tag_destroy(dma->dma_tag);
3969	}
3970	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3971}
3972
3973static int
3974vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3975{
3976	char path[64];
3977
3978	snprintf(path, sizeof(path),
3979	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3980	TUNABLE_INT_FETCH(path, &def);
3981
3982	return (def);
3983}
3984
3985/*
3986 * Since this is a purely paravirtualized device, we do not have
3987 * to worry about DMA coherency. But at times, we must make sure
3988 * both the compiler and CPU do not reorder memory operations.
3989 */
3990static inline void
3991vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3992{
3993
3994	switch (type) {
3995	case VMXNET3_BARRIER_RD:
3996		rmb();
3997		break;
3998	case VMXNET3_BARRIER_WR:
3999		wmb();
4000		break;
4001	case VMXNET3_BARRIER_RDWR:
4002		mb();
4003		break;
4004	default:
4005		panic("%s: bad barrier type %d", __func__, type);
4006	}
4007}
4008