1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD$");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/endian.h>
29#include <sys/sockio.h>
30#include <sys/mbuf.h>
31#include <sys/malloc.h>
32#include <sys/module.h>
33#include <sys/socket.h>
34#include <sys/sysctl.h>
35#include <sys/smp.h>
36#include <sys/taskqueue.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <net/if_dl.h>
44#include <net/if_types.h>
45#include <net/if_media.h>
46#include <net/if_vlan_var.h>
47
48#include <net/bpf.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/udp.h>
56#include <netinet/tcp.h>
57
58#include <machine/in_cksum.h>
59
60#include <machine/bus.h>
61#include <machine/resource.h>
62#include <sys/bus.h>
63#include <sys/rman.h>
64
65#include <dev/pci/pcireg.h>
66#include <dev/pci/pcivar.h>
67
68#include "if_vmxreg.h"
69#include "if_vmxvar.h"
70
71#include "opt_inet.h"
72#include "opt_inet6.h"
73
74#ifdef VMXNET3_FAILPOINTS
75#include <sys/fail.h>
76static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77    "vmxnet3 fail points");
78#define VMXNET3_FP	_debug_fail_point_vmxnet3
79#endif
80
81static int	vmxnet3_probe(device_t);
82static int	vmxnet3_attach(device_t);
83static int	vmxnet3_detach(device_t);
84static int	vmxnet3_shutdown(device_t);
85
86static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87static void	vmxnet3_free_resources(struct vmxnet3_softc *);
88static int	vmxnet3_check_version(struct vmxnet3_softc *);
89static void	vmxnet3_initial_config(struct vmxnet3_softc *);
90static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96		    struct vmxnet3_interrupt *);
97static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104		    struct vmxnet3_interrupt *);
105static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107#ifndef VMXNET3_LEGACY_TX
108static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112#endif
113
114static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135static void	vmxnet3_free_data(struct vmxnet3_softc *);
136static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138static void	vmxnet3_evintr(struct vmxnet3_softc *);
139static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143		    struct vmxnet3_rxring *, int);
144static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145static void	vmxnet3_legacy_intr(void *);
146static void	vmxnet3_txq_intr(void *);
147static void	vmxnet3_rxq_intr(void *);
148static void	vmxnet3_event_intr(void *);
149
150static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152static void	vmxnet3_stop(struct vmxnet3_softc *);
153
154static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157static int	vmxnet3_enable_device(struct vmxnet3_softc *);
158static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159static int	vmxnet3_reinit(struct vmxnet3_softc *);
160static void	vmxnet3_init_locked(struct vmxnet3_softc *);
161static void	vmxnet3_init(void *);
162
163static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164		    int *, int *, int *);
165static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169#ifdef VMXNET3_LEGACY_TX
170static void	vmxnet3_start_locked(struct ifnet *);
171static void	vmxnet3_start(struct ifnet *);
172#else
173static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
174		    struct mbuf *);
175static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
176static void	vmxnet3_txq_tq_deferred(void *, int);
177#endif
178static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
179static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
180
181static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
182		    uint16_t);
183static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
184static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
186static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
187static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188
189#ifndef VMXNET3_LEGACY_TX
190static void	vmxnet3_qflush(struct ifnet *);
191#endif
192
193static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
194static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
195static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
196		    struct vmxnet3_txq_stats *);
197static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
198		    struct vmxnet3_rxq_stats *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242/* Tunables. */
243static int vmxnet3_mq_disable = 0;
244TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254static device_method_t vmxnet3_methods[] = {
255	/* Device interface. */
256	DEVMETHOD(device_probe,		vmxnet3_probe),
257	DEVMETHOD(device_attach,	vmxnet3_attach),
258	DEVMETHOD(device_detach,	vmxnet3_detach),
259	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
260
261	DEVMETHOD_END
262};
263
264static driver_t vmxnet3_driver = {
265	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266};
267
268static devclass_t vmxnet3_devclass;
269DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271MODULE_DEPEND(vmx, pci, 1, 1, 1);
272MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
275#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
276
277static int
278vmxnet3_probe(device_t dev)
279{
280
281	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284		return (BUS_PROBE_DEFAULT);
285	}
286
287	return (ENXIO);
288}
289
290static int
291vmxnet3_attach(device_t dev)
292{
293	struct vmxnet3_softc *sc;
294	int error;
295
296	sc = device_get_softc(dev);
297	sc->vmx_dev = dev;
298
299	pci_enable_busmaster(dev);
300
301	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304	vmxnet3_initial_config(sc);
305
306	error = vmxnet3_alloc_resources(sc);
307	if (error)
308		goto fail;
309
310	error = vmxnet3_check_version(sc);
311	if (error)
312		goto fail;
313
314	error = vmxnet3_alloc_rxtx_queues(sc);
315	if (error)
316		goto fail;
317
318#ifndef VMXNET3_LEGACY_TX
319	error = vmxnet3_alloc_taskqueue(sc);
320	if (error)
321		goto fail;
322#endif
323
324	error = vmxnet3_alloc_interrupts(sc);
325	if (error)
326		goto fail;
327
328	vmxnet3_check_multiqueue(sc);
329
330	error = vmxnet3_alloc_data(sc);
331	if (error)
332		goto fail;
333
334	error = vmxnet3_setup_interface(sc);
335	if (error)
336		goto fail;
337
338	error = vmxnet3_setup_interrupts(sc);
339	if (error) {
340		ether_ifdetach(sc->vmx_ifp);
341		device_printf(dev, "could not set up interrupt\n");
342		goto fail;
343	}
344
345	vmxnet3_setup_sysctl(sc);
346#ifndef VMXNET3_LEGACY_TX
347	vmxnet3_start_taskqueue(sc);
348#endif
349
350fail:
351	if (error)
352		vmxnet3_detach(dev);
353
354	return (error);
355}
356
357static int
358vmxnet3_detach(device_t dev)
359{
360	struct vmxnet3_softc *sc;
361	struct ifnet *ifp;
362
363	sc = device_get_softc(dev);
364	ifp = sc->vmx_ifp;
365
366	if (device_is_attached(dev)) {
367		VMXNET3_CORE_LOCK(sc);
368		vmxnet3_stop(sc);
369		VMXNET3_CORE_UNLOCK(sc);
370
371		callout_drain(&sc->vmx_tick);
372#ifndef VMXNET3_LEGACY_TX
373		vmxnet3_drain_taskqueue(sc);
374#endif
375
376		ether_ifdetach(ifp);
377	}
378
379	if (sc->vmx_vlan_attach != NULL) {
380		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381		sc->vmx_vlan_attach = NULL;
382	}
383	if (sc->vmx_vlan_detach != NULL) {
384		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385		sc->vmx_vlan_detach = NULL;
386	}
387
388#ifndef VMXNET3_LEGACY_TX
389	vmxnet3_free_taskqueue(sc);
390#endif
391	vmxnet3_free_interrupts(sc);
392
393	if (ifp != NULL) {
394		if_free(ifp);
395		sc->vmx_ifp = NULL;
396	}
397
398	ifmedia_removeall(&sc->vmx_media);
399
400	vmxnet3_free_data(sc);
401	vmxnet3_free_resources(sc);
402	vmxnet3_free_rxtx_queues(sc);
403
404	VMXNET3_CORE_LOCK_DESTROY(sc);
405
406	return (0);
407}
408
409static int
410vmxnet3_shutdown(device_t dev)
411{
412
413	return (0);
414}
415
416static int
417vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418{
419	device_t dev;
420	int rid;
421
422	dev = sc->vmx_dev;
423
424	rid = PCIR_BAR(0);
425	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426	    RF_ACTIVE);
427	if (sc->vmx_res0 == NULL) {
428		device_printf(dev,
429		    "could not map BAR0 memory\n");
430		return (ENXIO);
431	}
432
433	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436	rid = PCIR_BAR(1);
437	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438	    RF_ACTIVE);
439	if (sc->vmx_res1 == NULL) {
440		device_printf(dev,
441		    "could not map BAR1 memory\n");
442		return (ENXIO);
443	}
444
445	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449		rid = PCIR_BAR(2);
450		sc->vmx_msix_res = bus_alloc_resource_any(dev,
451		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
452	}
453
454	if (sc->vmx_msix_res == NULL)
455		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457	return (0);
458}
459
460static void
461vmxnet3_free_resources(struct vmxnet3_softc *sc)
462{
463	device_t dev;
464	int rid;
465
466	dev = sc->vmx_dev;
467
468	if (sc->vmx_res0 != NULL) {
469		rid = PCIR_BAR(0);
470		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471		sc->vmx_res0 = NULL;
472	}
473
474	if (sc->vmx_res1 != NULL) {
475		rid = PCIR_BAR(1);
476		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477		sc->vmx_res1 = NULL;
478	}
479
480	if (sc->vmx_msix_res != NULL) {
481		rid = PCIR_BAR(2);
482		bus_release_resource(dev, SYS_RES_MEMORY, rid,
483		    sc->vmx_msix_res);
484		sc->vmx_msix_res = NULL;
485	}
486}
487
488static int
489vmxnet3_check_version(struct vmxnet3_softc *sc)
490{
491	device_t dev;
492	uint32_t version;
493
494	dev = sc->vmx_dev;
495
496	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497	if ((version & 0x01) == 0) {
498		device_printf(dev, "unsupported hardware version %#x\n",
499		    version);
500		return (ENOTSUP);
501	}
502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505	if ((version & 0x01) == 0) {
506		device_printf(dev, "unsupported UPT version %#x\n", version);
507		return (ENOTSUP);
508	}
509	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511	return (0);
512}
513
514static void
515vmxnet3_initial_config(struct vmxnet3_softc *sc)
516{
517	int nqueue, ndesc;
518
519	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
520	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
521		nqueue = VMXNET3_DEF_TX_QUEUES;
522	if (nqueue > mp_ncpus)
523		nqueue = mp_ncpus;
524	sc->vmx_max_ntxqueues = nqueue;
525
526	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
527	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
528		nqueue = VMXNET3_DEF_RX_QUEUES;
529	if (nqueue > mp_ncpus)
530		nqueue = mp_ncpus;
531	sc->vmx_max_nrxqueues = nqueue;
532
533	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
534		sc->vmx_max_nrxqueues = 1;
535		sc->vmx_max_ntxqueues = 1;
536	}
537
538	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
539	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
540		ndesc = VMXNET3_DEF_TX_NDESC;
541	if (ndesc & VMXNET3_MASK_TX_NDESC)
542		ndesc &= ~VMXNET3_MASK_TX_NDESC;
543	sc->vmx_ntxdescs = ndesc;
544
545	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
546	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
547		ndesc = VMXNET3_DEF_RX_NDESC;
548	if (ndesc & VMXNET3_MASK_RX_NDESC)
549		ndesc &= ~VMXNET3_MASK_RX_NDESC;
550	sc->vmx_nrxdescs = ndesc;
551	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
552}
553
554static void
555vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
556{
557
558	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
559		goto out;
560
561	/* BMV: Just use the maximum configured for now. */
562	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
563	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
564
565	if (sc->vmx_nrxqueues > 1)
566		sc->vmx_flags |= VMXNET3_FLAG_RSS;
567
568	return;
569
570out:
571	sc->vmx_ntxqueues = 1;
572	sc->vmx_nrxqueues = 1;
573}
574
575static int
576vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
577{
578	device_t dev;
579	int nmsix, cnt, required;
580
581	dev = sc->vmx_dev;
582
583	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
584		return (1);
585
586	/* Allocate an additional vector for the events interrupt. */
587	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
588
589	nmsix = pci_msix_count(dev);
590	if (nmsix < required)
591		return (1);
592
593	cnt = required;
594	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
595		sc->vmx_nintrs = required;
596		return (0);
597	} else
598		pci_release_msi(dev);
599
600	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
601
602	return (1);
603}
604
605static int
606vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
607{
608	device_t dev;
609	int nmsi, cnt, required;
610
611	dev = sc->vmx_dev;
612	required = 1;
613
614	nmsi = pci_msi_count(dev);
615	if (nmsi < required)
616		return (1);
617
618	cnt = required;
619	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
620		sc->vmx_nintrs = 1;
621		return (0);
622	} else
623		pci_release_msi(dev);
624
625	return (1);
626}
627
628static int
629vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
630{
631
632	sc->vmx_nintrs = 1;
633	return (0);
634}
635
636static int
637vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
638    struct vmxnet3_interrupt *intr)
639{
640	struct resource *irq;
641
642	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
643	if (irq == NULL)
644		return (ENXIO);
645
646	intr->vmxi_irq = irq;
647	intr->vmxi_rid = rid;
648
649	return (0);
650}
651
652static int
653vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
654{
655	int i, rid, flags, error;
656
657	rid = 0;
658	flags = RF_ACTIVE;
659
660	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
661		flags |= RF_SHAREABLE;
662	else
663		rid = 1;
664
665	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
666		error = vmxnet3_alloc_interrupt(sc, rid, flags,
667		    &sc->vmx_intrs[i]);
668		if (error)
669			return (error);
670	}
671
672	return (0);
673}
674
675static int
676vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
677{
678	device_t dev;
679	struct vmxnet3_txqueue *txq;
680	struct vmxnet3_rxqueue *rxq;
681	struct vmxnet3_interrupt *intr;
682	enum intr_type type;
683	int i, error;
684
685	dev = sc->vmx_dev;
686	intr = &sc->vmx_intrs[0];
687	type = INTR_TYPE_NET | INTR_MPSAFE;
688
689	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
690		txq = &sc->vmx_txq[i];
691		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
692		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
693		if (error)
694			return (error);
695		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
696		    "tq%d", i);
697		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
698	}
699
700	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
701		rxq = &sc->vmx_rxq[i];
702		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
703		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
704		if (error)
705			return (error);
706		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
707		    "rq%d", i);
708		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
709	}
710
711	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
712	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
713	if (error)
714		return (error);
715	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
716	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
717
718	return (0);
719}
720
721static int
722vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
723{
724	struct vmxnet3_interrupt *intr;
725	int i, error;
726
727	intr = &sc->vmx_intrs[0];
728	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
729	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
730	    &intr->vmxi_handler);
731
732	for (i = 0; i < sc->vmx_ntxqueues; i++)
733		sc->vmx_txq[i].vxtxq_intr_idx = 0;
734	for (i = 0; i < sc->vmx_nrxqueues; i++)
735		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
736	sc->vmx_event_intr_idx = 0;
737
738	return (error);
739}
740
741static void
742vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
743{
744	struct vmxnet3_txqueue *txq;
745	struct vmxnet3_txq_shared *txs;
746	struct vmxnet3_rxqueue *rxq;
747	struct vmxnet3_rxq_shared *rxs;
748	int i;
749
750	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
751
752	for (i = 0; i < sc->vmx_ntxqueues; i++) {
753		txq = &sc->vmx_txq[i];
754		txs = txq->vxtxq_ts;
755		txs->intr_idx = txq->vxtxq_intr_idx;
756	}
757
758	for (i = 0; i < sc->vmx_nrxqueues; i++) {
759		rxq = &sc->vmx_rxq[i];
760		rxs = rxq->vxrxq_rs;
761		rxs->intr_idx = rxq->vxrxq_intr_idx;
762	}
763}
764
765static int
766vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
767{
768	int error;
769
770	error = vmxnet3_alloc_intr_resources(sc);
771	if (error)
772		return (error);
773
774	switch (sc->vmx_intr_type) {
775	case VMXNET3_IT_MSIX:
776		error = vmxnet3_setup_msix_interrupts(sc);
777		break;
778	case VMXNET3_IT_MSI:
779	case VMXNET3_IT_LEGACY:
780		error = vmxnet3_setup_legacy_interrupt(sc);
781		break;
782	default:
783		panic("%s: invalid interrupt type %d", __func__,
784		    sc->vmx_intr_type);
785	}
786
787	if (error == 0)
788		vmxnet3_set_interrupt_idx(sc);
789
790	return (error);
791}
792
793static int
794vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
795{
796	device_t dev;
797	uint32_t config;
798	int error;
799
800	dev = sc->vmx_dev;
801	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
802
803	sc->vmx_intr_type = config & 0x03;
804	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
805
806	switch (sc->vmx_intr_type) {
807	case VMXNET3_IT_AUTO:
808		sc->vmx_intr_type = VMXNET3_IT_MSIX;
809		/* FALLTHROUGH */
810	case VMXNET3_IT_MSIX:
811		error = vmxnet3_alloc_msix_interrupts(sc);
812		if (error == 0)
813			break;
814		sc->vmx_intr_type = VMXNET3_IT_MSI;
815		/* FALLTHROUGH */
816	case VMXNET3_IT_MSI:
817		error = vmxnet3_alloc_msi_interrupts(sc);
818		if (error == 0)
819			break;
820		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
821		/* FALLTHROUGH */
822	case VMXNET3_IT_LEGACY:
823		error = vmxnet3_alloc_legacy_interrupts(sc);
824		if (error == 0)
825			break;
826		/* FALLTHROUGH */
827	default:
828		sc->vmx_intr_type = -1;
829		device_printf(dev, "cannot allocate any interrupt resources\n");
830		return (ENXIO);
831	}
832
833	return (error);
834}
835
836static void
837vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
838    struct vmxnet3_interrupt *intr)
839{
840	device_t dev;
841
842	dev = sc->vmx_dev;
843
844	if (intr->vmxi_handler != NULL) {
845		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
846		intr->vmxi_handler = NULL;
847	}
848
849	if (intr->vmxi_irq != NULL) {
850		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
851		    intr->vmxi_irq);
852		intr->vmxi_irq = NULL;
853		intr->vmxi_rid = -1;
854	}
855}
856
857static void
858vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
859{
860	int i;
861
862	for (i = 0; i < sc->vmx_nintrs; i++)
863		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
864
865	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
866	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
867		pci_release_msi(sc->vmx_dev);
868}
869
870#ifndef VMXNET3_LEGACY_TX
871static int
872vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
873{
874	device_t dev;
875
876	dev = sc->vmx_dev;
877
878	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
879	    taskqueue_thread_enqueue, &sc->vmx_tq);
880	if (sc->vmx_tq == NULL)
881		return (ENOMEM);
882
883	return (0);
884}
885
886static void
887vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
888{
889	device_t dev;
890	int nthreads, error;
891
892	dev = sc->vmx_dev;
893
894	/*
895	 * The taskqueue is typically not frequently used, so a dedicated
896	 * thread for each queue is unnecessary.
897	 */
898	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
899
900	/*
901	 * Most drivers just ignore the return value - it only fails
902	 * with ENOMEM so an error is not likely. It is hard for us
903	 * to recover from an error here.
904	 */
905	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
906	    "%s taskq", device_get_nameunit(dev));
907	if (error)
908		device_printf(dev, "failed to start taskqueue: %d", error);
909}
910
911static void
912vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
913{
914	struct vmxnet3_txqueue *txq;
915	int i;
916
917	if (sc->vmx_tq != NULL) {
918		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
919			txq = &sc->vmx_txq[i];
920			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
921		}
922	}
923}
924
925static void
926vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
927{
928	if (sc->vmx_tq != NULL) {
929		taskqueue_free(sc->vmx_tq);
930		sc->vmx_tq = NULL;
931	}
932}
933#endif
934
935static int
936vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
937{
938	struct vmxnet3_rxqueue *rxq;
939	struct vmxnet3_rxring *rxr;
940	int i;
941
942	rxq = &sc->vmx_rxq[q];
943
944	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
945	    device_get_nameunit(sc->vmx_dev), q);
946	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
947
948	rxq->vxrxq_sc = sc;
949	rxq->vxrxq_id = q;
950
951	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
952		rxr = &rxq->vxrxq_cmd_ring[i];
953		rxr->vxrxr_rid = i;
954		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
955		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
956		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
957		if (rxr->vxrxr_rxbuf == NULL)
958			return (ENOMEM);
959
960		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
961	}
962
963	return (0);
964}
965
966static int
967vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
968{
969	struct vmxnet3_txqueue *txq;
970	struct vmxnet3_txring *txr;
971
972	txq = &sc->vmx_txq[q];
973	txr = &txq->vxtxq_cmd_ring;
974
975	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
976	    device_get_nameunit(sc->vmx_dev), q);
977	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
978
979	txq->vxtxq_sc = sc;
980	txq->vxtxq_id = q;
981
982	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
983	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
984	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
985	if (txr->vxtxr_txbuf == NULL)
986		return (ENOMEM);
987
988	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
989
990#ifndef VMXNET3_LEGACY_TX
991	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
992
993	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
994	    M_NOWAIT, &txq->vxtxq_mtx);
995	if (txq->vxtxq_br == NULL)
996		return (ENOMEM);
997#endif
998
999	return (0);
1000}
1001
1002static int
1003vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1004{
1005	int i, error;
1006
1007	/*
1008	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1009	 * disabled by default because its apparently broken for devices passed
1010	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1011	 * must be set to zero for MSIX. This check prevents us from allocating
1012	 * queue structures that we will not use.
1013	 */
1014	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1015		sc->vmx_max_nrxqueues = 1;
1016		sc->vmx_max_ntxqueues = 1;
1017	}
1018
1019	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1020	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1021	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1022	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1023	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1024		return (ENOMEM);
1025
1026	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1027		error = vmxnet3_init_rxq(sc, i);
1028		if (error)
1029			return (error);
1030	}
1031
1032	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1033		error = vmxnet3_init_txq(sc, i);
1034		if (error)
1035			return (error);
1036	}
1037
1038	return (0);
1039}
1040
1041static void
1042vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1043{
1044	struct vmxnet3_rxring *rxr;
1045	int i;
1046
1047	rxq->vxrxq_sc = NULL;
1048	rxq->vxrxq_id = -1;
1049
1050	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1051		rxr = &rxq->vxrxq_cmd_ring[i];
1052
1053		if (rxr->vxrxr_rxbuf != NULL) {
1054			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1055			rxr->vxrxr_rxbuf = NULL;
1056		}
1057	}
1058
1059	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1060		mtx_destroy(&rxq->vxrxq_mtx);
1061}
1062
1063static void
1064vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1065{
1066	struct vmxnet3_txring *txr;
1067
1068	txr = &txq->vxtxq_cmd_ring;
1069
1070	txq->vxtxq_sc = NULL;
1071	txq->vxtxq_id = -1;
1072
1073#ifndef VMXNET3_LEGACY_TX
1074	if (txq->vxtxq_br != NULL) {
1075		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1076		txq->vxtxq_br = NULL;
1077	}
1078#endif
1079
1080	if (txr->vxtxr_txbuf != NULL) {
1081		free(txr->vxtxr_txbuf, M_DEVBUF);
1082		txr->vxtxr_txbuf = NULL;
1083	}
1084
1085	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1086		mtx_destroy(&txq->vxtxq_mtx);
1087}
1088
1089static void
1090vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1091{
1092	int i;
1093
1094	if (sc->vmx_rxq != NULL) {
1095		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1096			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1097		free(sc->vmx_rxq, M_DEVBUF);
1098		sc->vmx_rxq = NULL;
1099	}
1100
1101	if (sc->vmx_txq != NULL) {
1102		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1103			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1104		free(sc->vmx_txq, M_DEVBUF);
1105		sc->vmx_txq = NULL;
1106	}
1107}
1108
1109static int
1110vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1111{
1112	device_t dev;
1113	uint8_t *kva;
1114	size_t size;
1115	int i, error;
1116
1117	dev = sc->vmx_dev;
1118
1119	size = sizeof(struct vmxnet3_driver_shared);
1120	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1121	if (error) {
1122		device_printf(dev, "cannot alloc shared memory\n");
1123		return (error);
1124	}
1125	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1126
1127	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1128	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1129	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1130	if (error) {
1131		device_printf(dev, "cannot alloc queue shared memory\n");
1132		return (error);
1133	}
1134	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1135	kva = sc->vmx_qs;
1136
1137	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1138		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1139		kva += sizeof(struct vmxnet3_txq_shared);
1140	}
1141	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1142		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1143		kva += sizeof(struct vmxnet3_rxq_shared);
1144	}
1145
1146	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1147		size = sizeof(struct vmxnet3_rss_shared);
1148		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1149		if (error) {
1150			device_printf(dev, "cannot alloc rss shared memory\n");
1151			return (error);
1152		}
1153		sc->vmx_rss =
1154		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1155	}
1156
1157	return (0);
1158}
1159
1160static void
1161vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1162{
1163
1164	if (sc->vmx_rss != NULL) {
1165		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1166		sc->vmx_rss = NULL;
1167	}
1168
1169	if (sc->vmx_qs != NULL) {
1170		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1171		sc->vmx_qs = NULL;
1172	}
1173
1174	if (sc->vmx_ds != NULL) {
1175		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1176		sc->vmx_ds = NULL;
1177	}
1178}
1179
1180static int
1181vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1182{
1183	device_t dev;
1184	struct vmxnet3_txqueue *txq;
1185	struct vmxnet3_txring *txr;
1186	struct vmxnet3_comp_ring *txc;
1187	size_t descsz, compsz;
1188	int i, q, error;
1189
1190	dev = sc->vmx_dev;
1191
1192	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1193		txq = &sc->vmx_txq[q];
1194		txr = &txq->vxtxq_cmd_ring;
1195		txc = &txq->vxtxq_comp_ring;
1196
1197		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1198		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1199
1200		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1201		    1, 0,			/* alignment, boundary */
1202		    BUS_SPACE_MAXADDR,		/* lowaddr */
1203		    BUS_SPACE_MAXADDR,		/* highaddr */
1204		    NULL, NULL,			/* filter, filterarg */
1205		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1206		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1207		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1208		    0,				/* flags */
1209		    NULL, NULL,			/* lockfunc, lockarg */
1210		    &txr->vxtxr_txtag);
1211		if (error) {
1212			device_printf(dev,
1213			    "unable to create Tx buffer tag for queue %d\n", q);
1214			return (error);
1215		}
1216
1217		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1218		if (error) {
1219			device_printf(dev, "cannot alloc Tx descriptors for "
1220			    "queue %d error %d\n", q, error);
1221			return (error);
1222		}
1223		txr->vxtxr_txd =
1224		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1225
1226		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1227		if (error) {
1228			device_printf(dev, "cannot alloc Tx comp descriptors "
1229			   "for queue %d error %d\n", q, error);
1230			return (error);
1231		}
1232		txc->vxcr_u.txcd =
1233		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1234
1235		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1236			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1237			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1238			if (error) {
1239				device_printf(dev, "unable to create Tx buf "
1240				    "dmamap for queue %d idx %d\n", q, i);
1241				return (error);
1242			}
1243		}
1244	}
1245
1246	return (0);
1247}
1248
1249static void
1250vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1251{
1252	device_t dev;
1253	struct vmxnet3_txqueue *txq;
1254	struct vmxnet3_txring *txr;
1255	struct vmxnet3_comp_ring *txc;
1256	struct vmxnet3_txbuf *txb;
1257	int i, q;
1258
1259	dev = sc->vmx_dev;
1260
1261	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1262		txq = &sc->vmx_txq[q];
1263		txr = &txq->vxtxq_cmd_ring;
1264		txc = &txq->vxtxq_comp_ring;
1265
1266		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1267			txb = &txr->vxtxr_txbuf[i];
1268			if (txb->vtxb_dmamap != NULL) {
1269				bus_dmamap_destroy(txr->vxtxr_txtag,
1270				    txb->vtxb_dmamap);
1271				txb->vtxb_dmamap = NULL;
1272			}
1273		}
1274
1275		if (txc->vxcr_u.txcd != NULL) {
1276			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1277			txc->vxcr_u.txcd = NULL;
1278		}
1279
1280		if (txr->vxtxr_txd != NULL) {
1281			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1282			txr->vxtxr_txd = NULL;
1283		}
1284
1285		if (txr->vxtxr_txtag != NULL) {
1286			bus_dma_tag_destroy(txr->vxtxr_txtag);
1287			txr->vxtxr_txtag = NULL;
1288		}
1289	}
1290}
1291
1292static int
1293vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1294{
1295	device_t dev;
1296	struct vmxnet3_rxqueue *rxq;
1297	struct vmxnet3_rxring *rxr;
1298	struct vmxnet3_comp_ring *rxc;
1299	int descsz, compsz;
1300	int i, j, q, error;
1301
1302	dev = sc->vmx_dev;
1303
1304	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1305		rxq = &sc->vmx_rxq[q];
1306		rxc = &rxq->vxrxq_comp_ring;
1307		compsz = 0;
1308
1309		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1310			rxr = &rxq->vxrxq_cmd_ring[i];
1311
1312			descsz = rxr->vxrxr_ndesc *
1313			    sizeof(struct vmxnet3_rxdesc);
1314			compsz += rxr->vxrxr_ndesc *
1315			    sizeof(struct vmxnet3_rxcompdesc);
1316
1317			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1318			    1, 0,		/* alignment, boundary */
1319			    BUS_SPACE_MAXADDR,	/* lowaddr */
1320			    BUS_SPACE_MAXADDR,	/* highaddr */
1321			    NULL, NULL,		/* filter, filterarg */
1322			    MJUMPAGESIZE,	/* maxsize */
1323			    1,			/* nsegments */
1324			    MJUMPAGESIZE,	/* maxsegsize */
1325			    0,			/* flags */
1326			    NULL, NULL,		/* lockfunc, lockarg */
1327			    &rxr->vxrxr_rxtag);
1328			if (error) {
1329				device_printf(dev,
1330				    "unable to create Rx buffer tag for "
1331				    "queue %d\n", q);
1332				return (error);
1333			}
1334
1335			error = vmxnet3_dma_malloc(sc, descsz, 512,
1336			    &rxr->vxrxr_dma);
1337			if (error) {
1338				device_printf(dev, "cannot allocate Rx "
1339				    "descriptors for queue %d/%d error %d\n",
1340				    i, q, error);
1341				return (error);
1342			}
1343			rxr->vxrxr_rxd =
1344			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1345		}
1346
1347		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1348		if (error) {
1349			device_printf(dev, "cannot alloc Rx comp descriptors "
1350			    "for queue %d error %d\n", q, error);
1351			return (error);
1352		}
1353		rxc->vxcr_u.rxcd =
1354		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1355
1356		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1357			rxr = &rxq->vxrxq_cmd_ring[i];
1358
1359			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1360			    &rxr->vxrxr_spare_dmap);
1361			if (error) {
1362				device_printf(dev, "unable to create spare "
1363				    "dmamap for queue %d/%d error %d\n",
1364				    q, i, error);
1365				return (error);
1366			}
1367
1368			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1369				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1370				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1371				if (error) {
1372					device_printf(dev, "unable to create "
1373					    "dmamap for queue %d/%d slot %d "
1374					    "error %d\n",
1375					    q, i, j, error);
1376					return (error);
1377				}
1378			}
1379		}
1380	}
1381
1382	return (0);
1383}
1384
1385static void
1386vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1387{
1388	device_t dev;
1389	struct vmxnet3_rxqueue *rxq;
1390	struct vmxnet3_rxring *rxr;
1391	struct vmxnet3_comp_ring *rxc;
1392	struct vmxnet3_rxbuf *rxb;
1393	int i, j, q;
1394
1395	dev = sc->vmx_dev;
1396
1397	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1398		rxq = &sc->vmx_rxq[q];
1399		rxc = &rxq->vxrxq_comp_ring;
1400
1401		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1402			rxr = &rxq->vxrxq_cmd_ring[i];
1403
1404			if (rxr->vxrxr_spare_dmap != NULL) {
1405				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1406				    rxr->vxrxr_spare_dmap);
1407				rxr->vxrxr_spare_dmap = NULL;
1408			}
1409
1410			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1411				rxb = &rxr->vxrxr_rxbuf[j];
1412				if (rxb->vrxb_dmamap != NULL) {
1413					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1414					    rxb->vrxb_dmamap);
1415					rxb->vrxb_dmamap = NULL;
1416				}
1417			}
1418		}
1419
1420		if (rxc->vxcr_u.rxcd != NULL) {
1421			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1422			rxc->vxcr_u.rxcd = NULL;
1423		}
1424
1425		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1426			rxr = &rxq->vxrxq_cmd_ring[i];
1427
1428			if (rxr->vxrxr_rxd != NULL) {
1429				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1430				rxr->vxrxr_rxd = NULL;
1431			}
1432
1433			if (rxr->vxrxr_rxtag != NULL) {
1434				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1435				rxr->vxrxr_rxtag = NULL;
1436			}
1437		}
1438	}
1439}
1440
1441static int
1442vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1443{
1444	int error;
1445
1446	error = vmxnet3_alloc_txq_data(sc);
1447	if (error)
1448		return (error);
1449
1450	error = vmxnet3_alloc_rxq_data(sc);
1451	if (error)
1452		return (error);
1453
1454	return (0);
1455}
1456
1457static void
1458vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1459{
1460
1461	if (sc->vmx_rxq != NULL)
1462		vmxnet3_free_rxq_data(sc);
1463
1464	if (sc->vmx_txq != NULL)
1465		vmxnet3_free_txq_data(sc);
1466}
1467
1468static int
1469vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1470{
1471	int error;
1472
1473	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1474	    32, &sc->vmx_mcast_dma);
1475	if (error)
1476		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1477	else
1478		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1479
1480	return (error);
1481}
1482
1483static void
1484vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1485{
1486
1487	if (sc->vmx_mcast != NULL) {
1488		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1489		sc->vmx_mcast = NULL;
1490	}
1491}
1492
1493static void
1494vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1495{
1496	struct vmxnet3_driver_shared *ds;
1497	struct vmxnet3_txqueue *txq;
1498	struct vmxnet3_txq_shared *txs;
1499	struct vmxnet3_rxqueue *rxq;
1500	struct vmxnet3_rxq_shared *rxs;
1501	int i;
1502
1503	ds = sc->vmx_ds;
1504
1505	/*
1506	 * Initialize fields of the shared data that remains the same across
1507	 * reinits. Note the shared data is zero'd when allocated.
1508	 */
1509
1510	ds->magic = VMXNET3_REV1_MAGIC;
1511
1512	/* DriverInfo */
1513	ds->version = VMXNET3_DRIVER_VERSION;
1514	ds->guest = VMXNET3_GOS_FREEBSD |
1515#ifdef __LP64__
1516	    VMXNET3_GOS_64BIT;
1517#else
1518	    VMXNET3_GOS_32BIT;
1519#endif
1520	ds->vmxnet3_revision = 1;
1521	ds->upt_version = 1;
1522
1523	/* Misc. conf */
1524	ds->driver_data = vtophys(sc);
1525	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1526	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1527	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1528	ds->nrxsg_max = sc->vmx_max_rxsegs;
1529
1530	/* RSS conf */
1531	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1532		ds->rss.version = 1;
1533		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1534		ds->rss.len = sc->vmx_rss_dma.dma_size;
1535	}
1536
1537	/* Interrupt control. */
1538	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1539	ds->nintr = sc->vmx_nintrs;
1540	ds->evintr = sc->vmx_event_intr_idx;
1541	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1542
1543	for (i = 0; i < sc->vmx_nintrs; i++)
1544		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1545
1546	/* Receive filter. */
1547	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1548	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1549
1550	/* Tx queues */
1551	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1552		txq = &sc->vmx_txq[i];
1553		txs = txq->vxtxq_ts;
1554
1555		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1556		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1557		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1558		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1559		txs->driver_data = vtophys(txq);
1560		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1561	}
1562
1563	/* Rx queues */
1564	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1565		rxq = &sc->vmx_rxq[i];
1566		rxs = rxq->vxrxq_rs;
1567
1568		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1569		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1570		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1571		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1572		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1573		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1574		rxs->driver_data = vtophys(rxq);
1575		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1576	}
1577}
1578
1579static void
1580vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1581{
1582	struct ifnet *ifp;
1583
1584	ifp = sc->vmx_ifp;
1585
1586	/* Use the current MAC address. */
1587	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1588	vmxnet3_set_lladdr(sc);
1589
1590	ifp->if_hwassist = 0;
1591	if (ifp->if_capenable & IFCAP_TXCSUM)
1592		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1593	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1594		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1595	if (ifp->if_capenable & IFCAP_TSO4)
1596		ifp->if_hwassist |= CSUM_IP_TSO;
1597	if (ifp->if_capenable & IFCAP_TSO6)
1598		ifp->if_hwassist |= CSUM_IP6_TSO;
1599}
1600
1601static void
1602vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1603{
1604	/*
1605	 * Use the same key as the Linux driver until FreeBSD can do
1606	 * RSS (presumably Toeplitz) in software.
1607	 */
1608	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1609	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1610	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1611	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1612	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1613	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1614	};
1615
1616	struct vmxnet3_driver_shared *ds;
1617	struct vmxnet3_rss_shared *rss;
1618	int i;
1619
1620	ds = sc->vmx_ds;
1621	rss = sc->vmx_rss;
1622
1623	rss->hash_type =
1624	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1625	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1626	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1627	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1628	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1629	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1630
1631	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1632		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1633}
1634
1635static void
1636vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1637{
1638	struct ifnet *ifp;
1639	struct vmxnet3_driver_shared *ds;
1640
1641	ifp = sc->vmx_ifp;
1642	ds = sc->vmx_ds;
1643
1644	ds->mtu = ifp->if_mtu;
1645	ds->ntxqueue = sc->vmx_ntxqueues;
1646	ds->nrxqueue = sc->vmx_nrxqueues;
1647
1648	ds->upt_features = 0;
1649	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1650		ds->upt_features |= UPT1_F_CSUM;
1651	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1652		ds->upt_features |= UPT1_F_VLAN;
1653	if (ifp->if_capenable & IFCAP_LRO)
1654		ds->upt_features |= UPT1_F_LRO;
1655
1656	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1657		ds->upt_features |= UPT1_F_RSS;
1658		vmxnet3_reinit_rss_shared_data(sc);
1659	}
1660
1661	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1662	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1663	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1664}
1665
1666static int
1667vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1668{
1669	int error;
1670
1671	error = vmxnet3_alloc_shared_data(sc);
1672	if (error)
1673		return (error);
1674
1675	error = vmxnet3_alloc_queue_data(sc);
1676	if (error)
1677		return (error);
1678
1679	error = vmxnet3_alloc_mcast_table(sc);
1680	if (error)
1681		return (error);
1682
1683	vmxnet3_init_shared_data(sc);
1684
1685	return (0);
1686}
1687
1688static void
1689vmxnet3_free_data(struct vmxnet3_softc *sc)
1690{
1691
1692	vmxnet3_free_mcast_table(sc);
1693	vmxnet3_free_queue_data(sc);
1694	vmxnet3_free_shared_data(sc);
1695}
1696
1697static int
1698vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1699{
1700	device_t dev;
1701	struct ifnet *ifp;
1702
1703	dev = sc->vmx_dev;
1704
1705	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1706	if (ifp == NULL) {
1707		device_printf(dev, "cannot allocate ifnet structure\n");
1708		return (ENOSPC);
1709	}
1710
1711	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1712#if __FreeBSD_version < 1000025
1713	ifp->if_baudrate = 1000000000;
1714#elif __FreeBSD_version < 1100011
1715	if_initbaudrate(ifp, IF_Gbps(10));
1716#else
1717	ifp->if_baudrate = IF_Gbps(10);
1718#endif
1719	ifp->if_softc = sc;
1720	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1721	ifp->if_init = vmxnet3_init;
1722	ifp->if_ioctl = vmxnet3_ioctl;
1723	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1724
1725#ifdef VMXNET3_LEGACY_TX
1726	ifp->if_start = vmxnet3_start;
1727	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1728	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1729	IFQ_SET_READY(&ifp->if_snd);
1730#else
1731	ifp->if_transmit = vmxnet3_txq_mq_start;
1732	ifp->if_qflush = vmxnet3_qflush;
1733#endif
1734
1735	vmxnet3_get_lladdr(sc);
1736	ether_ifattach(ifp, sc->vmx_lladdr);
1737
1738	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1739	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1740	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1741	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1742	    IFCAP_VLAN_HWCSUM;
1743	ifp->if_capenable = ifp->if_capabilities;
1744
1745	/* These capabilities are not enabled by default. */
1746	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1747
1748	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1749	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1750	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1751	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1752
1753	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1754	    vmxnet3_media_status);
1755	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1756	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1757
1758	return (0);
1759}
1760
1761static void
1762vmxnet3_evintr(struct vmxnet3_softc *sc)
1763{
1764	device_t dev;
1765	struct ifnet *ifp;
1766	struct vmxnet3_txq_shared *ts;
1767	struct vmxnet3_rxq_shared *rs;
1768	uint32_t event;
1769	int reset;
1770
1771	dev = sc->vmx_dev;
1772	ifp = sc->vmx_ifp;
1773	reset = 0;
1774
1775	VMXNET3_CORE_LOCK(sc);
1776
1777	/* Clear events. */
1778	event = sc->vmx_ds->event;
1779	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1780
1781	if (event & VMXNET3_EVENT_LINK) {
1782		vmxnet3_link_status(sc);
1783		if (sc->vmx_link_active != 0)
1784			vmxnet3_tx_start_all(sc);
1785	}
1786
1787	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1788		reset = 1;
1789		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1790		ts = sc->vmx_txq[0].vxtxq_ts;
1791		if (ts->stopped != 0)
1792			device_printf(dev, "Tx queue error %#x\n", ts->error);
1793		rs = sc->vmx_rxq[0].vxrxq_rs;
1794		if (rs->stopped != 0)
1795			device_printf(dev, "Rx queue error %#x\n", rs->error);
1796		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1797	}
1798
1799	if (event & VMXNET3_EVENT_DIC)
1800		device_printf(dev, "device implementation change event\n");
1801	if (event & VMXNET3_EVENT_DEBUG)
1802		device_printf(dev, "debug event\n");
1803
1804	if (reset != 0) {
1805		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1806		vmxnet3_init_locked(sc);
1807	}
1808
1809	VMXNET3_CORE_UNLOCK(sc);
1810}
1811
1812static void
1813vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1814{
1815	struct vmxnet3_softc *sc;
1816	struct ifnet *ifp;
1817	struct vmxnet3_txring *txr;
1818	struct vmxnet3_comp_ring *txc;
1819	struct vmxnet3_txcompdesc *txcd;
1820	struct vmxnet3_txbuf *txb;
1821	struct mbuf *m;
1822	u_int sop;
1823
1824	sc = txq->vxtxq_sc;
1825	ifp = sc->vmx_ifp;
1826	txr = &txq->vxtxq_cmd_ring;
1827	txc = &txq->vxtxq_comp_ring;
1828
1829	VMXNET3_TXQ_LOCK_ASSERT(txq);
1830
1831	for (;;) {
1832		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1833		if (txcd->gen != txc->vxcr_gen)
1834			break;
1835		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1836
1837		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1838			txc->vxcr_next = 0;
1839			txc->vxcr_gen ^= 1;
1840		}
1841
1842		sop = txr->vxtxr_next;
1843		txb = &txr->vxtxr_txbuf[sop];
1844
1845		if ((m = txb->vtxb_m) != NULL) {
1846			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1847			    BUS_DMASYNC_POSTWRITE);
1848			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1849
1850			txq->vxtxq_stats.vmtxs_opackets++;
1851			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1852			if (m->m_flags & M_MCAST)
1853				txq->vxtxq_stats.vmtxs_omcasts++;
1854
1855			m_freem(m);
1856			txb->vtxb_m = NULL;
1857		}
1858
1859		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1860	}
1861
1862	if (txr->vxtxr_head == txr->vxtxr_next)
1863		txq->vxtxq_watchdog = 0;
1864}
1865
1866static int
1867vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1868{
1869	struct ifnet *ifp;
1870	struct mbuf *m;
1871	struct vmxnet3_rxdesc *rxd;
1872	struct vmxnet3_rxbuf *rxb;
1873	bus_dma_tag_t tag;
1874	bus_dmamap_t dmap;
1875	bus_dma_segment_t segs[1];
1876	int idx, clsize, btype, flags, nsegs, error;
1877
1878	ifp = sc->vmx_ifp;
1879	tag = rxr->vxrxr_rxtag;
1880	dmap = rxr->vxrxr_spare_dmap;
1881	idx = rxr->vxrxr_fill;
1882	rxd = &rxr->vxrxr_rxd[idx];
1883	rxb = &rxr->vxrxr_rxbuf[idx];
1884
1885#ifdef VMXNET3_FAILPOINTS
1886	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1887	if (rxr->vxrxr_rid != 0)
1888		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1889#endif
1890
1891	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1892		flags = M_PKTHDR;
1893		clsize = MCLBYTES;
1894		btype = VMXNET3_BTYPE_HEAD;
1895	} else {
1896#if __FreeBSD_version < 902001
1897		/*
1898		 * These mbufs will never be used for the start of a frame.
1899		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1900		 * required the mbuf to always be a packet header. Avoid
1901		 * unnecessary mbuf initialization in newer versions where
1902		 * that is not the case.
1903		 */
1904		flags = M_PKTHDR;
1905#else
1906		flags = 0;
1907#endif
1908		clsize = MJUMPAGESIZE;
1909		btype = VMXNET3_BTYPE_BODY;
1910	}
1911
1912	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1913	if (m == NULL) {
1914		sc->vmx_stats.vmst_mgetcl_failed++;
1915		return (ENOBUFS);
1916	}
1917
1918	if (btype == VMXNET3_BTYPE_HEAD) {
1919		m->m_len = m->m_pkthdr.len = clsize;
1920		m_adj(m, ETHER_ALIGN);
1921	} else
1922		m->m_len = clsize;
1923
1924	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1925	    BUS_DMA_NOWAIT);
1926	if (error) {
1927		m_freem(m);
1928		sc->vmx_stats.vmst_mbuf_load_failed++;
1929		return (error);
1930	}
1931	KASSERT(nsegs == 1,
1932	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1933#if __FreeBSD_version < 902001
1934	if (btype == VMXNET3_BTYPE_BODY)
1935		m->m_flags &= ~M_PKTHDR;
1936#endif
1937
1938	if (rxb->vrxb_m != NULL) {
1939		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1940		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1941	}
1942
1943	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1944	rxb->vrxb_dmamap = dmap;
1945	rxb->vrxb_m = m;
1946
1947	rxd->addr = segs[0].ds_addr;
1948	rxd->len = segs[0].ds_len;
1949	rxd->btype = btype;
1950	rxd->gen = rxr->vxrxr_gen;
1951
1952	vmxnet3_rxr_increment_fill(rxr);
1953	return (0);
1954}
1955
1956static void
1957vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1958    struct vmxnet3_rxring *rxr, int idx)
1959{
1960	struct vmxnet3_rxdesc *rxd;
1961
1962	rxd = &rxr->vxrxr_rxd[idx];
1963	rxd->gen = rxr->vxrxr_gen;
1964	vmxnet3_rxr_increment_fill(rxr);
1965}
1966
1967static void
1968vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1969{
1970	struct vmxnet3_softc *sc;
1971	struct vmxnet3_rxring *rxr;
1972	struct vmxnet3_comp_ring *rxc;
1973	struct vmxnet3_rxcompdesc *rxcd;
1974	int idx, eof;
1975
1976	sc = rxq->vxrxq_sc;
1977	rxc = &rxq->vxrxq_comp_ring;
1978
1979	do {
1980		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1981		if (rxcd->gen != rxc->vxcr_gen)
1982			break;		/* Not expected. */
1983		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1984
1985		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1986			rxc->vxcr_next = 0;
1987			rxc->vxcr_gen ^= 1;
1988		}
1989
1990		idx = rxcd->rxd_idx;
1991		eof = rxcd->eop;
1992		if (rxcd->qid < sc->vmx_nrxqueues)
1993			rxr = &rxq->vxrxq_cmd_ring[0];
1994		else
1995			rxr = &rxq->vxrxq_cmd_ring[1];
1996		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1997	} while (!eof);
1998}
1999
2000static void
2001vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2002{
2003
2004	if (rxcd->ipv4) {
2005		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2006		if (rxcd->ipcsum_ok)
2007			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2008	}
2009
2010	if (!rxcd->fragment) {
2011		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2012			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2013			    CSUM_PSEUDO_HDR;
2014			m->m_pkthdr.csum_data = 0xFFFF;
2015		}
2016	}
2017}
2018
2019static void
2020vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2021    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2022{
2023	struct vmxnet3_softc *sc;
2024	struct ifnet *ifp;
2025
2026	sc = rxq->vxrxq_sc;
2027	ifp = sc->vmx_ifp;
2028
2029	if (rxcd->error) {
2030		rxq->vxrxq_stats.vmrxs_ierrors++;
2031		m_freem(m);
2032		return;
2033	}
2034
2035#ifdef notyet
2036	switch (rxcd->rss_type) {
2037	case VMXNET3_RCD_RSS_TYPE_IPV4:
2038		m->m_pkthdr.flowid = rxcd->rss_hash;
2039		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2040		break;
2041	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2042		m->m_pkthdr.flowid = rxcd->rss_hash;
2043		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2044		break;
2045	case VMXNET3_RCD_RSS_TYPE_IPV6:
2046		m->m_pkthdr.flowid = rxcd->rss_hash;
2047		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2048		break;
2049	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2050		m->m_pkthdr.flowid = rxcd->rss_hash;
2051		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2052		break;
2053	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2054		m->m_pkthdr.flowid = rxq->vxrxq_id;
2055		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2056		break;
2057	}
2058#else
2059	m->m_pkthdr.flowid = rxq->vxrxq_id;
2060	m->m_flags |= M_FLOWID;
2061#endif
2062
2063	if (!rxcd->no_csum)
2064		vmxnet3_rx_csum(rxcd, m);
2065	if (rxcd->vlan) {
2066		m->m_flags |= M_VLANTAG;
2067		m->m_pkthdr.ether_vtag = rxcd->vtag;
2068	}
2069
2070	rxq->vxrxq_stats.vmrxs_ipackets++;
2071	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2072
2073	VMXNET3_RXQ_UNLOCK(rxq);
2074	(*ifp->if_input)(ifp, m);
2075	VMXNET3_RXQ_LOCK(rxq);
2076}
2077
2078static void
2079vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2080{
2081	struct vmxnet3_softc *sc;
2082	struct ifnet *ifp;
2083	struct vmxnet3_rxring *rxr;
2084	struct vmxnet3_comp_ring *rxc;
2085	struct vmxnet3_rxdesc *rxd;
2086	struct vmxnet3_rxcompdesc *rxcd;
2087	struct mbuf *m, *m_head, *m_tail;
2088	int idx, length;
2089
2090	sc = rxq->vxrxq_sc;
2091	ifp = sc->vmx_ifp;
2092	rxc = &rxq->vxrxq_comp_ring;
2093
2094	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2095
2096	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2097		return;
2098
2099	m_head = rxq->vxrxq_mhead;
2100	rxq->vxrxq_mhead = NULL;
2101	m_tail = rxq->vxrxq_mtail;
2102	rxq->vxrxq_mtail = NULL;
2103	MPASS(m_head == NULL || m_tail != NULL);
2104
2105	for (;;) {
2106		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2107		if (rxcd->gen != rxc->vxcr_gen) {
2108			rxq->vxrxq_mhead = m_head;
2109			rxq->vxrxq_mtail = m_tail;
2110			break;
2111		}
2112		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2113
2114		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2115			rxc->vxcr_next = 0;
2116			rxc->vxcr_gen ^= 1;
2117		}
2118
2119		idx = rxcd->rxd_idx;
2120		length = rxcd->len;
2121		if (rxcd->qid < sc->vmx_nrxqueues)
2122			rxr = &rxq->vxrxq_cmd_ring[0];
2123		else
2124			rxr = &rxq->vxrxq_cmd_ring[1];
2125		rxd = &rxr->vxrxr_rxd[idx];
2126
2127		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2128		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2129		    __func__, rxcd->qid, idx));
2130
2131		/*
2132		 * The host may skip descriptors. We detect this when this
2133		 * descriptor does not match the previous fill index. Catch
2134		 * up with the host now.
2135		 */
2136		if (__predict_false(rxr->vxrxr_fill != idx)) {
2137			while (rxr->vxrxr_fill != idx) {
2138				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2139				    rxr->vxrxr_gen;
2140				vmxnet3_rxr_increment_fill(rxr);
2141			}
2142		}
2143
2144		if (rxcd->sop) {
2145			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2146			    ("%s: start of frame w/o head buffer", __func__));
2147			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2148			    ("%s: start of frame not in ring 0", __func__));
2149			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2150			    ("%s: start of frame at unexcepted index %d (%d)",
2151			     __func__, idx, sc->vmx_rx_max_chain));
2152			KASSERT(m_head == NULL,
2153			    ("%s: duplicate start of frame?", __func__));
2154
2155			if (length == 0) {
2156				/* Just ignore this descriptor. */
2157				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2158				goto nextp;
2159			}
2160
2161			if (vmxnet3_newbuf(sc, rxr) != 0) {
2162				rxq->vxrxq_stats.vmrxs_iqdrops++;
2163				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2164				if (!rxcd->eop)
2165					vmxnet3_rxq_discard_chain(rxq);
2166				goto nextp;
2167			}
2168
2169			m->m_pkthdr.rcvif = ifp;
2170			m->m_pkthdr.len = m->m_len = length;
2171			m->m_pkthdr.csum_flags = 0;
2172			m_head = m_tail = m;
2173
2174		} else {
2175			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2176			    ("%s: non start of frame w/o body buffer", __func__));
2177			KASSERT(m_head != NULL,
2178			    ("%s: frame not started?", __func__));
2179
2180			if (vmxnet3_newbuf(sc, rxr) != 0) {
2181				rxq->vxrxq_stats.vmrxs_iqdrops++;
2182				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2183				if (!rxcd->eop)
2184					vmxnet3_rxq_discard_chain(rxq);
2185				m_freem(m_head);
2186				m_head = m_tail = NULL;
2187				goto nextp;
2188			}
2189
2190			m->m_len = length;
2191			m_head->m_pkthdr.len += length;
2192			m_tail->m_next = m;
2193			m_tail = m;
2194		}
2195
2196		if (rxcd->eop) {
2197			vmxnet3_rxq_input(rxq, rxcd, m_head);
2198			m_head = m_tail = NULL;
2199
2200			/* Must recheck after dropping the Rx lock. */
2201			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2202				break;
2203		}
2204
2205nextp:
2206		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2207			int qid = rxcd->qid;
2208			bus_size_t r;
2209
2210			idx = (idx + 1) % rxr->vxrxr_ndesc;
2211			if (qid >= sc->vmx_nrxqueues) {
2212				qid -= sc->vmx_nrxqueues;
2213				r = VMXNET3_BAR0_RXH2(qid);
2214			} else
2215				r = VMXNET3_BAR0_RXH1(qid);
2216			vmxnet3_write_bar0(sc, r, idx);
2217		}
2218	}
2219}
2220
2221static void
2222vmxnet3_legacy_intr(void *xsc)
2223{
2224	struct vmxnet3_softc *sc;
2225	struct vmxnet3_rxqueue *rxq;
2226	struct vmxnet3_txqueue *txq;
2227
2228	sc = xsc;
2229	rxq = &sc->vmx_rxq[0];
2230	txq = &sc->vmx_txq[0];
2231
2232	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2233		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2234			return;
2235	}
2236	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2237		vmxnet3_disable_all_intrs(sc);
2238
2239	if (sc->vmx_ds->event != 0)
2240		vmxnet3_evintr(sc);
2241
2242	VMXNET3_RXQ_LOCK(rxq);
2243	vmxnet3_rxq_eof(rxq);
2244	VMXNET3_RXQ_UNLOCK(rxq);
2245
2246	VMXNET3_TXQ_LOCK(txq);
2247	vmxnet3_txq_eof(txq);
2248	vmxnet3_txq_start(txq);
2249	VMXNET3_TXQ_UNLOCK(txq);
2250
2251	vmxnet3_enable_all_intrs(sc);
2252}
2253
2254static void
2255vmxnet3_txq_intr(void *xtxq)
2256{
2257	struct vmxnet3_softc *sc;
2258	struct vmxnet3_txqueue *txq;
2259
2260	txq = xtxq;
2261	sc = txq->vxtxq_sc;
2262
2263	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2264		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2265
2266	VMXNET3_TXQ_LOCK(txq);
2267	vmxnet3_txq_eof(txq);
2268	vmxnet3_txq_start(txq);
2269	VMXNET3_TXQ_UNLOCK(txq);
2270
2271	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2272}
2273
2274static void
2275vmxnet3_rxq_intr(void *xrxq)
2276{
2277	struct vmxnet3_softc *sc;
2278	struct vmxnet3_rxqueue *rxq;
2279
2280	rxq = xrxq;
2281	sc = rxq->vxrxq_sc;
2282
2283	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2284		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2285
2286	VMXNET3_RXQ_LOCK(rxq);
2287	vmxnet3_rxq_eof(rxq);
2288	VMXNET3_RXQ_UNLOCK(rxq);
2289
2290	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2291}
2292
2293static void
2294vmxnet3_event_intr(void *xsc)
2295{
2296	struct vmxnet3_softc *sc;
2297
2298	sc = xsc;
2299
2300	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2301		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2302
2303	if (sc->vmx_ds->event != 0)
2304		vmxnet3_evintr(sc);
2305
2306	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2307}
2308
2309static void
2310vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2311{
2312	struct vmxnet3_txring *txr;
2313	struct vmxnet3_txbuf *txb;
2314	int i;
2315
2316	txr = &txq->vxtxq_cmd_ring;
2317
2318	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2319		txb = &txr->vxtxr_txbuf[i];
2320
2321		if (txb->vtxb_m == NULL)
2322			continue;
2323
2324		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2325		    BUS_DMASYNC_POSTWRITE);
2326		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2327		m_freem(txb->vtxb_m);
2328		txb->vtxb_m = NULL;
2329	}
2330}
2331
2332static void
2333vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2334{
2335	struct vmxnet3_rxring *rxr;
2336	struct vmxnet3_rxbuf *rxb;
2337	int i, j;
2338
2339	if (rxq->vxrxq_mhead != NULL) {
2340		m_freem(rxq->vxrxq_mhead);
2341		rxq->vxrxq_mhead = NULL;
2342		rxq->vxrxq_mtail = NULL;
2343	}
2344
2345	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2346		rxr = &rxq->vxrxq_cmd_ring[i];
2347
2348		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2349			rxb = &rxr->vxrxr_rxbuf[j];
2350
2351			if (rxb->vrxb_m == NULL)
2352				continue;
2353
2354			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2355			    BUS_DMASYNC_POSTREAD);
2356			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2357			m_freem(rxb->vrxb_m);
2358			rxb->vrxb_m = NULL;
2359		}
2360	}
2361}
2362
2363static void
2364vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2365{
2366	struct vmxnet3_rxqueue *rxq;
2367	struct vmxnet3_txqueue *txq;
2368	int i;
2369
2370	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2371		rxq = &sc->vmx_rxq[i];
2372		VMXNET3_RXQ_LOCK(rxq);
2373		VMXNET3_RXQ_UNLOCK(rxq);
2374	}
2375
2376	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2377		txq = &sc->vmx_txq[i];
2378		VMXNET3_TXQ_LOCK(txq);
2379		VMXNET3_TXQ_UNLOCK(txq);
2380	}
2381}
2382
2383static void
2384vmxnet3_stop(struct vmxnet3_softc *sc)
2385{
2386	struct ifnet *ifp;
2387	int q;
2388
2389	ifp = sc->vmx_ifp;
2390	VMXNET3_CORE_LOCK_ASSERT(sc);
2391
2392	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2393	sc->vmx_link_active = 0;
2394	callout_stop(&sc->vmx_tick);
2395
2396	/* Disable interrupts. */
2397	vmxnet3_disable_all_intrs(sc);
2398	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2399
2400	vmxnet3_stop_rendezvous(sc);
2401
2402	for (q = 0; q < sc->vmx_ntxqueues; q++)
2403		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2404	for (q = 0; q < sc->vmx_nrxqueues; q++)
2405		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2406
2407	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2408}
2409
2410static void
2411vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2412{
2413	struct vmxnet3_txring *txr;
2414	struct vmxnet3_comp_ring *txc;
2415
2416	txr = &txq->vxtxq_cmd_ring;
2417	txr->vxtxr_head = 0;
2418	txr->vxtxr_next = 0;
2419	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2420	bzero(txr->vxtxr_txd,
2421	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2422
2423	txc = &txq->vxtxq_comp_ring;
2424	txc->vxcr_next = 0;
2425	txc->vxcr_gen = VMXNET3_INIT_GEN;
2426	bzero(txc->vxcr_u.txcd,
2427	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2428}
2429
2430static int
2431vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2432{
2433	struct ifnet *ifp;
2434	struct vmxnet3_rxring *rxr;
2435	struct vmxnet3_comp_ring *rxc;
2436	int i, populate, idx, frame_size, error;
2437
2438	ifp = sc->vmx_ifp;
2439	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2440	    ifp->if_mtu;
2441
2442	/*
2443	 * If the MTU causes us to exceed what a regular sized cluster can
2444	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2445	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2446	 *
2447	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2448	 * our life easier. We do not support changing the ring size after
2449	 * the attach.
2450	 */
2451	if (frame_size <= MCLBYTES)
2452		sc->vmx_rx_max_chain = 1;
2453	else
2454		sc->vmx_rx_max_chain = 2;
2455
2456	/*
2457	 * Only populate ring 1 if the configuration will take advantage
2458	 * of it. That is either when LRO is enabled or the frame size
2459	 * exceeds what ring 0 can contain.
2460	 */
2461	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2462	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2463		populate = 1;
2464	else
2465		populate = VMXNET3_RXRINGS_PERQ;
2466
2467	for (i = 0; i < populate; i++) {
2468		rxr = &rxq->vxrxq_cmd_ring[i];
2469		rxr->vxrxr_fill = 0;
2470		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2471		bzero(rxr->vxrxr_rxd,
2472		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2473
2474		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2475			error = vmxnet3_newbuf(sc, rxr);
2476			if (error)
2477				return (error);
2478		}
2479	}
2480
2481	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2482		rxr = &rxq->vxrxq_cmd_ring[i];
2483		rxr->vxrxr_fill = 0;
2484		rxr->vxrxr_gen = 0;
2485		bzero(rxr->vxrxr_rxd,
2486		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2487	}
2488
2489	rxc = &rxq->vxrxq_comp_ring;
2490	rxc->vxcr_next = 0;
2491	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2492	bzero(rxc->vxcr_u.rxcd,
2493	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2494
2495	return (0);
2496}
2497
2498static int
2499vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2500{
2501	device_t dev;
2502	int q, error;
2503
2504	dev = sc->vmx_dev;
2505
2506	for (q = 0; q < sc->vmx_ntxqueues; q++)
2507		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2508
2509	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2510		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2511		if (error) {
2512			device_printf(dev, "cannot populate Rx queue %d\n", q);
2513			return (error);
2514		}
2515	}
2516
2517	return (0);
2518}
2519
2520static int
2521vmxnet3_enable_device(struct vmxnet3_softc *sc)
2522{
2523	int q;
2524
2525	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2526		device_printf(sc->vmx_dev, "device enable command failed!\n");
2527		return (1);
2528	}
2529
2530	/* Reset the Rx queue heads. */
2531	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2532		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2533		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2534	}
2535
2536	return (0);
2537}
2538
2539static void
2540vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2541{
2542	struct ifnet *ifp;
2543
2544	ifp = sc->vmx_ifp;
2545
2546	vmxnet3_set_rxfilter(sc);
2547
2548	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2549		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2550		    sizeof(sc->vmx_ds->vlan_filter));
2551	else
2552		bzero(sc->vmx_ds->vlan_filter,
2553		    sizeof(sc->vmx_ds->vlan_filter));
2554	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2555}
2556
2557static int
2558vmxnet3_reinit(struct vmxnet3_softc *sc)
2559{
2560
2561	vmxnet3_reinit_interface(sc);
2562	vmxnet3_reinit_shared_data(sc);
2563
2564	if (vmxnet3_reinit_queues(sc) != 0)
2565		return (ENXIO);
2566
2567	if (vmxnet3_enable_device(sc) != 0)
2568		return (ENXIO);
2569
2570	vmxnet3_reinit_rxfilters(sc);
2571
2572	return (0);
2573}
2574
2575static void
2576vmxnet3_init_locked(struct vmxnet3_softc *sc)
2577{
2578	struct ifnet *ifp;
2579
2580	ifp = sc->vmx_ifp;
2581
2582	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2583		return;
2584
2585	vmxnet3_stop(sc);
2586
2587	if (vmxnet3_reinit(sc) != 0) {
2588		vmxnet3_stop(sc);
2589		return;
2590	}
2591
2592	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2593	vmxnet3_link_status(sc);
2594
2595	vmxnet3_enable_all_intrs(sc);
2596	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2597}
2598
2599static void
2600vmxnet3_init(void *xsc)
2601{
2602	struct vmxnet3_softc *sc;
2603
2604	sc = xsc;
2605
2606	VMXNET3_CORE_LOCK(sc);
2607	vmxnet3_init_locked(sc);
2608	VMXNET3_CORE_UNLOCK(sc);
2609}
2610
2611/*
2612 * BMV: Much of this can go away once we finally have offsets in
2613 * the mbuf packet header. Bug andre@.
2614 */
2615static int
2616vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2617    int *etype, int *proto, int *start)
2618{
2619	struct ether_vlan_header *evh;
2620	int offset;
2621#if defined(INET)
2622	struct ip *ip = NULL;
2623	struct ip iphdr;
2624#endif
2625#if defined(INET6)
2626	struct ip6_hdr *ip6 = NULL;
2627	struct ip6_hdr ip6hdr;
2628#endif
2629
2630	evh = mtod(m, struct ether_vlan_header *);
2631	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2632		/* BMV: We should handle nested VLAN tags too. */
2633		*etype = ntohs(evh->evl_proto);
2634		offset = sizeof(struct ether_vlan_header);
2635	} else {
2636		*etype = ntohs(evh->evl_encap_proto);
2637		offset = sizeof(struct ether_header);
2638	}
2639
2640	switch (*etype) {
2641#if defined(INET)
2642	case ETHERTYPE_IP:
2643		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2644			m_copydata(m, offset, sizeof(struct ip),
2645			    (caddr_t) &iphdr);
2646			ip = &iphdr;
2647		} else
2648			ip = mtodo(m, offset);
2649		*proto = ip->ip_p;
2650		*start = offset + (ip->ip_hl << 2);
2651		break;
2652#endif
2653#if defined(INET6)
2654	case ETHERTYPE_IPV6:
2655		if (__predict_false(m->m_len <
2656		    offset + sizeof(struct ip6_hdr))) {
2657			m_copydata(m, offset, sizeof(struct ip6_hdr),
2658			    (caddr_t) &ip6hdr);
2659			ip6 = &ip6hdr;
2660		} else
2661			ip6 = mtodo(m, offset);
2662		*proto = -1;
2663		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2664		/* Assert the network stack sent us a valid packet. */
2665		KASSERT(*start > offset,
2666		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2667		    *start, offset, *proto));
2668		break;
2669#endif
2670	default:
2671		return (EINVAL);
2672	}
2673
2674	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2675		struct tcphdr *tcp, tcphdr;
2676		uint16_t sum;
2677
2678		if (__predict_false(*proto != IPPROTO_TCP)) {
2679			/* Likely failed to correctly parse the mbuf. */
2680			return (EINVAL);
2681		}
2682
2683		txq->vxtxq_stats.vmtxs_tso++;
2684
2685		switch (*etype) {
2686#if defined(INET)
2687		case ETHERTYPE_IP:
2688			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2689			    htons(IPPROTO_TCP));
2690			break;
2691#endif
2692#if defined(INET6)
2693		case ETHERTYPE_IPV6:
2694			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2695			break;
2696#endif
2697		default:
2698			sum = 0;
2699			break;
2700		}
2701
2702		if (m->m_len < *start + sizeof(struct tcphdr)) {
2703			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2704			    sizeof(uint16_t), (caddr_t) &sum);
2705			m_copydata(m, *start, sizeof(struct tcphdr),
2706			    (caddr_t) &tcphdr);
2707			tcp = &tcphdr;
2708		} else {
2709			tcp = mtodo(m, *start);
2710			tcp->th_sum = sum;
2711		}
2712
2713		/*
2714		 * For TSO, the size of the protocol header is also
2715		 * included in the descriptor header size.
2716		 */
2717		*start += (tcp->th_off << 2);
2718	} else
2719		txq->vxtxq_stats.vmtxs_csum++;
2720
2721	return (0);
2722}
2723
2724static int
2725vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2726    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2727{
2728	struct vmxnet3_txring *txr;
2729	struct mbuf *m;
2730	bus_dma_tag_t tag;
2731	int error;
2732
2733	txr = &txq->vxtxq_cmd_ring;
2734	m = *m0;
2735	tag = txr->vxtxr_txtag;
2736
2737	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2738	if (error == 0 || error != EFBIG)
2739		return (error);
2740
2741	m = m_defrag(m, M_NOWAIT);
2742	if (m != NULL) {
2743		*m0 = m;
2744		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2745	} else
2746		error = ENOBUFS;
2747
2748	if (error) {
2749		m_freem(*m0);
2750		*m0 = NULL;
2751		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2752	} else
2753		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2754
2755	return (error);
2756}
2757
2758static void
2759vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2760{
2761	struct vmxnet3_txring *txr;
2762
2763	txr = &txq->vxtxq_cmd_ring;
2764	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2765}
2766
2767static int
2768vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2769{
2770	struct vmxnet3_softc *sc;
2771	struct vmxnet3_txring *txr;
2772	struct vmxnet3_txdesc *txd, *sop;
2773	struct mbuf *m;
2774	bus_dmamap_t dmap;
2775	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2776	int i, gen, nsegs, etype, proto, start, error;
2777
2778	sc = txq->vxtxq_sc;
2779	start = 0;
2780	txd = NULL;
2781	txr = &txq->vxtxq_cmd_ring;
2782	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2783
2784	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2785	if (error)
2786		return (error);
2787
2788	m = *m0;
2789	M_ASSERTPKTHDR(m);
2790	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2791	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2792
2793	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2794		txq->vxtxq_stats.vmtxs_full++;
2795		vmxnet3_txq_unload_mbuf(txq, dmap);
2796		return (ENOSPC);
2797	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2798		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2799		if (error) {
2800			txq->vxtxq_stats.vmtxs_offload_failed++;
2801			vmxnet3_txq_unload_mbuf(txq, dmap);
2802			m_freem(m);
2803			*m0 = NULL;
2804			return (error);
2805		}
2806	}
2807
2808	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2809	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2810	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2811
2812	for (i = 0; i < nsegs; i++) {
2813		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2814
2815		txd->addr = segs[i].ds_addr;
2816		txd->len = segs[i].ds_len;
2817		txd->gen = gen;
2818		txd->dtype = 0;
2819		txd->offload_mode = VMXNET3_OM_NONE;
2820		txd->offload_pos = 0;
2821		txd->hlen = 0;
2822		txd->eop = 0;
2823		txd->compreq = 0;
2824		txd->vtag_mode = 0;
2825		txd->vtag = 0;
2826
2827		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2828			txr->vxtxr_head = 0;
2829			txr->vxtxr_gen ^= 1;
2830		}
2831		gen = txr->vxtxr_gen;
2832	}
2833	txd->eop = 1;
2834	txd->compreq = 1;
2835
2836	if (m->m_flags & M_VLANTAG) {
2837		sop->vtag_mode = 1;
2838		sop->vtag = m->m_pkthdr.ether_vtag;
2839	}
2840
2841	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2842		sop->offload_mode = VMXNET3_OM_TSO;
2843		sop->hlen = start;
2844		sop->offload_pos = m->m_pkthdr.tso_segsz;
2845	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2846	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2847		sop->offload_mode = VMXNET3_OM_CSUM;
2848		sop->hlen = start;
2849		sop->offload_pos = start + m->m_pkthdr.csum_data;
2850	}
2851
2852	/* Finally, change the ownership. */
2853	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2854	sop->gen ^= 1;
2855
2856	txq->vxtxq_ts->npending += nsegs;
2857	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2858		txq->vxtxq_ts->npending = 0;
2859		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2860		    txr->vxtxr_head);
2861	}
2862
2863	return (0);
2864}
2865
2866#ifdef VMXNET3_LEGACY_TX
2867
2868static void
2869vmxnet3_start_locked(struct ifnet *ifp)
2870{
2871	struct vmxnet3_softc *sc;
2872	struct vmxnet3_txqueue *txq;
2873	struct vmxnet3_txring *txr;
2874	struct mbuf *m_head;
2875	int tx, avail;
2876
2877	sc = ifp->if_softc;
2878	txq = &sc->vmx_txq[0];
2879	txr = &txq->vxtxq_cmd_ring;
2880	tx = 0;
2881
2882	VMXNET3_TXQ_LOCK_ASSERT(txq);
2883
2884	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2885	    sc->vmx_link_active == 0)
2886		return;
2887
2888	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2889		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2890			break;
2891
2892		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2893		if (m_head == NULL)
2894			break;
2895
2896		/* Assume worse case if this mbuf is the head of a chain. */
2897		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2898			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2899			break;
2900		}
2901
2902		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2903			if (m_head != NULL)
2904				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2905			break;
2906		}
2907
2908		tx++;
2909		ETHER_BPF_MTAP(ifp, m_head);
2910	}
2911
2912	if (tx > 0)
2913		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2914}
2915
2916static void
2917vmxnet3_start(struct ifnet *ifp)
2918{
2919	struct vmxnet3_softc *sc;
2920	struct vmxnet3_txqueue *txq;
2921
2922	sc = ifp->if_softc;
2923	txq = &sc->vmx_txq[0];
2924
2925	VMXNET3_TXQ_LOCK(txq);
2926	vmxnet3_start_locked(ifp);
2927	VMXNET3_TXQ_UNLOCK(txq);
2928}
2929
2930#else /* !VMXNET3_LEGACY_TX */
2931
2932static int
2933vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2934{
2935	struct vmxnet3_softc *sc;
2936	struct vmxnet3_txring *txr;
2937	struct buf_ring *br;
2938	struct ifnet *ifp;
2939	int tx, avail, error;
2940
2941	sc = txq->vxtxq_sc;
2942	br = txq->vxtxq_br;
2943	ifp = sc->vmx_ifp;
2944	txr = &txq->vxtxq_cmd_ring;
2945	tx = 0;
2946	error = 0;
2947
2948	VMXNET3_TXQ_LOCK_ASSERT(txq);
2949
2950	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2951	    sc->vmx_link_active == 0) {
2952		if (m != NULL)
2953			error = drbr_enqueue(ifp, br, m);
2954		return (error);
2955	}
2956
2957	if (m != NULL) {
2958		error = drbr_enqueue(ifp, br, m);
2959		if (error)
2960			return (error);
2961	}
2962
2963	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2964		m = drbr_peek(ifp, br);
2965		if (m == NULL)
2966			break;
2967
2968		/* Assume worse case if this mbuf is the head of a chain. */
2969		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2970			drbr_putback(ifp, br, m);
2971			break;
2972		}
2973
2974		if (vmxnet3_txq_encap(txq, &m) != 0) {
2975			if (m != NULL)
2976				drbr_putback(ifp, br, m);
2977			else
2978				drbr_advance(ifp, br);
2979			break;
2980		}
2981		drbr_advance(ifp, br);
2982
2983		tx++;
2984		ETHER_BPF_MTAP(ifp, m);
2985	}
2986
2987	if (tx > 0)
2988		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2989
2990	return (0);
2991}
2992
2993static int
2994vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2995{
2996	struct vmxnet3_softc *sc;
2997	struct vmxnet3_txqueue *txq;
2998	int i, ntxq, error;
2999
3000	sc = ifp->if_softc;
3001	ntxq = sc->vmx_ntxqueues;
3002
3003	if (m->m_flags & M_FLOWID)
3004		i = m->m_pkthdr.flowid % ntxq;
3005	else
3006		i = curcpu % ntxq;
3007
3008	txq = &sc->vmx_txq[i];
3009
3010	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3011		error = vmxnet3_txq_mq_start_locked(txq, m);
3012		VMXNET3_TXQ_UNLOCK(txq);
3013	} else {
3014		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3015		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3016	}
3017
3018	return (error);
3019}
3020
3021static void
3022vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3023{
3024	struct vmxnet3_softc *sc;
3025	struct vmxnet3_txqueue *txq;
3026
3027	txq = xtxq;
3028	sc = txq->vxtxq_sc;
3029
3030	VMXNET3_TXQ_LOCK(txq);
3031	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3032		vmxnet3_txq_mq_start_locked(txq, NULL);
3033	VMXNET3_TXQ_UNLOCK(txq);
3034}
3035
3036#endif /* VMXNET3_LEGACY_TX */
3037
3038static void
3039vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3040{
3041	struct vmxnet3_softc *sc;
3042	struct ifnet *ifp;
3043
3044	sc = txq->vxtxq_sc;
3045	ifp = sc->vmx_ifp;
3046
3047#ifdef VMXNET3_LEGACY_TX
3048	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3049		vmxnet3_start_locked(ifp);
3050#else
3051	if (!drbr_empty(ifp, txq->vxtxq_br))
3052		vmxnet3_txq_mq_start_locked(txq, NULL);
3053#endif
3054}
3055
3056static void
3057vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3058{
3059	struct vmxnet3_txqueue *txq;
3060	int i;
3061
3062	VMXNET3_CORE_LOCK_ASSERT(sc);
3063
3064	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3065		txq = &sc->vmx_txq[i];
3066
3067		VMXNET3_TXQ_LOCK(txq);
3068		vmxnet3_txq_start(txq);
3069		VMXNET3_TXQ_UNLOCK(txq);
3070	}
3071}
3072
3073static void
3074vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3075{
3076	struct ifnet *ifp;
3077	int idx, bit;
3078
3079	ifp = sc->vmx_ifp;
3080	idx = (tag >> 5) & 0x7F;
3081	bit = tag & 0x1F;
3082
3083	if (tag == 0 || tag > 4095)
3084		return;
3085
3086	VMXNET3_CORE_LOCK(sc);
3087
3088	/* Update our private VLAN bitvector. */
3089	if (add)
3090		sc->vmx_vlan_filter[idx] |= (1 << bit);
3091	else
3092		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3093
3094	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3095		if (add)
3096			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3097		else
3098			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3099		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3100	}
3101
3102	VMXNET3_CORE_UNLOCK(sc);
3103}
3104
3105static void
3106vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3107{
3108
3109	if (ifp->if_softc == arg)
3110		vmxnet3_update_vlan_filter(arg, 1, tag);
3111}
3112
3113static void
3114vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3115{
3116
3117	if (ifp->if_softc == arg)
3118		vmxnet3_update_vlan_filter(arg, 0, tag);
3119}
3120
3121static void
3122vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3123{
3124	struct ifnet *ifp;
3125	struct vmxnet3_driver_shared *ds;
3126	struct ifmultiaddr *ifma;
3127	u_int mode;
3128
3129	ifp = sc->vmx_ifp;
3130	ds = sc->vmx_ds;
3131
3132	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3133	if (ifp->if_flags & IFF_PROMISC)
3134		mode |= VMXNET3_RXMODE_PROMISC;
3135	if (ifp->if_flags & IFF_ALLMULTI)
3136		mode |= VMXNET3_RXMODE_ALLMULTI;
3137	else {
3138		int cnt = 0, overflow = 0;
3139
3140		if_maddr_rlock(ifp);
3141		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3142			if (ifma->ifma_addr->sa_family != AF_LINK)
3143				continue;
3144			else if (cnt == VMXNET3_MULTICAST_MAX) {
3145				overflow = 1;
3146				break;
3147			}
3148
3149			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3150			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3151			cnt++;
3152		}
3153		if_maddr_runlock(ifp);
3154
3155		if (overflow != 0) {
3156			cnt = 0;
3157			mode |= VMXNET3_RXMODE_ALLMULTI;
3158		} else if (cnt > 0)
3159			mode |= VMXNET3_RXMODE_MCAST;
3160		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3161	}
3162
3163	ds->rxmode = mode;
3164
3165	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3166	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3167}
3168
3169static int
3170vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3171{
3172	struct ifnet *ifp;
3173
3174	ifp = sc->vmx_ifp;
3175
3176	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3177		return (EINVAL);
3178
3179	ifp->if_mtu = mtu;
3180
3181	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3182		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3183		vmxnet3_init_locked(sc);
3184	}
3185
3186	return (0);
3187}
3188
3189static int
3190vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3191{
3192	struct vmxnet3_softc *sc;
3193	struct ifreq *ifr;
3194	int reinit, mask, error;
3195
3196	sc = ifp->if_softc;
3197	ifr = (struct ifreq *) data;
3198	error = 0;
3199
3200	switch (cmd) {
3201	case SIOCSIFMTU:
3202		if (ifp->if_mtu != ifr->ifr_mtu) {
3203			VMXNET3_CORE_LOCK(sc);
3204			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3205			VMXNET3_CORE_UNLOCK(sc);
3206		}
3207		break;
3208
3209	case SIOCSIFFLAGS:
3210		VMXNET3_CORE_LOCK(sc);
3211		if (ifp->if_flags & IFF_UP) {
3212			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3213				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3214				    (IFF_PROMISC | IFF_ALLMULTI)) {
3215					vmxnet3_set_rxfilter(sc);
3216				}
3217			} else
3218				vmxnet3_init_locked(sc);
3219		} else {
3220			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3221				vmxnet3_stop(sc);
3222		}
3223		sc->vmx_if_flags = ifp->if_flags;
3224		VMXNET3_CORE_UNLOCK(sc);
3225		break;
3226
3227	case SIOCADDMULTI:
3228	case SIOCDELMULTI:
3229		VMXNET3_CORE_LOCK(sc);
3230		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3231			vmxnet3_set_rxfilter(sc);
3232		VMXNET3_CORE_UNLOCK(sc);
3233		break;
3234
3235	case SIOCSIFMEDIA:
3236	case SIOCGIFMEDIA:
3237		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3238		break;
3239
3240	case SIOCSIFCAP:
3241		VMXNET3_CORE_LOCK(sc);
3242		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3243
3244		if (mask & IFCAP_TXCSUM)
3245			ifp->if_capenable ^= IFCAP_TXCSUM;
3246		if (mask & IFCAP_TXCSUM_IPV6)
3247			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3248		if (mask & IFCAP_TSO4)
3249			ifp->if_capenable ^= IFCAP_TSO4;
3250		if (mask & IFCAP_TSO6)
3251			ifp->if_capenable ^= IFCAP_TSO6;
3252
3253		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3254		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3255			/* Changing these features requires us to reinit. */
3256			reinit = 1;
3257
3258			if (mask & IFCAP_RXCSUM)
3259				ifp->if_capenable ^= IFCAP_RXCSUM;
3260			if (mask & IFCAP_RXCSUM_IPV6)
3261				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3262			if (mask & IFCAP_LRO)
3263				ifp->if_capenable ^= IFCAP_LRO;
3264			if (mask & IFCAP_VLAN_HWTAGGING)
3265				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3266			if (mask & IFCAP_VLAN_HWFILTER)
3267				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3268		} else
3269			reinit = 0;
3270
3271		if (mask & IFCAP_VLAN_HWTSO)
3272			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3273
3274		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3275			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3276			vmxnet3_init_locked(sc);
3277		}
3278
3279		VMXNET3_CORE_UNLOCK(sc);
3280		VLAN_CAPABILITIES(ifp);
3281		break;
3282
3283	default:
3284		error = ether_ioctl(ifp, cmd, data);
3285		break;
3286	}
3287
3288	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3289
3290	return (error);
3291}
3292
3293#ifndef VMXNET3_LEGACY_TX
3294static void
3295vmxnet3_qflush(struct ifnet *ifp)
3296{
3297	struct vmxnet3_softc *sc;
3298	struct vmxnet3_txqueue *txq;
3299	struct mbuf *m;
3300	int i;
3301
3302	sc = ifp->if_softc;
3303
3304	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3305		txq = &sc->vmx_txq[i];
3306
3307		VMXNET3_TXQ_LOCK(txq);
3308		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3309			m_freem(m);
3310		VMXNET3_TXQ_UNLOCK(txq);
3311	}
3312
3313	if_qflush(ifp);
3314}
3315#endif
3316
3317static int
3318vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3319{
3320	struct vmxnet3_softc *sc;
3321
3322	sc = txq->vxtxq_sc;
3323
3324	VMXNET3_TXQ_LOCK(txq);
3325	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3326		VMXNET3_TXQ_UNLOCK(txq);
3327		return (0);
3328	}
3329	VMXNET3_TXQ_UNLOCK(txq);
3330
3331	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3332	    txq->vxtxq_id);
3333	return (1);
3334}
3335
3336static void
3337vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3338{
3339
3340	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3341}
3342
3343static void
3344vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3345    struct vmxnet3_txq_stats *accum)
3346{
3347	struct vmxnet3_txq_stats *st;
3348
3349	st = &txq->vxtxq_stats;
3350
3351	accum->vmtxs_opackets += st->vmtxs_opackets;
3352	accum->vmtxs_obytes += st->vmtxs_obytes;
3353	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3354	accum->vmtxs_csum += st->vmtxs_csum;
3355	accum->vmtxs_tso += st->vmtxs_tso;
3356	accum->vmtxs_full += st->vmtxs_full;
3357	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3358}
3359
3360static void
3361vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3362    struct vmxnet3_rxq_stats *accum)
3363{
3364	struct vmxnet3_rxq_stats *st;
3365
3366	st = &rxq->vxrxq_stats;
3367
3368	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3369	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3370	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3371	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3372}
3373
3374static void
3375vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3376{
3377	struct ifnet *ifp;
3378	struct vmxnet3_statistics *st;
3379	struct vmxnet3_txq_stats txaccum;
3380	struct vmxnet3_rxq_stats rxaccum;
3381	int i;
3382
3383	ifp = sc->vmx_ifp;
3384	st = &sc->vmx_stats;
3385
3386	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3387	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3388
3389	for (i = 0; i < sc->vmx_ntxqueues; i++)
3390		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3391	for (i = 0; i < sc->vmx_nrxqueues; i++)
3392		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3393
3394	/*
3395	 * With the exception of if_ierrors, these ifnet statistics are
3396	 * only updated in the driver, so just set them to our accumulated
3397	 * values. if_ierrors is updated in ether_input() for malformed
3398	 * frames that we should have already discarded.
3399	 */
3400	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3401	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3402	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3403	ifp->if_opackets = txaccum.vmtxs_opackets;
3404#ifndef VMXNET3_LEGACY_TX
3405	ifp->if_obytes = txaccum.vmtxs_obytes;
3406	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3407#endif
3408}
3409
3410static void
3411vmxnet3_tick(void *xsc)
3412{
3413	struct vmxnet3_softc *sc;
3414	struct ifnet *ifp;
3415	int i, timedout;
3416
3417	sc = xsc;
3418	ifp = sc->vmx_ifp;
3419	timedout = 0;
3420
3421	VMXNET3_CORE_LOCK_ASSERT(sc);
3422
3423	vmxnet3_accumulate_stats(sc);
3424	vmxnet3_refresh_host_stats(sc);
3425
3426	for (i = 0; i < sc->vmx_ntxqueues; i++)
3427		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3428
3429	if (timedout != 0) {
3430		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3431		vmxnet3_init_locked(sc);
3432	} else
3433		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3434}
3435
3436static int
3437vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3438{
3439	uint32_t status;
3440
3441	/* Also update the link speed while here. */
3442	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3443	sc->vmx_link_speed = status >> 16;
3444	return !!(status & 0x1);
3445}
3446
3447static void
3448vmxnet3_link_status(struct vmxnet3_softc *sc)
3449{
3450	struct ifnet *ifp;
3451	int link;
3452
3453	ifp = sc->vmx_ifp;
3454	link = vmxnet3_link_is_up(sc);
3455
3456	if (link != 0 && sc->vmx_link_active == 0) {
3457		sc->vmx_link_active = 1;
3458		if_link_state_change(ifp, LINK_STATE_UP);
3459	} else if (link == 0 && sc->vmx_link_active != 0) {
3460		sc->vmx_link_active = 0;
3461		if_link_state_change(ifp, LINK_STATE_DOWN);
3462	}
3463}
3464
3465static void
3466vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3467{
3468	struct vmxnet3_softc *sc;
3469
3470	sc = ifp->if_softc;
3471
3472	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3473	ifmr->ifm_status = IFM_AVALID;
3474
3475	VMXNET3_CORE_LOCK(sc);
3476	if (vmxnet3_link_is_up(sc) != 0)
3477		ifmr->ifm_status |= IFM_ACTIVE;
3478	else
3479		ifmr->ifm_status |= IFM_NONE;
3480	VMXNET3_CORE_UNLOCK(sc);
3481}
3482
3483static int
3484vmxnet3_media_change(struct ifnet *ifp)
3485{
3486
3487	/* Ignore. */
3488	return (0);
3489}
3490
3491static void
3492vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3493{
3494	uint32_t ml, mh;
3495
3496	ml  = sc->vmx_lladdr[0];
3497	ml |= sc->vmx_lladdr[1] << 8;
3498	ml |= sc->vmx_lladdr[2] << 16;
3499	ml |= sc->vmx_lladdr[3] << 24;
3500	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3501
3502	mh  = sc->vmx_lladdr[4];
3503	mh |= sc->vmx_lladdr[5] << 8;
3504	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3505}
3506
3507static void
3508vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3509{
3510	uint32_t ml, mh;
3511
3512	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3513	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3514
3515	sc->vmx_lladdr[0] = ml;
3516	sc->vmx_lladdr[1] = ml >> 8;
3517	sc->vmx_lladdr[2] = ml >> 16;
3518	sc->vmx_lladdr[3] = ml >> 24;
3519	sc->vmx_lladdr[4] = mh;
3520	sc->vmx_lladdr[5] = mh >> 8;
3521}
3522
3523static void
3524vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3525    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3526{
3527	struct sysctl_oid *node, *txsnode;
3528	struct sysctl_oid_list *list, *txslist;
3529	struct vmxnet3_txq_stats *stats;
3530	struct UPT1_TxStats *txstats;
3531	char namebuf[16];
3532
3533	stats = &txq->vxtxq_stats;
3534	txstats = &txq->vxtxq_ts->stats;
3535
3536	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3537	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3538	    NULL, "Transmit Queue");
3539	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3540
3541	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3542	    &stats->vmtxs_opackets, "Transmit packets");
3543	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3544	    &stats->vmtxs_obytes, "Transmit bytes");
3545	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3546	    &stats->vmtxs_omcasts, "Transmit multicasts");
3547	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3548	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3549	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3550	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3551	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3552	    &stats->vmtxs_full, "Transmit ring full");
3553	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3554	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3555
3556	/*
3557	 * Add statistics reported by the host. These are updated once
3558	 * per second.
3559	 */
3560	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3561	    NULL, "Host Statistics");
3562	txslist = SYSCTL_CHILDREN(txsnode);
3563	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3564	    &txstats->TSO_packets, "TSO packets");
3565	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3566	    &txstats->TSO_bytes, "TSO bytes");
3567	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3568	    &txstats->ucast_packets, "Unicast packets");
3569	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3570	    &txstats->ucast_bytes, "Unicast bytes");
3571	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3572	    &txstats->mcast_packets, "Multicast packets");
3573	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3574	    &txstats->mcast_bytes, "Multicast bytes");
3575	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3576	    &txstats->error, "Errors");
3577	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3578	    &txstats->discard, "Discards");
3579}
3580
3581static void
3582vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3583    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3584{
3585	struct sysctl_oid *node, *rxsnode;
3586	struct sysctl_oid_list *list, *rxslist;
3587	struct vmxnet3_rxq_stats *stats;
3588	struct UPT1_RxStats *rxstats;
3589	char namebuf[16];
3590
3591	stats = &rxq->vxrxq_stats;
3592	rxstats = &rxq->vxrxq_rs->stats;
3593
3594	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3595	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3596	    NULL, "Receive Queue");
3597	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3598
3599	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3600	    &stats->vmrxs_ipackets, "Receive packets");
3601	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3602	    &stats->vmrxs_ibytes, "Receive bytes");
3603	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3604	    &stats->vmrxs_iqdrops, "Receive drops");
3605	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3606	    &stats->vmrxs_ierrors, "Receive errors");
3607
3608	/*
3609	 * Add statistics reported by the host. These are updated once
3610	 * per second.
3611	 */
3612	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3613	    NULL, "Host Statistics");
3614	rxslist = SYSCTL_CHILDREN(rxsnode);
3615	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3616	    &rxstats->LRO_packets, "LRO packets");
3617	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3618	    &rxstats->LRO_bytes, "LRO bytes");
3619	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3620	    &rxstats->ucast_packets, "Unicast packets");
3621	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3622	    &rxstats->ucast_bytes, "Unicast bytes");
3623	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3624	    &rxstats->mcast_packets, "Multicast packets");
3625	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3626	    &rxstats->mcast_bytes, "Multicast bytes");
3627	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3628	    &rxstats->bcast_packets, "Broadcast packets");
3629	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3630	    &rxstats->bcast_bytes, "Broadcast bytes");
3631	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3632	    &rxstats->nobuffer, "No buffer");
3633	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3634	    &rxstats->error, "Errors");
3635}
3636
3637static void
3638vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3639    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3640{
3641	struct sysctl_oid *node;
3642	struct sysctl_oid_list *list;
3643	int i;
3644
3645	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3646		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3647
3648		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3649		    "debug", CTLFLAG_RD, NULL, "");
3650		list = SYSCTL_CHILDREN(node);
3651
3652		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3653		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3654		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3655		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3656		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3657		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3658		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3659		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3660		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3661		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3662		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3663		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3664		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3665		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3666	}
3667
3668	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3669		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3670
3671		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3672		    "debug", CTLFLAG_RD, NULL, "");
3673		list = SYSCTL_CHILDREN(node);
3674
3675		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3676		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3677		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3678		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3679		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3680		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3681		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3682		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3683		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3684		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3685		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3686		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3687		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3688		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3689		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3690		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3691		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3692		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3693	}
3694}
3695
3696static void
3697vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3698    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3699{
3700	int i;
3701
3702	for (i = 0; i < sc->vmx_ntxqueues; i++)
3703		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3704	for (i = 0; i < sc->vmx_nrxqueues; i++)
3705		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3706
3707	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3708}
3709
3710static void
3711vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3712{
3713	device_t dev;
3714	struct vmxnet3_statistics *stats;
3715	struct sysctl_ctx_list *ctx;
3716	struct sysctl_oid *tree;
3717	struct sysctl_oid_list *child;
3718
3719	dev = sc->vmx_dev;
3720	ctx = device_get_sysctl_ctx(dev);
3721	tree = device_get_sysctl_tree(dev);
3722	child = SYSCTL_CHILDREN(tree);
3723
3724	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3725	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3726	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3727	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3728	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3729	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3730	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3731	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3732
3733	stats = &sc->vmx_stats;
3734	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3735	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3736	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3737	    &stats->vmst_defrag_failed, 0,
3738	    "Tx mbuf dropped because defrag failed");
3739	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3740	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3741	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3742	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3743
3744	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3745}
3746
3747static void
3748vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3749{
3750
3751	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3752}
3753
3754static uint32_t
3755vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3756{
3757
3758	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3759}
3760
3761static void
3762vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3763{
3764
3765	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3766}
3767
3768static void
3769vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3770{
3771
3772	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3773}
3774
3775static uint32_t
3776vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3777{
3778
3779	vmxnet3_write_cmd(sc, cmd);
3780	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3781	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3782	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3783}
3784
3785static void
3786vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3787{
3788
3789	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3790}
3791
3792static void
3793vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3794{
3795
3796	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3797}
3798
3799static void
3800vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3801{
3802	int i;
3803
3804	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3805	for (i = 0; i < sc->vmx_nintrs; i++)
3806		vmxnet3_enable_intr(sc, i);
3807}
3808
3809static void
3810vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3811{
3812	int i;
3813
3814	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3815	for (i = 0; i < sc->vmx_nintrs; i++)
3816		vmxnet3_disable_intr(sc, i);
3817}
3818
3819static void
3820vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3821{
3822	bus_addr_t *baddr = arg;
3823
3824	if (error == 0)
3825		*baddr = segs->ds_addr;
3826}
3827
3828static int
3829vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3830    struct vmxnet3_dma_alloc *dma)
3831{
3832	device_t dev;
3833	int error;
3834
3835	dev = sc->vmx_dev;
3836	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3837
3838	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3839	    align, 0,		/* alignment, bounds */
3840	    BUS_SPACE_MAXADDR,	/* lowaddr */
3841	    BUS_SPACE_MAXADDR,	/* highaddr */
3842	    NULL, NULL,		/* filter, filterarg */
3843	    size,		/* maxsize */
3844	    1,			/* nsegments */
3845	    size,		/* maxsegsize */
3846	    BUS_DMA_ALLOCNOW,	/* flags */
3847	    NULL,		/* lockfunc */
3848	    NULL,		/* lockfuncarg */
3849	    &dma->dma_tag);
3850	if (error) {
3851		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3852		goto fail;
3853	}
3854
3855	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3856	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3857	if (error) {
3858		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3859		goto fail;
3860	}
3861
3862	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3863	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3864	if (error) {
3865		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3866		goto fail;
3867	}
3868
3869	dma->dma_size = size;
3870
3871fail:
3872	if (error)
3873		vmxnet3_dma_free(sc, dma);
3874
3875	return (error);
3876}
3877
3878static void
3879vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3880{
3881
3882	if (dma->dma_tag != NULL) {
3883		if (dma->dma_map != NULL) {
3884			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3885			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3886			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3887		}
3888
3889		if (dma->dma_vaddr != NULL) {
3890			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3891			    dma->dma_map);
3892		}
3893
3894		bus_dma_tag_destroy(dma->dma_tag);
3895	}
3896	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3897}
3898
3899static int
3900vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3901{
3902	char path[64];
3903
3904	snprintf(path, sizeof(path),
3905	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3906	TUNABLE_INT_FETCH(path, &def);
3907
3908	return (def);
3909}
3910
3911/*
3912 * Since this is a purely paravirtualized device, we do not have
3913 * to worry about DMA coherency. But at times, we must make sure
3914 * both the compiler and CPU do not reorder memory operations.
3915 */
3916static inline void
3917vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3918{
3919
3920	switch (type) {
3921	case VMXNET3_BARRIER_RD:
3922		rmb();
3923		break;
3924	case VMXNET3_BARRIER_WR:
3925		wmb();
3926		break;
3927	case VMXNET3_BARRIER_RDWR:
3928		mb();
3929		break;
3930	default:
3931		panic("%s: bad barrier type %d", __func__, type);
3932	}
3933}
3934