1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: releng/10.3/sys/dev/vmware/vmxnet3/if_vmx.c 292568 2015-12-21 20:40:17Z jhb $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/endian.h>
29#include <sys/sockio.h>
30#include <sys/mbuf.h>
31#include <sys/malloc.h>
32#include <sys/module.h>
33#include <sys/socket.h>
34#include <sys/sysctl.h>
35#include <sys/smp.h>
36#include <sys/taskqueue.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <net/if_dl.h>
44#include <net/if_types.h>
45#include <net/if_media.h>
46#include <net/if_vlan_var.h>
47
48#include <net/bpf.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/udp.h>
56#include <netinet/tcp.h>
57
58#include <machine/in_cksum.h>
59
60#include <machine/bus.h>
61#include <machine/resource.h>
62#include <sys/bus.h>
63#include <sys/rman.h>
64
65#include <dev/pci/pcireg.h>
66#include <dev/pci/pcivar.h>
67
68#include "if_vmxreg.h"
69#include "if_vmxvar.h"
70
71#include "opt_inet.h"
72#include "opt_inet6.h"
73
74#ifdef VMXNET3_FAILPOINTS
75#include <sys/fail.h>
76static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77    "vmxnet3 fail points");
78#define VMXNET3_FP	_debug_fail_point_vmxnet3
79#endif
80
81static int	vmxnet3_probe(device_t);
82static int	vmxnet3_attach(device_t);
83static int	vmxnet3_detach(device_t);
84static int	vmxnet3_shutdown(device_t);
85
86static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87static void	vmxnet3_free_resources(struct vmxnet3_softc *);
88static int	vmxnet3_check_version(struct vmxnet3_softc *);
89static void	vmxnet3_initial_config(struct vmxnet3_softc *);
90static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96		    struct vmxnet3_interrupt *);
97static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104		    struct vmxnet3_interrupt *);
105static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107#ifndef VMXNET3_LEGACY_TX
108static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112#endif
113
114static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135static void	vmxnet3_free_data(struct vmxnet3_softc *);
136static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138static void	vmxnet3_evintr(struct vmxnet3_softc *);
139static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143		    struct vmxnet3_rxring *, int);
144static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145static void	vmxnet3_legacy_intr(void *);
146static void	vmxnet3_txq_intr(void *);
147static void	vmxnet3_rxq_intr(void *);
148static void	vmxnet3_event_intr(void *);
149
150static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152static void	vmxnet3_stop(struct vmxnet3_softc *);
153
154static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157static int	vmxnet3_enable_device(struct vmxnet3_softc *);
158static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159static int	vmxnet3_reinit(struct vmxnet3_softc *);
160static void	vmxnet3_init_locked(struct vmxnet3_softc *);
161static void	vmxnet3_init(void *);
162
163static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164		    int *, int *, int *);
165static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169#ifdef VMXNET3_LEGACY_TX
170static void	vmxnet3_start_locked(struct ifnet *);
171static void	vmxnet3_start(struct ifnet *);
172#else
173static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
174		    struct mbuf *);
175static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
176static void	vmxnet3_txq_tq_deferred(void *, int);
177#endif
178static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
179static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
180
181static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
182		    uint16_t);
183static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
184static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
186static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
187static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188
189#ifndef VMXNET3_LEGACY_TX
190static void	vmxnet3_qflush(struct ifnet *);
191#endif
192
193static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
194static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
195static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
196		    struct vmxnet3_txq_stats *);
197static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
198		    struct vmxnet3_rxq_stats *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242/* Tunables. */
243static int vmxnet3_mq_disable = 0;
244TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254static device_method_t vmxnet3_methods[] = {
255	/* Device interface. */
256	DEVMETHOD(device_probe,		vmxnet3_probe),
257	DEVMETHOD(device_attach,	vmxnet3_attach),
258	DEVMETHOD(device_detach,	vmxnet3_detach),
259	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
260
261	DEVMETHOD_END
262};
263
264static driver_t vmxnet3_driver = {
265	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266};
267
268static devclass_t vmxnet3_devclass;
269DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271MODULE_DEPEND(vmx, pci, 1, 1, 1);
272MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
275#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
276
277static int
278vmxnet3_probe(device_t dev)
279{
280
281	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284		return (BUS_PROBE_DEFAULT);
285	}
286
287	return (ENXIO);
288}
289
290static int
291vmxnet3_attach(device_t dev)
292{
293	struct vmxnet3_softc *sc;
294	int error;
295
296	sc = device_get_softc(dev);
297	sc->vmx_dev = dev;
298
299	pci_enable_busmaster(dev);
300
301	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304	vmxnet3_initial_config(sc);
305
306	error = vmxnet3_alloc_resources(sc);
307	if (error)
308		goto fail;
309
310	error = vmxnet3_check_version(sc);
311	if (error)
312		goto fail;
313
314	error = vmxnet3_alloc_rxtx_queues(sc);
315	if (error)
316		goto fail;
317
318#ifndef VMXNET3_LEGACY_TX
319	error = vmxnet3_alloc_taskqueue(sc);
320	if (error)
321		goto fail;
322#endif
323
324	error = vmxnet3_alloc_interrupts(sc);
325	if (error)
326		goto fail;
327
328	vmxnet3_check_multiqueue(sc);
329
330	error = vmxnet3_alloc_data(sc);
331	if (error)
332		goto fail;
333
334	error = vmxnet3_setup_interface(sc);
335	if (error)
336		goto fail;
337
338	error = vmxnet3_setup_interrupts(sc);
339	if (error) {
340		ether_ifdetach(sc->vmx_ifp);
341		device_printf(dev, "could not set up interrupt\n");
342		goto fail;
343	}
344
345	vmxnet3_setup_sysctl(sc);
346#ifndef VMXNET3_LEGACY_TX
347	vmxnet3_start_taskqueue(sc);
348#endif
349
350fail:
351	if (error)
352		vmxnet3_detach(dev);
353
354	return (error);
355}
356
357static int
358vmxnet3_detach(device_t dev)
359{
360	struct vmxnet3_softc *sc;
361	struct ifnet *ifp;
362
363	sc = device_get_softc(dev);
364	ifp = sc->vmx_ifp;
365
366	if (device_is_attached(dev)) {
367		VMXNET3_CORE_LOCK(sc);
368		vmxnet3_stop(sc);
369		VMXNET3_CORE_UNLOCK(sc);
370
371		callout_drain(&sc->vmx_tick);
372#ifndef VMXNET3_LEGACY_TX
373		vmxnet3_drain_taskqueue(sc);
374#endif
375
376		ether_ifdetach(ifp);
377	}
378
379	if (sc->vmx_vlan_attach != NULL) {
380		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381		sc->vmx_vlan_attach = NULL;
382	}
383	if (sc->vmx_vlan_detach != NULL) {
384		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385		sc->vmx_vlan_detach = NULL;
386	}
387
388#ifndef VMXNET3_LEGACY_TX
389	vmxnet3_free_taskqueue(sc);
390#endif
391	vmxnet3_free_interrupts(sc);
392
393	if (ifp != NULL) {
394		if_free(ifp);
395		sc->vmx_ifp = NULL;
396	}
397
398	ifmedia_removeall(&sc->vmx_media);
399
400	vmxnet3_free_data(sc);
401	vmxnet3_free_resources(sc);
402	vmxnet3_free_rxtx_queues(sc);
403
404	VMXNET3_CORE_LOCK_DESTROY(sc);
405
406	return (0);
407}
408
409static int
410vmxnet3_shutdown(device_t dev)
411{
412
413	return (0);
414}
415
416static int
417vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418{
419	device_t dev;
420	int rid;
421
422	dev = sc->vmx_dev;
423
424	rid = PCIR_BAR(0);
425	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426	    RF_ACTIVE);
427	if (sc->vmx_res0 == NULL) {
428		device_printf(dev,
429		    "could not map BAR0 memory\n");
430		return (ENXIO);
431	}
432
433	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436	rid = PCIR_BAR(1);
437	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438	    RF_ACTIVE);
439	if (sc->vmx_res1 == NULL) {
440		device_printf(dev,
441		    "could not map BAR1 memory\n");
442		return (ENXIO);
443	}
444
445	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449		rid = PCIR_BAR(2);
450		sc->vmx_msix_res = bus_alloc_resource_any(dev,
451		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
452	}
453
454	if (sc->vmx_msix_res == NULL)
455		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457	return (0);
458}
459
460static void
461vmxnet3_free_resources(struct vmxnet3_softc *sc)
462{
463	device_t dev;
464	int rid;
465
466	dev = sc->vmx_dev;
467
468	if (sc->vmx_res0 != NULL) {
469		rid = PCIR_BAR(0);
470		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471		sc->vmx_res0 = NULL;
472	}
473
474	if (sc->vmx_res1 != NULL) {
475		rid = PCIR_BAR(1);
476		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477		sc->vmx_res1 = NULL;
478	}
479
480	if (sc->vmx_msix_res != NULL) {
481		rid = PCIR_BAR(2);
482		bus_release_resource(dev, SYS_RES_MEMORY, rid,
483		    sc->vmx_msix_res);
484		sc->vmx_msix_res = NULL;
485	}
486}
487
488static int
489vmxnet3_check_version(struct vmxnet3_softc *sc)
490{
491	device_t dev;
492	uint32_t version;
493
494	dev = sc->vmx_dev;
495
496	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497	if ((version & 0x01) == 0) {
498		device_printf(dev, "unsupported hardware version %#x\n",
499		    version);
500		return (ENOTSUP);
501	}
502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505	if ((version & 0x01) == 0) {
506		device_printf(dev, "unsupported UPT version %#x\n", version);
507		return (ENOTSUP);
508	}
509	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511	return (0);
512}
513
514static int
515trunc_powerof2(int val)
516{
517
518	return (1U << (fls(val) - 1));
519}
520
521static void
522vmxnet3_initial_config(struct vmxnet3_softc *sc)
523{
524	int nqueue, ndesc;
525
526	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
527	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
528		nqueue = VMXNET3_DEF_TX_QUEUES;
529	if (nqueue > mp_ncpus)
530		nqueue = mp_ncpus;
531	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
532
533	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
534	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
535		nqueue = VMXNET3_DEF_RX_QUEUES;
536	if (nqueue > mp_ncpus)
537		nqueue = mp_ncpus;
538	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
539
540	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
541		sc->vmx_max_nrxqueues = 1;
542		sc->vmx_max_ntxqueues = 1;
543	}
544
545	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
546	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
547		ndesc = VMXNET3_DEF_TX_NDESC;
548	if (ndesc & VMXNET3_MASK_TX_NDESC)
549		ndesc &= ~VMXNET3_MASK_TX_NDESC;
550	sc->vmx_ntxdescs = ndesc;
551
552	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
553	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
554		ndesc = VMXNET3_DEF_RX_NDESC;
555	if (ndesc & VMXNET3_MASK_RX_NDESC)
556		ndesc &= ~VMXNET3_MASK_RX_NDESC;
557	sc->vmx_nrxdescs = ndesc;
558	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
559}
560
561static void
562vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
563{
564
565	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
566		goto out;
567
568	/* BMV: Just use the maximum configured for now. */
569	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
570	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
571
572	if (sc->vmx_nrxqueues > 1)
573		sc->vmx_flags |= VMXNET3_FLAG_RSS;
574
575	return;
576
577out:
578	sc->vmx_ntxqueues = 1;
579	sc->vmx_nrxqueues = 1;
580}
581
582static int
583vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
584{
585	device_t dev;
586	int nmsix, cnt, required;
587
588	dev = sc->vmx_dev;
589
590	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
591		return (1);
592
593	/* Allocate an additional vector for the events interrupt. */
594	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
595
596	nmsix = pci_msix_count(dev);
597	if (nmsix < required)
598		return (1);
599
600	cnt = required;
601	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
602		sc->vmx_nintrs = required;
603		return (0);
604	} else
605		pci_release_msi(dev);
606
607	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
608
609	return (1);
610}
611
612static int
613vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
614{
615	device_t dev;
616	int nmsi, cnt, required;
617
618	dev = sc->vmx_dev;
619	required = 1;
620
621	nmsi = pci_msi_count(dev);
622	if (nmsi < required)
623		return (1);
624
625	cnt = required;
626	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
627		sc->vmx_nintrs = 1;
628		return (0);
629	} else
630		pci_release_msi(dev);
631
632	return (1);
633}
634
635static int
636vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
637{
638
639	sc->vmx_nintrs = 1;
640	return (0);
641}
642
643static int
644vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
645    struct vmxnet3_interrupt *intr)
646{
647	struct resource *irq;
648
649	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
650	if (irq == NULL)
651		return (ENXIO);
652
653	intr->vmxi_irq = irq;
654	intr->vmxi_rid = rid;
655
656	return (0);
657}
658
659static int
660vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
661{
662	int i, rid, flags, error;
663
664	rid = 0;
665	flags = RF_ACTIVE;
666
667	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
668		flags |= RF_SHAREABLE;
669	else
670		rid = 1;
671
672	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
673		error = vmxnet3_alloc_interrupt(sc, rid, flags,
674		    &sc->vmx_intrs[i]);
675		if (error)
676			return (error);
677	}
678
679	return (0);
680}
681
682static int
683vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
684{
685	device_t dev;
686	struct vmxnet3_txqueue *txq;
687	struct vmxnet3_rxqueue *rxq;
688	struct vmxnet3_interrupt *intr;
689	enum intr_type type;
690	int i, error;
691
692	dev = sc->vmx_dev;
693	intr = &sc->vmx_intrs[0];
694	type = INTR_TYPE_NET | INTR_MPSAFE;
695
696	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
697		txq = &sc->vmx_txq[i];
698		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
699		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
700		if (error)
701			return (error);
702		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
703		    "tq%d", i);
704		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
705	}
706
707	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
708		rxq = &sc->vmx_rxq[i];
709		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
710		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
711		if (error)
712			return (error);
713		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
714		    "rq%d", i);
715		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
716	}
717
718	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
719	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
720	if (error)
721		return (error);
722	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
723	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
724
725	return (0);
726}
727
728static int
729vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
730{
731	struct vmxnet3_interrupt *intr;
732	int i, error;
733
734	intr = &sc->vmx_intrs[0];
735	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
736	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
737	    &intr->vmxi_handler);
738
739	for (i = 0; i < sc->vmx_ntxqueues; i++)
740		sc->vmx_txq[i].vxtxq_intr_idx = 0;
741	for (i = 0; i < sc->vmx_nrxqueues; i++)
742		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
743	sc->vmx_event_intr_idx = 0;
744
745	return (error);
746}
747
748static void
749vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
750{
751	struct vmxnet3_txqueue *txq;
752	struct vmxnet3_txq_shared *txs;
753	struct vmxnet3_rxqueue *rxq;
754	struct vmxnet3_rxq_shared *rxs;
755	int i;
756
757	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
758
759	for (i = 0; i < sc->vmx_ntxqueues; i++) {
760		txq = &sc->vmx_txq[i];
761		txs = txq->vxtxq_ts;
762		txs->intr_idx = txq->vxtxq_intr_idx;
763	}
764
765	for (i = 0; i < sc->vmx_nrxqueues; i++) {
766		rxq = &sc->vmx_rxq[i];
767		rxs = rxq->vxrxq_rs;
768		rxs->intr_idx = rxq->vxrxq_intr_idx;
769	}
770}
771
772static int
773vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
774{
775	int error;
776
777	error = vmxnet3_alloc_intr_resources(sc);
778	if (error)
779		return (error);
780
781	switch (sc->vmx_intr_type) {
782	case VMXNET3_IT_MSIX:
783		error = vmxnet3_setup_msix_interrupts(sc);
784		break;
785	case VMXNET3_IT_MSI:
786	case VMXNET3_IT_LEGACY:
787		error = vmxnet3_setup_legacy_interrupt(sc);
788		break;
789	default:
790		panic("%s: invalid interrupt type %d", __func__,
791		    sc->vmx_intr_type);
792	}
793
794	if (error == 0)
795		vmxnet3_set_interrupt_idx(sc);
796
797	return (error);
798}
799
800static int
801vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
802{
803	device_t dev;
804	uint32_t config;
805	int error;
806
807	dev = sc->vmx_dev;
808	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
809
810	sc->vmx_intr_type = config & 0x03;
811	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
812
813	switch (sc->vmx_intr_type) {
814	case VMXNET3_IT_AUTO:
815		sc->vmx_intr_type = VMXNET3_IT_MSIX;
816		/* FALLTHROUGH */
817	case VMXNET3_IT_MSIX:
818		error = vmxnet3_alloc_msix_interrupts(sc);
819		if (error == 0)
820			break;
821		sc->vmx_intr_type = VMXNET3_IT_MSI;
822		/* FALLTHROUGH */
823	case VMXNET3_IT_MSI:
824		error = vmxnet3_alloc_msi_interrupts(sc);
825		if (error == 0)
826			break;
827		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
828		/* FALLTHROUGH */
829	case VMXNET3_IT_LEGACY:
830		error = vmxnet3_alloc_legacy_interrupts(sc);
831		if (error == 0)
832			break;
833		/* FALLTHROUGH */
834	default:
835		sc->vmx_intr_type = -1;
836		device_printf(dev, "cannot allocate any interrupt resources\n");
837		return (ENXIO);
838	}
839
840	return (error);
841}
842
843static void
844vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
845    struct vmxnet3_interrupt *intr)
846{
847	device_t dev;
848
849	dev = sc->vmx_dev;
850
851	if (intr->vmxi_handler != NULL) {
852		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
853		intr->vmxi_handler = NULL;
854	}
855
856	if (intr->vmxi_irq != NULL) {
857		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
858		    intr->vmxi_irq);
859		intr->vmxi_irq = NULL;
860		intr->vmxi_rid = -1;
861	}
862}
863
864static void
865vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
866{
867	int i;
868
869	for (i = 0; i < sc->vmx_nintrs; i++)
870		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
871
872	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
873	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
874		pci_release_msi(sc->vmx_dev);
875}
876
877#ifndef VMXNET3_LEGACY_TX
878static int
879vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
880{
881	device_t dev;
882
883	dev = sc->vmx_dev;
884
885	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
886	    taskqueue_thread_enqueue, &sc->vmx_tq);
887	if (sc->vmx_tq == NULL)
888		return (ENOMEM);
889
890	return (0);
891}
892
893static void
894vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
895{
896	device_t dev;
897	int nthreads, error;
898
899	dev = sc->vmx_dev;
900
901	/*
902	 * The taskqueue is typically not frequently used, so a dedicated
903	 * thread for each queue is unnecessary.
904	 */
905	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
906
907	/*
908	 * Most drivers just ignore the return value - it only fails
909	 * with ENOMEM so an error is not likely. It is hard for us
910	 * to recover from an error here.
911	 */
912	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
913	    "%s taskq", device_get_nameunit(dev));
914	if (error)
915		device_printf(dev, "failed to start taskqueue: %d", error);
916}
917
918static void
919vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
920{
921	struct vmxnet3_txqueue *txq;
922	int i;
923
924	if (sc->vmx_tq != NULL) {
925		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
926			txq = &sc->vmx_txq[i];
927			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
928		}
929	}
930}
931
932static void
933vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
934{
935	if (sc->vmx_tq != NULL) {
936		taskqueue_free(sc->vmx_tq);
937		sc->vmx_tq = NULL;
938	}
939}
940#endif
941
942static int
943vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
944{
945	struct vmxnet3_rxqueue *rxq;
946	struct vmxnet3_rxring *rxr;
947	int i;
948
949	rxq = &sc->vmx_rxq[q];
950
951	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
952	    device_get_nameunit(sc->vmx_dev), q);
953	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
954
955	rxq->vxrxq_sc = sc;
956	rxq->vxrxq_id = q;
957
958	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
959		rxr = &rxq->vxrxq_cmd_ring[i];
960		rxr->vxrxr_rid = i;
961		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
962		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
963		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
964		if (rxr->vxrxr_rxbuf == NULL)
965			return (ENOMEM);
966
967		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
968	}
969
970	return (0);
971}
972
973static int
974vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
975{
976	struct vmxnet3_txqueue *txq;
977	struct vmxnet3_txring *txr;
978
979	txq = &sc->vmx_txq[q];
980	txr = &txq->vxtxq_cmd_ring;
981
982	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
983	    device_get_nameunit(sc->vmx_dev), q);
984	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
985
986	txq->vxtxq_sc = sc;
987	txq->vxtxq_id = q;
988
989	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
990	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
991	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
992	if (txr->vxtxr_txbuf == NULL)
993		return (ENOMEM);
994
995	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
996
997#ifndef VMXNET3_LEGACY_TX
998	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
999
1000	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
1001	    M_NOWAIT, &txq->vxtxq_mtx);
1002	if (txq->vxtxq_br == NULL)
1003		return (ENOMEM);
1004#endif
1005
1006	return (0);
1007}
1008
1009static int
1010vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1011{
1012	int i, error;
1013
1014	/*
1015	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1016	 * disabled by default because its apparently broken for devices passed
1017	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1018	 * must be set to zero for MSIX. This check prevents us from allocating
1019	 * queue structures that we will not use.
1020	 */
1021	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1022		sc->vmx_max_nrxqueues = 1;
1023		sc->vmx_max_ntxqueues = 1;
1024	}
1025
1026	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1027	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1028	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1029	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1030	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1031		return (ENOMEM);
1032
1033	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1034		error = vmxnet3_init_rxq(sc, i);
1035		if (error)
1036			return (error);
1037	}
1038
1039	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1040		error = vmxnet3_init_txq(sc, i);
1041		if (error)
1042			return (error);
1043	}
1044
1045	return (0);
1046}
1047
1048static void
1049vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1050{
1051	struct vmxnet3_rxring *rxr;
1052	int i;
1053
1054	rxq->vxrxq_sc = NULL;
1055	rxq->vxrxq_id = -1;
1056
1057	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1058		rxr = &rxq->vxrxq_cmd_ring[i];
1059
1060		if (rxr->vxrxr_rxbuf != NULL) {
1061			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1062			rxr->vxrxr_rxbuf = NULL;
1063		}
1064	}
1065
1066	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1067		mtx_destroy(&rxq->vxrxq_mtx);
1068}
1069
1070static void
1071vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1072{
1073	struct vmxnet3_txring *txr;
1074
1075	txr = &txq->vxtxq_cmd_ring;
1076
1077	txq->vxtxq_sc = NULL;
1078	txq->vxtxq_id = -1;
1079
1080#ifndef VMXNET3_LEGACY_TX
1081	if (txq->vxtxq_br != NULL) {
1082		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1083		txq->vxtxq_br = NULL;
1084	}
1085#endif
1086
1087	if (txr->vxtxr_txbuf != NULL) {
1088		free(txr->vxtxr_txbuf, M_DEVBUF);
1089		txr->vxtxr_txbuf = NULL;
1090	}
1091
1092	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1093		mtx_destroy(&txq->vxtxq_mtx);
1094}
1095
1096static void
1097vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1098{
1099	int i;
1100
1101	if (sc->vmx_rxq != NULL) {
1102		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1103			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1104		free(sc->vmx_rxq, M_DEVBUF);
1105		sc->vmx_rxq = NULL;
1106	}
1107
1108	if (sc->vmx_txq != NULL) {
1109		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1110			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1111		free(sc->vmx_txq, M_DEVBUF);
1112		sc->vmx_txq = NULL;
1113	}
1114}
1115
1116static int
1117vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1118{
1119	device_t dev;
1120	uint8_t *kva;
1121	size_t size;
1122	int i, error;
1123
1124	dev = sc->vmx_dev;
1125
1126	size = sizeof(struct vmxnet3_driver_shared);
1127	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1128	if (error) {
1129		device_printf(dev, "cannot alloc shared memory\n");
1130		return (error);
1131	}
1132	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1133
1134	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1135	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1136	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1137	if (error) {
1138		device_printf(dev, "cannot alloc queue shared memory\n");
1139		return (error);
1140	}
1141	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1142	kva = sc->vmx_qs;
1143
1144	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1145		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1146		kva += sizeof(struct vmxnet3_txq_shared);
1147	}
1148	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1149		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1150		kva += sizeof(struct vmxnet3_rxq_shared);
1151	}
1152
1153	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1154		size = sizeof(struct vmxnet3_rss_shared);
1155		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1156		if (error) {
1157			device_printf(dev, "cannot alloc rss shared memory\n");
1158			return (error);
1159		}
1160		sc->vmx_rss =
1161		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1162	}
1163
1164	return (0);
1165}
1166
1167static void
1168vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1169{
1170
1171	if (sc->vmx_rss != NULL) {
1172		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1173		sc->vmx_rss = NULL;
1174	}
1175
1176	if (sc->vmx_qs != NULL) {
1177		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1178		sc->vmx_qs = NULL;
1179	}
1180
1181	if (sc->vmx_ds != NULL) {
1182		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1183		sc->vmx_ds = NULL;
1184	}
1185}
1186
1187static int
1188vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1189{
1190	device_t dev;
1191	struct vmxnet3_txqueue *txq;
1192	struct vmxnet3_txring *txr;
1193	struct vmxnet3_comp_ring *txc;
1194	size_t descsz, compsz;
1195	int i, q, error;
1196
1197	dev = sc->vmx_dev;
1198
1199	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1200		txq = &sc->vmx_txq[q];
1201		txr = &txq->vxtxq_cmd_ring;
1202		txc = &txq->vxtxq_comp_ring;
1203
1204		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1205		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1206
1207		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1208		    1, 0,			/* alignment, boundary */
1209		    BUS_SPACE_MAXADDR,		/* lowaddr */
1210		    BUS_SPACE_MAXADDR,		/* highaddr */
1211		    NULL, NULL,			/* filter, filterarg */
1212		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1213		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1214		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1215		    0,				/* flags */
1216		    NULL, NULL,			/* lockfunc, lockarg */
1217		    &txr->vxtxr_txtag);
1218		if (error) {
1219			device_printf(dev,
1220			    "unable to create Tx buffer tag for queue %d\n", q);
1221			return (error);
1222		}
1223
1224		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1225		if (error) {
1226			device_printf(dev, "cannot alloc Tx descriptors for "
1227			    "queue %d error %d\n", q, error);
1228			return (error);
1229		}
1230		txr->vxtxr_txd =
1231		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1232
1233		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1234		if (error) {
1235			device_printf(dev, "cannot alloc Tx comp descriptors "
1236			   "for queue %d error %d\n", q, error);
1237			return (error);
1238		}
1239		txc->vxcr_u.txcd =
1240		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1241
1242		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1243			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1244			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1245			if (error) {
1246				device_printf(dev, "unable to create Tx buf "
1247				    "dmamap for queue %d idx %d\n", q, i);
1248				return (error);
1249			}
1250		}
1251	}
1252
1253	return (0);
1254}
1255
1256static void
1257vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1258{
1259	device_t dev;
1260	struct vmxnet3_txqueue *txq;
1261	struct vmxnet3_txring *txr;
1262	struct vmxnet3_comp_ring *txc;
1263	struct vmxnet3_txbuf *txb;
1264	int i, q;
1265
1266	dev = sc->vmx_dev;
1267
1268	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1269		txq = &sc->vmx_txq[q];
1270		txr = &txq->vxtxq_cmd_ring;
1271		txc = &txq->vxtxq_comp_ring;
1272
1273		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1274			txb = &txr->vxtxr_txbuf[i];
1275			if (txb->vtxb_dmamap != NULL) {
1276				bus_dmamap_destroy(txr->vxtxr_txtag,
1277				    txb->vtxb_dmamap);
1278				txb->vtxb_dmamap = NULL;
1279			}
1280		}
1281
1282		if (txc->vxcr_u.txcd != NULL) {
1283			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1284			txc->vxcr_u.txcd = NULL;
1285		}
1286
1287		if (txr->vxtxr_txd != NULL) {
1288			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1289			txr->vxtxr_txd = NULL;
1290		}
1291
1292		if (txr->vxtxr_txtag != NULL) {
1293			bus_dma_tag_destroy(txr->vxtxr_txtag);
1294			txr->vxtxr_txtag = NULL;
1295		}
1296	}
1297}
1298
1299static int
1300vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1301{
1302	device_t dev;
1303	struct vmxnet3_rxqueue *rxq;
1304	struct vmxnet3_rxring *rxr;
1305	struct vmxnet3_comp_ring *rxc;
1306	int descsz, compsz;
1307	int i, j, q, error;
1308
1309	dev = sc->vmx_dev;
1310
1311	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1312		rxq = &sc->vmx_rxq[q];
1313		rxc = &rxq->vxrxq_comp_ring;
1314		compsz = 0;
1315
1316		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1317			rxr = &rxq->vxrxq_cmd_ring[i];
1318
1319			descsz = rxr->vxrxr_ndesc *
1320			    sizeof(struct vmxnet3_rxdesc);
1321			compsz += rxr->vxrxr_ndesc *
1322			    sizeof(struct vmxnet3_rxcompdesc);
1323
1324			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1325			    1, 0,		/* alignment, boundary */
1326			    BUS_SPACE_MAXADDR,	/* lowaddr */
1327			    BUS_SPACE_MAXADDR,	/* highaddr */
1328			    NULL, NULL,		/* filter, filterarg */
1329			    MJUMPAGESIZE,	/* maxsize */
1330			    1,			/* nsegments */
1331			    MJUMPAGESIZE,	/* maxsegsize */
1332			    0,			/* flags */
1333			    NULL, NULL,		/* lockfunc, lockarg */
1334			    &rxr->vxrxr_rxtag);
1335			if (error) {
1336				device_printf(dev,
1337				    "unable to create Rx buffer tag for "
1338				    "queue %d\n", q);
1339				return (error);
1340			}
1341
1342			error = vmxnet3_dma_malloc(sc, descsz, 512,
1343			    &rxr->vxrxr_dma);
1344			if (error) {
1345				device_printf(dev, "cannot allocate Rx "
1346				    "descriptors for queue %d/%d error %d\n",
1347				    i, q, error);
1348				return (error);
1349			}
1350			rxr->vxrxr_rxd =
1351			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1352		}
1353
1354		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1355		if (error) {
1356			device_printf(dev, "cannot alloc Rx comp descriptors "
1357			    "for queue %d error %d\n", q, error);
1358			return (error);
1359		}
1360		rxc->vxcr_u.rxcd =
1361		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1362
1363		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1364			rxr = &rxq->vxrxq_cmd_ring[i];
1365
1366			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1367			    &rxr->vxrxr_spare_dmap);
1368			if (error) {
1369				device_printf(dev, "unable to create spare "
1370				    "dmamap for queue %d/%d error %d\n",
1371				    q, i, error);
1372				return (error);
1373			}
1374
1375			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1376				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1377				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1378				if (error) {
1379					device_printf(dev, "unable to create "
1380					    "dmamap for queue %d/%d slot %d "
1381					    "error %d\n",
1382					    q, i, j, error);
1383					return (error);
1384				}
1385			}
1386		}
1387	}
1388
1389	return (0);
1390}
1391
1392static void
1393vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1394{
1395	device_t dev;
1396	struct vmxnet3_rxqueue *rxq;
1397	struct vmxnet3_rxring *rxr;
1398	struct vmxnet3_comp_ring *rxc;
1399	struct vmxnet3_rxbuf *rxb;
1400	int i, j, q;
1401
1402	dev = sc->vmx_dev;
1403
1404	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1405		rxq = &sc->vmx_rxq[q];
1406		rxc = &rxq->vxrxq_comp_ring;
1407
1408		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1409			rxr = &rxq->vxrxq_cmd_ring[i];
1410
1411			if (rxr->vxrxr_spare_dmap != NULL) {
1412				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1413				    rxr->vxrxr_spare_dmap);
1414				rxr->vxrxr_spare_dmap = NULL;
1415			}
1416
1417			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1418				rxb = &rxr->vxrxr_rxbuf[j];
1419				if (rxb->vrxb_dmamap != NULL) {
1420					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1421					    rxb->vrxb_dmamap);
1422					rxb->vrxb_dmamap = NULL;
1423				}
1424			}
1425		}
1426
1427		if (rxc->vxcr_u.rxcd != NULL) {
1428			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1429			rxc->vxcr_u.rxcd = NULL;
1430		}
1431
1432		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1433			rxr = &rxq->vxrxq_cmd_ring[i];
1434
1435			if (rxr->vxrxr_rxd != NULL) {
1436				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1437				rxr->vxrxr_rxd = NULL;
1438			}
1439
1440			if (rxr->vxrxr_rxtag != NULL) {
1441				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1442				rxr->vxrxr_rxtag = NULL;
1443			}
1444		}
1445	}
1446}
1447
1448static int
1449vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1450{
1451	int error;
1452
1453	error = vmxnet3_alloc_txq_data(sc);
1454	if (error)
1455		return (error);
1456
1457	error = vmxnet3_alloc_rxq_data(sc);
1458	if (error)
1459		return (error);
1460
1461	return (0);
1462}
1463
1464static void
1465vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1466{
1467
1468	if (sc->vmx_rxq != NULL)
1469		vmxnet3_free_rxq_data(sc);
1470
1471	if (sc->vmx_txq != NULL)
1472		vmxnet3_free_txq_data(sc);
1473}
1474
1475static int
1476vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1477{
1478	int error;
1479
1480	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1481	    32, &sc->vmx_mcast_dma);
1482	if (error)
1483		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1484	else
1485		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1486
1487	return (error);
1488}
1489
1490static void
1491vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1492{
1493
1494	if (sc->vmx_mcast != NULL) {
1495		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1496		sc->vmx_mcast = NULL;
1497	}
1498}
1499
1500static void
1501vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1502{
1503	struct vmxnet3_driver_shared *ds;
1504	struct vmxnet3_txqueue *txq;
1505	struct vmxnet3_txq_shared *txs;
1506	struct vmxnet3_rxqueue *rxq;
1507	struct vmxnet3_rxq_shared *rxs;
1508	int i;
1509
1510	ds = sc->vmx_ds;
1511
1512	/*
1513	 * Initialize fields of the shared data that remains the same across
1514	 * reinits. Note the shared data is zero'd when allocated.
1515	 */
1516
1517	ds->magic = VMXNET3_REV1_MAGIC;
1518
1519	/* DriverInfo */
1520	ds->version = VMXNET3_DRIVER_VERSION;
1521	ds->guest = VMXNET3_GOS_FREEBSD |
1522#ifdef __LP64__
1523	    VMXNET3_GOS_64BIT;
1524#else
1525	    VMXNET3_GOS_32BIT;
1526#endif
1527	ds->vmxnet3_revision = 1;
1528	ds->upt_version = 1;
1529
1530	/* Misc. conf */
1531	ds->driver_data = vtophys(sc);
1532	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1533	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1534	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1535	ds->nrxsg_max = sc->vmx_max_rxsegs;
1536
1537	/* RSS conf */
1538	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1539		ds->rss.version = 1;
1540		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1541		ds->rss.len = sc->vmx_rss_dma.dma_size;
1542	}
1543
1544	/* Interrupt control. */
1545	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1546	ds->nintr = sc->vmx_nintrs;
1547	ds->evintr = sc->vmx_event_intr_idx;
1548	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1549
1550	for (i = 0; i < sc->vmx_nintrs; i++)
1551		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1552
1553	/* Receive filter. */
1554	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1555	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1556
1557	/* Tx queues */
1558	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1559		txq = &sc->vmx_txq[i];
1560		txs = txq->vxtxq_ts;
1561
1562		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1563		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1564		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1565		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1566		txs->driver_data = vtophys(txq);
1567		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1568	}
1569
1570	/* Rx queues */
1571	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1572		rxq = &sc->vmx_rxq[i];
1573		rxs = rxq->vxrxq_rs;
1574
1575		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1576		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1577		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1578		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1579		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1580		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1581		rxs->driver_data = vtophys(rxq);
1582		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1583	}
1584}
1585
1586static void
1587vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1588{
1589	struct ifnet *ifp;
1590
1591	ifp = sc->vmx_ifp;
1592
1593	/* Use the current MAC address. */
1594	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1595	vmxnet3_set_lladdr(sc);
1596
1597	ifp->if_hwassist = 0;
1598	if (ifp->if_capenable & IFCAP_TXCSUM)
1599		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1600	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1601		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1602	if (ifp->if_capenable & IFCAP_TSO4)
1603		ifp->if_hwassist |= CSUM_IP_TSO;
1604	if (ifp->if_capenable & IFCAP_TSO6)
1605		ifp->if_hwassist |= CSUM_IP6_TSO;
1606}
1607
1608static void
1609vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1610{
1611	/*
1612	 * Use the same key as the Linux driver until FreeBSD can do
1613	 * RSS (presumably Toeplitz) in software.
1614	 */
1615	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1616	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1617	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1618	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1619	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1620	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1621	};
1622
1623	struct vmxnet3_driver_shared *ds;
1624	struct vmxnet3_rss_shared *rss;
1625	int i;
1626
1627	ds = sc->vmx_ds;
1628	rss = sc->vmx_rss;
1629
1630	rss->hash_type =
1631	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1632	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1633	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1634	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1635	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1636	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1637
1638	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1639		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1640}
1641
1642static void
1643vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1644{
1645	struct ifnet *ifp;
1646	struct vmxnet3_driver_shared *ds;
1647
1648	ifp = sc->vmx_ifp;
1649	ds = sc->vmx_ds;
1650
1651	ds->mtu = ifp->if_mtu;
1652	ds->ntxqueue = sc->vmx_ntxqueues;
1653	ds->nrxqueue = sc->vmx_nrxqueues;
1654
1655	ds->upt_features = 0;
1656	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1657		ds->upt_features |= UPT1_F_CSUM;
1658	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1659		ds->upt_features |= UPT1_F_VLAN;
1660	if (ifp->if_capenable & IFCAP_LRO)
1661		ds->upt_features |= UPT1_F_LRO;
1662
1663	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1664		ds->upt_features |= UPT1_F_RSS;
1665		vmxnet3_reinit_rss_shared_data(sc);
1666	}
1667
1668	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1669	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1670	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1671}
1672
1673static int
1674vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1675{
1676	int error;
1677
1678	error = vmxnet3_alloc_shared_data(sc);
1679	if (error)
1680		return (error);
1681
1682	error = vmxnet3_alloc_queue_data(sc);
1683	if (error)
1684		return (error);
1685
1686	error = vmxnet3_alloc_mcast_table(sc);
1687	if (error)
1688		return (error);
1689
1690	vmxnet3_init_shared_data(sc);
1691
1692	return (0);
1693}
1694
1695static void
1696vmxnet3_free_data(struct vmxnet3_softc *sc)
1697{
1698
1699	vmxnet3_free_mcast_table(sc);
1700	vmxnet3_free_queue_data(sc);
1701	vmxnet3_free_shared_data(sc);
1702}
1703
1704static int
1705vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1706{
1707	device_t dev;
1708	struct ifnet *ifp;
1709
1710	dev = sc->vmx_dev;
1711
1712	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1713	if (ifp == NULL) {
1714		device_printf(dev, "cannot allocate ifnet structure\n");
1715		return (ENOSPC);
1716	}
1717
1718	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1719#if __FreeBSD_version < 1000025
1720	ifp->if_baudrate = 1000000000;
1721#elif __FreeBSD_version < 1100011
1722	if_initbaudrate(ifp, IF_Gbps(10));
1723#else
1724	ifp->if_baudrate = IF_Gbps(10);
1725#endif
1726	ifp->if_softc = sc;
1727	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1728	ifp->if_init = vmxnet3_init;
1729	ifp->if_ioctl = vmxnet3_ioctl;
1730	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1731	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1732	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1733
1734#ifdef VMXNET3_LEGACY_TX
1735	ifp->if_start = vmxnet3_start;
1736	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1737	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1738	IFQ_SET_READY(&ifp->if_snd);
1739#else
1740	ifp->if_transmit = vmxnet3_txq_mq_start;
1741	ifp->if_qflush = vmxnet3_qflush;
1742#endif
1743
1744	vmxnet3_get_lladdr(sc);
1745	ether_ifattach(ifp, sc->vmx_lladdr);
1746
1747	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1748	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1749	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1750	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1751	    IFCAP_VLAN_HWCSUM;
1752	ifp->if_capenable = ifp->if_capabilities;
1753
1754	/* These capabilities are not enabled by default. */
1755	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1756
1757	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1758	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1759	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1760	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1761
1762	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1763	    vmxnet3_media_status);
1764	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1765	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1766
1767	return (0);
1768}
1769
1770static void
1771vmxnet3_evintr(struct vmxnet3_softc *sc)
1772{
1773	device_t dev;
1774	struct ifnet *ifp;
1775	struct vmxnet3_txq_shared *ts;
1776	struct vmxnet3_rxq_shared *rs;
1777	uint32_t event;
1778	int reset;
1779
1780	dev = sc->vmx_dev;
1781	ifp = sc->vmx_ifp;
1782	reset = 0;
1783
1784	VMXNET3_CORE_LOCK(sc);
1785
1786	/* Clear events. */
1787	event = sc->vmx_ds->event;
1788	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1789
1790	if (event & VMXNET3_EVENT_LINK) {
1791		vmxnet3_link_status(sc);
1792		if (sc->vmx_link_active != 0)
1793			vmxnet3_tx_start_all(sc);
1794	}
1795
1796	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1797		reset = 1;
1798		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1799		ts = sc->vmx_txq[0].vxtxq_ts;
1800		if (ts->stopped != 0)
1801			device_printf(dev, "Tx queue error %#x\n", ts->error);
1802		rs = sc->vmx_rxq[0].vxrxq_rs;
1803		if (rs->stopped != 0)
1804			device_printf(dev, "Rx queue error %#x\n", rs->error);
1805		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1806	}
1807
1808	if (event & VMXNET3_EVENT_DIC)
1809		device_printf(dev, "device implementation change event\n");
1810	if (event & VMXNET3_EVENT_DEBUG)
1811		device_printf(dev, "debug event\n");
1812
1813	if (reset != 0) {
1814		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1815		vmxnet3_init_locked(sc);
1816	}
1817
1818	VMXNET3_CORE_UNLOCK(sc);
1819}
1820
1821static void
1822vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1823{
1824	struct vmxnet3_softc *sc;
1825	struct ifnet *ifp;
1826	struct vmxnet3_txring *txr;
1827	struct vmxnet3_comp_ring *txc;
1828	struct vmxnet3_txcompdesc *txcd;
1829	struct vmxnet3_txbuf *txb;
1830	struct mbuf *m;
1831	u_int sop;
1832
1833	sc = txq->vxtxq_sc;
1834	ifp = sc->vmx_ifp;
1835	txr = &txq->vxtxq_cmd_ring;
1836	txc = &txq->vxtxq_comp_ring;
1837
1838	VMXNET3_TXQ_LOCK_ASSERT(txq);
1839
1840	for (;;) {
1841		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1842		if (txcd->gen != txc->vxcr_gen)
1843			break;
1844		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1845
1846		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1847			txc->vxcr_next = 0;
1848			txc->vxcr_gen ^= 1;
1849		}
1850
1851		sop = txr->vxtxr_next;
1852		txb = &txr->vxtxr_txbuf[sop];
1853
1854		if ((m = txb->vtxb_m) != NULL) {
1855			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1856			    BUS_DMASYNC_POSTWRITE);
1857			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1858
1859			txq->vxtxq_stats.vmtxs_opackets++;
1860			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1861			if (m->m_flags & M_MCAST)
1862				txq->vxtxq_stats.vmtxs_omcasts++;
1863
1864			m_freem(m);
1865			txb->vtxb_m = NULL;
1866		}
1867
1868		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1869	}
1870
1871	if (txr->vxtxr_head == txr->vxtxr_next)
1872		txq->vxtxq_watchdog = 0;
1873}
1874
1875static int
1876vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1877{
1878	struct ifnet *ifp;
1879	struct mbuf *m;
1880	struct vmxnet3_rxdesc *rxd;
1881	struct vmxnet3_rxbuf *rxb;
1882	bus_dma_tag_t tag;
1883	bus_dmamap_t dmap;
1884	bus_dma_segment_t segs[1];
1885	int idx, clsize, btype, flags, nsegs, error;
1886
1887	ifp = sc->vmx_ifp;
1888	tag = rxr->vxrxr_rxtag;
1889	dmap = rxr->vxrxr_spare_dmap;
1890	idx = rxr->vxrxr_fill;
1891	rxd = &rxr->vxrxr_rxd[idx];
1892	rxb = &rxr->vxrxr_rxbuf[idx];
1893
1894#ifdef VMXNET3_FAILPOINTS
1895	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1896	if (rxr->vxrxr_rid != 0)
1897		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1898#endif
1899
1900	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1901		flags = M_PKTHDR;
1902		clsize = MCLBYTES;
1903		btype = VMXNET3_BTYPE_HEAD;
1904	} else {
1905#if __FreeBSD_version < 902001
1906		/*
1907		 * These mbufs will never be used for the start of a frame.
1908		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1909		 * required the mbuf to always be a packet header. Avoid
1910		 * unnecessary mbuf initialization in newer versions where
1911		 * that is not the case.
1912		 */
1913		flags = M_PKTHDR;
1914#else
1915		flags = 0;
1916#endif
1917		clsize = MJUMPAGESIZE;
1918		btype = VMXNET3_BTYPE_BODY;
1919	}
1920
1921	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1922	if (m == NULL) {
1923		sc->vmx_stats.vmst_mgetcl_failed++;
1924		return (ENOBUFS);
1925	}
1926
1927	if (btype == VMXNET3_BTYPE_HEAD) {
1928		m->m_len = m->m_pkthdr.len = clsize;
1929		m_adj(m, ETHER_ALIGN);
1930	} else
1931		m->m_len = clsize;
1932
1933	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1934	    BUS_DMA_NOWAIT);
1935	if (error) {
1936		m_freem(m);
1937		sc->vmx_stats.vmst_mbuf_load_failed++;
1938		return (error);
1939	}
1940	KASSERT(nsegs == 1,
1941	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1942#if __FreeBSD_version < 902001
1943	if (btype == VMXNET3_BTYPE_BODY)
1944		m->m_flags &= ~M_PKTHDR;
1945#endif
1946
1947	if (rxb->vrxb_m != NULL) {
1948		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1949		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1950	}
1951
1952	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1953	rxb->vrxb_dmamap = dmap;
1954	rxb->vrxb_m = m;
1955
1956	rxd->addr = segs[0].ds_addr;
1957	rxd->len = segs[0].ds_len;
1958	rxd->btype = btype;
1959	rxd->gen = rxr->vxrxr_gen;
1960
1961	vmxnet3_rxr_increment_fill(rxr);
1962	return (0);
1963}
1964
1965static void
1966vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1967    struct vmxnet3_rxring *rxr, int idx)
1968{
1969	struct vmxnet3_rxdesc *rxd;
1970
1971	rxd = &rxr->vxrxr_rxd[idx];
1972	rxd->gen = rxr->vxrxr_gen;
1973	vmxnet3_rxr_increment_fill(rxr);
1974}
1975
1976static void
1977vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1978{
1979	struct vmxnet3_softc *sc;
1980	struct vmxnet3_rxring *rxr;
1981	struct vmxnet3_comp_ring *rxc;
1982	struct vmxnet3_rxcompdesc *rxcd;
1983	int idx, eof;
1984
1985	sc = rxq->vxrxq_sc;
1986	rxc = &rxq->vxrxq_comp_ring;
1987
1988	do {
1989		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1990		if (rxcd->gen != rxc->vxcr_gen)
1991			break;		/* Not expected. */
1992		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1993
1994		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1995			rxc->vxcr_next = 0;
1996			rxc->vxcr_gen ^= 1;
1997		}
1998
1999		idx = rxcd->rxd_idx;
2000		eof = rxcd->eop;
2001		if (rxcd->qid < sc->vmx_nrxqueues)
2002			rxr = &rxq->vxrxq_cmd_ring[0];
2003		else
2004			rxr = &rxq->vxrxq_cmd_ring[1];
2005		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2006	} while (!eof);
2007}
2008
2009static void
2010vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2011{
2012
2013	if (rxcd->ipv4) {
2014		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2015		if (rxcd->ipcsum_ok)
2016			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2017	}
2018
2019	if (!rxcd->fragment) {
2020		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2021			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2022			    CSUM_PSEUDO_HDR;
2023			m->m_pkthdr.csum_data = 0xFFFF;
2024		}
2025	}
2026}
2027
2028static void
2029vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2030    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2031{
2032	struct vmxnet3_softc *sc;
2033	struct ifnet *ifp;
2034
2035	sc = rxq->vxrxq_sc;
2036	ifp = sc->vmx_ifp;
2037
2038	if (rxcd->error) {
2039		rxq->vxrxq_stats.vmrxs_ierrors++;
2040		m_freem(m);
2041		return;
2042	}
2043
2044#ifdef notyet
2045	switch (rxcd->rss_type) {
2046	case VMXNET3_RCD_RSS_TYPE_IPV4:
2047		m->m_pkthdr.flowid = rxcd->rss_hash;
2048		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2049		break;
2050	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2051		m->m_pkthdr.flowid = rxcd->rss_hash;
2052		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2053		break;
2054	case VMXNET3_RCD_RSS_TYPE_IPV6:
2055		m->m_pkthdr.flowid = rxcd->rss_hash;
2056		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2057		break;
2058	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2059		m->m_pkthdr.flowid = rxcd->rss_hash;
2060		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2061		break;
2062	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2063		m->m_pkthdr.flowid = rxq->vxrxq_id;
2064		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2065		break;
2066	}
2067#else
2068	m->m_pkthdr.flowid = rxq->vxrxq_id;
2069	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2070#endif
2071
2072	if (!rxcd->no_csum)
2073		vmxnet3_rx_csum(rxcd, m);
2074	if (rxcd->vlan) {
2075		m->m_flags |= M_VLANTAG;
2076		m->m_pkthdr.ether_vtag = rxcd->vtag;
2077	}
2078
2079	rxq->vxrxq_stats.vmrxs_ipackets++;
2080	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2081
2082	VMXNET3_RXQ_UNLOCK(rxq);
2083	(*ifp->if_input)(ifp, m);
2084	VMXNET3_RXQ_LOCK(rxq);
2085}
2086
2087static void
2088vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2089{
2090	struct vmxnet3_softc *sc;
2091	struct ifnet *ifp;
2092	struct vmxnet3_rxring *rxr;
2093	struct vmxnet3_comp_ring *rxc;
2094	struct vmxnet3_rxdesc *rxd;
2095	struct vmxnet3_rxcompdesc *rxcd;
2096	struct mbuf *m, *m_head, *m_tail;
2097	int idx, length;
2098
2099	sc = rxq->vxrxq_sc;
2100	ifp = sc->vmx_ifp;
2101	rxc = &rxq->vxrxq_comp_ring;
2102
2103	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2104
2105	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2106		return;
2107
2108	m_head = rxq->vxrxq_mhead;
2109	rxq->vxrxq_mhead = NULL;
2110	m_tail = rxq->vxrxq_mtail;
2111	rxq->vxrxq_mtail = NULL;
2112	MPASS(m_head == NULL || m_tail != NULL);
2113
2114	for (;;) {
2115		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2116		if (rxcd->gen != rxc->vxcr_gen) {
2117			rxq->vxrxq_mhead = m_head;
2118			rxq->vxrxq_mtail = m_tail;
2119			break;
2120		}
2121		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2122
2123		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2124			rxc->vxcr_next = 0;
2125			rxc->vxcr_gen ^= 1;
2126		}
2127
2128		idx = rxcd->rxd_idx;
2129		length = rxcd->len;
2130		if (rxcd->qid < sc->vmx_nrxqueues)
2131			rxr = &rxq->vxrxq_cmd_ring[0];
2132		else
2133			rxr = &rxq->vxrxq_cmd_ring[1];
2134		rxd = &rxr->vxrxr_rxd[idx];
2135
2136		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2137		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2138		    __func__, rxcd->qid, idx));
2139
2140		/*
2141		 * The host may skip descriptors. We detect this when this
2142		 * descriptor does not match the previous fill index. Catch
2143		 * up with the host now.
2144		 */
2145		if (__predict_false(rxr->vxrxr_fill != idx)) {
2146			while (rxr->vxrxr_fill != idx) {
2147				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2148				    rxr->vxrxr_gen;
2149				vmxnet3_rxr_increment_fill(rxr);
2150			}
2151		}
2152
2153		if (rxcd->sop) {
2154			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2155			    ("%s: start of frame w/o head buffer", __func__));
2156			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2157			    ("%s: start of frame not in ring 0", __func__));
2158			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2159			    ("%s: start of frame at unexcepted index %d (%d)",
2160			     __func__, idx, sc->vmx_rx_max_chain));
2161			KASSERT(m_head == NULL,
2162			    ("%s: duplicate start of frame?", __func__));
2163
2164			if (length == 0) {
2165				/* Just ignore this descriptor. */
2166				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2167				goto nextp;
2168			}
2169
2170			if (vmxnet3_newbuf(sc, rxr) != 0) {
2171				rxq->vxrxq_stats.vmrxs_iqdrops++;
2172				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2173				if (!rxcd->eop)
2174					vmxnet3_rxq_discard_chain(rxq);
2175				goto nextp;
2176			}
2177
2178			m->m_pkthdr.rcvif = ifp;
2179			m->m_pkthdr.len = m->m_len = length;
2180			m->m_pkthdr.csum_flags = 0;
2181			m_head = m_tail = m;
2182
2183		} else {
2184			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2185			    ("%s: non start of frame w/o body buffer", __func__));
2186			KASSERT(m_head != NULL,
2187			    ("%s: frame not started?", __func__));
2188
2189			if (vmxnet3_newbuf(sc, rxr) != 0) {
2190				rxq->vxrxq_stats.vmrxs_iqdrops++;
2191				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2192				if (!rxcd->eop)
2193					vmxnet3_rxq_discard_chain(rxq);
2194				m_freem(m_head);
2195				m_head = m_tail = NULL;
2196				goto nextp;
2197			}
2198
2199			m->m_len = length;
2200			m_head->m_pkthdr.len += length;
2201			m_tail->m_next = m;
2202			m_tail = m;
2203		}
2204
2205		if (rxcd->eop) {
2206			vmxnet3_rxq_input(rxq, rxcd, m_head);
2207			m_head = m_tail = NULL;
2208
2209			/* Must recheck after dropping the Rx lock. */
2210			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2211				break;
2212		}
2213
2214nextp:
2215		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2216			int qid = rxcd->qid;
2217			bus_size_t r;
2218
2219			idx = (idx + 1) % rxr->vxrxr_ndesc;
2220			if (qid >= sc->vmx_nrxqueues) {
2221				qid -= sc->vmx_nrxqueues;
2222				r = VMXNET3_BAR0_RXH2(qid);
2223			} else
2224				r = VMXNET3_BAR0_RXH1(qid);
2225			vmxnet3_write_bar0(sc, r, idx);
2226		}
2227	}
2228}
2229
2230static void
2231vmxnet3_legacy_intr(void *xsc)
2232{
2233	struct vmxnet3_softc *sc;
2234	struct vmxnet3_rxqueue *rxq;
2235	struct vmxnet3_txqueue *txq;
2236
2237	sc = xsc;
2238	rxq = &sc->vmx_rxq[0];
2239	txq = &sc->vmx_txq[0];
2240
2241	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2242		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2243			return;
2244	}
2245	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2246		vmxnet3_disable_all_intrs(sc);
2247
2248	if (sc->vmx_ds->event != 0)
2249		vmxnet3_evintr(sc);
2250
2251	VMXNET3_RXQ_LOCK(rxq);
2252	vmxnet3_rxq_eof(rxq);
2253	VMXNET3_RXQ_UNLOCK(rxq);
2254
2255	VMXNET3_TXQ_LOCK(txq);
2256	vmxnet3_txq_eof(txq);
2257	vmxnet3_txq_start(txq);
2258	VMXNET3_TXQ_UNLOCK(txq);
2259
2260	vmxnet3_enable_all_intrs(sc);
2261}
2262
2263static void
2264vmxnet3_txq_intr(void *xtxq)
2265{
2266	struct vmxnet3_softc *sc;
2267	struct vmxnet3_txqueue *txq;
2268
2269	txq = xtxq;
2270	sc = txq->vxtxq_sc;
2271
2272	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2273		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2274
2275	VMXNET3_TXQ_LOCK(txq);
2276	vmxnet3_txq_eof(txq);
2277	vmxnet3_txq_start(txq);
2278	VMXNET3_TXQ_UNLOCK(txq);
2279
2280	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2281}
2282
2283static void
2284vmxnet3_rxq_intr(void *xrxq)
2285{
2286	struct vmxnet3_softc *sc;
2287	struct vmxnet3_rxqueue *rxq;
2288
2289	rxq = xrxq;
2290	sc = rxq->vxrxq_sc;
2291
2292	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2293		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2294
2295	VMXNET3_RXQ_LOCK(rxq);
2296	vmxnet3_rxq_eof(rxq);
2297	VMXNET3_RXQ_UNLOCK(rxq);
2298
2299	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2300}
2301
2302static void
2303vmxnet3_event_intr(void *xsc)
2304{
2305	struct vmxnet3_softc *sc;
2306
2307	sc = xsc;
2308
2309	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2310		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2311
2312	if (sc->vmx_ds->event != 0)
2313		vmxnet3_evintr(sc);
2314
2315	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2316}
2317
2318static void
2319vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2320{
2321	struct vmxnet3_txring *txr;
2322	struct vmxnet3_txbuf *txb;
2323	int i;
2324
2325	txr = &txq->vxtxq_cmd_ring;
2326
2327	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2328		txb = &txr->vxtxr_txbuf[i];
2329
2330		if (txb->vtxb_m == NULL)
2331			continue;
2332
2333		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2334		    BUS_DMASYNC_POSTWRITE);
2335		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2336		m_freem(txb->vtxb_m);
2337		txb->vtxb_m = NULL;
2338	}
2339}
2340
2341static void
2342vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2343{
2344	struct vmxnet3_rxring *rxr;
2345	struct vmxnet3_rxbuf *rxb;
2346	int i, j;
2347
2348	if (rxq->vxrxq_mhead != NULL) {
2349		m_freem(rxq->vxrxq_mhead);
2350		rxq->vxrxq_mhead = NULL;
2351		rxq->vxrxq_mtail = NULL;
2352	}
2353
2354	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2355		rxr = &rxq->vxrxq_cmd_ring[i];
2356
2357		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2358			rxb = &rxr->vxrxr_rxbuf[j];
2359
2360			if (rxb->vrxb_m == NULL)
2361				continue;
2362
2363			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2364			    BUS_DMASYNC_POSTREAD);
2365			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2366			m_freem(rxb->vrxb_m);
2367			rxb->vrxb_m = NULL;
2368		}
2369	}
2370}
2371
2372static void
2373vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2374{
2375	struct vmxnet3_rxqueue *rxq;
2376	struct vmxnet3_txqueue *txq;
2377	int i;
2378
2379	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2380		rxq = &sc->vmx_rxq[i];
2381		VMXNET3_RXQ_LOCK(rxq);
2382		VMXNET3_RXQ_UNLOCK(rxq);
2383	}
2384
2385	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2386		txq = &sc->vmx_txq[i];
2387		VMXNET3_TXQ_LOCK(txq);
2388		VMXNET3_TXQ_UNLOCK(txq);
2389	}
2390}
2391
2392static void
2393vmxnet3_stop(struct vmxnet3_softc *sc)
2394{
2395	struct ifnet *ifp;
2396	int q;
2397
2398	ifp = sc->vmx_ifp;
2399	VMXNET3_CORE_LOCK_ASSERT(sc);
2400
2401	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2402	sc->vmx_link_active = 0;
2403	callout_stop(&sc->vmx_tick);
2404
2405	/* Disable interrupts. */
2406	vmxnet3_disable_all_intrs(sc);
2407	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2408
2409	vmxnet3_stop_rendezvous(sc);
2410
2411	for (q = 0; q < sc->vmx_ntxqueues; q++)
2412		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2413	for (q = 0; q < sc->vmx_nrxqueues; q++)
2414		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2415
2416	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2417}
2418
2419static void
2420vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2421{
2422	struct vmxnet3_txring *txr;
2423	struct vmxnet3_comp_ring *txc;
2424
2425	txr = &txq->vxtxq_cmd_ring;
2426	txr->vxtxr_head = 0;
2427	txr->vxtxr_next = 0;
2428	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2429	bzero(txr->vxtxr_txd,
2430	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2431
2432	txc = &txq->vxtxq_comp_ring;
2433	txc->vxcr_next = 0;
2434	txc->vxcr_gen = VMXNET3_INIT_GEN;
2435	bzero(txc->vxcr_u.txcd,
2436	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2437}
2438
2439static int
2440vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2441{
2442	struct ifnet *ifp;
2443	struct vmxnet3_rxring *rxr;
2444	struct vmxnet3_comp_ring *rxc;
2445	int i, populate, idx, frame_size, error;
2446
2447	ifp = sc->vmx_ifp;
2448	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2449	    ifp->if_mtu;
2450
2451	/*
2452	 * If the MTU causes us to exceed what a regular sized cluster can
2453	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2454	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2455	 *
2456	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2457	 * our life easier. We do not support changing the ring size after
2458	 * the attach.
2459	 */
2460	if (frame_size <= MCLBYTES)
2461		sc->vmx_rx_max_chain = 1;
2462	else
2463		sc->vmx_rx_max_chain = 2;
2464
2465	/*
2466	 * Only populate ring 1 if the configuration will take advantage
2467	 * of it. That is either when LRO is enabled or the frame size
2468	 * exceeds what ring 0 can contain.
2469	 */
2470	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2471	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2472		populate = 1;
2473	else
2474		populate = VMXNET3_RXRINGS_PERQ;
2475
2476	for (i = 0; i < populate; i++) {
2477		rxr = &rxq->vxrxq_cmd_ring[i];
2478		rxr->vxrxr_fill = 0;
2479		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2480		bzero(rxr->vxrxr_rxd,
2481		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2482
2483		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2484			error = vmxnet3_newbuf(sc, rxr);
2485			if (error)
2486				return (error);
2487		}
2488	}
2489
2490	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2491		rxr = &rxq->vxrxq_cmd_ring[i];
2492		rxr->vxrxr_fill = 0;
2493		rxr->vxrxr_gen = 0;
2494		bzero(rxr->vxrxr_rxd,
2495		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2496	}
2497
2498	rxc = &rxq->vxrxq_comp_ring;
2499	rxc->vxcr_next = 0;
2500	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2501	bzero(rxc->vxcr_u.rxcd,
2502	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2503
2504	return (0);
2505}
2506
2507static int
2508vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2509{
2510	device_t dev;
2511	int q, error;
2512
2513	dev = sc->vmx_dev;
2514
2515	for (q = 0; q < sc->vmx_ntxqueues; q++)
2516		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2517
2518	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2519		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2520		if (error) {
2521			device_printf(dev, "cannot populate Rx queue %d\n", q);
2522			return (error);
2523		}
2524	}
2525
2526	return (0);
2527}
2528
2529static int
2530vmxnet3_enable_device(struct vmxnet3_softc *sc)
2531{
2532	int q;
2533
2534	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2535		device_printf(sc->vmx_dev, "device enable command failed!\n");
2536		return (1);
2537	}
2538
2539	/* Reset the Rx queue heads. */
2540	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2541		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2542		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2543	}
2544
2545	return (0);
2546}
2547
2548static void
2549vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2550{
2551	struct ifnet *ifp;
2552
2553	ifp = sc->vmx_ifp;
2554
2555	vmxnet3_set_rxfilter(sc);
2556
2557	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2558		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2559		    sizeof(sc->vmx_ds->vlan_filter));
2560	else
2561		bzero(sc->vmx_ds->vlan_filter,
2562		    sizeof(sc->vmx_ds->vlan_filter));
2563	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2564}
2565
2566static int
2567vmxnet3_reinit(struct vmxnet3_softc *sc)
2568{
2569
2570	vmxnet3_reinit_interface(sc);
2571	vmxnet3_reinit_shared_data(sc);
2572
2573	if (vmxnet3_reinit_queues(sc) != 0)
2574		return (ENXIO);
2575
2576	if (vmxnet3_enable_device(sc) != 0)
2577		return (ENXIO);
2578
2579	vmxnet3_reinit_rxfilters(sc);
2580
2581	return (0);
2582}
2583
2584static void
2585vmxnet3_init_locked(struct vmxnet3_softc *sc)
2586{
2587	struct ifnet *ifp;
2588
2589	ifp = sc->vmx_ifp;
2590
2591	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2592		return;
2593
2594	vmxnet3_stop(sc);
2595
2596	if (vmxnet3_reinit(sc) != 0) {
2597		vmxnet3_stop(sc);
2598		return;
2599	}
2600
2601	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2602	vmxnet3_link_status(sc);
2603
2604	vmxnet3_enable_all_intrs(sc);
2605	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2606}
2607
2608static void
2609vmxnet3_init(void *xsc)
2610{
2611	struct vmxnet3_softc *sc;
2612
2613	sc = xsc;
2614
2615	VMXNET3_CORE_LOCK(sc);
2616	vmxnet3_init_locked(sc);
2617	VMXNET3_CORE_UNLOCK(sc);
2618}
2619
2620/*
2621 * BMV: Much of this can go away once we finally have offsets in
2622 * the mbuf packet header. Bug andre@.
2623 */
2624static int
2625vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2626    int *etype, int *proto, int *start)
2627{
2628	struct ether_vlan_header *evh;
2629	int offset;
2630#if defined(INET)
2631	struct ip *ip = NULL;
2632	struct ip iphdr;
2633#endif
2634#if defined(INET6)
2635	struct ip6_hdr *ip6 = NULL;
2636	struct ip6_hdr ip6hdr;
2637#endif
2638
2639	evh = mtod(m, struct ether_vlan_header *);
2640	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2641		/* BMV: We should handle nested VLAN tags too. */
2642		*etype = ntohs(evh->evl_proto);
2643		offset = sizeof(struct ether_vlan_header);
2644	} else {
2645		*etype = ntohs(evh->evl_encap_proto);
2646		offset = sizeof(struct ether_header);
2647	}
2648
2649	switch (*etype) {
2650#if defined(INET)
2651	case ETHERTYPE_IP:
2652		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2653			m_copydata(m, offset, sizeof(struct ip),
2654			    (caddr_t) &iphdr);
2655			ip = &iphdr;
2656		} else
2657			ip = mtodo(m, offset);
2658		*proto = ip->ip_p;
2659		*start = offset + (ip->ip_hl << 2);
2660		break;
2661#endif
2662#if defined(INET6)
2663	case ETHERTYPE_IPV6:
2664		if (__predict_false(m->m_len <
2665		    offset + sizeof(struct ip6_hdr))) {
2666			m_copydata(m, offset, sizeof(struct ip6_hdr),
2667			    (caddr_t) &ip6hdr);
2668			ip6 = &ip6hdr;
2669		} else
2670			ip6 = mtodo(m, offset);
2671		*proto = -1;
2672		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2673		/* Assert the network stack sent us a valid packet. */
2674		KASSERT(*start > offset,
2675		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2676		    *start, offset, *proto));
2677		break;
2678#endif
2679	default:
2680		return (EINVAL);
2681	}
2682
2683	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2684		struct tcphdr *tcp, tcphdr;
2685		uint16_t sum;
2686
2687		if (__predict_false(*proto != IPPROTO_TCP)) {
2688			/* Likely failed to correctly parse the mbuf. */
2689			return (EINVAL);
2690		}
2691
2692		txq->vxtxq_stats.vmtxs_tso++;
2693
2694		switch (*etype) {
2695#if defined(INET)
2696		case ETHERTYPE_IP:
2697			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2698			    htons(IPPROTO_TCP));
2699			break;
2700#endif
2701#if defined(INET6)
2702		case ETHERTYPE_IPV6:
2703			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2704			break;
2705#endif
2706		default:
2707			sum = 0;
2708			break;
2709		}
2710
2711		if (m->m_len < *start + sizeof(struct tcphdr)) {
2712			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2713			    sizeof(uint16_t), (caddr_t) &sum);
2714			m_copydata(m, *start, sizeof(struct tcphdr),
2715			    (caddr_t) &tcphdr);
2716			tcp = &tcphdr;
2717		} else {
2718			tcp = mtodo(m, *start);
2719			tcp->th_sum = sum;
2720		}
2721
2722		/*
2723		 * For TSO, the size of the protocol header is also
2724		 * included in the descriptor header size.
2725		 */
2726		*start += (tcp->th_off << 2);
2727	} else
2728		txq->vxtxq_stats.vmtxs_csum++;
2729
2730	return (0);
2731}
2732
2733static int
2734vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2735    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2736{
2737	struct vmxnet3_txring *txr;
2738	struct mbuf *m;
2739	bus_dma_tag_t tag;
2740	int error;
2741
2742	txr = &txq->vxtxq_cmd_ring;
2743	m = *m0;
2744	tag = txr->vxtxr_txtag;
2745
2746	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2747	if (error == 0 || error != EFBIG)
2748		return (error);
2749
2750	m = m_defrag(m, M_NOWAIT);
2751	if (m != NULL) {
2752		*m0 = m;
2753		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2754	} else
2755		error = ENOBUFS;
2756
2757	if (error) {
2758		m_freem(*m0);
2759		*m0 = NULL;
2760		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2761	} else
2762		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2763
2764	return (error);
2765}
2766
2767static void
2768vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2769{
2770	struct vmxnet3_txring *txr;
2771
2772	txr = &txq->vxtxq_cmd_ring;
2773	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2774}
2775
2776static int
2777vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2778{
2779	struct vmxnet3_softc *sc;
2780	struct vmxnet3_txring *txr;
2781	struct vmxnet3_txdesc *txd, *sop;
2782	struct mbuf *m;
2783	bus_dmamap_t dmap;
2784	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2785	int i, gen, nsegs, etype, proto, start, error;
2786
2787	sc = txq->vxtxq_sc;
2788	start = 0;
2789	txd = NULL;
2790	txr = &txq->vxtxq_cmd_ring;
2791	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2792
2793	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2794	if (error)
2795		return (error);
2796
2797	m = *m0;
2798	M_ASSERTPKTHDR(m);
2799	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2800	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2801
2802	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2803		txq->vxtxq_stats.vmtxs_full++;
2804		vmxnet3_txq_unload_mbuf(txq, dmap);
2805		return (ENOSPC);
2806	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2807		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2808		if (error) {
2809			txq->vxtxq_stats.vmtxs_offload_failed++;
2810			vmxnet3_txq_unload_mbuf(txq, dmap);
2811			m_freem(m);
2812			*m0 = NULL;
2813			return (error);
2814		}
2815	}
2816
2817	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2818	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2819	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2820
2821	for (i = 0; i < nsegs; i++) {
2822		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2823
2824		txd->addr = segs[i].ds_addr;
2825		txd->len = segs[i].ds_len;
2826		txd->gen = gen;
2827		txd->dtype = 0;
2828		txd->offload_mode = VMXNET3_OM_NONE;
2829		txd->offload_pos = 0;
2830		txd->hlen = 0;
2831		txd->eop = 0;
2832		txd->compreq = 0;
2833		txd->vtag_mode = 0;
2834		txd->vtag = 0;
2835
2836		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2837			txr->vxtxr_head = 0;
2838			txr->vxtxr_gen ^= 1;
2839		}
2840		gen = txr->vxtxr_gen;
2841	}
2842	txd->eop = 1;
2843	txd->compreq = 1;
2844
2845	if (m->m_flags & M_VLANTAG) {
2846		sop->vtag_mode = 1;
2847		sop->vtag = m->m_pkthdr.ether_vtag;
2848	}
2849
2850	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2851		sop->offload_mode = VMXNET3_OM_TSO;
2852		sop->hlen = start;
2853		sop->offload_pos = m->m_pkthdr.tso_segsz;
2854	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2855	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2856		sop->offload_mode = VMXNET3_OM_CSUM;
2857		sop->hlen = start;
2858		sop->offload_pos = start + m->m_pkthdr.csum_data;
2859	}
2860
2861	/* Finally, change the ownership. */
2862	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2863	sop->gen ^= 1;
2864
2865	txq->vxtxq_ts->npending += nsegs;
2866	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2867		txq->vxtxq_ts->npending = 0;
2868		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2869		    txr->vxtxr_head);
2870	}
2871
2872	return (0);
2873}
2874
2875#ifdef VMXNET3_LEGACY_TX
2876
2877static void
2878vmxnet3_start_locked(struct ifnet *ifp)
2879{
2880	struct vmxnet3_softc *sc;
2881	struct vmxnet3_txqueue *txq;
2882	struct vmxnet3_txring *txr;
2883	struct mbuf *m_head;
2884	int tx, avail;
2885
2886	sc = ifp->if_softc;
2887	txq = &sc->vmx_txq[0];
2888	txr = &txq->vxtxq_cmd_ring;
2889	tx = 0;
2890
2891	VMXNET3_TXQ_LOCK_ASSERT(txq);
2892
2893	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2894	    sc->vmx_link_active == 0)
2895		return;
2896
2897	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2898		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2899			break;
2900
2901		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2902		if (m_head == NULL)
2903			break;
2904
2905		/* Assume worse case if this mbuf is the head of a chain. */
2906		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2907			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2908			break;
2909		}
2910
2911		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2912			if (m_head != NULL)
2913				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2914			break;
2915		}
2916
2917		tx++;
2918		ETHER_BPF_MTAP(ifp, m_head);
2919	}
2920
2921	if (tx > 0)
2922		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2923}
2924
2925static void
2926vmxnet3_start(struct ifnet *ifp)
2927{
2928	struct vmxnet3_softc *sc;
2929	struct vmxnet3_txqueue *txq;
2930
2931	sc = ifp->if_softc;
2932	txq = &sc->vmx_txq[0];
2933
2934	VMXNET3_TXQ_LOCK(txq);
2935	vmxnet3_start_locked(ifp);
2936	VMXNET3_TXQ_UNLOCK(txq);
2937}
2938
2939#else /* !VMXNET3_LEGACY_TX */
2940
2941static int
2942vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2943{
2944	struct vmxnet3_softc *sc;
2945	struct vmxnet3_txring *txr;
2946	struct buf_ring *br;
2947	struct ifnet *ifp;
2948	int tx, avail, error;
2949
2950	sc = txq->vxtxq_sc;
2951	br = txq->vxtxq_br;
2952	ifp = sc->vmx_ifp;
2953	txr = &txq->vxtxq_cmd_ring;
2954	tx = 0;
2955	error = 0;
2956
2957	VMXNET3_TXQ_LOCK_ASSERT(txq);
2958
2959	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2960	    sc->vmx_link_active == 0) {
2961		if (m != NULL)
2962			error = drbr_enqueue(ifp, br, m);
2963		return (error);
2964	}
2965
2966	if (m != NULL) {
2967		error = drbr_enqueue(ifp, br, m);
2968		if (error)
2969			return (error);
2970	}
2971
2972	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2973		m = drbr_peek(ifp, br);
2974		if (m == NULL)
2975			break;
2976
2977		/* Assume worse case if this mbuf is the head of a chain. */
2978		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2979			drbr_putback(ifp, br, m);
2980			break;
2981		}
2982
2983		if (vmxnet3_txq_encap(txq, &m) != 0) {
2984			if (m != NULL)
2985				drbr_putback(ifp, br, m);
2986			else
2987				drbr_advance(ifp, br);
2988			break;
2989		}
2990		drbr_advance(ifp, br);
2991
2992		tx++;
2993		ETHER_BPF_MTAP(ifp, m);
2994	}
2995
2996	if (tx > 0)
2997		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2998
2999	return (0);
3000}
3001
3002static int
3003vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3004{
3005	struct vmxnet3_softc *sc;
3006	struct vmxnet3_txqueue *txq;
3007	int i, ntxq, error;
3008
3009	sc = ifp->if_softc;
3010	ntxq = sc->vmx_ntxqueues;
3011
3012	/* check if flowid is set */
3013	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3014		i = m->m_pkthdr.flowid % ntxq;
3015	else
3016		i = curcpu % ntxq;
3017
3018	txq = &sc->vmx_txq[i];
3019
3020	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3021		error = vmxnet3_txq_mq_start_locked(txq, m);
3022		VMXNET3_TXQ_UNLOCK(txq);
3023	} else {
3024		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3025		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3026	}
3027
3028	return (error);
3029}
3030
3031static void
3032vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3033{
3034	struct vmxnet3_softc *sc;
3035	struct vmxnet3_txqueue *txq;
3036
3037	txq = xtxq;
3038	sc = txq->vxtxq_sc;
3039
3040	VMXNET3_TXQ_LOCK(txq);
3041	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3042		vmxnet3_txq_mq_start_locked(txq, NULL);
3043	VMXNET3_TXQ_UNLOCK(txq);
3044}
3045
3046#endif /* VMXNET3_LEGACY_TX */
3047
3048static void
3049vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3050{
3051	struct vmxnet3_softc *sc;
3052	struct ifnet *ifp;
3053
3054	sc = txq->vxtxq_sc;
3055	ifp = sc->vmx_ifp;
3056
3057#ifdef VMXNET3_LEGACY_TX
3058	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3059		vmxnet3_start_locked(ifp);
3060#else
3061	if (!drbr_empty(ifp, txq->vxtxq_br))
3062		vmxnet3_txq_mq_start_locked(txq, NULL);
3063#endif
3064}
3065
3066static void
3067vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3068{
3069	struct vmxnet3_txqueue *txq;
3070	int i;
3071
3072	VMXNET3_CORE_LOCK_ASSERT(sc);
3073
3074	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3075		txq = &sc->vmx_txq[i];
3076
3077		VMXNET3_TXQ_LOCK(txq);
3078		vmxnet3_txq_start(txq);
3079		VMXNET3_TXQ_UNLOCK(txq);
3080	}
3081}
3082
3083static void
3084vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3085{
3086	struct ifnet *ifp;
3087	int idx, bit;
3088
3089	ifp = sc->vmx_ifp;
3090	idx = (tag >> 5) & 0x7F;
3091	bit = tag & 0x1F;
3092
3093	if (tag == 0 || tag > 4095)
3094		return;
3095
3096	VMXNET3_CORE_LOCK(sc);
3097
3098	/* Update our private VLAN bitvector. */
3099	if (add)
3100		sc->vmx_vlan_filter[idx] |= (1 << bit);
3101	else
3102		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3103
3104	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3105		if (add)
3106			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3107		else
3108			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3109		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3110	}
3111
3112	VMXNET3_CORE_UNLOCK(sc);
3113}
3114
3115static void
3116vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3117{
3118
3119	if (ifp->if_softc == arg)
3120		vmxnet3_update_vlan_filter(arg, 1, tag);
3121}
3122
3123static void
3124vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3125{
3126
3127	if (ifp->if_softc == arg)
3128		vmxnet3_update_vlan_filter(arg, 0, tag);
3129}
3130
3131static void
3132vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3133{
3134	struct ifnet *ifp;
3135	struct vmxnet3_driver_shared *ds;
3136	struct ifmultiaddr *ifma;
3137	u_int mode;
3138
3139	ifp = sc->vmx_ifp;
3140	ds = sc->vmx_ds;
3141
3142	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3143	if (ifp->if_flags & IFF_PROMISC)
3144		mode |= VMXNET3_RXMODE_PROMISC;
3145	if (ifp->if_flags & IFF_ALLMULTI)
3146		mode |= VMXNET3_RXMODE_ALLMULTI;
3147	else {
3148		int cnt = 0, overflow = 0;
3149
3150		if_maddr_rlock(ifp);
3151		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3152			if (ifma->ifma_addr->sa_family != AF_LINK)
3153				continue;
3154			else if (cnt == VMXNET3_MULTICAST_MAX) {
3155				overflow = 1;
3156				break;
3157			}
3158
3159			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3160			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3161			cnt++;
3162		}
3163		if_maddr_runlock(ifp);
3164
3165		if (overflow != 0) {
3166			cnt = 0;
3167			mode |= VMXNET3_RXMODE_ALLMULTI;
3168		} else if (cnt > 0)
3169			mode |= VMXNET3_RXMODE_MCAST;
3170		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3171	}
3172
3173	ds->rxmode = mode;
3174
3175	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3176	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3177}
3178
3179static int
3180vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3181{
3182	struct ifnet *ifp;
3183
3184	ifp = sc->vmx_ifp;
3185
3186	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3187		return (EINVAL);
3188
3189	ifp->if_mtu = mtu;
3190
3191	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3192		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3193		vmxnet3_init_locked(sc);
3194	}
3195
3196	return (0);
3197}
3198
3199static int
3200vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3201{
3202	struct vmxnet3_softc *sc;
3203	struct ifreq *ifr;
3204	int reinit, mask, error;
3205
3206	sc = ifp->if_softc;
3207	ifr = (struct ifreq *) data;
3208	error = 0;
3209
3210	switch (cmd) {
3211	case SIOCSIFMTU:
3212		if (ifp->if_mtu != ifr->ifr_mtu) {
3213			VMXNET3_CORE_LOCK(sc);
3214			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3215			VMXNET3_CORE_UNLOCK(sc);
3216		}
3217		break;
3218
3219	case SIOCSIFFLAGS:
3220		VMXNET3_CORE_LOCK(sc);
3221		if (ifp->if_flags & IFF_UP) {
3222			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3223				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3224				    (IFF_PROMISC | IFF_ALLMULTI)) {
3225					vmxnet3_set_rxfilter(sc);
3226				}
3227			} else
3228				vmxnet3_init_locked(sc);
3229		} else {
3230			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3231				vmxnet3_stop(sc);
3232		}
3233		sc->vmx_if_flags = ifp->if_flags;
3234		VMXNET3_CORE_UNLOCK(sc);
3235		break;
3236
3237	case SIOCADDMULTI:
3238	case SIOCDELMULTI:
3239		VMXNET3_CORE_LOCK(sc);
3240		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3241			vmxnet3_set_rxfilter(sc);
3242		VMXNET3_CORE_UNLOCK(sc);
3243		break;
3244
3245	case SIOCSIFMEDIA:
3246	case SIOCGIFMEDIA:
3247		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3248		break;
3249
3250	case SIOCSIFCAP:
3251		VMXNET3_CORE_LOCK(sc);
3252		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3253
3254		if (mask & IFCAP_TXCSUM)
3255			ifp->if_capenable ^= IFCAP_TXCSUM;
3256		if (mask & IFCAP_TXCSUM_IPV6)
3257			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3258		if (mask & IFCAP_TSO4)
3259			ifp->if_capenable ^= IFCAP_TSO4;
3260		if (mask & IFCAP_TSO6)
3261			ifp->if_capenable ^= IFCAP_TSO6;
3262
3263		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3264		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3265			/* Changing these features requires us to reinit. */
3266			reinit = 1;
3267
3268			if (mask & IFCAP_RXCSUM)
3269				ifp->if_capenable ^= IFCAP_RXCSUM;
3270			if (mask & IFCAP_RXCSUM_IPV6)
3271				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3272			if (mask & IFCAP_LRO)
3273				ifp->if_capenable ^= IFCAP_LRO;
3274			if (mask & IFCAP_VLAN_HWTAGGING)
3275				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3276			if (mask & IFCAP_VLAN_HWFILTER)
3277				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3278		} else
3279			reinit = 0;
3280
3281		if (mask & IFCAP_VLAN_HWTSO)
3282			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3283
3284		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3285			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3286			vmxnet3_init_locked(sc);
3287		}
3288
3289		VMXNET3_CORE_UNLOCK(sc);
3290		VLAN_CAPABILITIES(ifp);
3291		break;
3292
3293	default:
3294		error = ether_ioctl(ifp, cmd, data);
3295		break;
3296	}
3297
3298	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3299
3300	return (error);
3301}
3302
3303#ifndef VMXNET3_LEGACY_TX
3304static void
3305vmxnet3_qflush(struct ifnet *ifp)
3306{
3307	struct vmxnet3_softc *sc;
3308	struct vmxnet3_txqueue *txq;
3309	struct mbuf *m;
3310	int i;
3311
3312	sc = ifp->if_softc;
3313
3314	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3315		txq = &sc->vmx_txq[i];
3316
3317		VMXNET3_TXQ_LOCK(txq);
3318		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3319			m_freem(m);
3320		VMXNET3_TXQ_UNLOCK(txq);
3321	}
3322
3323	if_qflush(ifp);
3324}
3325#endif
3326
3327static int
3328vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3329{
3330	struct vmxnet3_softc *sc;
3331
3332	sc = txq->vxtxq_sc;
3333
3334	VMXNET3_TXQ_LOCK(txq);
3335	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3336		VMXNET3_TXQ_UNLOCK(txq);
3337		return (0);
3338	}
3339	VMXNET3_TXQ_UNLOCK(txq);
3340
3341	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3342	    txq->vxtxq_id);
3343	return (1);
3344}
3345
3346static void
3347vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3348{
3349
3350	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3351}
3352
3353static void
3354vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3355    struct vmxnet3_txq_stats *accum)
3356{
3357	struct vmxnet3_txq_stats *st;
3358
3359	st = &txq->vxtxq_stats;
3360
3361	accum->vmtxs_opackets += st->vmtxs_opackets;
3362	accum->vmtxs_obytes += st->vmtxs_obytes;
3363	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3364	accum->vmtxs_csum += st->vmtxs_csum;
3365	accum->vmtxs_tso += st->vmtxs_tso;
3366	accum->vmtxs_full += st->vmtxs_full;
3367	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3368}
3369
3370static void
3371vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3372    struct vmxnet3_rxq_stats *accum)
3373{
3374	struct vmxnet3_rxq_stats *st;
3375
3376	st = &rxq->vxrxq_stats;
3377
3378	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3379	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3380	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3381	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3382}
3383
3384static void
3385vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3386{
3387	struct ifnet *ifp;
3388	struct vmxnet3_statistics *st;
3389	struct vmxnet3_txq_stats txaccum;
3390	struct vmxnet3_rxq_stats rxaccum;
3391	int i;
3392
3393	ifp = sc->vmx_ifp;
3394	st = &sc->vmx_stats;
3395
3396	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3397	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3398
3399	for (i = 0; i < sc->vmx_ntxqueues; i++)
3400		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3401	for (i = 0; i < sc->vmx_nrxqueues; i++)
3402		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3403
3404	/*
3405	 * With the exception of if_ierrors, these ifnet statistics are
3406	 * only updated in the driver, so just set them to our accumulated
3407	 * values. if_ierrors is updated in ether_input() for malformed
3408	 * frames that we should have already discarded.
3409	 */
3410	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3411	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3412	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3413	ifp->if_opackets = txaccum.vmtxs_opackets;
3414#ifndef VMXNET3_LEGACY_TX
3415	ifp->if_obytes = txaccum.vmtxs_obytes;
3416	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3417#endif
3418}
3419
3420static void
3421vmxnet3_tick(void *xsc)
3422{
3423	struct vmxnet3_softc *sc;
3424	struct ifnet *ifp;
3425	int i, timedout;
3426
3427	sc = xsc;
3428	ifp = sc->vmx_ifp;
3429	timedout = 0;
3430
3431	VMXNET3_CORE_LOCK_ASSERT(sc);
3432
3433	vmxnet3_accumulate_stats(sc);
3434	vmxnet3_refresh_host_stats(sc);
3435
3436	for (i = 0; i < sc->vmx_ntxqueues; i++)
3437		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3438
3439	if (timedout != 0) {
3440		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3441		vmxnet3_init_locked(sc);
3442	} else
3443		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3444}
3445
3446static int
3447vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3448{
3449	uint32_t status;
3450
3451	/* Also update the link speed while here. */
3452	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3453	sc->vmx_link_speed = status >> 16;
3454	return !!(status & 0x1);
3455}
3456
3457static void
3458vmxnet3_link_status(struct vmxnet3_softc *sc)
3459{
3460	struct ifnet *ifp;
3461	int link;
3462
3463	ifp = sc->vmx_ifp;
3464	link = vmxnet3_link_is_up(sc);
3465
3466	if (link != 0 && sc->vmx_link_active == 0) {
3467		sc->vmx_link_active = 1;
3468		if_link_state_change(ifp, LINK_STATE_UP);
3469	} else if (link == 0 && sc->vmx_link_active != 0) {
3470		sc->vmx_link_active = 0;
3471		if_link_state_change(ifp, LINK_STATE_DOWN);
3472	}
3473}
3474
3475static void
3476vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3477{
3478	struct vmxnet3_softc *sc;
3479
3480	sc = ifp->if_softc;
3481
3482	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3483	ifmr->ifm_status = IFM_AVALID;
3484
3485	VMXNET3_CORE_LOCK(sc);
3486	if (vmxnet3_link_is_up(sc) != 0)
3487		ifmr->ifm_status |= IFM_ACTIVE;
3488	else
3489		ifmr->ifm_status |= IFM_NONE;
3490	VMXNET3_CORE_UNLOCK(sc);
3491}
3492
3493static int
3494vmxnet3_media_change(struct ifnet *ifp)
3495{
3496
3497	/* Ignore. */
3498	return (0);
3499}
3500
3501static void
3502vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3503{
3504	uint32_t ml, mh;
3505
3506	ml  = sc->vmx_lladdr[0];
3507	ml |= sc->vmx_lladdr[1] << 8;
3508	ml |= sc->vmx_lladdr[2] << 16;
3509	ml |= sc->vmx_lladdr[3] << 24;
3510	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3511
3512	mh  = sc->vmx_lladdr[4];
3513	mh |= sc->vmx_lladdr[5] << 8;
3514	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3515}
3516
3517static void
3518vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3519{
3520	uint32_t ml, mh;
3521
3522	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3523	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3524
3525	sc->vmx_lladdr[0] = ml;
3526	sc->vmx_lladdr[1] = ml >> 8;
3527	sc->vmx_lladdr[2] = ml >> 16;
3528	sc->vmx_lladdr[3] = ml >> 24;
3529	sc->vmx_lladdr[4] = mh;
3530	sc->vmx_lladdr[5] = mh >> 8;
3531}
3532
3533static void
3534vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3535    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3536{
3537	struct sysctl_oid *node, *txsnode;
3538	struct sysctl_oid_list *list, *txslist;
3539	struct vmxnet3_txq_stats *stats;
3540	struct UPT1_TxStats *txstats;
3541	char namebuf[16];
3542
3543	stats = &txq->vxtxq_stats;
3544	txstats = &txq->vxtxq_ts->stats;
3545
3546	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3547	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3548	    NULL, "Transmit Queue");
3549	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3550
3551	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3552	    &stats->vmtxs_opackets, "Transmit packets");
3553	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3554	    &stats->vmtxs_obytes, "Transmit bytes");
3555	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3556	    &stats->vmtxs_omcasts, "Transmit multicasts");
3557	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3558	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3559	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3560	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3561	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3562	    &stats->vmtxs_full, "Transmit ring full");
3563	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3564	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3565
3566	/*
3567	 * Add statistics reported by the host. These are updated once
3568	 * per second.
3569	 */
3570	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3571	    NULL, "Host Statistics");
3572	txslist = SYSCTL_CHILDREN(txsnode);
3573	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3574	    &txstats->TSO_packets, "TSO packets");
3575	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3576	    &txstats->TSO_bytes, "TSO bytes");
3577	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3578	    &txstats->ucast_packets, "Unicast packets");
3579	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3580	    &txstats->ucast_bytes, "Unicast bytes");
3581	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3582	    &txstats->mcast_packets, "Multicast packets");
3583	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3584	    &txstats->mcast_bytes, "Multicast bytes");
3585	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3586	    &txstats->error, "Errors");
3587	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3588	    &txstats->discard, "Discards");
3589}
3590
3591static void
3592vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3593    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3594{
3595	struct sysctl_oid *node, *rxsnode;
3596	struct sysctl_oid_list *list, *rxslist;
3597	struct vmxnet3_rxq_stats *stats;
3598	struct UPT1_RxStats *rxstats;
3599	char namebuf[16];
3600
3601	stats = &rxq->vxrxq_stats;
3602	rxstats = &rxq->vxrxq_rs->stats;
3603
3604	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3605	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3606	    NULL, "Receive Queue");
3607	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3608
3609	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3610	    &stats->vmrxs_ipackets, "Receive packets");
3611	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3612	    &stats->vmrxs_ibytes, "Receive bytes");
3613	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3614	    &stats->vmrxs_iqdrops, "Receive drops");
3615	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3616	    &stats->vmrxs_ierrors, "Receive errors");
3617
3618	/*
3619	 * Add statistics reported by the host. These are updated once
3620	 * per second.
3621	 */
3622	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3623	    NULL, "Host Statistics");
3624	rxslist = SYSCTL_CHILDREN(rxsnode);
3625	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3626	    &rxstats->LRO_packets, "LRO packets");
3627	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3628	    &rxstats->LRO_bytes, "LRO bytes");
3629	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3630	    &rxstats->ucast_packets, "Unicast packets");
3631	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3632	    &rxstats->ucast_bytes, "Unicast bytes");
3633	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3634	    &rxstats->mcast_packets, "Multicast packets");
3635	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3636	    &rxstats->mcast_bytes, "Multicast bytes");
3637	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3638	    &rxstats->bcast_packets, "Broadcast packets");
3639	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3640	    &rxstats->bcast_bytes, "Broadcast bytes");
3641	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3642	    &rxstats->nobuffer, "No buffer");
3643	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3644	    &rxstats->error, "Errors");
3645}
3646
3647static void
3648vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3649    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3650{
3651	struct sysctl_oid *node;
3652	struct sysctl_oid_list *list;
3653	int i;
3654
3655	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3656		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3657
3658		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3659		    "debug", CTLFLAG_RD, NULL, "");
3660		list = SYSCTL_CHILDREN(node);
3661
3662		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3663		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3664		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3665		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3666		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3667		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3668		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3669		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3670		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3671		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3672		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3673		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3674		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3675		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3676	}
3677
3678	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3679		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3680
3681		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3682		    "debug", CTLFLAG_RD, NULL, "");
3683		list = SYSCTL_CHILDREN(node);
3684
3685		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3686		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3687		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3688		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3689		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3690		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3691		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3692		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3693		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3694		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3695		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3696		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3697		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3698		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3699		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3700		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3701		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3702		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3703	}
3704}
3705
3706static void
3707vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3708    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3709{
3710	int i;
3711
3712	for (i = 0; i < sc->vmx_ntxqueues; i++)
3713		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3714	for (i = 0; i < sc->vmx_nrxqueues; i++)
3715		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3716
3717	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3718}
3719
3720static void
3721vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3722{
3723	device_t dev;
3724	struct vmxnet3_statistics *stats;
3725	struct sysctl_ctx_list *ctx;
3726	struct sysctl_oid *tree;
3727	struct sysctl_oid_list *child;
3728
3729	dev = sc->vmx_dev;
3730	ctx = device_get_sysctl_ctx(dev);
3731	tree = device_get_sysctl_tree(dev);
3732	child = SYSCTL_CHILDREN(tree);
3733
3734	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3735	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3736	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3737	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3738	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3739	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3740	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3741	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3742
3743	stats = &sc->vmx_stats;
3744	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3745	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3746	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3747	    &stats->vmst_defrag_failed, 0,
3748	    "Tx mbuf dropped because defrag failed");
3749	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3750	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3751	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3752	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3753
3754	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3755}
3756
3757static void
3758vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3759{
3760
3761	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3762}
3763
3764static uint32_t
3765vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3766{
3767
3768	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3769}
3770
3771static void
3772vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3773{
3774
3775	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3776}
3777
3778static void
3779vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3780{
3781
3782	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3783}
3784
3785static uint32_t
3786vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3787{
3788
3789	vmxnet3_write_cmd(sc, cmd);
3790	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3791	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3792	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3793}
3794
3795static void
3796vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3797{
3798
3799	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3800}
3801
3802static void
3803vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3804{
3805
3806	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3807}
3808
3809static void
3810vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3811{
3812	int i;
3813
3814	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3815	for (i = 0; i < sc->vmx_nintrs; i++)
3816		vmxnet3_enable_intr(sc, i);
3817}
3818
3819static void
3820vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3821{
3822	int i;
3823
3824	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3825	for (i = 0; i < sc->vmx_nintrs; i++)
3826		vmxnet3_disable_intr(sc, i);
3827}
3828
3829static void
3830vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3831{
3832	bus_addr_t *baddr = arg;
3833
3834	if (error == 0)
3835		*baddr = segs->ds_addr;
3836}
3837
3838static int
3839vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3840    struct vmxnet3_dma_alloc *dma)
3841{
3842	device_t dev;
3843	int error;
3844
3845	dev = sc->vmx_dev;
3846	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3847
3848	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3849	    align, 0,		/* alignment, bounds */
3850	    BUS_SPACE_MAXADDR,	/* lowaddr */
3851	    BUS_SPACE_MAXADDR,	/* highaddr */
3852	    NULL, NULL,		/* filter, filterarg */
3853	    size,		/* maxsize */
3854	    1,			/* nsegments */
3855	    size,		/* maxsegsize */
3856	    BUS_DMA_ALLOCNOW,	/* flags */
3857	    NULL,		/* lockfunc */
3858	    NULL,		/* lockfuncarg */
3859	    &dma->dma_tag);
3860	if (error) {
3861		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3862		goto fail;
3863	}
3864
3865	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3866	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3867	if (error) {
3868		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3869		goto fail;
3870	}
3871
3872	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3873	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3874	if (error) {
3875		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3876		goto fail;
3877	}
3878
3879	dma->dma_size = size;
3880
3881fail:
3882	if (error)
3883		vmxnet3_dma_free(sc, dma);
3884
3885	return (error);
3886}
3887
3888static void
3889vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3890{
3891
3892	if (dma->dma_tag != NULL) {
3893		if (dma->dma_map != NULL) {
3894			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3895			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3896			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3897		}
3898
3899		if (dma->dma_vaddr != NULL) {
3900			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3901			    dma->dma_map);
3902		}
3903
3904		bus_dma_tag_destroy(dma->dma_tag);
3905	}
3906	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3907}
3908
3909static int
3910vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3911{
3912	char path[64];
3913
3914	snprintf(path, sizeof(path),
3915	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3916	TUNABLE_INT_FETCH(path, &def);
3917
3918	return (def);
3919}
3920
3921/*
3922 * Since this is a purely paravirtualized device, we do not have
3923 * to worry about DMA coherency. But at times, we must make sure
3924 * both the compiler and CPU do not reorder memory operations.
3925 */
3926static inline void
3927vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3928{
3929
3930	switch (type) {
3931	case VMXNET3_BARRIER_RD:
3932		rmb();
3933		break;
3934	case VMXNET3_BARRIER_WR:
3935		wmb();
3936		break;
3937	case VMXNET3_BARRIER_RDWR:
3938		mb();
3939		break;
3940	default:
3941		panic("%s: bad barrier type %d", __func__, type);
3942	}
3943}
3944