if_vmx.c revision 267662
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 267662 2014-06-20 02:49:03Z bryanv $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/eventhandler.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <sys/taskqueue.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <net/ethernet.h>
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_arp.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47#include <net/if_media.h>
48#include <net/if_vlan_var.h>
49
50#include <net/bpf.h>
51
52#include <netinet/in_systm.h>
53#include <netinet/in.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#include <netinet/udp.h>
58#include <netinet/tcp.h>
59
60#include <machine/in_cksum.h>
61
62#include <machine/bus.h>
63#include <machine/resource.h>
64#include <sys/bus.h>
65#include <sys/rman.h>
66
67#include <dev/pci/pcireg.h>
68#include <dev/pci/pcivar.h>
69
70#include "if_vmxreg.h"
71#include "if_vmxvar.h"
72
73#include "opt_inet.h"
74#include "opt_inet6.h"
75
76#ifdef VMXNET3_FAILPOINTS
77#include <sys/fail.h>
78static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
79    "vmxnet3 fail points");
80#define VMXNET3_FP	_debug_fail_point_vmxnet3
81#endif
82
83static int	vmxnet3_probe(device_t);
84static int	vmxnet3_attach(device_t);
85static int	vmxnet3_detach(device_t);
86static int	vmxnet3_shutdown(device_t);
87
88static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
89static void	vmxnet3_free_resources(struct vmxnet3_softc *);
90static int	vmxnet3_check_version(struct vmxnet3_softc *);
91static void	vmxnet3_initial_config(struct vmxnet3_softc *);
92static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
93
94static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
97static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
98		    struct vmxnet3_interrupt *);
99static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
100static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
102static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
103static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
104
105static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
106		    struct vmxnet3_interrupt *);
107static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
108
109#ifndef VMXNET3_LEGACY_TX
110static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
112static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
113static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
114#endif
115
116static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
117static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
118static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
119static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
120static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
121static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
122
123static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
130static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
131static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
132static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
134static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
135static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
136static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
137static void	vmxnet3_free_data(struct vmxnet3_softc *);
138static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
139
140static void	vmxnet3_evintr(struct vmxnet3_softc *);
141static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
142static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
143static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
144static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
145		    struct vmxnet3_rxring *, int);
146static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
147static void	vmxnet3_legacy_intr(void *);
148static void	vmxnet3_txq_intr(void *);
149static void	vmxnet3_rxq_intr(void *);
150static void	vmxnet3_event_intr(void *);
151
152static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
153static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
154static void	vmxnet3_stop(struct vmxnet3_softc *);
155
156static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
157static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
158static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
159static int	vmxnet3_enable_device(struct vmxnet3_softc *);
160static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
161static int	vmxnet3_reinit(struct vmxnet3_softc *);
162static void	vmxnet3_init_locked(struct vmxnet3_softc *);
163static void	vmxnet3_init(void *);
164
165static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
166		    int *, int *, int *);
167static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
168		    bus_dmamap_t, bus_dma_segment_t [], int *);
169static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
170static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
171#ifdef VMXNET3_LEGACY_TX
172static void	vmxnet3_start_locked(struct ifnet *);
173static void	vmxnet3_start(struct ifnet *);
174#else
175static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
176		    struct mbuf *);
177static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
178static void	vmxnet3_txq_tq_deferred(void *, int);
179#endif
180static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
181static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
182
183static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
184		    uint16_t);
185static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
186static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
187static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
188static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
189static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
190
191#ifndef VMXNET3_LEGACY_TX
192static void	vmxnet3_qflush(struct ifnet *);
193#endif
194
195static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
196static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
197static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
198		    struct vmxnet3_txq_stats *);
199static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
200		    struct vmxnet3_rxq_stats *);
201static void	vmxnet3_tick(void *);
202static void	vmxnet3_link_status(struct vmxnet3_softc *);
203static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
204static int	vmxnet3_media_change(struct ifnet *);
205static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
206static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
207
208static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
213		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
214static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
215
216static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
217		    uint32_t);
218static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
219static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
220		    uint32_t);
221static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
222static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
223
224static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
225static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
226static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
227static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
228
229static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
230		    bus_size_t, struct vmxnet3_dma_alloc *);
231static void	vmxnet3_dma_free(struct vmxnet3_softc *,
232		    struct vmxnet3_dma_alloc *);
233static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
234		    const char *, int);
235
236typedef enum {
237	VMXNET3_BARRIER_RD,
238	VMXNET3_BARRIER_WR,
239	VMXNET3_BARRIER_RDWR,
240} vmxnet3_barrier_t;
241
242static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
243
244/* Tunables. */
245static int vmxnet3_mq_disable = 0;
246TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
247static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
248TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
249static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
250TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
251static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
252TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
253static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
254TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
255
256static device_method_t vmxnet3_methods[] = {
257	/* Device interface. */
258	DEVMETHOD(device_probe,		vmxnet3_probe),
259	DEVMETHOD(device_attach,	vmxnet3_attach),
260	DEVMETHOD(device_detach,	vmxnet3_detach),
261	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
262
263	DEVMETHOD_END
264};
265
266static driver_t vmxnet3_driver = {
267	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
268};
269
270static devclass_t vmxnet3_devclass;
271DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
272
273MODULE_DEPEND(vmx, pci, 1, 1, 1);
274MODULE_DEPEND(vmx, ether, 1, 1, 1);
275
276#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
277#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
278
279static int
280vmxnet3_probe(device_t dev)
281{
282
283	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
284	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
285		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
286		return (BUS_PROBE_DEFAULT);
287	}
288
289	return (ENXIO);
290}
291
292static int
293vmxnet3_attach(device_t dev)
294{
295	struct vmxnet3_softc *sc;
296	int error;
297
298	sc = device_get_softc(dev);
299	sc->vmx_dev = dev;
300
301	pci_enable_busmaster(dev);
302
303	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
304	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
305
306	vmxnet3_initial_config(sc);
307
308	error = vmxnet3_alloc_resources(sc);
309	if (error)
310		goto fail;
311
312	error = vmxnet3_check_version(sc);
313	if (error)
314		goto fail;
315
316	error = vmxnet3_alloc_rxtx_queues(sc);
317	if (error)
318		goto fail;
319
320#ifndef VMXNET3_LEGACY_TX
321	error = vmxnet3_alloc_taskqueue(sc);
322	if (error)
323		goto fail;
324#endif
325
326	error = vmxnet3_alloc_interrupts(sc);
327	if (error)
328		goto fail;
329
330	vmxnet3_check_multiqueue(sc);
331
332	error = vmxnet3_alloc_data(sc);
333	if (error)
334		goto fail;
335
336	error = vmxnet3_setup_interface(sc);
337	if (error)
338		goto fail;
339
340	error = vmxnet3_setup_interrupts(sc);
341	if (error) {
342		ether_ifdetach(sc->vmx_ifp);
343		device_printf(dev, "could not set up interrupt\n");
344		goto fail;
345	}
346
347	vmxnet3_setup_sysctl(sc);
348#ifndef VMXNET3_LEGACY_TX
349	vmxnet3_start_taskqueue(sc);
350#endif
351
352fail:
353	if (error)
354		vmxnet3_detach(dev);
355
356	return (error);
357}
358
359static int
360vmxnet3_detach(device_t dev)
361{
362	struct vmxnet3_softc *sc;
363	struct ifnet *ifp;
364
365	sc = device_get_softc(dev);
366	ifp = sc->vmx_ifp;
367
368	if (device_is_attached(dev)) {
369		VMXNET3_CORE_LOCK(sc);
370		vmxnet3_stop(sc);
371		VMXNET3_CORE_UNLOCK(sc);
372
373		callout_drain(&sc->vmx_tick);
374#ifndef VMXNET3_LEGACY_TX
375		vmxnet3_drain_taskqueue(sc);
376#endif
377
378		ether_ifdetach(ifp);
379	}
380
381	if (sc->vmx_vlan_attach != NULL) {
382		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
383		sc->vmx_vlan_attach = NULL;
384	}
385	if (sc->vmx_vlan_detach != NULL) {
386		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
387		sc->vmx_vlan_detach = NULL;
388	}
389
390#ifndef VMXNET3_LEGACY_TX
391	vmxnet3_free_taskqueue(sc);
392#endif
393	vmxnet3_free_interrupts(sc);
394
395	if (ifp != NULL) {
396		if_free(ifp);
397		sc->vmx_ifp = NULL;
398	}
399
400	ifmedia_removeall(&sc->vmx_media);
401
402	vmxnet3_free_data(sc);
403	vmxnet3_free_resources(sc);
404	vmxnet3_free_rxtx_queues(sc);
405
406	VMXNET3_CORE_LOCK_DESTROY(sc);
407
408	return (0);
409}
410
411static int
412vmxnet3_shutdown(device_t dev)
413{
414
415	return (0);
416}
417
418static int
419vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
420{
421	device_t dev;
422	int rid;
423
424	dev = sc->vmx_dev;
425
426	rid = PCIR_BAR(0);
427	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
428	    RF_ACTIVE);
429	if (sc->vmx_res0 == NULL) {
430		device_printf(dev,
431		    "could not map BAR0 memory\n");
432		return (ENXIO);
433	}
434
435	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
436	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
437
438	rid = PCIR_BAR(1);
439	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
440	    RF_ACTIVE);
441	if (sc->vmx_res1 == NULL) {
442		device_printf(dev,
443		    "could not map BAR1 memory\n");
444		return (ENXIO);
445	}
446
447	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
448	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
449
450	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
451		rid = PCIR_BAR(2);
452		sc->vmx_msix_res = bus_alloc_resource_any(dev,
453		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
454	}
455
456	if (sc->vmx_msix_res == NULL)
457		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
458
459	return (0);
460}
461
462static void
463vmxnet3_free_resources(struct vmxnet3_softc *sc)
464{
465	device_t dev;
466	int rid;
467
468	dev = sc->vmx_dev;
469
470	if (sc->vmx_res0 != NULL) {
471		rid = PCIR_BAR(0);
472		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
473		sc->vmx_res0 = NULL;
474	}
475
476	if (sc->vmx_res1 != NULL) {
477		rid = PCIR_BAR(1);
478		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
479		sc->vmx_res1 = NULL;
480	}
481
482	if (sc->vmx_msix_res != NULL) {
483		rid = PCIR_BAR(2);
484		bus_release_resource(dev, SYS_RES_MEMORY, rid,
485		    sc->vmx_msix_res);
486		sc->vmx_msix_res = NULL;
487	}
488}
489
490static int
491vmxnet3_check_version(struct vmxnet3_softc *sc)
492{
493	device_t dev;
494	uint32_t version;
495
496	dev = sc->vmx_dev;
497
498	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
499	if ((version & 0x01) == 0) {
500		device_printf(dev, "unsupported hardware version %#x\n",
501		    version);
502		return (ENOTSUP);
503	}
504	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
505
506	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
507	if ((version & 0x01) == 0) {
508		device_printf(dev, "unsupported UPT version %#x\n", version);
509		return (ENOTSUP);
510	}
511	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
512
513	return (0);
514}
515
516static void
517vmxnet3_initial_config(struct vmxnet3_softc *sc)
518{
519	int nqueue, ndesc;
520
521	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
522	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
523		nqueue = VMXNET3_DEF_TX_QUEUES;
524	if (nqueue > mp_ncpus)
525		nqueue = mp_ncpus;
526	sc->vmx_max_ntxqueues = nqueue;
527
528	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
529	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
530		nqueue = VMXNET3_DEF_RX_QUEUES;
531	if (nqueue > mp_ncpus)
532		nqueue = mp_ncpus;
533	sc->vmx_max_nrxqueues = nqueue;
534
535	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
536		sc->vmx_max_nrxqueues = 1;
537		sc->vmx_max_ntxqueues = 1;
538	}
539
540	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
541	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
542		ndesc = VMXNET3_DEF_TX_NDESC;
543	if (ndesc & VMXNET3_MASK_TX_NDESC)
544		ndesc &= ~VMXNET3_MASK_TX_NDESC;
545	sc->vmx_ntxdescs = ndesc;
546
547	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
548	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
549		ndesc = VMXNET3_DEF_RX_NDESC;
550	if (ndesc & VMXNET3_MASK_RX_NDESC)
551		ndesc &= ~VMXNET3_MASK_RX_NDESC;
552	sc->vmx_nrxdescs = ndesc;
553	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
554}
555
556static void
557vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
558{
559
560	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
561		goto out;
562
563	/* BMV: Just use the maximum configured for now. */
564	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
565	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
566
567	if (sc->vmx_nrxqueues > 1)
568		sc->vmx_flags |= VMXNET3_FLAG_RSS;
569
570	return;
571
572out:
573	sc->vmx_ntxqueues = 1;
574	sc->vmx_nrxqueues = 1;
575}
576
577static int
578vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
579{
580	device_t dev;
581	int nmsix, cnt, required;
582
583	dev = sc->vmx_dev;
584
585	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
586		return (1);
587
588	/* Allocate an additional vector for the events interrupt. */
589	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
590
591	nmsix = pci_msix_count(dev);
592	if (nmsix < required)
593		return (1);
594
595	cnt = required;
596	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
597		sc->vmx_nintrs = required;
598		return (0);
599	} else
600		pci_release_msi(dev);
601
602	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
603
604	return (1);
605}
606
607static int
608vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
609{
610	device_t dev;
611	int nmsi, cnt, required;
612
613	dev = sc->vmx_dev;
614	required = 1;
615
616	nmsi = pci_msi_count(dev);
617	if (nmsi < required)
618		return (1);
619
620	cnt = required;
621	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
622		sc->vmx_nintrs = 1;
623		return (0);
624	} else
625		pci_release_msi(dev);
626
627	return (1);
628}
629
630static int
631vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
632{
633
634	sc->vmx_nintrs = 1;
635	return (0);
636}
637
638static int
639vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
640    struct vmxnet3_interrupt *intr)
641{
642	struct resource *irq;
643
644	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
645	if (irq == NULL)
646		return (ENXIO);
647
648	intr->vmxi_irq = irq;
649	intr->vmxi_rid = rid;
650
651	return (0);
652}
653
654static int
655vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
656{
657	int i, rid, flags, error;
658
659	rid = 0;
660	flags = RF_ACTIVE;
661
662	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
663		flags |= RF_SHAREABLE;
664	else
665		rid = 1;
666
667	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
668		error = vmxnet3_alloc_interrupt(sc, rid, flags,
669		    &sc->vmx_intrs[i]);
670		if (error)
671			return (error);
672	}
673
674	return (0);
675}
676
677static int
678vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
679{
680	device_t dev;
681	struct vmxnet3_txqueue *txq;
682	struct vmxnet3_rxqueue *rxq;
683	struct vmxnet3_interrupt *intr;
684	enum intr_type type;
685	int i, error;
686
687	dev = sc->vmx_dev;
688	intr = &sc->vmx_intrs[0];
689	type = INTR_TYPE_NET | INTR_MPSAFE;
690
691	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
692		txq = &sc->vmx_txq[i];
693		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
694		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
695		if (error)
696			return (error);
697		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
698	}
699
700	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
701		rxq = &sc->vmx_rxq[i];
702		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
703		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
704		if (error)
705			return (error);
706		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
707	}
708
709	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
710	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
711	if (error)
712		return (error);
713	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
714
715	return (0);
716}
717
718static int
719vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
720{
721	struct vmxnet3_interrupt *intr;
722	int i, error;
723
724	intr = &sc->vmx_intrs[0];
725	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
726	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
727	    &intr->vmxi_handler);
728
729	for (i = 0; i < sc->vmx_ntxqueues; i++)
730		sc->vmx_txq[i].vxtxq_intr_idx = 0;
731	for (i = 0; i < sc->vmx_nrxqueues; i++)
732		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
733	sc->vmx_event_intr_idx = 0;
734
735	return (error);
736}
737
738static void
739vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
740{
741	struct vmxnet3_txqueue *txq;
742	struct vmxnet3_txq_shared *txs;
743	struct vmxnet3_rxqueue *rxq;
744	struct vmxnet3_rxq_shared *rxs;
745	int i;
746
747	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
748
749	for (i = 0; i < sc->vmx_ntxqueues; i++) {
750		txq = &sc->vmx_txq[i];
751		txs = txq->vxtxq_ts;
752		txs->intr_idx = txq->vxtxq_intr_idx;
753	}
754
755	for (i = 0; i < sc->vmx_nrxqueues; i++) {
756		rxq = &sc->vmx_rxq[i];
757		rxs = rxq->vxrxq_rs;
758		rxs->intr_idx = rxq->vxrxq_intr_idx;
759	}
760}
761
762static int
763vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
764{
765	int error;
766
767	error = vmxnet3_alloc_intr_resources(sc);
768	if (error)
769		return (error);
770
771	switch (sc->vmx_intr_type) {
772	case VMXNET3_IT_MSIX:
773		error = vmxnet3_setup_msix_interrupts(sc);
774		break;
775	case VMXNET3_IT_MSI:
776	case VMXNET3_IT_LEGACY:
777		error = vmxnet3_setup_legacy_interrupt(sc);
778		break;
779	default:
780		panic("%s: invalid interrupt type %d", __func__,
781		    sc->vmx_intr_type);
782	}
783
784	if (error == 0)
785		vmxnet3_set_interrupt_idx(sc);
786
787	return (error);
788}
789
790static int
791vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
792{
793	device_t dev;
794	uint32_t config;
795	int error;
796
797	dev = sc->vmx_dev;
798	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
799
800	sc->vmx_intr_type = config & 0x03;
801	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
802
803	switch (sc->vmx_intr_type) {
804	case VMXNET3_IT_AUTO:
805		sc->vmx_intr_type = VMXNET3_IT_MSIX;
806		/* FALLTHROUGH */
807	case VMXNET3_IT_MSIX:
808		error = vmxnet3_alloc_msix_interrupts(sc);
809		if (error == 0)
810			break;
811		sc->vmx_intr_type = VMXNET3_IT_MSI;
812		/* FALLTHROUGH */
813	case VMXNET3_IT_MSI:
814		error = vmxnet3_alloc_msi_interrupts(sc);
815		if (error == 0)
816			break;
817		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
818		/* FALLTHROUGH */
819	case VMXNET3_IT_LEGACY:
820		error = vmxnet3_alloc_legacy_interrupts(sc);
821		if (error == 0)
822			break;
823		/* FALLTHROUGH */
824	default:
825		sc->vmx_intr_type = -1;
826		device_printf(dev, "cannot allocate any interrupt resources\n");
827		return (ENXIO);
828	}
829
830	return (error);
831}
832
833static void
834vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
835    struct vmxnet3_interrupt *intr)
836{
837	device_t dev;
838
839	dev = sc->vmx_dev;
840
841	if (intr->vmxi_handler != NULL) {
842		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
843		intr->vmxi_handler = NULL;
844	}
845
846	if (intr->vmxi_irq != NULL) {
847		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
848		    intr->vmxi_irq);
849		intr->vmxi_irq = NULL;
850		intr->vmxi_rid = -1;
851	}
852}
853
854static void
855vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
856{
857	int i;
858
859	for (i = 0; i < sc->vmx_nintrs; i++)
860		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
861
862	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
863	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
864		pci_release_msi(sc->vmx_dev);
865}
866
867#ifndef VMXNET3_LEGACY_TX
868static int
869vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
870{
871	device_t dev;
872
873	dev = sc->vmx_dev;
874
875	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
876	    taskqueue_thread_enqueue, &sc->vmx_tq);
877	if (sc->vmx_tq == NULL)
878		return (ENOMEM);
879
880	return (0);
881}
882
883static void
884vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
885{
886	device_t dev;
887	int nthreads, error;
888
889	dev = sc->vmx_dev;
890
891	/*
892	 * The taskqueue is typically not frequently used, so a dedicated
893	 * thread for each queue is unnecessary.
894	 */
895	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
896
897	/*
898	 * Most drivers just ignore the return value - it only fails
899	 * with ENOMEM so an error is not likely. It is hard for us
900	 * to recover from an error here.
901	 */
902	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
903	    "%s taskq", device_get_nameunit(dev));
904	if (error)
905		device_printf(dev, "failed to start taskqueue: %d", error);
906}
907
908static void
909vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
910{
911	struct vmxnet3_txqueue *txq;
912	int i;
913
914	if (sc->vmx_tq != NULL) {
915		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
916			txq = &sc->vmx_txq[i];
917			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
918		}
919	}
920}
921
922static void
923vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
924{
925	if (sc->vmx_tq != NULL) {
926		taskqueue_free(sc->vmx_tq);
927		sc->vmx_tq = NULL;
928	}
929}
930#endif
931
932static int
933vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
934{
935	struct vmxnet3_rxqueue *rxq;
936	struct vmxnet3_rxring *rxr;
937	int i;
938
939	rxq = &sc->vmx_rxq[q];
940
941	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
942	    device_get_nameunit(sc->vmx_dev), q);
943	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
944
945	rxq->vxrxq_sc = sc;
946	rxq->vxrxq_id = q;
947
948	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
949		rxr = &rxq->vxrxq_cmd_ring[i];
950		rxr->vxrxr_rid = i;
951		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
952		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
953		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
954		if (rxr->vxrxr_rxbuf == NULL)
955			return (ENOMEM);
956
957		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
958	}
959
960	return (0);
961}
962
963static int
964vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
965{
966	struct vmxnet3_txqueue *txq;
967	struct vmxnet3_txring *txr;
968
969	txq = &sc->vmx_txq[q];
970	txr = &txq->vxtxq_cmd_ring;
971
972	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
973	    device_get_nameunit(sc->vmx_dev), q);
974	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
975
976	txq->vxtxq_sc = sc;
977	txq->vxtxq_id = q;
978
979	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
980	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
981	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
982	if (txr->vxtxr_txbuf == NULL)
983		return (ENOMEM);
984
985	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
986
987#ifndef VMXNET3_LEGACY_TX
988	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
989
990	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
991	    M_NOWAIT, &txq->vxtxq_mtx);
992	if (txq->vxtxq_br == NULL)
993		return (ENOMEM);
994#endif
995
996	return (0);
997}
998
999static int
1000vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1001{
1002	int i, error;
1003
1004	/*
1005	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1006	 * disabled by default because its apparently broken for devices passed
1007	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1008	 * must be set to zero for MSIX. This check prevents us from allocating
1009	 * queue structures that we will not use.
1010	 */
1011	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1012		sc->vmx_max_nrxqueues = 1;
1013		sc->vmx_max_ntxqueues = 1;
1014	}
1015
1016	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1017	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1018	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1019	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1020	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1021		return (ENOMEM);
1022
1023	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1024		error = vmxnet3_init_rxq(sc, i);
1025		if (error)
1026			return (error);
1027	}
1028
1029	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1030		error = vmxnet3_init_txq(sc, i);
1031		if (error)
1032			return (error);
1033	}
1034
1035	return (0);
1036}
1037
1038static void
1039vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1040{
1041	struct vmxnet3_rxring *rxr;
1042	int i;
1043
1044	rxq->vxrxq_sc = NULL;
1045	rxq->vxrxq_id = -1;
1046
1047	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1048		rxr = &rxq->vxrxq_cmd_ring[i];
1049
1050		if (rxr->vxrxr_rxbuf != NULL) {
1051			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1052			rxr->vxrxr_rxbuf = NULL;
1053		}
1054	}
1055
1056	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1057		mtx_destroy(&rxq->vxrxq_mtx);
1058}
1059
1060static void
1061vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1062{
1063	struct vmxnet3_txring *txr;
1064
1065	txr = &txq->vxtxq_cmd_ring;
1066
1067	txq->vxtxq_sc = NULL;
1068	txq->vxtxq_id = -1;
1069
1070#ifndef VMXNET3_LEGACY_TX
1071	if (txq->vxtxq_br != NULL) {
1072		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1073		txq->vxtxq_br = NULL;
1074	}
1075#endif
1076
1077	if (txr->vxtxr_txbuf != NULL) {
1078		free(txr->vxtxr_txbuf, M_DEVBUF);
1079		txr->vxtxr_txbuf = NULL;
1080	}
1081
1082	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1083		mtx_destroy(&txq->vxtxq_mtx);
1084}
1085
1086static void
1087vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1088{
1089	int i;
1090
1091	if (sc->vmx_rxq != NULL) {
1092		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1093			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1094		free(sc->vmx_rxq, M_DEVBUF);
1095		sc->vmx_rxq = NULL;
1096	}
1097
1098	if (sc->vmx_txq != NULL) {
1099		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1100			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1101		free(sc->vmx_txq, M_DEVBUF);
1102		sc->vmx_txq = NULL;
1103	}
1104}
1105
1106static int
1107vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1108{
1109	device_t dev;
1110	uint8_t *kva;
1111	size_t size;
1112	int i, error;
1113
1114	dev = sc->vmx_dev;
1115
1116	size = sizeof(struct vmxnet3_driver_shared);
1117	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1118	if (error) {
1119		device_printf(dev, "cannot alloc shared memory\n");
1120		return (error);
1121	}
1122	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1123
1124	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1125	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1126	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1127	if (error) {
1128		device_printf(dev, "cannot alloc queue shared memory\n");
1129		return (error);
1130	}
1131	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1132	kva = sc->vmx_qs;
1133
1134	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1135		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1136		kva += sizeof(struct vmxnet3_txq_shared);
1137	}
1138	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1139		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1140		kva += sizeof(struct vmxnet3_rxq_shared);
1141	}
1142
1143	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1144		size = sizeof(struct vmxnet3_rss_shared);
1145		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1146		if (error) {
1147			device_printf(dev, "cannot alloc rss shared memory\n");
1148			return (error);
1149		}
1150		sc->vmx_rss =
1151		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1152	}
1153
1154	return (0);
1155}
1156
1157static void
1158vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1159{
1160
1161	if (sc->vmx_rss != NULL) {
1162		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1163		sc->vmx_rss = NULL;
1164	}
1165
1166	if (sc->vmx_qs != NULL) {
1167		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1168		sc->vmx_qs = NULL;
1169	}
1170
1171	if (sc->vmx_ds != NULL) {
1172		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1173		sc->vmx_ds = NULL;
1174	}
1175}
1176
1177static int
1178vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1179{
1180	device_t dev;
1181	struct vmxnet3_txqueue *txq;
1182	struct vmxnet3_txring *txr;
1183	struct vmxnet3_comp_ring *txc;
1184	size_t descsz, compsz;
1185	int i, q, error;
1186
1187	dev = sc->vmx_dev;
1188
1189	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1190		txq = &sc->vmx_txq[q];
1191		txr = &txq->vxtxq_cmd_ring;
1192		txc = &txq->vxtxq_comp_ring;
1193
1194		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1195		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1196
1197		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1198		    1, 0,			/* alignment, boundary */
1199		    BUS_SPACE_MAXADDR,		/* lowaddr */
1200		    BUS_SPACE_MAXADDR,		/* highaddr */
1201		    NULL, NULL,			/* filter, filterarg */
1202		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1203		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1204		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1205		    0,				/* flags */
1206		    NULL, NULL,			/* lockfunc, lockarg */
1207		    &txr->vxtxr_txtag);
1208		if (error) {
1209			device_printf(dev,
1210			    "unable to create Tx buffer tag for queue %d\n", q);
1211			return (error);
1212		}
1213
1214		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1215		if (error) {
1216			device_printf(dev, "cannot alloc Tx descriptors for "
1217			    "queue %d error %d\n", q, error);
1218			return (error);
1219		}
1220		txr->vxtxr_txd =
1221		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1222
1223		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1224		if (error) {
1225			device_printf(dev, "cannot alloc Tx comp descriptors "
1226			   "for queue %d error %d\n", q, error);
1227			return (error);
1228		}
1229		txc->vxcr_u.txcd =
1230		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1231
1232		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1233			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1234			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1235			if (error) {
1236				device_printf(dev, "unable to create Tx buf "
1237				    "dmamap for queue %d idx %d\n", q, i);
1238				return (error);
1239			}
1240		}
1241	}
1242
1243	return (0);
1244}
1245
1246static void
1247vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1248{
1249	device_t dev;
1250	struct vmxnet3_txqueue *txq;
1251	struct vmxnet3_txring *txr;
1252	struct vmxnet3_comp_ring *txc;
1253	struct vmxnet3_txbuf *txb;
1254	int i, q;
1255
1256	dev = sc->vmx_dev;
1257
1258	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1259		txq = &sc->vmx_txq[q];
1260		txr = &txq->vxtxq_cmd_ring;
1261		txc = &txq->vxtxq_comp_ring;
1262
1263		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1264			txb = &txr->vxtxr_txbuf[i];
1265			if (txb->vtxb_dmamap != NULL) {
1266				bus_dmamap_destroy(txr->vxtxr_txtag,
1267				    txb->vtxb_dmamap);
1268				txb->vtxb_dmamap = NULL;
1269			}
1270		}
1271
1272		if (txc->vxcr_u.txcd != NULL) {
1273			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1274			txc->vxcr_u.txcd = NULL;
1275		}
1276
1277		if (txr->vxtxr_txd != NULL) {
1278			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1279			txr->vxtxr_txd = NULL;
1280		}
1281
1282		if (txr->vxtxr_txtag != NULL) {
1283			bus_dma_tag_destroy(txr->vxtxr_txtag);
1284			txr->vxtxr_txtag = NULL;
1285		}
1286	}
1287}
1288
1289static int
1290vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1291{
1292	device_t dev;
1293	struct vmxnet3_rxqueue *rxq;
1294	struct vmxnet3_rxring *rxr;
1295	struct vmxnet3_comp_ring *rxc;
1296	int descsz, compsz;
1297	int i, j, q, error;
1298
1299	dev = sc->vmx_dev;
1300
1301	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1302		rxq = &sc->vmx_rxq[q];
1303		rxc = &rxq->vxrxq_comp_ring;
1304		compsz = 0;
1305
1306		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1307			rxr = &rxq->vxrxq_cmd_ring[i];
1308
1309			descsz = rxr->vxrxr_ndesc *
1310			    sizeof(struct vmxnet3_rxdesc);
1311			compsz += rxr->vxrxr_ndesc *
1312			    sizeof(struct vmxnet3_rxcompdesc);
1313
1314			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1315			    1, 0,		/* alignment, boundary */
1316			    BUS_SPACE_MAXADDR,	/* lowaddr */
1317			    BUS_SPACE_MAXADDR,	/* highaddr */
1318			    NULL, NULL,		/* filter, filterarg */
1319			    MJUMPAGESIZE,	/* maxsize */
1320			    1,			/* nsegments */
1321			    MJUMPAGESIZE,	/* maxsegsize */
1322			    0,			/* flags */
1323			    NULL, NULL,		/* lockfunc, lockarg */
1324			    &rxr->vxrxr_rxtag);
1325			if (error) {
1326				device_printf(dev,
1327				    "unable to create Rx buffer tag for "
1328				    "queue %d\n", q);
1329				return (error);
1330			}
1331
1332			error = vmxnet3_dma_malloc(sc, descsz, 512,
1333			    &rxr->vxrxr_dma);
1334			if (error) {
1335				device_printf(dev, "cannot allocate Rx "
1336				    "descriptors for queue %d/%d error %d\n",
1337				    i, q, error);
1338				return (error);
1339			}
1340			rxr->vxrxr_rxd =
1341			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1342		}
1343
1344		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1345		if (error) {
1346			device_printf(dev, "cannot alloc Rx comp descriptors "
1347			    "for queue %d error %d\n", q, error);
1348			return (error);
1349		}
1350		rxc->vxcr_u.rxcd =
1351		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1352
1353		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1354			rxr = &rxq->vxrxq_cmd_ring[i];
1355
1356			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1357			    &rxr->vxrxr_spare_dmap);
1358			if (error) {
1359				device_printf(dev, "unable to create spare "
1360				    "dmamap for queue %d/%d error %d\n",
1361				    q, i, error);
1362				return (error);
1363			}
1364
1365			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1366				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1367				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1368				if (error) {
1369					device_printf(dev, "unable to create "
1370					    "dmamap for queue %d/%d slot %d "
1371					    "error %d\n",
1372					    q, i, j, error);
1373					return (error);
1374				}
1375			}
1376		}
1377	}
1378
1379	return (0);
1380}
1381
1382static void
1383vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1384{
1385	device_t dev;
1386	struct vmxnet3_rxqueue *rxq;
1387	struct vmxnet3_rxring *rxr;
1388	struct vmxnet3_comp_ring *rxc;
1389	struct vmxnet3_rxbuf *rxb;
1390	int i, j, q;
1391
1392	dev = sc->vmx_dev;
1393
1394	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1395		rxq = &sc->vmx_rxq[q];
1396		rxc = &rxq->vxrxq_comp_ring;
1397
1398		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1399			rxr = &rxq->vxrxq_cmd_ring[i];
1400
1401			if (rxr->vxrxr_spare_dmap != NULL) {
1402				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1403				    rxr->vxrxr_spare_dmap);
1404				rxr->vxrxr_spare_dmap = NULL;
1405			}
1406
1407			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1408				rxb = &rxr->vxrxr_rxbuf[j];
1409				if (rxb->vrxb_dmamap != NULL) {
1410					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1411					    rxb->vrxb_dmamap);
1412					rxb->vrxb_dmamap = NULL;
1413				}
1414			}
1415		}
1416
1417		if (rxc->vxcr_u.rxcd != NULL) {
1418			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1419			rxc->vxcr_u.rxcd = NULL;
1420		}
1421
1422		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1423			rxr = &rxq->vxrxq_cmd_ring[i];
1424
1425			if (rxr->vxrxr_rxd != NULL) {
1426				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1427				rxr->vxrxr_rxd = NULL;
1428			}
1429
1430			if (rxr->vxrxr_rxtag != NULL) {
1431				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1432				rxr->vxrxr_rxtag = NULL;
1433			}
1434		}
1435	}
1436}
1437
1438static int
1439vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1440{
1441	int error;
1442
1443	error = vmxnet3_alloc_txq_data(sc);
1444	if (error)
1445		return (error);
1446
1447	error = vmxnet3_alloc_rxq_data(sc);
1448	if (error)
1449		return (error);
1450
1451	return (0);
1452}
1453
1454static void
1455vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1456{
1457
1458	if (sc->vmx_rxq != NULL)
1459		vmxnet3_free_rxq_data(sc);
1460
1461	if (sc->vmx_txq != NULL)
1462		vmxnet3_free_txq_data(sc);
1463}
1464
1465static int
1466vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1467{
1468	int error;
1469
1470	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1471	    32, &sc->vmx_mcast_dma);
1472	if (error)
1473		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1474	else
1475		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1476
1477	return (error);
1478}
1479
1480static void
1481vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1482{
1483
1484	if (sc->vmx_mcast != NULL) {
1485		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1486		sc->vmx_mcast = NULL;
1487	}
1488}
1489
1490static void
1491vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1492{
1493	struct vmxnet3_driver_shared *ds;
1494	struct vmxnet3_txqueue *txq;
1495	struct vmxnet3_txq_shared *txs;
1496	struct vmxnet3_rxqueue *rxq;
1497	struct vmxnet3_rxq_shared *rxs;
1498	int i;
1499
1500	ds = sc->vmx_ds;
1501
1502	/*
1503	 * Initialize fields of the shared data that remains the same across
1504	 * reinits. Note the shared data is zero'd when allocated.
1505	 */
1506
1507	ds->magic = VMXNET3_REV1_MAGIC;
1508
1509	/* DriverInfo */
1510	ds->version = VMXNET3_DRIVER_VERSION;
1511	ds->guest = VMXNET3_GOS_FREEBSD |
1512#ifdef __LP64__
1513	    VMXNET3_GOS_64BIT;
1514#else
1515	    VMXNET3_GOS_32BIT;
1516#endif
1517	ds->vmxnet3_revision = 1;
1518	ds->upt_version = 1;
1519
1520	/* Misc. conf */
1521	ds->driver_data = vtophys(sc);
1522	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1523	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1524	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1525	ds->nrxsg_max = sc->vmx_max_rxsegs;
1526
1527	/* RSS conf */
1528	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1529		ds->rss.version = 1;
1530		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1531		ds->rss.len = sc->vmx_rss_dma.dma_size;
1532	}
1533
1534	/* Interrupt control. */
1535	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1536	ds->nintr = sc->vmx_nintrs;
1537	ds->evintr = sc->vmx_event_intr_idx;
1538	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1539
1540	for (i = 0; i < sc->vmx_nintrs; i++)
1541		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1542
1543	/* Receive filter. */
1544	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1545	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1546
1547	/* Tx queues */
1548	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1549		txq = &sc->vmx_txq[i];
1550		txs = txq->vxtxq_ts;
1551
1552		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1553		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1554		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1555		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1556		txs->driver_data = vtophys(txq);
1557		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1558	}
1559
1560	/* Rx queues */
1561	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1562		rxq = &sc->vmx_rxq[i];
1563		rxs = rxq->vxrxq_rs;
1564
1565		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1566		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1567		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1568		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1569		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1570		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1571		rxs->driver_data = vtophys(rxq);
1572		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1573	}
1574}
1575
1576static void
1577vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1578{
1579	struct ifnet *ifp;
1580
1581	ifp = sc->vmx_ifp;
1582
1583	/* Use the current MAC address. */
1584	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1585	vmxnet3_set_lladdr(sc);
1586
1587	ifp->if_hwassist = 0;
1588	if (ifp->if_capenable & IFCAP_TXCSUM)
1589		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1590	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1591		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1592	if (ifp->if_capenable & IFCAP_TSO4)
1593		ifp->if_hwassist |= CSUM_IP_TSO;
1594	if (ifp->if_capenable & IFCAP_TSO6)
1595		ifp->if_hwassist |= CSUM_IP6_TSO;
1596}
1597
1598static void
1599vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1600{
1601	/*
1602	 * Use the same key as the Linux driver until FreeBSD can do
1603	 * RSS (presumably Toeplitz) in software.
1604	 */
1605	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1606	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1607	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1608	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1609	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1610	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1611	};
1612
1613	struct vmxnet3_driver_shared *ds;
1614	struct vmxnet3_rss_shared *rss;
1615	int i;
1616
1617	ds = sc->vmx_ds;
1618	rss = sc->vmx_rss;
1619
1620	rss->hash_type =
1621	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1622	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1623	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1624	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1625	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1626	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1627
1628	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1629		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1630}
1631
1632static void
1633vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1634{
1635	struct ifnet *ifp;
1636	struct vmxnet3_driver_shared *ds;
1637
1638	ifp = sc->vmx_ifp;
1639	ds = sc->vmx_ds;
1640
1641	ds->mtu = ifp->if_mtu;
1642	ds->ntxqueue = sc->vmx_ntxqueues;
1643	ds->nrxqueue = sc->vmx_nrxqueues;
1644
1645	ds->upt_features = 0;
1646	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1647		ds->upt_features |= UPT1_F_CSUM;
1648	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1649		ds->upt_features |= UPT1_F_VLAN;
1650	if (ifp->if_capenable & IFCAP_LRO)
1651		ds->upt_features |= UPT1_F_LRO;
1652
1653	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1654		ds->upt_features |= UPT1_F_RSS;
1655		vmxnet3_reinit_rss_shared_data(sc);
1656	}
1657
1658	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1659	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1660	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1661}
1662
1663static int
1664vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1665{
1666	int error;
1667
1668	error = vmxnet3_alloc_shared_data(sc);
1669	if (error)
1670		return (error);
1671
1672	error = vmxnet3_alloc_queue_data(sc);
1673	if (error)
1674		return (error);
1675
1676	error = vmxnet3_alloc_mcast_table(sc);
1677	if (error)
1678		return (error);
1679
1680	vmxnet3_init_shared_data(sc);
1681
1682	return (0);
1683}
1684
1685static void
1686vmxnet3_free_data(struct vmxnet3_softc *sc)
1687{
1688
1689	vmxnet3_free_mcast_table(sc);
1690	vmxnet3_free_queue_data(sc);
1691	vmxnet3_free_shared_data(sc);
1692}
1693
1694static int
1695vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1696{
1697	device_t dev;
1698	struct ifnet *ifp;
1699
1700	dev = sc->vmx_dev;
1701
1702	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1703	if (ifp == NULL) {
1704		device_printf(dev, "cannot allocate ifnet structure\n");
1705		return (ENOSPC);
1706	}
1707
1708	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1709#if __FreeBSD_version < 1000025
1710	ifp->if_baudrate = 1000000000;
1711#elif __FreeBSD_version < 1100011
1712	if_initbaudrate(ifp, IF_Gbps(10));
1713#else
1714	ifp->if_baudrate = IF_Gbps(10);
1715#endif
1716	ifp->if_softc = sc;
1717	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1718	ifp->if_init = vmxnet3_init;
1719	ifp->if_ioctl = vmxnet3_ioctl;
1720	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1721
1722#ifdef VMXNET3_LEGACY_TX
1723	ifp->if_start = vmxnet3_start;
1724	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1725	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1726	IFQ_SET_READY(&ifp->if_snd);
1727#else
1728	ifp->if_transmit = vmxnet3_txq_mq_start;
1729	ifp->if_qflush = vmxnet3_qflush;
1730#endif
1731
1732	vmxnet3_get_lladdr(sc);
1733	ether_ifattach(ifp, sc->vmx_lladdr);
1734
1735	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1736	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1737	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1738	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1739	    IFCAP_VLAN_HWCSUM;
1740	ifp->if_capenable = ifp->if_capabilities;
1741
1742	/* These capabilities are not enabled by default. */
1743	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1744
1745	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1746	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1747	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1748	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1749
1750	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1751	    vmxnet3_media_status);
1752	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1753	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1754
1755	return (0);
1756}
1757
1758static void
1759vmxnet3_evintr(struct vmxnet3_softc *sc)
1760{
1761	device_t dev;
1762	struct ifnet *ifp;
1763	struct vmxnet3_txq_shared *ts;
1764	struct vmxnet3_rxq_shared *rs;
1765	uint32_t event;
1766	int reset;
1767
1768	dev = sc->vmx_dev;
1769	ifp = sc->vmx_ifp;
1770	reset = 0;
1771
1772	VMXNET3_CORE_LOCK(sc);
1773
1774	/* Clear events. */
1775	event = sc->vmx_ds->event;
1776	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1777
1778	if (event & VMXNET3_EVENT_LINK) {
1779		vmxnet3_link_status(sc);
1780		if (sc->vmx_link_active != 0)
1781			vmxnet3_tx_start_all(sc);
1782	}
1783
1784	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1785		reset = 1;
1786		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1787		ts = sc->vmx_txq[0].vxtxq_ts;
1788		if (ts->stopped != 0)
1789			device_printf(dev, "Tx queue error %#x\n", ts->error);
1790		rs = sc->vmx_rxq[0].vxrxq_rs;
1791		if (rs->stopped != 0)
1792			device_printf(dev, "Rx queue error %#x\n", rs->error);
1793		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1794	}
1795
1796	if (event & VMXNET3_EVENT_DIC)
1797		device_printf(dev, "device implementation change event\n");
1798	if (event & VMXNET3_EVENT_DEBUG)
1799		device_printf(dev, "debug event\n");
1800
1801	if (reset != 0) {
1802		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1803		vmxnet3_init_locked(sc);
1804	}
1805
1806	VMXNET3_CORE_UNLOCK(sc);
1807}
1808
1809static void
1810vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1811{
1812	struct vmxnet3_softc *sc;
1813	struct ifnet *ifp;
1814	struct vmxnet3_txring *txr;
1815	struct vmxnet3_comp_ring *txc;
1816	struct vmxnet3_txcompdesc *txcd;
1817	struct vmxnet3_txbuf *txb;
1818	struct mbuf *m;
1819	u_int sop;
1820
1821	sc = txq->vxtxq_sc;
1822	ifp = sc->vmx_ifp;
1823	txr = &txq->vxtxq_cmd_ring;
1824	txc = &txq->vxtxq_comp_ring;
1825
1826	VMXNET3_TXQ_LOCK_ASSERT(txq);
1827
1828	for (;;) {
1829		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1830		if (txcd->gen != txc->vxcr_gen)
1831			break;
1832		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1833
1834		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1835			txc->vxcr_next = 0;
1836			txc->vxcr_gen ^= 1;
1837		}
1838
1839		sop = txr->vxtxr_next;
1840		txb = &txr->vxtxr_txbuf[sop];
1841
1842		if ((m = txb->vtxb_m) != NULL) {
1843			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1844			    BUS_DMASYNC_POSTWRITE);
1845			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1846
1847			txq->vxtxq_stats.vmtxs_opackets++;
1848			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1849			if (m->m_flags & M_MCAST)
1850				txq->vxtxq_stats.vmtxs_omcasts++;
1851
1852			m_freem(m);
1853			txb->vtxb_m = NULL;
1854		}
1855
1856		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1857	}
1858
1859	if (txr->vxtxr_head == txr->vxtxr_next)
1860		txq->vxtxq_watchdog = 0;
1861}
1862
1863static int
1864vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1865{
1866	struct ifnet *ifp;
1867	struct mbuf *m;
1868	struct vmxnet3_rxdesc *rxd;
1869	struct vmxnet3_rxbuf *rxb;
1870	bus_dma_tag_t tag;
1871	bus_dmamap_t dmap;
1872	bus_dma_segment_t segs[1];
1873	int idx, clsize, btype, flags, nsegs, error;
1874
1875	ifp = sc->vmx_ifp;
1876	tag = rxr->vxrxr_rxtag;
1877	dmap = rxr->vxrxr_spare_dmap;
1878	idx = rxr->vxrxr_fill;
1879	rxd = &rxr->vxrxr_rxd[idx];
1880	rxb = &rxr->vxrxr_rxbuf[idx];
1881
1882#ifdef VMXNET3_FAILPOINTS
1883	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1884	if (rxr->vxrxr_rid != 0)
1885		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1886#endif
1887
1888	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1889		flags = M_PKTHDR;
1890		clsize = MCLBYTES;
1891		btype = VMXNET3_BTYPE_HEAD;
1892	} else {
1893#if __FreeBSD_version < 902001
1894		/*
1895		 * These mbufs will never be used for the start of a frame.
1896		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1897		 * required the mbuf to always be a packet header. Avoid
1898		 * unnecessary mbuf initialization in newer versions where
1899		 * that is not the case.
1900		 */
1901		flags = M_PKTHDR;
1902#else
1903		flags = 0;
1904#endif
1905		clsize = MJUMPAGESIZE;
1906		btype = VMXNET3_BTYPE_BODY;
1907	}
1908
1909	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1910	if (m == NULL) {
1911		sc->vmx_stats.vmst_mgetcl_failed++;
1912		return (ENOBUFS);
1913	}
1914
1915	if (btype == VMXNET3_BTYPE_HEAD) {
1916		m->m_len = m->m_pkthdr.len = clsize;
1917		m_adj(m, ETHER_ALIGN);
1918	} else
1919		m->m_len = clsize;
1920
1921	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1922	    BUS_DMA_NOWAIT);
1923	if (error) {
1924		m_freem(m);
1925		sc->vmx_stats.vmst_mbuf_load_failed++;
1926		return (error);
1927	}
1928	KASSERT(nsegs == 1,
1929	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1930#if __FreeBSD_version < 902001
1931	if (btype == VMXNET3_BTYPE_BODY)
1932		m->m_flags &= ~M_PKTHDR;
1933#endif
1934
1935	if (rxb->vrxb_m != NULL) {
1936		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1937		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1938	}
1939
1940	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1941	rxb->vrxb_dmamap = dmap;
1942	rxb->vrxb_m = m;
1943
1944	rxd->addr = segs[0].ds_addr;
1945	rxd->len = segs[0].ds_len;
1946	rxd->btype = btype;
1947	rxd->gen = rxr->vxrxr_gen;
1948
1949	vmxnet3_rxr_increment_fill(rxr);
1950	return (0);
1951}
1952
1953static void
1954vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1955    struct vmxnet3_rxring *rxr, int idx)
1956{
1957	struct vmxnet3_rxdesc *rxd;
1958
1959	rxd = &rxr->vxrxr_rxd[idx];
1960	rxd->gen = rxr->vxrxr_gen;
1961	vmxnet3_rxr_increment_fill(rxr);
1962}
1963
1964static void
1965vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1966{
1967	struct vmxnet3_softc *sc;
1968	struct vmxnet3_rxring *rxr;
1969	struct vmxnet3_comp_ring *rxc;
1970	struct vmxnet3_rxcompdesc *rxcd;
1971	int idx, eof;
1972
1973	sc = rxq->vxrxq_sc;
1974	rxc = &rxq->vxrxq_comp_ring;
1975
1976	do {
1977		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1978		if (rxcd->gen != rxc->vxcr_gen)
1979			break;		/* Not expected. */
1980		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1981
1982		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1983			rxc->vxcr_next = 0;
1984			rxc->vxcr_gen ^= 1;
1985		}
1986
1987		idx = rxcd->rxd_idx;
1988		eof = rxcd->eop;
1989		if (rxcd->qid < sc->vmx_nrxqueues)
1990			rxr = &rxq->vxrxq_cmd_ring[0];
1991		else
1992			rxr = &rxq->vxrxq_cmd_ring[1];
1993		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1994	} while (!eof);
1995}
1996
1997static void
1998vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1999{
2000
2001	if (rxcd->ipv4) {
2002		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2003		if (rxcd->ipcsum_ok)
2004			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2005	}
2006
2007	if (!rxcd->fragment) {
2008		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2009			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2010			    CSUM_PSEUDO_HDR;
2011			m->m_pkthdr.csum_data = 0xFFFF;
2012		}
2013	}
2014}
2015
2016static void
2017vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2018    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2019{
2020	struct vmxnet3_softc *sc;
2021	struct ifnet *ifp;
2022
2023	sc = rxq->vxrxq_sc;
2024	ifp = sc->vmx_ifp;
2025
2026	if (rxcd->error) {
2027		rxq->vxrxq_stats.vmrxs_ierrors++;
2028		m_freem(m);
2029		return;
2030	}
2031
2032#ifdef notyet
2033	switch (rxcd->rss_type) {
2034	case VMXNET3_RCD_RSS_TYPE_IPV4:
2035		m->m_pkthdr.flowid = rxcd->rss_hash;
2036		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2037		break;
2038	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2039		m->m_pkthdr.flowid = rxcd->rss_hash;
2040		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2041		break;
2042	case VMXNET3_RCD_RSS_TYPE_IPV6:
2043		m->m_pkthdr.flowid = rxcd->rss_hash;
2044		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2045		break;
2046	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2047		m->m_pkthdr.flowid = rxcd->rss_hash;
2048		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2049		break;
2050	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2051		m->m_pkthdr.flowid = rxq->vxrxq_id;
2052		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2053		break;
2054	}
2055#else
2056	m->m_pkthdr.flowid = rxq->vxrxq_id;
2057	m->m_flags |= M_FLOWID;
2058#endif
2059
2060	if (!rxcd->no_csum)
2061		vmxnet3_rx_csum(rxcd, m);
2062	if (rxcd->vlan) {
2063		m->m_flags |= M_VLANTAG;
2064		m->m_pkthdr.ether_vtag = rxcd->vtag;
2065	}
2066
2067	rxq->vxrxq_stats.vmrxs_ipackets++;
2068	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2069
2070	VMXNET3_RXQ_UNLOCK(rxq);
2071	(*ifp->if_input)(ifp, m);
2072	VMXNET3_RXQ_LOCK(rxq);
2073}
2074
2075static void
2076vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2077{
2078	struct vmxnet3_softc *sc;
2079	struct ifnet *ifp;
2080	struct vmxnet3_rxring *rxr;
2081	struct vmxnet3_comp_ring *rxc;
2082	struct vmxnet3_rxdesc *rxd;
2083	struct vmxnet3_rxcompdesc *rxcd;
2084	struct mbuf *m, *m_head, *m_tail;
2085	int idx, length;
2086
2087	sc = rxq->vxrxq_sc;
2088	ifp = sc->vmx_ifp;
2089	rxc = &rxq->vxrxq_comp_ring;
2090
2091	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2092
2093	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2094		return;
2095
2096	m_head = rxq->vxrxq_mhead;
2097	rxq->vxrxq_mhead = NULL;
2098	m_tail = rxq->vxrxq_mtail;
2099	rxq->vxrxq_mtail = NULL;
2100	MPASS(m_head == NULL || m_tail != NULL);
2101
2102	for (;;) {
2103		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2104		if (rxcd->gen != rxc->vxcr_gen) {
2105			rxq->vxrxq_mhead = m_head;
2106			rxq->vxrxq_mtail = m_tail;
2107			break;
2108		}
2109		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2110
2111		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2112			rxc->vxcr_next = 0;
2113			rxc->vxcr_gen ^= 1;
2114		}
2115
2116		idx = rxcd->rxd_idx;
2117		length = rxcd->len;
2118		if (rxcd->qid < sc->vmx_nrxqueues)
2119			rxr = &rxq->vxrxq_cmd_ring[0];
2120		else
2121			rxr = &rxq->vxrxq_cmd_ring[1];
2122		rxd = &rxr->vxrxr_rxd[idx];
2123
2124		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2125		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2126		    __func__, rxcd->qid, idx));
2127
2128		/*
2129		 * The host may skip descriptors. We detect this when this
2130		 * descriptor does not match the previous fill index. Catch
2131		 * up with the host now.
2132		 */
2133		if (__predict_false(rxr->vxrxr_fill != idx)) {
2134			while (rxr->vxrxr_fill != idx) {
2135				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2136				    rxr->vxrxr_gen;
2137				vmxnet3_rxr_increment_fill(rxr);
2138			}
2139		}
2140
2141		if (rxcd->sop) {
2142			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2143			    ("%s: start of frame w/o head buffer", __func__));
2144			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2145			    ("%s: start of frame not in ring 0", __func__));
2146			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2147			    ("%s: start of frame at unexcepted index %d (%d)",
2148			     __func__, idx, sc->vmx_rx_max_chain));
2149			KASSERT(m_head == NULL,
2150			    ("%s: duplicate start of frame?", __func__));
2151
2152			if (length == 0) {
2153				/* Just ignore this descriptor. */
2154				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2155				goto nextp;
2156			}
2157
2158			if (vmxnet3_newbuf(sc, rxr) != 0) {
2159				rxq->vxrxq_stats.vmrxs_iqdrops++;
2160				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2161				if (!rxcd->eop)
2162					vmxnet3_rxq_discard_chain(rxq);
2163				goto nextp;
2164			}
2165
2166			m->m_pkthdr.rcvif = ifp;
2167			m->m_pkthdr.len = m->m_len = length;
2168			m->m_pkthdr.csum_flags = 0;
2169			m_head = m_tail = m;
2170
2171		} else {
2172			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2173			    ("%s: non start of frame w/o body buffer", __func__));
2174			KASSERT(m_head != NULL,
2175			    ("%s: frame not started?", __func__));
2176
2177			if (vmxnet3_newbuf(sc, rxr) != 0) {
2178				rxq->vxrxq_stats.vmrxs_iqdrops++;
2179				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2180				if (!rxcd->eop)
2181					vmxnet3_rxq_discard_chain(rxq);
2182				m_freem(m_head);
2183				m_head = m_tail = NULL;
2184				goto nextp;
2185			}
2186
2187			m->m_len = length;
2188			m_head->m_pkthdr.len += length;
2189			m_tail->m_next = m;
2190			m_tail = m;
2191		}
2192
2193		if (rxcd->eop) {
2194			vmxnet3_rxq_input(rxq, rxcd, m_head);
2195			m_head = m_tail = NULL;
2196
2197			/* Must recheck after dropping the Rx lock. */
2198			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2199				break;
2200		}
2201
2202nextp:
2203		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2204			int qid = rxcd->qid;
2205			bus_size_t r;
2206
2207			idx = (idx + 1) % rxr->vxrxr_ndesc;
2208			if (qid >= sc->vmx_nrxqueues) {
2209				qid -= sc->vmx_nrxqueues;
2210				r = VMXNET3_BAR0_RXH2(qid);
2211			} else
2212				r = VMXNET3_BAR0_RXH1(qid);
2213			vmxnet3_write_bar0(sc, r, idx);
2214		}
2215	}
2216}
2217
2218static void
2219vmxnet3_legacy_intr(void *xsc)
2220{
2221	struct vmxnet3_softc *sc;
2222	struct vmxnet3_rxqueue *rxq;
2223	struct vmxnet3_txqueue *txq;
2224	struct ifnet *ifp;
2225
2226	sc = xsc;
2227	rxq = &sc->vmx_rxq[0];
2228	txq = &sc->vmx_txq[0];
2229	ifp = sc->vmx_ifp;
2230
2231	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2232		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2233			return;
2234	}
2235	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2236		vmxnet3_disable_all_intrs(sc);
2237
2238	if (sc->vmx_ds->event != 0)
2239		vmxnet3_evintr(sc);
2240
2241	VMXNET3_RXQ_LOCK(rxq);
2242	vmxnet3_rxq_eof(rxq);
2243	VMXNET3_RXQ_UNLOCK(rxq);
2244
2245	VMXNET3_TXQ_LOCK(txq);
2246	vmxnet3_txq_eof(txq);
2247	vmxnet3_txq_start(txq);
2248	VMXNET3_TXQ_UNLOCK(txq);
2249
2250	vmxnet3_enable_all_intrs(sc);
2251}
2252
2253static void
2254vmxnet3_txq_intr(void *xtxq)
2255{
2256	struct vmxnet3_softc *sc;
2257	struct vmxnet3_txqueue *txq;
2258	struct ifnet *ifp;
2259
2260	txq = xtxq;
2261	sc = txq->vxtxq_sc;
2262	ifp = sc->vmx_ifp;
2263
2264	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2265		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2266
2267	VMXNET3_TXQ_LOCK(txq);
2268	vmxnet3_txq_eof(txq);
2269	vmxnet3_txq_start(txq);
2270	VMXNET3_TXQ_UNLOCK(txq);
2271
2272	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2273}
2274
2275static void
2276vmxnet3_rxq_intr(void *xrxq)
2277{
2278	struct vmxnet3_softc *sc;
2279	struct vmxnet3_rxqueue *rxq;
2280
2281	rxq = xrxq;
2282	sc = rxq->vxrxq_sc;
2283
2284	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2285		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2286
2287	VMXNET3_RXQ_LOCK(rxq);
2288	vmxnet3_rxq_eof(rxq);
2289	VMXNET3_RXQ_UNLOCK(rxq);
2290
2291	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2292}
2293
2294static void
2295vmxnet3_event_intr(void *xsc)
2296{
2297	struct vmxnet3_softc *sc;
2298
2299	sc = xsc;
2300
2301	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2302		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2303
2304	if (sc->vmx_ds->event != 0)
2305		vmxnet3_evintr(sc);
2306
2307	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2308}
2309
2310static void
2311vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2312{
2313	struct vmxnet3_txring *txr;
2314	struct vmxnet3_txbuf *txb;
2315	int i;
2316
2317	txr = &txq->vxtxq_cmd_ring;
2318
2319	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2320		txb = &txr->vxtxr_txbuf[i];
2321
2322		if (txb->vtxb_m == NULL)
2323			continue;
2324
2325		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2326		    BUS_DMASYNC_POSTWRITE);
2327		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2328		m_freem(txb->vtxb_m);
2329		txb->vtxb_m = NULL;
2330	}
2331}
2332
2333static void
2334vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2335{
2336	struct vmxnet3_rxring *rxr;
2337	struct vmxnet3_rxbuf *rxb;
2338	int i, j;
2339
2340	if (rxq->vxrxq_mhead != NULL) {
2341		m_freem(rxq->vxrxq_mhead);
2342		rxq->vxrxq_mhead = NULL;
2343		rxq->vxrxq_mtail = NULL;
2344	}
2345
2346	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2347		rxr = &rxq->vxrxq_cmd_ring[i];
2348
2349		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2350			rxb = &rxr->vxrxr_rxbuf[j];
2351
2352			if (rxb->vrxb_m == NULL)
2353				continue;
2354
2355			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2356			    BUS_DMASYNC_POSTREAD);
2357			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2358			m_freem(rxb->vrxb_m);
2359			rxb->vrxb_m = NULL;
2360		}
2361	}
2362}
2363
2364static void
2365vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2366{
2367	struct vmxnet3_rxqueue *rxq;
2368	struct vmxnet3_txqueue *txq;
2369	int i;
2370
2371	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2372		rxq = &sc->vmx_rxq[i];
2373		VMXNET3_RXQ_LOCK(rxq);
2374		VMXNET3_RXQ_UNLOCK(rxq);
2375	}
2376
2377	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2378		txq = &sc->vmx_txq[i];
2379		VMXNET3_TXQ_LOCK(txq);
2380		VMXNET3_TXQ_UNLOCK(txq);
2381	}
2382}
2383
2384static void
2385vmxnet3_stop(struct vmxnet3_softc *sc)
2386{
2387	struct ifnet *ifp;
2388	int q;
2389
2390	ifp = sc->vmx_ifp;
2391	VMXNET3_CORE_LOCK_ASSERT(sc);
2392
2393	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2394	sc->vmx_link_active = 0;
2395	callout_stop(&sc->vmx_tick);
2396
2397	/* Disable interrupts. */
2398	vmxnet3_disable_all_intrs(sc);
2399	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2400
2401	vmxnet3_stop_rendezvous(sc);
2402
2403	for (q = 0; q < sc->vmx_ntxqueues; q++)
2404		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2405	for (q = 0; q < sc->vmx_nrxqueues; q++)
2406		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2407
2408	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2409}
2410
2411static void
2412vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2413{
2414	struct vmxnet3_txring *txr;
2415	struct vmxnet3_comp_ring *txc;
2416
2417	txr = &txq->vxtxq_cmd_ring;
2418	txr->vxtxr_head = 0;
2419	txr->vxtxr_next = 0;
2420	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2421	bzero(txr->vxtxr_txd,
2422	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2423
2424	txc = &txq->vxtxq_comp_ring;
2425	txc->vxcr_next = 0;
2426	txc->vxcr_gen = VMXNET3_INIT_GEN;
2427	bzero(txc->vxcr_u.txcd,
2428	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2429}
2430
2431static int
2432vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2433{
2434	struct ifnet *ifp;
2435	struct vmxnet3_rxring *rxr;
2436	struct vmxnet3_comp_ring *rxc;
2437	int i, populate, idx, frame_size, error;
2438
2439	ifp = sc->vmx_ifp;
2440	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2441	    ifp->if_mtu;
2442
2443	/*
2444	 * If the MTU causes us to exceed what a regular sized cluster can
2445	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2446	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2447	 *
2448	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2449	 * our life easier. We do not support changing the ring size after
2450	 * the attach.
2451	 */
2452	if (frame_size <= MCLBYTES)
2453		sc->vmx_rx_max_chain = 1;
2454	else
2455		sc->vmx_rx_max_chain = 2;
2456
2457	/*
2458	 * Only populate ring 1 if the configuration will take advantage
2459	 * of it. That is either when LRO is enabled or the frame size
2460	 * exceeds what ring 0 can contain.
2461	 */
2462	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2463	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2464		populate = 1;
2465	else
2466		populate = VMXNET3_RXRINGS_PERQ;
2467
2468	for (i = 0; i < populate; i++) {
2469		rxr = &rxq->vxrxq_cmd_ring[i];
2470		rxr->vxrxr_fill = 0;
2471		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2472		bzero(rxr->vxrxr_rxd,
2473		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2474
2475		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2476			error = vmxnet3_newbuf(sc, rxr);
2477			if (error)
2478				return (error);
2479		}
2480	}
2481
2482	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2483		rxr = &rxq->vxrxq_cmd_ring[i];
2484		rxr->vxrxr_fill = 0;
2485		rxr->vxrxr_gen = 0;
2486		bzero(rxr->vxrxr_rxd,
2487		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2488	}
2489
2490	rxc = &rxq->vxrxq_comp_ring;
2491	rxc->vxcr_next = 0;
2492	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2493	bzero(rxc->vxcr_u.rxcd,
2494	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2495
2496	return (0);
2497}
2498
2499static int
2500vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2501{
2502	device_t dev;
2503	int q, error;
2504
2505	dev = sc->vmx_dev;
2506
2507	for (q = 0; q < sc->vmx_ntxqueues; q++)
2508		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2509
2510	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2511		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2512		if (error) {
2513			device_printf(dev, "cannot populate Rx queue %d\n", q);
2514			return (error);
2515		}
2516	}
2517
2518	return (0);
2519}
2520
2521static int
2522vmxnet3_enable_device(struct vmxnet3_softc *sc)
2523{
2524	int q;
2525
2526	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2527		device_printf(sc->vmx_dev, "device enable command failed!\n");
2528		return (1);
2529	}
2530
2531	/* Reset the Rx queue heads. */
2532	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2533		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2534		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2535	}
2536
2537	return (0);
2538}
2539
2540static void
2541vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2542{
2543	struct ifnet *ifp;
2544
2545	ifp = sc->vmx_ifp;
2546
2547	vmxnet3_set_rxfilter(sc);
2548
2549	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2550		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2551		    sizeof(sc->vmx_ds->vlan_filter));
2552	else
2553		bzero(sc->vmx_ds->vlan_filter,
2554		    sizeof(sc->vmx_ds->vlan_filter));
2555	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2556}
2557
2558static int
2559vmxnet3_reinit(struct vmxnet3_softc *sc)
2560{
2561
2562	vmxnet3_reinit_interface(sc);
2563	vmxnet3_reinit_shared_data(sc);
2564
2565	if (vmxnet3_reinit_queues(sc) != 0)
2566		return (ENXIO);
2567
2568	if (vmxnet3_enable_device(sc) != 0)
2569		return (ENXIO);
2570
2571	vmxnet3_reinit_rxfilters(sc);
2572
2573	return (0);
2574}
2575
2576static void
2577vmxnet3_init_locked(struct vmxnet3_softc *sc)
2578{
2579	struct ifnet *ifp;
2580
2581	ifp = sc->vmx_ifp;
2582
2583	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2584		return;
2585
2586	vmxnet3_stop(sc);
2587
2588	if (vmxnet3_reinit(sc) != 0) {
2589		vmxnet3_stop(sc);
2590		return;
2591	}
2592
2593	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2594	vmxnet3_link_status(sc);
2595
2596	vmxnet3_enable_all_intrs(sc);
2597	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2598}
2599
2600static void
2601vmxnet3_init(void *xsc)
2602{
2603	struct vmxnet3_softc *sc;
2604
2605	sc = xsc;
2606
2607	VMXNET3_CORE_LOCK(sc);
2608	vmxnet3_init_locked(sc);
2609	VMXNET3_CORE_UNLOCK(sc);
2610}
2611
2612/*
2613 * BMV: Much of this can go away once we finally have offsets in
2614 * the mbuf packet header. Bug andre@.
2615 */
2616static int
2617vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2618    int *etype, int *proto, int *start)
2619{
2620	struct ether_vlan_header *evh;
2621	int offset;
2622#if defined(INET)
2623	struct ip *ip = NULL;
2624	struct ip iphdr;
2625#endif
2626#if defined(INET6)
2627	struct ip6_hdr *ip6 = NULL;
2628	struct ip6_hdr ip6hdr;
2629#endif
2630
2631	evh = mtod(m, struct ether_vlan_header *);
2632	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2633		/* BMV: We should handle nested VLAN tags too. */
2634		*etype = ntohs(evh->evl_proto);
2635		offset = sizeof(struct ether_vlan_header);
2636	} else {
2637		*etype = ntohs(evh->evl_encap_proto);
2638		offset = sizeof(struct ether_header);
2639	}
2640
2641	switch (*etype) {
2642#if defined(INET)
2643	case ETHERTYPE_IP:
2644		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2645			m_copydata(m, offset, sizeof(struct ip),
2646			    (caddr_t) &iphdr);
2647			ip = &iphdr;
2648		} else
2649			ip = mtodo(m, offset);
2650		*proto = ip->ip_p;
2651		*start = offset + (ip->ip_hl << 2);
2652		break;
2653#endif
2654#if defined(INET6)
2655	case ETHERTYPE_IPV6:
2656		if (__predict_false(m->m_len <
2657		    offset + sizeof(struct ip6_hdr))) {
2658			m_copydata(m, offset, sizeof(struct ip6_hdr),
2659			    (caddr_t) &ip6hdr);
2660			ip6 = &ip6hdr;
2661		} else
2662			ip6 = mtodo(m, offset);
2663		*proto = -1;
2664		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2665		/* Assert the network stack sent us a valid packet. */
2666		KASSERT(*start > offset,
2667		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2668		    *start, offset, *proto));
2669		break;
2670#endif
2671	default:
2672		return (EINVAL);
2673	}
2674
2675	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2676		struct tcphdr *tcp, tcphdr;
2677		uint16_t sum;
2678
2679		if (__predict_false(*proto != IPPROTO_TCP)) {
2680			/* Likely failed to correctly parse the mbuf. */
2681			return (EINVAL);
2682		}
2683
2684		txq->vxtxq_stats.vmtxs_tso++;
2685
2686		switch (*etype) {
2687#if defined(INET)
2688		case ETHERTYPE_IP:
2689			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2690			    htons(IPPROTO_TCP));
2691			break;
2692#endif
2693#if defined(INET6)
2694		case ETHERTYPE_IPV6:
2695			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2696			break;
2697#endif
2698		default:
2699			sum = 0;
2700			break;
2701		}
2702
2703		if (m->m_len < *start + sizeof(struct tcphdr)) {
2704			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2705			    sizeof(uint16_t), (caddr_t) &sum);
2706			m_copydata(m, *start, sizeof(struct tcphdr),
2707			    (caddr_t) &tcphdr);
2708			tcp = &tcphdr;
2709		} else {
2710			tcp = mtodo(m, *start);
2711			tcp->th_sum = sum;
2712		}
2713
2714		/*
2715		 * For TSO, the size of the protocol header is also
2716		 * included in the descriptor header size.
2717		 */
2718		*start += (tcp->th_off << 2);
2719	} else
2720		txq->vxtxq_stats.vmtxs_csum++;
2721
2722	return (0);
2723}
2724
2725static int
2726vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2727    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2728{
2729	struct vmxnet3_txring *txr;
2730	struct mbuf *m;
2731	bus_dma_tag_t tag;
2732	int error;
2733
2734	txr = &txq->vxtxq_cmd_ring;
2735	m = *m0;
2736	tag = txr->vxtxr_txtag;
2737
2738	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2739	if (error == 0 || error != EFBIG)
2740		return (error);
2741
2742	m = m_defrag(m, M_NOWAIT);
2743	if (m != NULL) {
2744		*m0 = m;
2745		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2746	} else
2747		error = ENOBUFS;
2748
2749	if (error) {
2750		m_freem(*m0);
2751		*m0 = NULL;
2752		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2753	} else
2754		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2755
2756	return (error);
2757}
2758
2759static void
2760vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2761{
2762	struct vmxnet3_txring *txr;
2763
2764	txr = &txq->vxtxq_cmd_ring;
2765	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2766}
2767
2768static int
2769vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2770{
2771	struct vmxnet3_softc *sc;
2772	struct ifnet *ifp;
2773	struct vmxnet3_txring *txr;
2774	struct vmxnet3_txdesc *txd, *sop;
2775	struct mbuf *m;
2776	bus_dmamap_t dmap;
2777	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2778	int i, gen, nsegs, etype, proto, start, error;
2779
2780	sc = txq->vxtxq_sc;
2781	ifp = sc->vmx_ifp;
2782	start = 0;
2783	txd = NULL;
2784	txr = &txq->vxtxq_cmd_ring;
2785	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2786
2787	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2788	if (error)
2789		return (error);
2790
2791	m = *m0;
2792	M_ASSERTPKTHDR(m);
2793	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2794	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2795
2796	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2797		txq->vxtxq_stats.vmtxs_full++;
2798		vmxnet3_txq_unload_mbuf(txq, dmap);
2799		return (ENOSPC);
2800	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2801		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2802		if (error) {
2803			txq->vxtxq_stats.vmtxs_offload_failed++;
2804			vmxnet3_txq_unload_mbuf(txq, dmap);
2805			m_freem(m);
2806			*m0 = NULL;
2807			return (error);
2808		}
2809	}
2810
2811	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2812	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2813	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2814
2815	for (i = 0; i < nsegs; i++) {
2816		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2817
2818		txd->addr = segs[i].ds_addr;
2819		txd->len = segs[i].ds_len;
2820		txd->gen = gen;
2821		txd->dtype = 0;
2822		txd->offload_mode = VMXNET3_OM_NONE;
2823		txd->offload_pos = 0;
2824		txd->hlen = 0;
2825		txd->eop = 0;
2826		txd->compreq = 0;
2827		txd->vtag_mode = 0;
2828		txd->vtag = 0;
2829
2830		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2831			txr->vxtxr_head = 0;
2832			txr->vxtxr_gen ^= 1;
2833		}
2834		gen = txr->vxtxr_gen;
2835	}
2836	txd->eop = 1;
2837	txd->compreq = 1;
2838
2839	if (m->m_flags & M_VLANTAG) {
2840		sop->vtag_mode = 1;
2841		sop->vtag = m->m_pkthdr.ether_vtag;
2842	}
2843
2844	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2845		sop->offload_mode = VMXNET3_OM_TSO;
2846		sop->hlen = start;
2847		sop->offload_pos = m->m_pkthdr.tso_segsz;
2848	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2849	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2850		sop->offload_mode = VMXNET3_OM_CSUM;
2851		sop->hlen = start;
2852		sop->offload_pos = start + m->m_pkthdr.csum_data;
2853	}
2854
2855	/* Finally, change the ownership. */
2856	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2857	sop->gen ^= 1;
2858
2859	if (++txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2860		txq->vxtxq_ts->npending = 0;
2861		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2862		    txr->vxtxr_head);
2863	}
2864
2865	return (0);
2866}
2867
2868#ifdef VMXNET3_LEGACY_TX
2869
2870static void
2871vmxnet3_start_locked(struct ifnet *ifp)
2872{
2873	struct vmxnet3_softc *sc;
2874	struct vmxnet3_txqueue *txq;
2875	struct vmxnet3_txring *txr;
2876	struct mbuf *m_head;
2877	int tx, avail;
2878
2879	sc = ifp->if_softc;
2880	txq = &sc->vmx_txq[0];
2881	txr = &txq->vxtxq_cmd_ring;
2882	tx = 0;
2883
2884	VMXNET3_TXQ_LOCK_ASSERT(txq);
2885
2886	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2887	    sc->vmx_link_active == 0)
2888		return;
2889
2890	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2891		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2892			break;
2893
2894		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2895		if (m_head == NULL)
2896			break;
2897
2898		/* Assume worse case if this mbuf is the head of a chain. */
2899		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2900			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2901			break;
2902		}
2903
2904		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2905			if (m_head != NULL)
2906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2907			break;
2908		}
2909
2910		tx++;
2911		ETHER_BPF_MTAP(ifp, m_head);
2912	}
2913
2914	if (tx > 0)
2915		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2916}
2917
2918static void
2919vmxnet3_start(struct ifnet *ifp)
2920{
2921	struct vmxnet3_softc *sc;
2922	struct vmxnet3_txqueue *txq;
2923
2924	sc = ifp->if_softc;
2925	txq = &sc->vmx_txq[0];
2926
2927	VMXNET3_TXQ_LOCK(txq);
2928	vmxnet3_start_locked(ifp);
2929	VMXNET3_TXQ_UNLOCK(txq);
2930}
2931
2932#else /* !VMXNET3_LEGACY_TX */
2933
2934static int
2935vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2936{
2937	struct vmxnet3_softc *sc;
2938	struct vmxnet3_txring *txr;
2939	struct buf_ring *br;
2940	struct ifnet *ifp;
2941	int tx, avail, error;
2942
2943	sc = txq->vxtxq_sc;
2944	br = txq->vxtxq_br;
2945	ifp = sc->vmx_ifp;
2946	txr = &txq->vxtxq_cmd_ring;
2947	tx = 0;
2948	error = 0;
2949
2950	VMXNET3_TXQ_LOCK_ASSERT(txq);
2951
2952	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2953	    sc->vmx_link_active == 0) {
2954		if (m != NULL)
2955			error = drbr_enqueue(ifp, br, m);
2956		return (error);
2957	}
2958
2959	if (m != NULL) {
2960		error = drbr_enqueue(ifp, br, m);
2961		if (error)
2962			return (error);
2963	}
2964
2965	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2966		m = drbr_peek(ifp, br);
2967		if (m == NULL)
2968			break;
2969
2970		/* Assume worse case if this mbuf is the head of a chain. */
2971		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2972			drbr_putback(ifp, br, m);
2973			break;
2974		}
2975
2976		error = vmxnet3_txq_encap(txq, &m);
2977		if (error) {
2978			if (m != NULL)
2979				drbr_putback(ifp, br, m);
2980			else
2981				drbr_advance(ifp, br);
2982			break;
2983		}
2984		drbr_advance(ifp, br);
2985
2986		tx++;
2987		ETHER_BPF_MTAP(ifp, m);
2988	}
2989
2990	if (tx > 0)
2991		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2992
2993	return (0);
2994}
2995
2996static int
2997vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2998{
2999	struct vmxnet3_softc *sc;
3000	struct vmxnet3_txqueue *txq;
3001	int i, ntxq, error;
3002
3003	sc = ifp->if_softc;
3004	ntxq = sc->vmx_ntxqueues;
3005
3006	if (m->m_flags & M_FLOWID)
3007		i = m->m_pkthdr.flowid % ntxq;
3008	else
3009		i = curcpu % ntxq;
3010
3011	txq = &sc->vmx_txq[i];
3012
3013	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3014		error = vmxnet3_txq_mq_start_locked(txq, m);
3015		VMXNET3_TXQ_UNLOCK(txq);
3016	} else {
3017		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3018		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3019	}
3020
3021	return (error);
3022}
3023
3024static void
3025vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3026{
3027	struct vmxnet3_softc *sc;
3028	struct vmxnet3_txqueue *txq;
3029
3030	txq = xtxq;
3031	sc = txq->vxtxq_sc;
3032
3033	VMXNET3_TXQ_LOCK(txq);
3034	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3035		vmxnet3_txq_mq_start_locked(txq, NULL);
3036	VMXNET3_TXQ_UNLOCK(txq);
3037}
3038
3039#endif /* VMXNET3_LEGACY_TX */
3040
3041static void
3042vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3043{
3044	struct vmxnet3_softc *sc;
3045	struct ifnet *ifp;
3046
3047	sc = txq->vxtxq_sc;
3048	ifp = sc->vmx_ifp;
3049
3050#ifdef VMXNET3_LEGACY_TX
3051	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3052		vmxnet3_start_locked(ifp);
3053#else
3054	if (!drbr_empty(ifp, txq->vxtxq_br))
3055		vmxnet3_txq_mq_start_locked(txq, NULL);
3056#endif
3057}
3058
3059static void
3060vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3061{
3062	struct vmxnet3_txqueue *txq;
3063	int i;
3064
3065	VMXNET3_CORE_LOCK_ASSERT(sc);
3066
3067	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3068		txq = &sc->vmx_txq[i];
3069
3070		VMXNET3_TXQ_LOCK(txq);
3071		vmxnet3_txq_start(txq);
3072		VMXNET3_TXQ_UNLOCK(txq);
3073	}
3074}
3075
3076static void
3077vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3078{
3079	struct ifnet *ifp;
3080	int idx, bit;
3081
3082	ifp = sc->vmx_ifp;
3083	idx = (tag >> 5) & 0x7F;
3084	bit = tag & 0x1F;
3085
3086	if (tag == 0 || tag > 4095)
3087		return;
3088
3089	VMXNET3_CORE_LOCK(sc);
3090
3091	/* Update our private VLAN bitvector. */
3092	if (add)
3093		sc->vmx_vlan_filter[idx] |= (1 << bit);
3094	else
3095		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3096
3097	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3098		if (add)
3099			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3100		else
3101			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3102		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3103	}
3104
3105	VMXNET3_CORE_UNLOCK(sc);
3106}
3107
3108static void
3109vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3110{
3111
3112	if (ifp->if_softc == arg)
3113		vmxnet3_update_vlan_filter(arg, 1, tag);
3114}
3115
3116static void
3117vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3118{
3119
3120	if (ifp->if_softc == arg)
3121		vmxnet3_update_vlan_filter(arg, 0, tag);
3122}
3123
3124static void
3125vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3126{
3127	struct ifnet *ifp;
3128	struct vmxnet3_driver_shared *ds;
3129	struct ifmultiaddr *ifma;
3130	u_int mode;
3131
3132	ifp = sc->vmx_ifp;
3133	ds = sc->vmx_ds;
3134
3135	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3136	if (ifp->if_flags & IFF_PROMISC)
3137		mode |= VMXNET3_RXMODE_PROMISC;
3138	if (ifp->if_flags & IFF_ALLMULTI)
3139		mode |= VMXNET3_RXMODE_ALLMULTI;
3140	else {
3141		int cnt = 0, overflow = 0;
3142
3143		if_maddr_rlock(ifp);
3144		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3145			if (ifma->ifma_addr->sa_family != AF_LINK)
3146				continue;
3147			else if (cnt == VMXNET3_MULTICAST_MAX) {
3148				overflow = 1;
3149				break;
3150			}
3151
3152			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3153			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3154			cnt++;
3155		}
3156		if_maddr_runlock(ifp);
3157
3158		if (overflow != 0) {
3159			cnt = 0;
3160			mode |= VMXNET3_RXMODE_ALLMULTI;
3161		} else if (cnt > 0)
3162			mode |= VMXNET3_RXMODE_MCAST;
3163		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3164	}
3165
3166	ds->rxmode = mode;
3167
3168	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3169	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3170}
3171
3172static int
3173vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3174{
3175	struct ifnet *ifp;
3176
3177	ifp = sc->vmx_ifp;
3178
3179	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3180		return (EINVAL);
3181
3182	ifp->if_mtu = mtu;
3183
3184	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3185		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3186		vmxnet3_init_locked(sc);
3187	}
3188
3189	return (0);
3190}
3191
3192static int
3193vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3194{
3195	struct vmxnet3_softc *sc;
3196	struct ifreq *ifr;
3197	int reinit, mask, error;
3198
3199	sc = ifp->if_softc;
3200	ifr = (struct ifreq *) data;
3201	error = 0;
3202
3203	switch (cmd) {
3204	case SIOCSIFMTU:
3205		if (ifp->if_mtu != ifr->ifr_mtu) {
3206			VMXNET3_CORE_LOCK(sc);
3207			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3208			VMXNET3_CORE_UNLOCK(sc);
3209		}
3210		break;
3211
3212	case SIOCSIFFLAGS:
3213		VMXNET3_CORE_LOCK(sc);
3214		if (ifp->if_flags & IFF_UP) {
3215			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3216				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3217				    (IFF_PROMISC | IFF_ALLMULTI)) {
3218					vmxnet3_set_rxfilter(sc);
3219				}
3220			} else
3221				vmxnet3_init_locked(sc);
3222		} else {
3223			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3224				vmxnet3_stop(sc);
3225		}
3226		sc->vmx_if_flags = ifp->if_flags;
3227		VMXNET3_CORE_UNLOCK(sc);
3228		break;
3229
3230	case SIOCADDMULTI:
3231	case SIOCDELMULTI:
3232		VMXNET3_CORE_LOCK(sc);
3233		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3234			vmxnet3_set_rxfilter(sc);
3235		VMXNET3_CORE_UNLOCK(sc);
3236		break;
3237
3238	case SIOCSIFMEDIA:
3239	case SIOCGIFMEDIA:
3240		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3241		break;
3242
3243	case SIOCSIFCAP:
3244		VMXNET3_CORE_LOCK(sc);
3245		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3246
3247		if (mask & IFCAP_TXCSUM)
3248			ifp->if_capenable ^= IFCAP_TXCSUM;
3249		if (mask & IFCAP_TXCSUM_IPV6)
3250			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3251		if (mask & IFCAP_TSO4)
3252			ifp->if_capenable ^= IFCAP_TSO4;
3253		if (mask & IFCAP_TSO6)
3254			ifp->if_capenable ^= IFCAP_TSO6;
3255
3256		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3257		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3258			/* Changing these features requires us to reinit. */
3259			reinit = 1;
3260
3261			if (mask & IFCAP_RXCSUM)
3262				ifp->if_capenable ^= IFCAP_RXCSUM;
3263			if (mask & IFCAP_RXCSUM_IPV6)
3264				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3265			if (mask & IFCAP_LRO)
3266				ifp->if_capenable ^= IFCAP_LRO;
3267			if (mask & IFCAP_VLAN_HWTAGGING)
3268				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3269			if (mask & IFCAP_VLAN_HWFILTER)
3270				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3271		} else
3272			reinit = 0;
3273
3274		if (mask & IFCAP_VLAN_HWTSO)
3275			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3276
3277		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3278			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3279			vmxnet3_init_locked(sc);
3280		}
3281
3282		VMXNET3_CORE_UNLOCK(sc);
3283		VLAN_CAPABILITIES(ifp);
3284		break;
3285
3286	default:
3287		error = ether_ioctl(ifp, cmd, data);
3288		break;
3289	}
3290
3291	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3292
3293	return (error);
3294}
3295
3296#ifndef VMXNET3_LEGACY_TX
3297static void
3298vmxnet3_qflush(struct ifnet *ifp)
3299{
3300	struct vmxnet3_softc *sc;
3301	struct vmxnet3_txqueue *txq;
3302	struct mbuf *m;
3303	int i;
3304
3305	sc = ifp->if_softc;
3306
3307	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3308		txq = &sc->vmx_txq[i];
3309
3310		VMXNET3_TXQ_LOCK(txq);
3311		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3312			m_freem(m);
3313		VMXNET3_TXQ_UNLOCK(txq);
3314	}
3315
3316	if_qflush(ifp);
3317}
3318#endif
3319
3320static int
3321vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3322{
3323	struct vmxnet3_softc *sc;
3324
3325	sc = txq->vxtxq_sc;
3326
3327	VMXNET3_TXQ_LOCK(txq);
3328	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3329		VMXNET3_TXQ_UNLOCK(txq);
3330		return (0);
3331	}
3332	VMXNET3_TXQ_UNLOCK(txq);
3333
3334	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3335	    txq->vxtxq_id);
3336	return (1);
3337}
3338
3339static void
3340vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3341{
3342
3343	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3344}
3345
3346static void
3347vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3348    struct vmxnet3_txq_stats *accum)
3349{
3350	struct vmxnet3_txq_stats *st;
3351
3352	st = &txq->vxtxq_stats;
3353
3354	accum->vmtxs_opackets += st->vmtxs_opackets;
3355	accum->vmtxs_obytes += st->vmtxs_obytes;
3356	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3357	accum->vmtxs_csum += st->vmtxs_csum;
3358	accum->vmtxs_tso += st->vmtxs_tso;
3359	accum->vmtxs_full += st->vmtxs_full;
3360	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3361}
3362
3363static void
3364vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3365    struct vmxnet3_rxq_stats *accum)
3366{
3367	struct vmxnet3_rxq_stats *st;
3368
3369	st = &rxq->vxrxq_stats;
3370
3371	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3372	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3373	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3374	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3375}
3376
3377static void
3378vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3379{
3380	struct ifnet *ifp;
3381	struct vmxnet3_statistics *st;
3382	struct vmxnet3_txq_stats txaccum;
3383	struct vmxnet3_rxq_stats rxaccum;
3384	int i;
3385
3386	ifp = sc->vmx_ifp;
3387	st = &sc->vmx_stats;
3388
3389	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3390	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3391
3392	for (i = 0; i < sc->vmx_ntxqueues; i++)
3393		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3394	for (i = 0; i < sc->vmx_nrxqueues; i++)
3395		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3396
3397	/*
3398	 * With the exception of if_ierrors, these ifnet statistics are
3399	 * only updated in the driver, so just set them to our accumulated
3400	 * values. if_ierrors is updated in ether_input() for malformed
3401	 * frames that we should have already discarded.
3402	 */
3403	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3404	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3405	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3406	ifp->if_opackets = txaccum.vmtxs_opackets;
3407#ifndef VMXNET3_LEGACY_TX
3408	ifp->if_obytes = txaccum.vmtxs_obytes;
3409	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3410#endif
3411}
3412
3413static void
3414vmxnet3_tick(void *xsc)
3415{
3416	struct vmxnet3_softc *sc;
3417	struct ifnet *ifp;
3418	int i, timedout;
3419
3420	sc = xsc;
3421	ifp = sc->vmx_ifp;
3422	timedout = 0;
3423
3424	VMXNET3_CORE_LOCK_ASSERT(sc);
3425
3426	vmxnet3_accumulate_stats(sc);
3427	vmxnet3_refresh_host_stats(sc);
3428
3429	for (i = 0; i < sc->vmx_ntxqueues; i++)
3430		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3431
3432	if (timedout != 0) {
3433		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3434		vmxnet3_init_locked(sc);
3435	} else
3436		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3437}
3438
3439static int
3440vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3441{
3442	uint32_t status;
3443
3444	/* Also update the link speed while here. */
3445	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3446	sc->vmx_link_speed = status >> 16;
3447	return !!(status & 0x1);
3448}
3449
3450static void
3451vmxnet3_link_status(struct vmxnet3_softc *sc)
3452{
3453	struct ifnet *ifp;
3454	int link;
3455
3456	ifp = sc->vmx_ifp;
3457	link = vmxnet3_link_is_up(sc);
3458
3459	if (link != 0 && sc->vmx_link_active == 0) {
3460		sc->vmx_link_active = 1;
3461		if_link_state_change(ifp, LINK_STATE_UP);
3462	} else if (link == 0 && sc->vmx_link_active != 0) {
3463		sc->vmx_link_active = 0;
3464		if_link_state_change(ifp, LINK_STATE_DOWN);
3465	}
3466}
3467
3468static void
3469vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3470{
3471	struct vmxnet3_softc *sc;
3472
3473	sc = ifp->if_softc;
3474
3475	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3476	ifmr->ifm_status = IFM_AVALID;
3477
3478	VMXNET3_CORE_LOCK(sc);
3479	if (vmxnet3_link_is_up(sc) != 0)
3480		ifmr->ifm_status |= IFM_ACTIVE;
3481	else
3482		ifmr->ifm_status |= IFM_NONE;
3483	VMXNET3_CORE_UNLOCK(sc);
3484}
3485
3486static int
3487vmxnet3_media_change(struct ifnet *ifp)
3488{
3489
3490	/* Ignore. */
3491	return (0);
3492}
3493
3494static void
3495vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3496{
3497	uint32_t ml, mh;
3498
3499	ml  = sc->vmx_lladdr[0];
3500	ml |= sc->vmx_lladdr[1] << 8;
3501	ml |= sc->vmx_lladdr[2] << 16;
3502	ml |= sc->vmx_lladdr[3] << 24;
3503	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3504
3505	mh  = sc->vmx_lladdr[4];
3506	mh |= sc->vmx_lladdr[5] << 8;
3507	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3508}
3509
3510static void
3511vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3512{
3513	uint32_t ml, mh;
3514
3515	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3516	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3517
3518	sc->vmx_lladdr[0] = ml;
3519	sc->vmx_lladdr[1] = ml >> 8;
3520	sc->vmx_lladdr[2] = ml >> 16;
3521	sc->vmx_lladdr[3] = ml >> 24;
3522	sc->vmx_lladdr[4] = mh;
3523	sc->vmx_lladdr[5] = mh >> 8;
3524}
3525
3526static void
3527vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3528    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3529{
3530	struct sysctl_oid *node, *txsnode;
3531	struct sysctl_oid_list *list, *txslist;
3532	struct vmxnet3_txq_stats *stats;
3533	struct UPT1_TxStats *txstats;
3534	char namebuf[16];
3535
3536	stats = &txq->vxtxq_stats;
3537	txstats = &txq->vxtxq_ts->stats;
3538
3539	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3540	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3541	    NULL, "Transmit Queue");
3542	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3543
3544	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3545	    &stats->vmtxs_opackets, "Transmit packets");
3546	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3547	    &stats->vmtxs_obytes, "Transmit bytes");
3548	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3549	    &stats->vmtxs_omcasts, "Transmit multicasts");
3550	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3551	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3552	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3553	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3554	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3555	    &stats->vmtxs_full, "Transmit ring full");
3556	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3557	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3558
3559	/*
3560	 * Add statistics reported by the host. These are updated once
3561	 * per second.
3562	 */
3563	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3564	    NULL, "Host Statistics");
3565	txslist = SYSCTL_CHILDREN(txsnode);
3566	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3567	    &txstats->TSO_packets, "TSO packets");
3568	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3569	    &txstats->TSO_bytes, "TSO bytes");
3570	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3571	    &txstats->ucast_packets, "Unicast packets");
3572	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3573	    &txstats->ucast_bytes, "Unicast bytes");
3574	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3575	    &txstats->mcast_packets, "Multicast packets");
3576	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3577	    &txstats->mcast_bytes, "Multicast bytes");
3578	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3579	    &txstats->error, "Errors");
3580	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3581	    &txstats->discard, "Discards");
3582}
3583
3584static void
3585vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3586    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3587{
3588	struct sysctl_oid *node, *rxsnode;
3589	struct sysctl_oid_list *list, *rxslist;
3590	struct vmxnet3_rxq_stats *stats;
3591	struct UPT1_RxStats *rxstats;
3592	char namebuf[16];
3593
3594	stats = &rxq->vxrxq_stats;
3595	rxstats = &rxq->vxrxq_rs->stats;
3596
3597	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3598	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3599	    NULL, "Receive Queue");
3600	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3601
3602	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3603	    &stats->vmrxs_ipackets, "Receive packets");
3604	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3605	    &stats->vmrxs_ibytes, "Receive bytes");
3606	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3607	    &stats->vmrxs_iqdrops, "Receive drops");
3608	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3609	    &stats->vmrxs_ierrors, "Receive errors");
3610
3611	/*
3612	 * Add statistics reported by the host. These are updated once
3613	 * per second.
3614	 */
3615	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3616	    NULL, "Host Statistics");
3617	rxslist = SYSCTL_CHILDREN(rxsnode);
3618	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3619	    &rxstats->LRO_packets, "LRO packets");
3620	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3621	    &rxstats->LRO_bytes, "LRO bytes");
3622	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3623	    &rxstats->ucast_packets, "Unicast packets");
3624	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3625	    &rxstats->ucast_bytes, "Unicast bytes");
3626	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3627	    &rxstats->mcast_packets, "Multicast packets");
3628	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3629	    &rxstats->mcast_bytes, "Multicast bytes");
3630	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3631	    &rxstats->bcast_packets, "Broadcast packets");
3632	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3633	    &rxstats->bcast_bytes, "Broadcast bytes");
3634	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3635	    &rxstats->nobuffer, "No buffer");
3636	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3637	    &rxstats->error, "Errors");
3638}
3639
3640static void
3641vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3642    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3643{
3644	struct sysctl_oid *node;
3645	struct sysctl_oid_list *list;
3646	int i;
3647
3648	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3649		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3650
3651		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3652		    "debug", CTLFLAG_RD, NULL, "");
3653		list = SYSCTL_CHILDREN(node);
3654
3655		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3656		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3657		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3658		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3659		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3660		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3661		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3662		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3663		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3664		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3665		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3666		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3667		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3668		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3669	}
3670
3671	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3672		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3673
3674		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3675		    "debug", CTLFLAG_RD, NULL, "");
3676		list = SYSCTL_CHILDREN(node);
3677
3678		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3679		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3680		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3681		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3682		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3683		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3684		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3685		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3686		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3687		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3688		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3689		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3690		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3691		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3692		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3693		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3694		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3695		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3696	}
3697}
3698
3699static void
3700vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3701    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3702{
3703	int i;
3704
3705	for (i = 0; i < sc->vmx_ntxqueues; i++)
3706		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3707	for (i = 0; i < sc->vmx_nrxqueues; i++)
3708		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3709
3710	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3711}
3712
3713static void
3714vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3715{
3716	device_t dev;
3717	struct vmxnet3_statistics *stats;
3718	struct sysctl_ctx_list *ctx;
3719	struct sysctl_oid *tree;
3720	struct sysctl_oid_list *child;
3721
3722	dev = sc->vmx_dev;
3723	ctx = device_get_sysctl_ctx(dev);
3724	tree = device_get_sysctl_tree(dev);
3725	child = SYSCTL_CHILDREN(tree);
3726
3727	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3728	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3729	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3730	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3731	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3732	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3733	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3734	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3735
3736	stats = &sc->vmx_stats;
3737	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3738	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3739	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3740	    &stats->vmst_defrag_failed, 0,
3741	    "Tx mbuf dropped because defrag failed");
3742	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3743	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3744	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3745	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3746
3747	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3748}
3749
3750static void
3751vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3752{
3753
3754	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3755}
3756
3757static uint32_t
3758vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3759{
3760
3761	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3762}
3763
3764static void
3765vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3766{
3767
3768	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3769}
3770
3771static void
3772vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3773{
3774
3775	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3776}
3777
3778static uint32_t
3779vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3780{
3781
3782	vmxnet3_write_cmd(sc, cmd);
3783	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3784	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3785	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3786}
3787
3788static void
3789vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3790{
3791
3792	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3793}
3794
3795static void
3796vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3797{
3798
3799	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3800}
3801
3802static void
3803vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3804{
3805	int i;
3806
3807	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3808	for (i = 0; i < sc->vmx_nintrs; i++)
3809		vmxnet3_enable_intr(sc, i);
3810}
3811
3812static void
3813vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3814{
3815	int i;
3816
3817	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3818	for (i = 0; i < sc->vmx_nintrs; i++)
3819		vmxnet3_disable_intr(sc, i);
3820}
3821
3822static void
3823vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3824{
3825	bus_addr_t *baddr = arg;
3826
3827	if (error == 0)
3828		*baddr = segs->ds_addr;
3829}
3830
3831static int
3832vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3833    struct vmxnet3_dma_alloc *dma)
3834{
3835	device_t dev;
3836	int error;
3837
3838	dev = sc->vmx_dev;
3839	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3840
3841	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3842	    align, 0,		/* alignment, bounds */
3843	    BUS_SPACE_MAXADDR,	/* lowaddr */
3844	    BUS_SPACE_MAXADDR,	/* highaddr */
3845	    NULL, NULL,		/* filter, filterarg */
3846	    size,		/* maxsize */
3847	    1,			/* nsegments */
3848	    size,		/* maxsegsize */
3849	    BUS_DMA_ALLOCNOW,	/* flags */
3850	    NULL,		/* lockfunc */
3851	    NULL,		/* lockfuncarg */
3852	    &dma->dma_tag);
3853	if (error) {
3854		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3855		goto fail;
3856	}
3857
3858	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3859	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3860	if (error) {
3861		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3862		goto fail;
3863	}
3864
3865	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3866	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3867	if (error) {
3868		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3869		goto fail;
3870	}
3871
3872	dma->dma_size = size;
3873
3874fail:
3875	if (error)
3876		vmxnet3_dma_free(sc, dma);
3877
3878	return (error);
3879}
3880
3881static void
3882vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3883{
3884
3885	if (dma->dma_tag != NULL) {
3886		if (dma->dma_paddr != 0) {
3887			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3888			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3889			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3890		}
3891
3892		if (dma->dma_vaddr != NULL) {
3893			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3894			    dma->dma_map);
3895		}
3896
3897		bus_dma_tag_destroy(dma->dma_tag);
3898	}
3899	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3900}
3901
3902static int
3903vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3904{
3905	char path[64];
3906
3907	snprintf(path, sizeof(path),
3908	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3909	TUNABLE_INT_FETCH(path, &def);
3910
3911	return (def);
3912}
3913
3914/*
3915 * Since this is a purely paravirtualized device, we do not have
3916 * to worry about DMA coherency. But at times, we must make sure
3917 * both the compiler and CPU do not reorder memory operations.
3918 */
3919static inline void
3920vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3921{
3922
3923	switch (type) {
3924	case VMXNET3_BARRIER_RD:
3925		rmb();
3926		break;
3927	case VMXNET3_BARRIER_WR:
3928		wmb();
3929		break;
3930	case VMXNET3_BARRIER_RDWR:
3931		mb();
3932		break;
3933	default:
3934		panic("%s: bad barrier type %d", __func__, type);
3935	}
3936}
3937