if_vmx.c revision 268012
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 268012 2014-06-29 01:04:11Z bryanv $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/eventhandler.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <sys/taskqueue.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <net/ethernet.h>
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_arp.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47#include <net/if_media.h>
48#include <net/if_vlan_var.h>
49
50#include <net/bpf.h>
51
52#include <netinet/in_systm.h>
53#include <netinet/in.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#include <netinet/udp.h>
58#include <netinet/tcp.h>
59
60#include <machine/in_cksum.h>
61
62#include <machine/bus.h>
63#include <machine/resource.h>
64#include <sys/bus.h>
65#include <sys/rman.h>
66
67#include <dev/pci/pcireg.h>
68#include <dev/pci/pcivar.h>
69
70#include "if_vmxreg.h"
71#include "if_vmxvar.h"
72
73#include "opt_inet.h"
74#include "opt_inet6.h"
75
76#ifdef VMXNET3_FAILPOINTS
77#include <sys/fail.h>
78static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
79    "vmxnet3 fail points");
80#define VMXNET3_FP	_debug_fail_point_vmxnet3
81#endif
82
83static int	vmxnet3_probe(device_t);
84static int	vmxnet3_attach(device_t);
85static int	vmxnet3_detach(device_t);
86static int	vmxnet3_shutdown(device_t);
87
88static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
89static void	vmxnet3_free_resources(struct vmxnet3_softc *);
90static int	vmxnet3_check_version(struct vmxnet3_softc *);
91static void	vmxnet3_initial_config(struct vmxnet3_softc *);
92static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
93
94static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
97static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
98		    struct vmxnet3_interrupt *);
99static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
100static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
102static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
103static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
104
105static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
106		    struct vmxnet3_interrupt *);
107static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
108
109#ifndef VMXNET3_LEGACY_TX
110static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
112static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
113static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
114#endif
115
116static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
117static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
118static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
119static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
120static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
121static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
122
123static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
130static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
131static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
132static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
134static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
135static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
136static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
137static void	vmxnet3_free_data(struct vmxnet3_softc *);
138static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
139
140static void	vmxnet3_evintr(struct vmxnet3_softc *);
141static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
142static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
143static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
144static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
145		    struct vmxnet3_rxring *, int);
146static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
147static void	vmxnet3_legacy_intr(void *);
148static void	vmxnet3_txq_intr(void *);
149static void	vmxnet3_rxq_intr(void *);
150static void	vmxnet3_event_intr(void *);
151
152static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
153static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
154static void	vmxnet3_stop(struct vmxnet3_softc *);
155
156static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
157static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
158static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
159static int	vmxnet3_enable_device(struct vmxnet3_softc *);
160static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
161static int	vmxnet3_reinit(struct vmxnet3_softc *);
162static void	vmxnet3_init_locked(struct vmxnet3_softc *);
163static void	vmxnet3_init(void *);
164
165static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
166		    int *, int *, int *);
167static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
168		    bus_dmamap_t, bus_dma_segment_t [], int *);
169static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
170static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
171#ifdef VMXNET3_LEGACY_TX
172static void	vmxnet3_start_locked(struct ifnet *);
173static void	vmxnet3_start(struct ifnet *);
174#else
175static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
176		    struct mbuf *);
177static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
178static void	vmxnet3_txq_tq_deferred(void *, int);
179#endif
180static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
181static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
182
183static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
184		    uint16_t);
185static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
186static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
187static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
188static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
189static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
190
191#ifndef VMXNET3_LEGACY_TX
192static void	vmxnet3_qflush(struct ifnet *);
193#endif
194
195static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
196static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
197static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
198		    struct vmxnet3_txq_stats *);
199static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
200		    struct vmxnet3_rxq_stats *);
201static void	vmxnet3_tick(void *);
202static void	vmxnet3_link_status(struct vmxnet3_softc *);
203static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
204static int	vmxnet3_media_change(struct ifnet *);
205static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
206static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
207
208static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
213		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
214static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
215
216static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
217		    uint32_t);
218static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
219static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
220		    uint32_t);
221static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
222static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
223
224static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
225static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
226static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
227static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
228
229static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
230		    bus_size_t, struct vmxnet3_dma_alloc *);
231static void	vmxnet3_dma_free(struct vmxnet3_softc *,
232		    struct vmxnet3_dma_alloc *);
233static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
234		    const char *, int);
235
236typedef enum {
237	VMXNET3_BARRIER_RD,
238	VMXNET3_BARRIER_WR,
239	VMXNET3_BARRIER_RDWR,
240} vmxnet3_barrier_t;
241
242static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
243
244/* Tunables. */
245static int vmxnet3_mq_disable = 0;
246TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
247static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
248TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
249static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
250TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
251static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
252TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
253static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
254TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
255
256static device_method_t vmxnet3_methods[] = {
257	/* Device interface. */
258	DEVMETHOD(device_probe,		vmxnet3_probe),
259	DEVMETHOD(device_attach,	vmxnet3_attach),
260	DEVMETHOD(device_detach,	vmxnet3_detach),
261	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
262
263	DEVMETHOD_END
264};
265
266static driver_t vmxnet3_driver = {
267	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
268};
269
270static devclass_t vmxnet3_devclass;
271DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
272
273MODULE_DEPEND(vmx, pci, 1, 1, 1);
274MODULE_DEPEND(vmx, ether, 1, 1, 1);
275
276#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
277#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
278
279static int
280vmxnet3_probe(device_t dev)
281{
282
283	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
284	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
285		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
286		return (BUS_PROBE_DEFAULT);
287	}
288
289	return (ENXIO);
290}
291
292static int
293vmxnet3_attach(device_t dev)
294{
295	struct vmxnet3_softc *sc;
296	int error;
297
298	sc = device_get_softc(dev);
299	sc->vmx_dev = dev;
300
301	pci_enable_busmaster(dev);
302
303	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
304	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
305
306	vmxnet3_initial_config(sc);
307
308	error = vmxnet3_alloc_resources(sc);
309	if (error)
310		goto fail;
311
312	error = vmxnet3_check_version(sc);
313	if (error)
314		goto fail;
315
316	error = vmxnet3_alloc_rxtx_queues(sc);
317	if (error)
318		goto fail;
319
320#ifndef VMXNET3_LEGACY_TX
321	error = vmxnet3_alloc_taskqueue(sc);
322	if (error)
323		goto fail;
324#endif
325
326	error = vmxnet3_alloc_interrupts(sc);
327	if (error)
328		goto fail;
329
330	vmxnet3_check_multiqueue(sc);
331
332	error = vmxnet3_alloc_data(sc);
333	if (error)
334		goto fail;
335
336	error = vmxnet3_setup_interface(sc);
337	if (error)
338		goto fail;
339
340	error = vmxnet3_setup_interrupts(sc);
341	if (error) {
342		ether_ifdetach(sc->vmx_ifp);
343		device_printf(dev, "could not set up interrupt\n");
344		goto fail;
345	}
346
347	vmxnet3_setup_sysctl(sc);
348#ifndef VMXNET3_LEGACY_TX
349	vmxnet3_start_taskqueue(sc);
350#endif
351
352fail:
353	if (error)
354		vmxnet3_detach(dev);
355
356	return (error);
357}
358
359static int
360vmxnet3_detach(device_t dev)
361{
362	struct vmxnet3_softc *sc;
363	struct ifnet *ifp;
364
365	sc = device_get_softc(dev);
366	ifp = sc->vmx_ifp;
367
368	if (device_is_attached(dev)) {
369		VMXNET3_CORE_LOCK(sc);
370		vmxnet3_stop(sc);
371		VMXNET3_CORE_UNLOCK(sc);
372
373		callout_drain(&sc->vmx_tick);
374#ifndef VMXNET3_LEGACY_TX
375		vmxnet3_drain_taskqueue(sc);
376#endif
377
378		ether_ifdetach(ifp);
379	}
380
381	if (sc->vmx_vlan_attach != NULL) {
382		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
383		sc->vmx_vlan_attach = NULL;
384	}
385	if (sc->vmx_vlan_detach != NULL) {
386		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
387		sc->vmx_vlan_detach = NULL;
388	}
389
390#ifndef VMXNET3_LEGACY_TX
391	vmxnet3_free_taskqueue(sc);
392#endif
393	vmxnet3_free_interrupts(sc);
394
395	if (ifp != NULL) {
396		if_free(ifp);
397		sc->vmx_ifp = NULL;
398	}
399
400	ifmedia_removeall(&sc->vmx_media);
401
402	vmxnet3_free_data(sc);
403	vmxnet3_free_resources(sc);
404	vmxnet3_free_rxtx_queues(sc);
405
406	VMXNET3_CORE_LOCK_DESTROY(sc);
407
408	return (0);
409}
410
411static int
412vmxnet3_shutdown(device_t dev)
413{
414
415	return (0);
416}
417
418static int
419vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
420{
421	device_t dev;
422	int rid;
423
424	dev = sc->vmx_dev;
425
426	rid = PCIR_BAR(0);
427	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
428	    RF_ACTIVE);
429	if (sc->vmx_res0 == NULL) {
430		device_printf(dev,
431		    "could not map BAR0 memory\n");
432		return (ENXIO);
433	}
434
435	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
436	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
437
438	rid = PCIR_BAR(1);
439	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
440	    RF_ACTIVE);
441	if (sc->vmx_res1 == NULL) {
442		device_printf(dev,
443		    "could not map BAR1 memory\n");
444		return (ENXIO);
445	}
446
447	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
448	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
449
450	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
451		rid = PCIR_BAR(2);
452		sc->vmx_msix_res = bus_alloc_resource_any(dev,
453		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
454	}
455
456	if (sc->vmx_msix_res == NULL)
457		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
458
459	return (0);
460}
461
462static void
463vmxnet3_free_resources(struct vmxnet3_softc *sc)
464{
465	device_t dev;
466	int rid;
467
468	dev = sc->vmx_dev;
469
470	if (sc->vmx_res0 != NULL) {
471		rid = PCIR_BAR(0);
472		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
473		sc->vmx_res0 = NULL;
474	}
475
476	if (sc->vmx_res1 != NULL) {
477		rid = PCIR_BAR(1);
478		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
479		sc->vmx_res1 = NULL;
480	}
481
482	if (sc->vmx_msix_res != NULL) {
483		rid = PCIR_BAR(2);
484		bus_release_resource(dev, SYS_RES_MEMORY, rid,
485		    sc->vmx_msix_res);
486		sc->vmx_msix_res = NULL;
487	}
488}
489
490static int
491vmxnet3_check_version(struct vmxnet3_softc *sc)
492{
493	device_t dev;
494	uint32_t version;
495
496	dev = sc->vmx_dev;
497
498	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
499	if ((version & 0x01) == 0) {
500		device_printf(dev, "unsupported hardware version %#x\n",
501		    version);
502		return (ENOTSUP);
503	}
504	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
505
506	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
507	if ((version & 0x01) == 0) {
508		device_printf(dev, "unsupported UPT version %#x\n", version);
509		return (ENOTSUP);
510	}
511	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
512
513	return (0);
514}
515
516static void
517vmxnet3_initial_config(struct vmxnet3_softc *sc)
518{
519	int nqueue, ndesc;
520
521	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
522	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
523		nqueue = VMXNET3_DEF_TX_QUEUES;
524	if (nqueue > mp_ncpus)
525		nqueue = mp_ncpus;
526	sc->vmx_max_ntxqueues = nqueue;
527
528	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
529	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
530		nqueue = VMXNET3_DEF_RX_QUEUES;
531	if (nqueue > mp_ncpus)
532		nqueue = mp_ncpus;
533	sc->vmx_max_nrxqueues = nqueue;
534
535	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
536		sc->vmx_max_nrxqueues = 1;
537		sc->vmx_max_ntxqueues = 1;
538	}
539
540	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
541	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
542		ndesc = VMXNET3_DEF_TX_NDESC;
543	if (ndesc & VMXNET3_MASK_TX_NDESC)
544		ndesc &= ~VMXNET3_MASK_TX_NDESC;
545	sc->vmx_ntxdescs = ndesc;
546
547	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
548	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
549		ndesc = VMXNET3_DEF_RX_NDESC;
550	if (ndesc & VMXNET3_MASK_RX_NDESC)
551		ndesc &= ~VMXNET3_MASK_RX_NDESC;
552	sc->vmx_nrxdescs = ndesc;
553	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
554}
555
556static void
557vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
558{
559
560	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
561		goto out;
562
563	/* BMV: Just use the maximum configured for now. */
564	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
565	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
566
567	if (sc->vmx_nrxqueues > 1)
568		sc->vmx_flags |= VMXNET3_FLAG_RSS;
569
570	return;
571
572out:
573	sc->vmx_ntxqueues = 1;
574	sc->vmx_nrxqueues = 1;
575}
576
577static int
578vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
579{
580	device_t dev;
581	int nmsix, cnt, required;
582
583	dev = sc->vmx_dev;
584
585	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
586		return (1);
587
588	/* Allocate an additional vector for the events interrupt. */
589	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
590
591	nmsix = pci_msix_count(dev);
592	if (nmsix < required)
593		return (1);
594
595	cnt = required;
596	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
597		sc->vmx_nintrs = required;
598		return (0);
599	} else
600		pci_release_msi(dev);
601
602	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
603
604	return (1);
605}
606
607static int
608vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
609{
610	device_t dev;
611	int nmsi, cnt, required;
612
613	dev = sc->vmx_dev;
614	required = 1;
615
616	nmsi = pci_msi_count(dev);
617	if (nmsi < required)
618		return (1);
619
620	cnt = required;
621	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
622		sc->vmx_nintrs = 1;
623		return (0);
624	} else
625		pci_release_msi(dev);
626
627	return (1);
628}
629
630static int
631vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
632{
633
634	sc->vmx_nintrs = 1;
635	return (0);
636}
637
638static int
639vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
640    struct vmxnet3_interrupt *intr)
641{
642	struct resource *irq;
643
644	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
645	if (irq == NULL)
646		return (ENXIO);
647
648	intr->vmxi_irq = irq;
649	intr->vmxi_rid = rid;
650
651	return (0);
652}
653
654static int
655vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
656{
657	int i, rid, flags, error;
658
659	rid = 0;
660	flags = RF_ACTIVE;
661
662	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
663		flags |= RF_SHAREABLE;
664	else
665		rid = 1;
666
667	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
668		error = vmxnet3_alloc_interrupt(sc, rid, flags,
669		    &sc->vmx_intrs[i]);
670		if (error)
671			return (error);
672	}
673
674	return (0);
675}
676
677static int
678vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
679{
680	device_t dev;
681	struct vmxnet3_txqueue *txq;
682	struct vmxnet3_rxqueue *rxq;
683	struct vmxnet3_interrupt *intr;
684	enum intr_type type;
685	int i, error;
686
687	dev = sc->vmx_dev;
688	intr = &sc->vmx_intrs[0];
689	type = INTR_TYPE_NET | INTR_MPSAFE;
690
691	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
692		txq = &sc->vmx_txq[i];
693		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
694		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
695		if (error)
696			return (error);
697		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
698		    "tq%d", i);
699		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
700	}
701
702	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
703		rxq = &sc->vmx_rxq[i];
704		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
705		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
706		if (error)
707			return (error);
708		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
709		    "rq%d", i);
710		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
711	}
712
713	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
714	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
715	if (error)
716		return (error);
717	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
718	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
719
720	return (0);
721}
722
723static int
724vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
725{
726	struct vmxnet3_interrupt *intr;
727	int i, error;
728
729	intr = &sc->vmx_intrs[0];
730	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
731	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
732	    &intr->vmxi_handler);
733
734	for (i = 0; i < sc->vmx_ntxqueues; i++)
735		sc->vmx_txq[i].vxtxq_intr_idx = 0;
736	for (i = 0; i < sc->vmx_nrxqueues; i++)
737		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
738	sc->vmx_event_intr_idx = 0;
739
740	return (error);
741}
742
743static void
744vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
745{
746	struct vmxnet3_txqueue *txq;
747	struct vmxnet3_txq_shared *txs;
748	struct vmxnet3_rxqueue *rxq;
749	struct vmxnet3_rxq_shared *rxs;
750	int i;
751
752	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
753
754	for (i = 0; i < sc->vmx_ntxqueues; i++) {
755		txq = &sc->vmx_txq[i];
756		txs = txq->vxtxq_ts;
757		txs->intr_idx = txq->vxtxq_intr_idx;
758	}
759
760	for (i = 0; i < sc->vmx_nrxqueues; i++) {
761		rxq = &sc->vmx_rxq[i];
762		rxs = rxq->vxrxq_rs;
763		rxs->intr_idx = rxq->vxrxq_intr_idx;
764	}
765}
766
767static int
768vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
769{
770	int error;
771
772	error = vmxnet3_alloc_intr_resources(sc);
773	if (error)
774		return (error);
775
776	switch (sc->vmx_intr_type) {
777	case VMXNET3_IT_MSIX:
778		error = vmxnet3_setup_msix_interrupts(sc);
779		break;
780	case VMXNET3_IT_MSI:
781	case VMXNET3_IT_LEGACY:
782		error = vmxnet3_setup_legacy_interrupt(sc);
783		break;
784	default:
785		panic("%s: invalid interrupt type %d", __func__,
786		    sc->vmx_intr_type);
787	}
788
789	if (error == 0)
790		vmxnet3_set_interrupt_idx(sc);
791
792	return (error);
793}
794
795static int
796vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
797{
798	device_t dev;
799	uint32_t config;
800	int error;
801
802	dev = sc->vmx_dev;
803	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
804
805	sc->vmx_intr_type = config & 0x03;
806	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
807
808	switch (sc->vmx_intr_type) {
809	case VMXNET3_IT_AUTO:
810		sc->vmx_intr_type = VMXNET3_IT_MSIX;
811		/* FALLTHROUGH */
812	case VMXNET3_IT_MSIX:
813		error = vmxnet3_alloc_msix_interrupts(sc);
814		if (error == 0)
815			break;
816		sc->vmx_intr_type = VMXNET3_IT_MSI;
817		/* FALLTHROUGH */
818	case VMXNET3_IT_MSI:
819		error = vmxnet3_alloc_msi_interrupts(sc);
820		if (error == 0)
821			break;
822		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
823		/* FALLTHROUGH */
824	case VMXNET3_IT_LEGACY:
825		error = vmxnet3_alloc_legacy_interrupts(sc);
826		if (error == 0)
827			break;
828		/* FALLTHROUGH */
829	default:
830		sc->vmx_intr_type = -1;
831		device_printf(dev, "cannot allocate any interrupt resources\n");
832		return (ENXIO);
833	}
834
835	return (error);
836}
837
838static void
839vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
840    struct vmxnet3_interrupt *intr)
841{
842	device_t dev;
843
844	dev = sc->vmx_dev;
845
846	if (intr->vmxi_handler != NULL) {
847		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
848		intr->vmxi_handler = NULL;
849	}
850
851	if (intr->vmxi_irq != NULL) {
852		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
853		    intr->vmxi_irq);
854		intr->vmxi_irq = NULL;
855		intr->vmxi_rid = -1;
856	}
857}
858
859static void
860vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
861{
862	int i;
863
864	for (i = 0; i < sc->vmx_nintrs; i++)
865		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
866
867	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
868	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
869		pci_release_msi(sc->vmx_dev);
870}
871
872#ifndef VMXNET3_LEGACY_TX
873static int
874vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
875{
876	device_t dev;
877
878	dev = sc->vmx_dev;
879
880	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
881	    taskqueue_thread_enqueue, &sc->vmx_tq);
882	if (sc->vmx_tq == NULL)
883		return (ENOMEM);
884
885	return (0);
886}
887
888static void
889vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
890{
891	device_t dev;
892	int nthreads, error;
893
894	dev = sc->vmx_dev;
895
896	/*
897	 * The taskqueue is typically not frequently used, so a dedicated
898	 * thread for each queue is unnecessary.
899	 */
900	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
901
902	/*
903	 * Most drivers just ignore the return value - it only fails
904	 * with ENOMEM so an error is not likely. It is hard for us
905	 * to recover from an error here.
906	 */
907	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
908	    "%s taskq", device_get_nameunit(dev));
909	if (error)
910		device_printf(dev, "failed to start taskqueue: %d", error);
911}
912
913static void
914vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
915{
916	struct vmxnet3_txqueue *txq;
917	int i;
918
919	if (sc->vmx_tq != NULL) {
920		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
921			txq = &sc->vmx_txq[i];
922			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
923		}
924	}
925}
926
927static void
928vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
929{
930	if (sc->vmx_tq != NULL) {
931		taskqueue_free(sc->vmx_tq);
932		sc->vmx_tq = NULL;
933	}
934}
935#endif
936
937static int
938vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
939{
940	struct vmxnet3_rxqueue *rxq;
941	struct vmxnet3_rxring *rxr;
942	int i;
943
944	rxq = &sc->vmx_rxq[q];
945
946	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
947	    device_get_nameunit(sc->vmx_dev), q);
948	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
949
950	rxq->vxrxq_sc = sc;
951	rxq->vxrxq_id = q;
952
953	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
954		rxr = &rxq->vxrxq_cmd_ring[i];
955		rxr->vxrxr_rid = i;
956		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
957		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
958		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
959		if (rxr->vxrxr_rxbuf == NULL)
960			return (ENOMEM);
961
962		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
963	}
964
965	return (0);
966}
967
968static int
969vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
970{
971	struct vmxnet3_txqueue *txq;
972	struct vmxnet3_txring *txr;
973
974	txq = &sc->vmx_txq[q];
975	txr = &txq->vxtxq_cmd_ring;
976
977	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
978	    device_get_nameunit(sc->vmx_dev), q);
979	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
980
981	txq->vxtxq_sc = sc;
982	txq->vxtxq_id = q;
983
984	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
985	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
986	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
987	if (txr->vxtxr_txbuf == NULL)
988		return (ENOMEM);
989
990	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
991
992#ifndef VMXNET3_LEGACY_TX
993	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
994
995	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
996	    M_NOWAIT, &txq->vxtxq_mtx);
997	if (txq->vxtxq_br == NULL)
998		return (ENOMEM);
999#endif
1000
1001	return (0);
1002}
1003
1004static int
1005vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1006{
1007	int i, error;
1008
1009	/*
1010	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1011	 * disabled by default because its apparently broken for devices passed
1012	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1013	 * must be set to zero for MSIX. This check prevents us from allocating
1014	 * queue structures that we will not use.
1015	 */
1016	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1017		sc->vmx_max_nrxqueues = 1;
1018		sc->vmx_max_ntxqueues = 1;
1019	}
1020
1021	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1022	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1023	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1024	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1025	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1026		return (ENOMEM);
1027
1028	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1029		error = vmxnet3_init_rxq(sc, i);
1030		if (error)
1031			return (error);
1032	}
1033
1034	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1035		error = vmxnet3_init_txq(sc, i);
1036		if (error)
1037			return (error);
1038	}
1039
1040	return (0);
1041}
1042
1043static void
1044vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1045{
1046	struct vmxnet3_rxring *rxr;
1047	int i;
1048
1049	rxq->vxrxq_sc = NULL;
1050	rxq->vxrxq_id = -1;
1051
1052	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1053		rxr = &rxq->vxrxq_cmd_ring[i];
1054
1055		if (rxr->vxrxr_rxbuf != NULL) {
1056			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1057			rxr->vxrxr_rxbuf = NULL;
1058		}
1059	}
1060
1061	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1062		mtx_destroy(&rxq->vxrxq_mtx);
1063}
1064
1065static void
1066vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1067{
1068	struct vmxnet3_txring *txr;
1069
1070	txr = &txq->vxtxq_cmd_ring;
1071
1072	txq->vxtxq_sc = NULL;
1073	txq->vxtxq_id = -1;
1074
1075#ifndef VMXNET3_LEGACY_TX
1076	if (txq->vxtxq_br != NULL) {
1077		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1078		txq->vxtxq_br = NULL;
1079	}
1080#endif
1081
1082	if (txr->vxtxr_txbuf != NULL) {
1083		free(txr->vxtxr_txbuf, M_DEVBUF);
1084		txr->vxtxr_txbuf = NULL;
1085	}
1086
1087	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1088		mtx_destroy(&txq->vxtxq_mtx);
1089}
1090
1091static void
1092vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1093{
1094	int i;
1095
1096	if (sc->vmx_rxq != NULL) {
1097		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1098			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1099		free(sc->vmx_rxq, M_DEVBUF);
1100		sc->vmx_rxq = NULL;
1101	}
1102
1103	if (sc->vmx_txq != NULL) {
1104		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1105			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1106		free(sc->vmx_txq, M_DEVBUF);
1107		sc->vmx_txq = NULL;
1108	}
1109}
1110
1111static int
1112vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1113{
1114	device_t dev;
1115	uint8_t *kva;
1116	size_t size;
1117	int i, error;
1118
1119	dev = sc->vmx_dev;
1120
1121	size = sizeof(struct vmxnet3_driver_shared);
1122	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1123	if (error) {
1124		device_printf(dev, "cannot alloc shared memory\n");
1125		return (error);
1126	}
1127	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1128
1129	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1130	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1131	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1132	if (error) {
1133		device_printf(dev, "cannot alloc queue shared memory\n");
1134		return (error);
1135	}
1136	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1137	kva = sc->vmx_qs;
1138
1139	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1140		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1141		kva += sizeof(struct vmxnet3_txq_shared);
1142	}
1143	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1144		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1145		kva += sizeof(struct vmxnet3_rxq_shared);
1146	}
1147
1148	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1149		size = sizeof(struct vmxnet3_rss_shared);
1150		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1151		if (error) {
1152			device_printf(dev, "cannot alloc rss shared memory\n");
1153			return (error);
1154		}
1155		sc->vmx_rss =
1156		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1157	}
1158
1159	return (0);
1160}
1161
1162static void
1163vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1164{
1165
1166	if (sc->vmx_rss != NULL) {
1167		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1168		sc->vmx_rss = NULL;
1169	}
1170
1171	if (sc->vmx_qs != NULL) {
1172		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1173		sc->vmx_qs = NULL;
1174	}
1175
1176	if (sc->vmx_ds != NULL) {
1177		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1178		sc->vmx_ds = NULL;
1179	}
1180}
1181
1182static int
1183vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1184{
1185	device_t dev;
1186	struct vmxnet3_txqueue *txq;
1187	struct vmxnet3_txring *txr;
1188	struct vmxnet3_comp_ring *txc;
1189	size_t descsz, compsz;
1190	int i, q, error;
1191
1192	dev = sc->vmx_dev;
1193
1194	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1195		txq = &sc->vmx_txq[q];
1196		txr = &txq->vxtxq_cmd_ring;
1197		txc = &txq->vxtxq_comp_ring;
1198
1199		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1200		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1201
1202		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1203		    1, 0,			/* alignment, boundary */
1204		    BUS_SPACE_MAXADDR,		/* lowaddr */
1205		    BUS_SPACE_MAXADDR,		/* highaddr */
1206		    NULL, NULL,			/* filter, filterarg */
1207		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1208		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1209		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1210		    0,				/* flags */
1211		    NULL, NULL,			/* lockfunc, lockarg */
1212		    &txr->vxtxr_txtag);
1213		if (error) {
1214			device_printf(dev,
1215			    "unable to create Tx buffer tag for queue %d\n", q);
1216			return (error);
1217		}
1218
1219		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1220		if (error) {
1221			device_printf(dev, "cannot alloc Tx descriptors for "
1222			    "queue %d error %d\n", q, error);
1223			return (error);
1224		}
1225		txr->vxtxr_txd =
1226		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1227
1228		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1229		if (error) {
1230			device_printf(dev, "cannot alloc Tx comp descriptors "
1231			   "for queue %d error %d\n", q, error);
1232			return (error);
1233		}
1234		txc->vxcr_u.txcd =
1235		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1236
1237		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1238			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1239			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1240			if (error) {
1241				device_printf(dev, "unable to create Tx buf "
1242				    "dmamap for queue %d idx %d\n", q, i);
1243				return (error);
1244			}
1245		}
1246	}
1247
1248	return (0);
1249}
1250
1251static void
1252vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1253{
1254	device_t dev;
1255	struct vmxnet3_txqueue *txq;
1256	struct vmxnet3_txring *txr;
1257	struct vmxnet3_comp_ring *txc;
1258	struct vmxnet3_txbuf *txb;
1259	int i, q;
1260
1261	dev = sc->vmx_dev;
1262
1263	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1264		txq = &sc->vmx_txq[q];
1265		txr = &txq->vxtxq_cmd_ring;
1266		txc = &txq->vxtxq_comp_ring;
1267
1268		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1269			txb = &txr->vxtxr_txbuf[i];
1270			if (txb->vtxb_dmamap != NULL) {
1271				bus_dmamap_destroy(txr->vxtxr_txtag,
1272				    txb->vtxb_dmamap);
1273				txb->vtxb_dmamap = NULL;
1274			}
1275		}
1276
1277		if (txc->vxcr_u.txcd != NULL) {
1278			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1279			txc->vxcr_u.txcd = NULL;
1280		}
1281
1282		if (txr->vxtxr_txd != NULL) {
1283			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1284			txr->vxtxr_txd = NULL;
1285		}
1286
1287		if (txr->vxtxr_txtag != NULL) {
1288			bus_dma_tag_destroy(txr->vxtxr_txtag);
1289			txr->vxtxr_txtag = NULL;
1290		}
1291	}
1292}
1293
1294static int
1295vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1296{
1297	device_t dev;
1298	struct vmxnet3_rxqueue *rxq;
1299	struct vmxnet3_rxring *rxr;
1300	struct vmxnet3_comp_ring *rxc;
1301	int descsz, compsz;
1302	int i, j, q, error;
1303
1304	dev = sc->vmx_dev;
1305
1306	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1307		rxq = &sc->vmx_rxq[q];
1308		rxc = &rxq->vxrxq_comp_ring;
1309		compsz = 0;
1310
1311		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1312			rxr = &rxq->vxrxq_cmd_ring[i];
1313
1314			descsz = rxr->vxrxr_ndesc *
1315			    sizeof(struct vmxnet3_rxdesc);
1316			compsz += rxr->vxrxr_ndesc *
1317			    sizeof(struct vmxnet3_rxcompdesc);
1318
1319			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1320			    1, 0,		/* alignment, boundary */
1321			    BUS_SPACE_MAXADDR,	/* lowaddr */
1322			    BUS_SPACE_MAXADDR,	/* highaddr */
1323			    NULL, NULL,		/* filter, filterarg */
1324			    MJUMPAGESIZE,	/* maxsize */
1325			    1,			/* nsegments */
1326			    MJUMPAGESIZE,	/* maxsegsize */
1327			    0,			/* flags */
1328			    NULL, NULL,		/* lockfunc, lockarg */
1329			    &rxr->vxrxr_rxtag);
1330			if (error) {
1331				device_printf(dev,
1332				    "unable to create Rx buffer tag for "
1333				    "queue %d\n", q);
1334				return (error);
1335			}
1336
1337			error = vmxnet3_dma_malloc(sc, descsz, 512,
1338			    &rxr->vxrxr_dma);
1339			if (error) {
1340				device_printf(dev, "cannot allocate Rx "
1341				    "descriptors for queue %d/%d error %d\n",
1342				    i, q, error);
1343				return (error);
1344			}
1345			rxr->vxrxr_rxd =
1346			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1347		}
1348
1349		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1350		if (error) {
1351			device_printf(dev, "cannot alloc Rx comp descriptors "
1352			    "for queue %d error %d\n", q, error);
1353			return (error);
1354		}
1355		rxc->vxcr_u.rxcd =
1356		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1357
1358		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1359			rxr = &rxq->vxrxq_cmd_ring[i];
1360
1361			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1362			    &rxr->vxrxr_spare_dmap);
1363			if (error) {
1364				device_printf(dev, "unable to create spare "
1365				    "dmamap for queue %d/%d error %d\n",
1366				    q, i, error);
1367				return (error);
1368			}
1369
1370			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1371				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1372				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1373				if (error) {
1374					device_printf(dev, "unable to create "
1375					    "dmamap for queue %d/%d slot %d "
1376					    "error %d\n",
1377					    q, i, j, error);
1378					return (error);
1379				}
1380			}
1381		}
1382	}
1383
1384	return (0);
1385}
1386
1387static void
1388vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1389{
1390	device_t dev;
1391	struct vmxnet3_rxqueue *rxq;
1392	struct vmxnet3_rxring *rxr;
1393	struct vmxnet3_comp_ring *rxc;
1394	struct vmxnet3_rxbuf *rxb;
1395	int i, j, q;
1396
1397	dev = sc->vmx_dev;
1398
1399	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1400		rxq = &sc->vmx_rxq[q];
1401		rxc = &rxq->vxrxq_comp_ring;
1402
1403		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1404			rxr = &rxq->vxrxq_cmd_ring[i];
1405
1406			if (rxr->vxrxr_spare_dmap != NULL) {
1407				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1408				    rxr->vxrxr_spare_dmap);
1409				rxr->vxrxr_spare_dmap = NULL;
1410			}
1411
1412			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1413				rxb = &rxr->vxrxr_rxbuf[j];
1414				if (rxb->vrxb_dmamap != NULL) {
1415					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1416					    rxb->vrxb_dmamap);
1417					rxb->vrxb_dmamap = NULL;
1418				}
1419			}
1420		}
1421
1422		if (rxc->vxcr_u.rxcd != NULL) {
1423			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1424			rxc->vxcr_u.rxcd = NULL;
1425		}
1426
1427		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1428			rxr = &rxq->vxrxq_cmd_ring[i];
1429
1430			if (rxr->vxrxr_rxd != NULL) {
1431				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1432				rxr->vxrxr_rxd = NULL;
1433			}
1434
1435			if (rxr->vxrxr_rxtag != NULL) {
1436				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1437				rxr->vxrxr_rxtag = NULL;
1438			}
1439		}
1440	}
1441}
1442
1443static int
1444vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1445{
1446	int error;
1447
1448	error = vmxnet3_alloc_txq_data(sc);
1449	if (error)
1450		return (error);
1451
1452	error = vmxnet3_alloc_rxq_data(sc);
1453	if (error)
1454		return (error);
1455
1456	return (0);
1457}
1458
1459static void
1460vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1461{
1462
1463	if (sc->vmx_rxq != NULL)
1464		vmxnet3_free_rxq_data(sc);
1465
1466	if (sc->vmx_txq != NULL)
1467		vmxnet3_free_txq_data(sc);
1468}
1469
1470static int
1471vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1472{
1473	int error;
1474
1475	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1476	    32, &sc->vmx_mcast_dma);
1477	if (error)
1478		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1479	else
1480		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1481
1482	return (error);
1483}
1484
1485static void
1486vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1487{
1488
1489	if (sc->vmx_mcast != NULL) {
1490		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1491		sc->vmx_mcast = NULL;
1492	}
1493}
1494
1495static void
1496vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1497{
1498	struct vmxnet3_driver_shared *ds;
1499	struct vmxnet3_txqueue *txq;
1500	struct vmxnet3_txq_shared *txs;
1501	struct vmxnet3_rxqueue *rxq;
1502	struct vmxnet3_rxq_shared *rxs;
1503	int i;
1504
1505	ds = sc->vmx_ds;
1506
1507	/*
1508	 * Initialize fields of the shared data that remains the same across
1509	 * reinits. Note the shared data is zero'd when allocated.
1510	 */
1511
1512	ds->magic = VMXNET3_REV1_MAGIC;
1513
1514	/* DriverInfo */
1515	ds->version = VMXNET3_DRIVER_VERSION;
1516	ds->guest = VMXNET3_GOS_FREEBSD |
1517#ifdef __LP64__
1518	    VMXNET3_GOS_64BIT;
1519#else
1520	    VMXNET3_GOS_32BIT;
1521#endif
1522	ds->vmxnet3_revision = 1;
1523	ds->upt_version = 1;
1524
1525	/* Misc. conf */
1526	ds->driver_data = vtophys(sc);
1527	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1528	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1529	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1530	ds->nrxsg_max = sc->vmx_max_rxsegs;
1531
1532	/* RSS conf */
1533	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1534		ds->rss.version = 1;
1535		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1536		ds->rss.len = sc->vmx_rss_dma.dma_size;
1537	}
1538
1539	/* Interrupt control. */
1540	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1541	ds->nintr = sc->vmx_nintrs;
1542	ds->evintr = sc->vmx_event_intr_idx;
1543	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1544
1545	for (i = 0; i < sc->vmx_nintrs; i++)
1546		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1547
1548	/* Receive filter. */
1549	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1550	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1551
1552	/* Tx queues */
1553	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1554		txq = &sc->vmx_txq[i];
1555		txs = txq->vxtxq_ts;
1556
1557		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1558		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1559		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1560		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1561		txs->driver_data = vtophys(txq);
1562		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1563	}
1564
1565	/* Rx queues */
1566	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1567		rxq = &sc->vmx_rxq[i];
1568		rxs = rxq->vxrxq_rs;
1569
1570		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1571		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1572		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1573		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1574		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1575		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1576		rxs->driver_data = vtophys(rxq);
1577		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1578	}
1579}
1580
1581static void
1582vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1583{
1584	struct ifnet *ifp;
1585
1586	ifp = sc->vmx_ifp;
1587
1588	/* Use the current MAC address. */
1589	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1590	vmxnet3_set_lladdr(sc);
1591
1592	ifp->if_hwassist = 0;
1593	if (ifp->if_capenable & IFCAP_TXCSUM)
1594		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1595	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1596		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1597	if (ifp->if_capenable & IFCAP_TSO4)
1598		ifp->if_hwassist |= CSUM_IP_TSO;
1599	if (ifp->if_capenable & IFCAP_TSO6)
1600		ifp->if_hwassist |= CSUM_IP6_TSO;
1601}
1602
1603static void
1604vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1605{
1606	/*
1607	 * Use the same key as the Linux driver until FreeBSD can do
1608	 * RSS (presumably Toeplitz) in software.
1609	 */
1610	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1611	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1612	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1613	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1614	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1615	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1616	};
1617
1618	struct vmxnet3_driver_shared *ds;
1619	struct vmxnet3_rss_shared *rss;
1620	int i;
1621
1622	ds = sc->vmx_ds;
1623	rss = sc->vmx_rss;
1624
1625	rss->hash_type =
1626	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1627	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1628	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1629	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1630	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1631	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1632
1633	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1634		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1635}
1636
1637static void
1638vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1639{
1640	struct ifnet *ifp;
1641	struct vmxnet3_driver_shared *ds;
1642
1643	ifp = sc->vmx_ifp;
1644	ds = sc->vmx_ds;
1645
1646	ds->mtu = ifp->if_mtu;
1647	ds->ntxqueue = sc->vmx_ntxqueues;
1648	ds->nrxqueue = sc->vmx_nrxqueues;
1649
1650	ds->upt_features = 0;
1651	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1652		ds->upt_features |= UPT1_F_CSUM;
1653	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1654		ds->upt_features |= UPT1_F_VLAN;
1655	if (ifp->if_capenable & IFCAP_LRO)
1656		ds->upt_features |= UPT1_F_LRO;
1657
1658	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1659		ds->upt_features |= UPT1_F_RSS;
1660		vmxnet3_reinit_rss_shared_data(sc);
1661	}
1662
1663	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1664	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1665	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1666}
1667
1668static int
1669vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1670{
1671	int error;
1672
1673	error = vmxnet3_alloc_shared_data(sc);
1674	if (error)
1675		return (error);
1676
1677	error = vmxnet3_alloc_queue_data(sc);
1678	if (error)
1679		return (error);
1680
1681	error = vmxnet3_alloc_mcast_table(sc);
1682	if (error)
1683		return (error);
1684
1685	vmxnet3_init_shared_data(sc);
1686
1687	return (0);
1688}
1689
1690static void
1691vmxnet3_free_data(struct vmxnet3_softc *sc)
1692{
1693
1694	vmxnet3_free_mcast_table(sc);
1695	vmxnet3_free_queue_data(sc);
1696	vmxnet3_free_shared_data(sc);
1697}
1698
1699static int
1700vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1701{
1702	device_t dev;
1703	struct ifnet *ifp;
1704
1705	dev = sc->vmx_dev;
1706
1707	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1708	if (ifp == NULL) {
1709		device_printf(dev, "cannot allocate ifnet structure\n");
1710		return (ENOSPC);
1711	}
1712
1713	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1714#if __FreeBSD_version < 1000025
1715	ifp->if_baudrate = 1000000000;
1716#elif __FreeBSD_version < 1100011
1717	if_initbaudrate(ifp, IF_Gbps(10));
1718#else
1719	ifp->if_baudrate = IF_Gbps(10);
1720#endif
1721	ifp->if_softc = sc;
1722	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1723	ifp->if_init = vmxnet3_init;
1724	ifp->if_ioctl = vmxnet3_ioctl;
1725	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1726
1727#ifdef VMXNET3_LEGACY_TX
1728	ifp->if_start = vmxnet3_start;
1729	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1730	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1731	IFQ_SET_READY(&ifp->if_snd);
1732#else
1733	ifp->if_transmit = vmxnet3_txq_mq_start;
1734	ifp->if_qflush = vmxnet3_qflush;
1735#endif
1736
1737	vmxnet3_get_lladdr(sc);
1738	ether_ifattach(ifp, sc->vmx_lladdr);
1739
1740	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1741	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1742	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1743	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1744	    IFCAP_VLAN_HWCSUM;
1745	ifp->if_capenable = ifp->if_capabilities;
1746
1747	/* These capabilities are not enabled by default. */
1748	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1749
1750	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1751	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1752	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1753	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1754
1755	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1756	    vmxnet3_media_status);
1757	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1758	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1759
1760	return (0);
1761}
1762
1763static void
1764vmxnet3_evintr(struct vmxnet3_softc *sc)
1765{
1766	device_t dev;
1767	struct ifnet *ifp;
1768	struct vmxnet3_txq_shared *ts;
1769	struct vmxnet3_rxq_shared *rs;
1770	uint32_t event;
1771	int reset;
1772
1773	dev = sc->vmx_dev;
1774	ifp = sc->vmx_ifp;
1775	reset = 0;
1776
1777	VMXNET3_CORE_LOCK(sc);
1778
1779	/* Clear events. */
1780	event = sc->vmx_ds->event;
1781	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1782
1783	if (event & VMXNET3_EVENT_LINK) {
1784		vmxnet3_link_status(sc);
1785		if (sc->vmx_link_active != 0)
1786			vmxnet3_tx_start_all(sc);
1787	}
1788
1789	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1790		reset = 1;
1791		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1792		ts = sc->vmx_txq[0].vxtxq_ts;
1793		if (ts->stopped != 0)
1794			device_printf(dev, "Tx queue error %#x\n", ts->error);
1795		rs = sc->vmx_rxq[0].vxrxq_rs;
1796		if (rs->stopped != 0)
1797			device_printf(dev, "Rx queue error %#x\n", rs->error);
1798		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1799	}
1800
1801	if (event & VMXNET3_EVENT_DIC)
1802		device_printf(dev, "device implementation change event\n");
1803	if (event & VMXNET3_EVENT_DEBUG)
1804		device_printf(dev, "debug event\n");
1805
1806	if (reset != 0) {
1807		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1808		vmxnet3_init_locked(sc);
1809	}
1810
1811	VMXNET3_CORE_UNLOCK(sc);
1812}
1813
1814static void
1815vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1816{
1817	struct vmxnet3_softc *sc;
1818	struct ifnet *ifp;
1819	struct vmxnet3_txring *txr;
1820	struct vmxnet3_comp_ring *txc;
1821	struct vmxnet3_txcompdesc *txcd;
1822	struct vmxnet3_txbuf *txb;
1823	struct mbuf *m;
1824	u_int sop;
1825
1826	sc = txq->vxtxq_sc;
1827	ifp = sc->vmx_ifp;
1828	txr = &txq->vxtxq_cmd_ring;
1829	txc = &txq->vxtxq_comp_ring;
1830
1831	VMXNET3_TXQ_LOCK_ASSERT(txq);
1832
1833	for (;;) {
1834		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1835		if (txcd->gen != txc->vxcr_gen)
1836			break;
1837		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1838
1839		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1840			txc->vxcr_next = 0;
1841			txc->vxcr_gen ^= 1;
1842		}
1843
1844		sop = txr->vxtxr_next;
1845		txb = &txr->vxtxr_txbuf[sop];
1846
1847		if ((m = txb->vtxb_m) != NULL) {
1848			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1849			    BUS_DMASYNC_POSTWRITE);
1850			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1851
1852			txq->vxtxq_stats.vmtxs_opackets++;
1853			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1854			if (m->m_flags & M_MCAST)
1855				txq->vxtxq_stats.vmtxs_omcasts++;
1856
1857			m_freem(m);
1858			txb->vtxb_m = NULL;
1859		}
1860
1861		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1862	}
1863
1864	if (txr->vxtxr_head == txr->vxtxr_next)
1865		txq->vxtxq_watchdog = 0;
1866}
1867
1868static int
1869vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1870{
1871	struct ifnet *ifp;
1872	struct mbuf *m;
1873	struct vmxnet3_rxdesc *rxd;
1874	struct vmxnet3_rxbuf *rxb;
1875	bus_dma_tag_t tag;
1876	bus_dmamap_t dmap;
1877	bus_dma_segment_t segs[1];
1878	int idx, clsize, btype, flags, nsegs, error;
1879
1880	ifp = sc->vmx_ifp;
1881	tag = rxr->vxrxr_rxtag;
1882	dmap = rxr->vxrxr_spare_dmap;
1883	idx = rxr->vxrxr_fill;
1884	rxd = &rxr->vxrxr_rxd[idx];
1885	rxb = &rxr->vxrxr_rxbuf[idx];
1886
1887#ifdef VMXNET3_FAILPOINTS
1888	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1889	if (rxr->vxrxr_rid != 0)
1890		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1891#endif
1892
1893	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1894		flags = M_PKTHDR;
1895		clsize = MCLBYTES;
1896		btype = VMXNET3_BTYPE_HEAD;
1897	} else {
1898#if __FreeBSD_version < 902001
1899		/*
1900		 * These mbufs will never be used for the start of a frame.
1901		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1902		 * required the mbuf to always be a packet header. Avoid
1903		 * unnecessary mbuf initialization in newer versions where
1904		 * that is not the case.
1905		 */
1906		flags = M_PKTHDR;
1907#else
1908		flags = 0;
1909#endif
1910		clsize = MJUMPAGESIZE;
1911		btype = VMXNET3_BTYPE_BODY;
1912	}
1913
1914	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1915	if (m == NULL) {
1916		sc->vmx_stats.vmst_mgetcl_failed++;
1917		return (ENOBUFS);
1918	}
1919
1920	if (btype == VMXNET3_BTYPE_HEAD) {
1921		m->m_len = m->m_pkthdr.len = clsize;
1922		m_adj(m, ETHER_ALIGN);
1923	} else
1924		m->m_len = clsize;
1925
1926	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1927	    BUS_DMA_NOWAIT);
1928	if (error) {
1929		m_freem(m);
1930		sc->vmx_stats.vmst_mbuf_load_failed++;
1931		return (error);
1932	}
1933	KASSERT(nsegs == 1,
1934	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1935#if __FreeBSD_version < 902001
1936	if (btype == VMXNET3_BTYPE_BODY)
1937		m->m_flags &= ~M_PKTHDR;
1938#endif
1939
1940	if (rxb->vrxb_m != NULL) {
1941		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1942		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1943	}
1944
1945	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1946	rxb->vrxb_dmamap = dmap;
1947	rxb->vrxb_m = m;
1948
1949	rxd->addr = segs[0].ds_addr;
1950	rxd->len = segs[0].ds_len;
1951	rxd->btype = btype;
1952	rxd->gen = rxr->vxrxr_gen;
1953
1954	vmxnet3_rxr_increment_fill(rxr);
1955	return (0);
1956}
1957
1958static void
1959vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1960    struct vmxnet3_rxring *rxr, int idx)
1961{
1962	struct vmxnet3_rxdesc *rxd;
1963
1964	rxd = &rxr->vxrxr_rxd[idx];
1965	rxd->gen = rxr->vxrxr_gen;
1966	vmxnet3_rxr_increment_fill(rxr);
1967}
1968
1969static void
1970vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1971{
1972	struct vmxnet3_softc *sc;
1973	struct vmxnet3_rxring *rxr;
1974	struct vmxnet3_comp_ring *rxc;
1975	struct vmxnet3_rxcompdesc *rxcd;
1976	int idx, eof;
1977
1978	sc = rxq->vxrxq_sc;
1979	rxc = &rxq->vxrxq_comp_ring;
1980
1981	do {
1982		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1983		if (rxcd->gen != rxc->vxcr_gen)
1984			break;		/* Not expected. */
1985		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1986
1987		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1988			rxc->vxcr_next = 0;
1989			rxc->vxcr_gen ^= 1;
1990		}
1991
1992		idx = rxcd->rxd_idx;
1993		eof = rxcd->eop;
1994		if (rxcd->qid < sc->vmx_nrxqueues)
1995			rxr = &rxq->vxrxq_cmd_ring[0];
1996		else
1997			rxr = &rxq->vxrxq_cmd_ring[1];
1998		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1999	} while (!eof);
2000}
2001
2002static void
2003vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2004{
2005
2006	if (rxcd->ipv4) {
2007		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2008		if (rxcd->ipcsum_ok)
2009			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2010	}
2011
2012	if (!rxcd->fragment) {
2013		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2014			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2015			    CSUM_PSEUDO_HDR;
2016			m->m_pkthdr.csum_data = 0xFFFF;
2017		}
2018	}
2019}
2020
2021static void
2022vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2023    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2024{
2025	struct vmxnet3_softc *sc;
2026	struct ifnet *ifp;
2027
2028	sc = rxq->vxrxq_sc;
2029	ifp = sc->vmx_ifp;
2030
2031	if (rxcd->error) {
2032		rxq->vxrxq_stats.vmrxs_ierrors++;
2033		m_freem(m);
2034		return;
2035	}
2036
2037#ifdef notyet
2038	switch (rxcd->rss_type) {
2039	case VMXNET3_RCD_RSS_TYPE_IPV4:
2040		m->m_pkthdr.flowid = rxcd->rss_hash;
2041		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2042		break;
2043	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2044		m->m_pkthdr.flowid = rxcd->rss_hash;
2045		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2046		break;
2047	case VMXNET3_RCD_RSS_TYPE_IPV6:
2048		m->m_pkthdr.flowid = rxcd->rss_hash;
2049		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2050		break;
2051	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2052		m->m_pkthdr.flowid = rxcd->rss_hash;
2053		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2054		break;
2055	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2056		m->m_pkthdr.flowid = rxq->vxrxq_id;
2057		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2058		break;
2059	}
2060#else
2061	m->m_pkthdr.flowid = rxq->vxrxq_id;
2062	m->m_flags |= M_FLOWID;
2063#endif
2064
2065	if (!rxcd->no_csum)
2066		vmxnet3_rx_csum(rxcd, m);
2067	if (rxcd->vlan) {
2068		m->m_flags |= M_VLANTAG;
2069		m->m_pkthdr.ether_vtag = rxcd->vtag;
2070	}
2071
2072	rxq->vxrxq_stats.vmrxs_ipackets++;
2073	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2074
2075	VMXNET3_RXQ_UNLOCK(rxq);
2076	(*ifp->if_input)(ifp, m);
2077	VMXNET3_RXQ_LOCK(rxq);
2078}
2079
2080static void
2081vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2082{
2083	struct vmxnet3_softc *sc;
2084	struct ifnet *ifp;
2085	struct vmxnet3_rxring *rxr;
2086	struct vmxnet3_comp_ring *rxc;
2087	struct vmxnet3_rxdesc *rxd;
2088	struct vmxnet3_rxcompdesc *rxcd;
2089	struct mbuf *m, *m_head, *m_tail;
2090	int idx, length;
2091
2092	sc = rxq->vxrxq_sc;
2093	ifp = sc->vmx_ifp;
2094	rxc = &rxq->vxrxq_comp_ring;
2095
2096	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2097
2098	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2099		return;
2100
2101	m_head = rxq->vxrxq_mhead;
2102	rxq->vxrxq_mhead = NULL;
2103	m_tail = rxq->vxrxq_mtail;
2104	rxq->vxrxq_mtail = NULL;
2105	MPASS(m_head == NULL || m_tail != NULL);
2106
2107	for (;;) {
2108		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2109		if (rxcd->gen != rxc->vxcr_gen) {
2110			rxq->vxrxq_mhead = m_head;
2111			rxq->vxrxq_mtail = m_tail;
2112			break;
2113		}
2114		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2115
2116		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2117			rxc->vxcr_next = 0;
2118			rxc->vxcr_gen ^= 1;
2119		}
2120
2121		idx = rxcd->rxd_idx;
2122		length = rxcd->len;
2123		if (rxcd->qid < sc->vmx_nrxqueues)
2124			rxr = &rxq->vxrxq_cmd_ring[0];
2125		else
2126			rxr = &rxq->vxrxq_cmd_ring[1];
2127		rxd = &rxr->vxrxr_rxd[idx];
2128
2129		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2130		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2131		    __func__, rxcd->qid, idx));
2132
2133		/*
2134		 * The host may skip descriptors. We detect this when this
2135		 * descriptor does not match the previous fill index. Catch
2136		 * up with the host now.
2137		 */
2138		if (__predict_false(rxr->vxrxr_fill != idx)) {
2139			while (rxr->vxrxr_fill != idx) {
2140				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2141				    rxr->vxrxr_gen;
2142				vmxnet3_rxr_increment_fill(rxr);
2143			}
2144		}
2145
2146		if (rxcd->sop) {
2147			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2148			    ("%s: start of frame w/o head buffer", __func__));
2149			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2150			    ("%s: start of frame not in ring 0", __func__));
2151			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2152			    ("%s: start of frame at unexcepted index %d (%d)",
2153			     __func__, idx, sc->vmx_rx_max_chain));
2154			KASSERT(m_head == NULL,
2155			    ("%s: duplicate start of frame?", __func__));
2156
2157			if (length == 0) {
2158				/* Just ignore this descriptor. */
2159				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2160				goto nextp;
2161			}
2162
2163			if (vmxnet3_newbuf(sc, rxr) != 0) {
2164				rxq->vxrxq_stats.vmrxs_iqdrops++;
2165				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2166				if (!rxcd->eop)
2167					vmxnet3_rxq_discard_chain(rxq);
2168				goto nextp;
2169			}
2170
2171			m->m_pkthdr.rcvif = ifp;
2172			m->m_pkthdr.len = m->m_len = length;
2173			m->m_pkthdr.csum_flags = 0;
2174			m_head = m_tail = m;
2175
2176		} else {
2177			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2178			    ("%s: non start of frame w/o body buffer", __func__));
2179			KASSERT(m_head != NULL,
2180			    ("%s: frame not started?", __func__));
2181
2182			if (vmxnet3_newbuf(sc, rxr) != 0) {
2183				rxq->vxrxq_stats.vmrxs_iqdrops++;
2184				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2185				if (!rxcd->eop)
2186					vmxnet3_rxq_discard_chain(rxq);
2187				m_freem(m_head);
2188				m_head = m_tail = NULL;
2189				goto nextp;
2190			}
2191
2192			m->m_len = length;
2193			m_head->m_pkthdr.len += length;
2194			m_tail->m_next = m;
2195			m_tail = m;
2196		}
2197
2198		if (rxcd->eop) {
2199			vmxnet3_rxq_input(rxq, rxcd, m_head);
2200			m_head = m_tail = NULL;
2201
2202			/* Must recheck after dropping the Rx lock. */
2203			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2204				break;
2205		}
2206
2207nextp:
2208		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2209			int qid = rxcd->qid;
2210			bus_size_t r;
2211
2212			idx = (idx + 1) % rxr->vxrxr_ndesc;
2213			if (qid >= sc->vmx_nrxqueues) {
2214				qid -= sc->vmx_nrxqueues;
2215				r = VMXNET3_BAR0_RXH2(qid);
2216			} else
2217				r = VMXNET3_BAR0_RXH1(qid);
2218			vmxnet3_write_bar0(sc, r, idx);
2219		}
2220	}
2221}
2222
2223static void
2224vmxnet3_legacy_intr(void *xsc)
2225{
2226	struct vmxnet3_softc *sc;
2227	struct vmxnet3_rxqueue *rxq;
2228	struct vmxnet3_txqueue *txq;
2229	struct ifnet *ifp;
2230
2231	sc = xsc;
2232	rxq = &sc->vmx_rxq[0];
2233	txq = &sc->vmx_txq[0];
2234	ifp = sc->vmx_ifp;
2235
2236	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2237		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2238			return;
2239	}
2240	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2241		vmxnet3_disable_all_intrs(sc);
2242
2243	if (sc->vmx_ds->event != 0)
2244		vmxnet3_evintr(sc);
2245
2246	VMXNET3_RXQ_LOCK(rxq);
2247	vmxnet3_rxq_eof(rxq);
2248	VMXNET3_RXQ_UNLOCK(rxq);
2249
2250	VMXNET3_TXQ_LOCK(txq);
2251	vmxnet3_txq_eof(txq);
2252	vmxnet3_txq_start(txq);
2253	VMXNET3_TXQ_UNLOCK(txq);
2254
2255	vmxnet3_enable_all_intrs(sc);
2256}
2257
2258static void
2259vmxnet3_txq_intr(void *xtxq)
2260{
2261	struct vmxnet3_softc *sc;
2262	struct vmxnet3_txqueue *txq;
2263	struct ifnet *ifp;
2264
2265	txq = xtxq;
2266	sc = txq->vxtxq_sc;
2267	ifp = sc->vmx_ifp;
2268
2269	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2270		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2271
2272	VMXNET3_TXQ_LOCK(txq);
2273	vmxnet3_txq_eof(txq);
2274	vmxnet3_txq_start(txq);
2275	VMXNET3_TXQ_UNLOCK(txq);
2276
2277	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2278}
2279
2280static void
2281vmxnet3_rxq_intr(void *xrxq)
2282{
2283	struct vmxnet3_softc *sc;
2284	struct vmxnet3_rxqueue *rxq;
2285
2286	rxq = xrxq;
2287	sc = rxq->vxrxq_sc;
2288
2289	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2290		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2291
2292	VMXNET3_RXQ_LOCK(rxq);
2293	vmxnet3_rxq_eof(rxq);
2294	VMXNET3_RXQ_UNLOCK(rxq);
2295
2296	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2297}
2298
2299static void
2300vmxnet3_event_intr(void *xsc)
2301{
2302	struct vmxnet3_softc *sc;
2303
2304	sc = xsc;
2305
2306	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2307		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2308
2309	if (sc->vmx_ds->event != 0)
2310		vmxnet3_evintr(sc);
2311
2312	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2313}
2314
2315static void
2316vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2317{
2318	struct vmxnet3_txring *txr;
2319	struct vmxnet3_txbuf *txb;
2320	int i;
2321
2322	txr = &txq->vxtxq_cmd_ring;
2323
2324	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2325		txb = &txr->vxtxr_txbuf[i];
2326
2327		if (txb->vtxb_m == NULL)
2328			continue;
2329
2330		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2331		    BUS_DMASYNC_POSTWRITE);
2332		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2333		m_freem(txb->vtxb_m);
2334		txb->vtxb_m = NULL;
2335	}
2336}
2337
2338static void
2339vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2340{
2341	struct vmxnet3_rxring *rxr;
2342	struct vmxnet3_rxbuf *rxb;
2343	int i, j;
2344
2345	if (rxq->vxrxq_mhead != NULL) {
2346		m_freem(rxq->vxrxq_mhead);
2347		rxq->vxrxq_mhead = NULL;
2348		rxq->vxrxq_mtail = NULL;
2349	}
2350
2351	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2352		rxr = &rxq->vxrxq_cmd_ring[i];
2353
2354		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2355			rxb = &rxr->vxrxr_rxbuf[j];
2356
2357			if (rxb->vrxb_m == NULL)
2358				continue;
2359
2360			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2361			    BUS_DMASYNC_POSTREAD);
2362			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2363			m_freem(rxb->vrxb_m);
2364			rxb->vrxb_m = NULL;
2365		}
2366	}
2367}
2368
2369static void
2370vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2371{
2372	struct vmxnet3_rxqueue *rxq;
2373	struct vmxnet3_txqueue *txq;
2374	int i;
2375
2376	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2377		rxq = &sc->vmx_rxq[i];
2378		VMXNET3_RXQ_LOCK(rxq);
2379		VMXNET3_RXQ_UNLOCK(rxq);
2380	}
2381
2382	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2383		txq = &sc->vmx_txq[i];
2384		VMXNET3_TXQ_LOCK(txq);
2385		VMXNET3_TXQ_UNLOCK(txq);
2386	}
2387}
2388
2389static void
2390vmxnet3_stop(struct vmxnet3_softc *sc)
2391{
2392	struct ifnet *ifp;
2393	int q;
2394
2395	ifp = sc->vmx_ifp;
2396	VMXNET3_CORE_LOCK_ASSERT(sc);
2397
2398	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2399	sc->vmx_link_active = 0;
2400	callout_stop(&sc->vmx_tick);
2401
2402	/* Disable interrupts. */
2403	vmxnet3_disable_all_intrs(sc);
2404	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2405
2406	vmxnet3_stop_rendezvous(sc);
2407
2408	for (q = 0; q < sc->vmx_ntxqueues; q++)
2409		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2410	for (q = 0; q < sc->vmx_nrxqueues; q++)
2411		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2412
2413	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2414}
2415
2416static void
2417vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2418{
2419	struct vmxnet3_txring *txr;
2420	struct vmxnet3_comp_ring *txc;
2421
2422	txr = &txq->vxtxq_cmd_ring;
2423	txr->vxtxr_head = 0;
2424	txr->vxtxr_next = 0;
2425	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2426	bzero(txr->vxtxr_txd,
2427	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2428
2429	txc = &txq->vxtxq_comp_ring;
2430	txc->vxcr_next = 0;
2431	txc->vxcr_gen = VMXNET3_INIT_GEN;
2432	bzero(txc->vxcr_u.txcd,
2433	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2434}
2435
2436static int
2437vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2438{
2439	struct ifnet *ifp;
2440	struct vmxnet3_rxring *rxr;
2441	struct vmxnet3_comp_ring *rxc;
2442	int i, populate, idx, frame_size, error;
2443
2444	ifp = sc->vmx_ifp;
2445	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2446	    ifp->if_mtu;
2447
2448	/*
2449	 * If the MTU causes us to exceed what a regular sized cluster can
2450	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2451	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2452	 *
2453	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2454	 * our life easier. We do not support changing the ring size after
2455	 * the attach.
2456	 */
2457	if (frame_size <= MCLBYTES)
2458		sc->vmx_rx_max_chain = 1;
2459	else
2460		sc->vmx_rx_max_chain = 2;
2461
2462	/*
2463	 * Only populate ring 1 if the configuration will take advantage
2464	 * of it. That is either when LRO is enabled or the frame size
2465	 * exceeds what ring 0 can contain.
2466	 */
2467	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2468	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2469		populate = 1;
2470	else
2471		populate = VMXNET3_RXRINGS_PERQ;
2472
2473	for (i = 0; i < populate; i++) {
2474		rxr = &rxq->vxrxq_cmd_ring[i];
2475		rxr->vxrxr_fill = 0;
2476		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2477		bzero(rxr->vxrxr_rxd,
2478		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2479
2480		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2481			error = vmxnet3_newbuf(sc, rxr);
2482			if (error)
2483				return (error);
2484		}
2485	}
2486
2487	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2488		rxr = &rxq->vxrxq_cmd_ring[i];
2489		rxr->vxrxr_fill = 0;
2490		rxr->vxrxr_gen = 0;
2491		bzero(rxr->vxrxr_rxd,
2492		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2493	}
2494
2495	rxc = &rxq->vxrxq_comp_ring;
2496	rxc->vxcr_next = 0;
2497	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2498	bzero(rxc->vxcr_u.rxcd,
2499	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2500
2501	return (0);
2502}
2503
2504static int
2505vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2506{
2507	device_t dev;
2508	int q, error;
2509
2510	dev = sc->vmx_dev;
2511
2512	for (q = 0; q < sc->vmx_ntxqueues; q++)
2513		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2514
2515	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2516		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2517		if (error) {
2518			device_printf(dev, "cannot populate Rx queue %d\n", q);
2519			return (error);
2520		}
2521	}
2522
2523	return (0);
2524}
2525
2526static int
2527vmxnet3_enable_device(struct vmxnet3_softc *sc)
2528{
2529	int q;
2530
2531	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2532		device_printf(sc->vmx_dev, "device enable command failed!\n");
2533		return (1);
2534	}
2535
2536	/* Reset the Rx queue heads. */
2537	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2538		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2539		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2540	}
2541
2542	return (0);
2543}
2544
2545static void
2546vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2547{
2548	struct ifnet *ifp;
2549
2550	ifp = sc->vmx_ifp;
2551
2552	vmxnet3_set_rxfilter(sc);
2553
2554	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2555		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2556		    sizeof(sc->vmx_ds->vlan_filter));
2557	else
2558		bzero(sc->vmx_ds->vlan_filter,
2559		    sizeof(sc->vmx_ds->vlan_filter));
2560	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2561}
2562
2563static int
2564vmxnet3_reinit(struct vmxnet3_softc *sc)
2565{
2566
2567	vmxnet3_reinit_interface(sc);
2568	vmxnet3_reinit_shared_data(sc);
2569
2570	if (vmxnet3_reinit_queues(sc) != 0)
2571		return (ENXIO);
2572
2573	if (vmxnet3_enable_device(sc) != 0)
2574		return (ENXIO);
2575
2576	vmxnet3_reinit_rxfilters(sc);
2577
2578	return (0);
2579}
2580
2581static void
2582vmxnet3_init_locked(struct vmxnet3_softc *sc)
2583{
2584	struct ifnet *ifp;
2585
2586	ifp = sc->vmx_ifp;
2587
2588	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2589		return;
2590
2591	vmxnet3_stop(sc);
2592
2593	if (vmxnet3_reinit(sc) != 0) {
2594		vmxnet3_stop(sc);
2595		return;
2596	}
2597
2598	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2599	vmxnet3_link_status(sc);
2600
2601	vmxnet3_enable_all_intrs(sc);
2602	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2603}
2604
2605static void
2606vmxnet3_init(void *xsc)
2607{
2608	struct vmxnet3_softc *sc;
2609
2610	sc = xsc;
2611
2612	VMXNET3_CORE_LOCK(sc);
2613	vmxnet3_init_locked(sc);
2614	VMXNET3_CORE_UNLOCK(sc);
2615}
2616
2617/*
2618 * BMV: Much of this can go away once we finally have offsets in
2619 * the mbuf packet header. Bug andre@.
2620 */
2621static int
2622vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2623    int *etype, int *proto, int *start)
2624{
2625	struct ether_vlan_header *evh;
2626	int offset;
2627#if defined(INET)
2628	struct ip *ip = NULL;
2629	struct ip iphdr;
2630#endif
2631#if defined(INET6)
2632	struct ip6_hdr *ip6 = NULL;
2633	struct ip6_hdr ip6hdr;
2634#endif
2635
2636	evh = mtod(m, struct ether_vlan_header *);
2637	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2638		/* BMV: We should handle nested VLAN tags too. */
2639		*etype = ntohs(evh->evl_proto);
2640		offset = sizeof(struct ether_vlan_header);
2641	} else {
2642		*etype = ntohs(evh->evl_encap_proto);
2643		offset = sizeof(struct ether_header);
2644	}
2645
2646	switch (*etype) {
2647#if defined(INET)
2648	case ETHERTYPE_IP:
2649		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2650			m_copydata(m, offset, sizeof(struct ip),
2651			    (caddr_t) &iphdr);
2652			ip = &iphdr;
2653		} else
2654			ip = mtodo(m, offset);
2655		*proto = ip->ip_p;
2656		*start = offset + (ip->ip_hl << 2);
2657		break;
2658#endif
2659#if defined(INET6)
2660	case ETHERTYPE_IPV6:
2661		if (__predict_false(m->m_len <
2662		    offset + sizeof(struct ip6_hdr))) {
2663			m_copydata(m, offset, sizeof(struct ip6_hdr),
2664			    (caddr_t) &ip6hdr);
2665			ip6 = &ip6hdr;
2666		} else
2667			ip6 = mtodo(m, offset);
2668		*proto = -1;
2669		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2670		/* Assert the network stack sent us a valid packet. */
2671		KASSERT(*start > offset,
2672		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2673		    *start, offset, *proto));
2674		break;
2675#endif
2676	default:
2677		return (EINVAL);
2678	}
2679
2680	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2681		struct tcphdr *tcp, tcphdr;
2682		uint16_t sum;
2683
2684		if (__predict_false(*proto != IPPROTO_TCP)) {
2685			/* Likely failed to correctly parse the mbuf. */
2686			return (EINVAL);
2687		}
2688
2689		txq->vxtxq_stats.vmtxs_tso++;
2690
2691		switch (*etype) {
2692#if defined(INET)
2693		case ETHERTYPE_IP:
2694			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2695			    htons(IPPROTO_TCP));
2696			break;
2697#endif
2698#if defined(INET6)
2699		case ETHERTYPE_IPV6:
2700			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2701			break;
2702#endif
2703		default:
2704			sum = 0;
2705			break;
2706		}
2707
2708		if (m->m_len < *start + sizeof(struct tcphdr)) {
2709			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2710			    sizeof(uint16_t), (caddr_t) &sum);
2711			m_copydata(m, *start, sizeof(struct tcphdr),
2712			    (caddr_t) &tcphdr);
2713			tcp = &tcphdr;
2714		} else {
2715			tcp = mtodo(m, *start);
2716			tcp->th_sum = sum;
2717		}
2718
2719		/*
2720		 * For TSO, the size of the protocol header is also
2721		 * included in the descriptor header size.
2722		 */
2723		*start += (tcp->th_off << 2);
2724	} else
2725		txq->vxtxq_stats.vmtxs_csum++;
2726
2727	return (0);
2728}
2729
2730static int
2731vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2732    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2733{
2734	struct vmxnet3_txring *txr;
2735	struct mbuf *m;
2736	bus_dma_tag_t tag;
2737	int error;
2738
2739	txr = &txq->vxtxq_cmd_ring;
2740	m = *m0;
2741	tag = txr->vxtxr_txtag;
2742
2743	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2744	if (error == 0 || error != EFBIG)
2745		return (error);
2746
2747	m = m_defrag(m, M_NOWAIT);
2748	if (m != NULL) {
2749		*m0 = m;
2750		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2751	} else
2752		error = ENOBUFS;
2753
2754	if (error) {
2755		m_freem(*m0);
2756		*m0 = NULL;
2757		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2758	} else
2759		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2760
2761	return (error);
2762}
2763
2764static void
2765vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2766{
2767	struct vmxnet3_txring *txr;
2768
2769	txr = &txq->vxtxq_cmd_ring;
2770	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2771}
2772
2773static int
2774vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2775{
2776	struct vmxnet3_softc *sc;
2777	struct ifnet *ifp;
2778	struct vmxnet3_txring *txr;
2779	struct vmxnet3_txdesc *txd, *sop;
2780	struct mbuf *m;
2781	bus_dmamap_t dmap;
2782	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2783	int i, gen, nsegs, etype, proto, start, error;
2784
2785	sc = txq->vxtxq_sc;
2786	ifp = sc->vmx_ifp;
2787	start = 0;
2788	txd = NULL;
2789	txr = &txq->vxtxq_cmd_ring;
2790	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2791
2792	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2793	if (error)
2794		return (error);
2795
2796	m = *m0;
2797	M_ASSERTPKTHDR(m);
2798	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2799	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2800
2801	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2802		txq->vxtxq_stats.vmtxs_full++;
2803		vmxnet3_txq_unload_mbuf(txq, dmap);
2804		return (ENOSPC);
2805	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2806		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2807		if (error) {
2808			txq->vxtxq_stats.vmtxs_offload_failed++;
2809			vmxnet3_txq_unload_mbuf(txq, dmap);
2810			m_freem(m);
2811			*m0 = NULL;
2812			return (error);
2813		}
2814	}
2815
2816	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2817	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2818	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2819
2820	for (i = 0; i < nsegs; i++) {
2821		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2822
2823		txd->addr = segs[i].ds_addr;
2824		txd->len = segs[i].ds_len;
2825		txd->gen = gen;
2826		txd->dtype = 0;
2827		txd->offload_mode = VMXNET3_OM_NONE;
2828		txd->offload_pos = 0;
2829		txd->hlen = 0;
2830		txd->eop = 0;
2831		txd->compreq = 0;
2832		txd->vtag_mode = 0;
2833		txd->vtag = 0;
2834
2835		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2836			txr->vxtxr_head = 0;
2837			txr->vxtxr_gen ^= 1;
2838		}
2839		gen = txr->vxtxr_gen;
2840	}
2841	txd->eop = 1;
2842	txd->compreq = 1;
2843
2844	if (m->m_flags & M_VLANTAG) {
2845		sop->vtag_mode = 1;
2846		sop->vtag = m->m_pkthdr.ether_vtag;
2847	}
2848
2849	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2850		sop->offload_mode = VMXNET3_OM_TSO;
2851		sop->hlen = start;
2852		sop->offload_pos = m->m_pkthdr.tso_segsz;
2853	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2854	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2855		sop->offload_mode = VMXNET3_OM_CSUM;
2856		sop->hlen = start;
2857		sop->offload_pos = start + m->m_pkthdr.csum_data;
2858	}
2859
2860	/* Finally, change the ownership. */
2861	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2862	sop->gen ^= 1;
2863
2864	txq->vxtxq_ts->npending += nsegs;
2865	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2866		txq->vxtxq_ts->npending = 0;
2867		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2868		    txr->vxtxr_head);
2869	}
2870
2871	return (0);
2872}
2873
2874#ifdef VMXNET3_LEGACY_TX
2875
2876static void
2877vmxnet3_start_locked(struct ifnet *ifp)
2878{
2879	struct vmxnet3_softc *sc;
2880	struct vmxnet3_txqueue *txq;
2881	struct vmxnet3_txring *txr;
2882	struct mbuf *m_head;
2883	int tx, avail;
2884
2885	sc = ifp->if_softc;
2886	txq = &sc->vmx_txq[0];
2887	txr = &txq->vxtxq_cmd_ring;
2888	tx = 0;
2889
2890	VMXNET3_TXQ_LOCK_ASSERT(txq);
2891
2892	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2893	    sc->vmx_link_active == 0)
2894		return;
2895
2896	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2897		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2898			break;
2899
2900		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2901		if (m_head == NULL)
2902			break;
2903
2904		/* Assume worse case if this mbuf is the head of a chain. */
2905		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2906			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2907			break;
2908		}
2909
2910		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2911			if (m_head != NULL)
2912				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2913			break;
2914		}
2915
2916		tx++;
2917		ETHER_BPF_MTAP(ifp, m_head);
2918	}
2919
2920	if (tx > 0)
2921		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2922}
2923
2924static void
2925vmxnet3_start(struct ifnet *ifp)
2926{
2927	struct vmxnet3_softc *sc;
2928	struct vmxnet3_txqueue *txq;
2929
2930	sc = ifp->if_softc;
2931	txq = &sc->vmx_txq[0];
2932
2933	VMXNET3_TXQ_LOCK(txq);
2934	vmxnet3_start_locked(ifp);
2935	VMXNET3_TXQ_UNLOCK(txq);
2936}
2937
2938#else /* !VMXNET3_LEGACY_TX */
2939
2940static int
2941vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2942{
2943	struct vmxnet3_softc *sc;
2944	struct vmxnet3_txring *txr;
2945	struct buf_ring *br;
2946	struct ifnet *ifp;
2947	int tx, avail, error;
2948
2949	sc = txq->vxtxq_sc;
2950	br = txq->vxtxq_br;
2951	ifp = sc->vmx_ifp;
2952	txr = &txq->vxtxq_cmd_ring;
2953	tx = 0;
2954	error = 0;
2955
2956	VMXNET3_TXQ_LOCK_ASSERT(txq);
2957
2958	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2959	    sc->vmx_link_active == 0) {
2960		if (m != NULL)
2961			error = drbr_enqueue(ifp, br, m);
2962		return (error);
2963	}
2964
2965	if (m != NULL) {
2966		error = drbr_enqueue(ifp, br, m);
2967		if (error)
2968			return (error);
2969	}
2970
2971	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2972		m = drbr_peek(ifp, br);
2973		if (m == NULL)
2974			break;
2975
2976		/* Assume worse case if this mbuf is the head of a chain. */
2977		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2978			drbr_putback(ifp, br, m);
2979			break;
2980		}
2981
2982		if (vmxnet3_txq_encap(txq, &m) != 0) {
2983			if (m != NULL)
2984				drbr_putback(ifp, br, m);
2985			else
2986				drbr_advance(ifp, br);
2987			break;
2988		}
2989		drbr_advance(ifp, br);
2990
2991		tx++;
2992		ETHER_BPF_MTAP(ifp, m);
2993	}
2994
2995	if (tx > 0)
2996		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2997
2998	return (0);
2999}
3000
3001static int
3002vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3003{
3004	struct vmxnet3_softc *sc;
3005	struct vmxnet3_txqueue *txq;
3006	int i, ntxq, error;
3007
3008	sc = ifp->if_softc;
3009	ntxq = sc->vmx_ntxqueues;
3010
3011	if (m->m_flags & M_FLOWID)
3012		i = m->m_pkthdr.flowid % ntxq;
3013	else
3014		i = curcpu % ntxq;
3015
3016	txq = &sc->vmx_txq[i];
3017
3018	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3019		error = vmxnet3_txq_mq_start_locked(txq, m);
3020		VMXNET3_TXQ_UNLOCK(txq);
3021	} else {
3022		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3023		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3024	}
3025
3026	return (error);
3027}
3028
3029static void
3030vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3031{
3032	struct vmxnet3_softc *sc;
3033	struct vmxnet3_txqueue *txq;
3034
3035	txq = xtxq;
3036	sc = txq->vxtxq_sc;
3037
3038	VMXNET3_TXQ_LOCK(txq);
3039	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3040		vmxnet3_txq_mq_start_locked(txq, NULL);
3041	VMXNET3_TXQ_UNLOCK(txq);
3042}
3043
3044#endif /* VMXNET3_LEGACY_TX */
3045
3046static void
3047vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3048{
3049	struct vmxnet3_softc *sc;
3050	struct ifnet *ifp;
3051
3052	sc = txq->vxtxq_sc;
3053	ifp = sc->vmx_ifp;
3054
3055#ifdef VMXNET3_LEGACY_TX
3056	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3057		vmxnet3_start_locked(ifp);
3058#else
3059	if (!drbr_empty(ifp, txq->vxtxq_br))
3060		vmxnet3_txq_mq_start_locked(txq, NULL);
3061#endif
3062}
3063
3064static void
3065vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3066{
3067	struct vmxnet3_txqueue *txq;
3068	int i;
3069
3070	VMXNET3_CORE_LOCK_ASSERT(sc);
3071
3072	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3073		txq = &sc->vmx_txq[i];
3074
3075		VMXNET3_TXQ_LOCK(txq);
3076		vmxnet3_txq_start(txq);
3077		VMXNET3_TXQ_UNLOCK(txq);
3078	}
3079}
3080
3081static void
3082vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3083{
3084	struct ifnet *ifp;
3085	int idx, bit;
3086
3087	ifp = sc->vmx_ifp;
3088	idx = (tag >> 5) & 0x7F;
3089	bit = tag & 0x1F;
3090
3091	if (tag == 0 || tag > 4095)
3092		return;
3093
3094	VMXNET3_CORE_LOCK(sc);
3095
3096	/* Update our private VLAN bitvector. */
3097	if (add)
3098		sc->vmx_vlan_filter[idx] |= (1 << bit);
3099	else
3100		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3101
3102	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3103		if (add)
3104			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3105		else
3106			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3107		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3108	}
3109
3110	VMXNET3_CORE_UNLOCK(sc);
3111}
3112
3113static void
3114vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3115{
3116
3117	if (ifp->if_softc == arg)
3118		vmxnet3_update_vlan_filter(arg, 1, tag);
3119}
3120
3121static void
3122vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3123{
3124
3125	if (ifp->if_softc == arg)
3126		vmxnet3_update_vlan_filter(arg, 0, tag);
3127}
3128
3129static void
3130vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3131{
3132	struct ifnet *ifp;
3133	struct vmxnet3_driver_shared *ds;
3134	struct ifmultiaddr *ifma;
3135	u_int mode;
3136
3137	ifp = sc->vmx_ifp;
3138	ds = sc->vmx_ds;
3139
3140	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3141	if (ifp->if_flags & IFF_PROMISC)
3142		mode |= VMXNET3_RXMODE_PROMISC;
3143	if (ifp->if_flags & IFF_ALLMULTI)
3144		mode |= VMXNET3_RXMODE_ALLMULTI;
3145	else {
3146		int cnt = 0, overflow = 0;
3147
3148		if_maddr_rlock(ifp);
3149		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3150			if (ifma->ifma_addr->sa_family != AF_LINK)
3151				continue;
3152			else if (cnt == VMXNET3_MULTICAST_MAX) {
3153				overflow = 1;
3154				break;
3155			}
3156
3157			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3158			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3159			cnt++;
3160		}
3161		if_maddr_runlock(ifp);
3162
3163		if (overflow != 0) {
3164			cnt = 0;
3165			mode |= VMXNET3_RXMODE_ALLMULTI;
3166		} else if (cnt > 0)
3167			mode |= VMXNET3_RXMODE_MCAST;
3168		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3169	}
3170
3171	ds->rxmode = mode;
3172
3173	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3174	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3175}
3176
3177static int
3178vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3179{
3180	struct ifnet *ifp;
3181
3182	ifp = sc->vmx_ifp;
3183
3184	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3185		return (EINVAL);
3186
3187	ifp->if_mtu = mtu;
3188
3189	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3190		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3191		vmxnet3_init_locked(sc);
3192	}
3193
3194	return (0);
3195}
3196
3197static int
3198vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3199{
3200	struct vmxnet3_softc *sc;
3201	struct ifreq *ifr;
3202	int reinit, mask, error;
3203
3204	sc = ifp->if_softc;
3205	ifr = (struct ifreq *) data;
3206	error = 0;
3207
3208	switch (cmd) {
3209	case SIOCSIFMTU:
3210		if (ifp->if_mtu != ifr->ifr_mtu) {
3211			VMXNET3_CORE_LOCK(sc);
3212			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3213			VMXNET3_CORE_UNLOCK(sc);
3214		}
3215		break;
3216
3217	case SIOCSIFFLAGS:
3218		VMXNET3_CORE_LOCK(sc);
3219		if (ifp->if_flags & IFF_UP) {
3220			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3221				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3222				    (IFF_PROMISC | IFF_ALLMULTI)) {
3223					vmxnet3_set_rxfilter(sc);
3224				}
3225			} else
3226				vmxnet3_init_locked(sc);
3227		} else {
3228			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3229				vmxnet3_stop(sc);
3230		}
3231		sc->vmx_if_flags = ifp->if_flags;
3232		VMXNET3_CORE_UNLOCK(sc);
3233		break;
3234
3235	case SIOCADDMULTI:
3236	case SIOCDELMULTI:
3237		VMXNET3_CORE_LOCK(sc);
3238		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3239			vmxnet3_set_rxfilter(sc);
3240		VMXNET3_CORE_UNLOCK(sc);
3241		break;
3242
3243	case SIOCSIFMEDIA:
3244	case SIOCGIFMEDIA:
3245		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3246		break;
3247
3248	case SIOCSIFCAP:
3249		VMXNET3_CORE_LOCK(sc);
3250		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3251
3252		if (mask & IFCAP_TXCSUM)
3253			ifp->if_capenable ^= IFCAP_TXCSUM;
3254		if (mask & IFCAP_TXCSUM_IPV6)
3255			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3256		if (mask & IFCAP_TSO4)
3257			ifp->if_capenable ^= IFCAP_TSO4;
3258		if (mask & IFCAP_TSO6)
3259			ifp->if_capenable ^= IFCAP_TSO6;
3260
3261		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3262		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3263			/* Changing these features requires us to reinit. */
3264			reinit = 1;
3265
3266			if (mask & IFCAP_RXCSUM)
3267				ifp->if_capenable ^= IFCAP_RXCSUM;
3268			if (mask & IFCAP_RXCSUM_IPV6)
3269				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3270			if (mask & IFCAP_LRO)
3271				ifp->if_capenable ^= IFCAP_LRO;
3272			if (mask & IFCAP_VLAN_HWTAGGING)
3273				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3274			if (mask & IFCAP_VLAN_HWFILTER)
3275				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3276		} else
3277			reinit = 0;
3278
3279		if (mask & IFCAP_VLAN_HWTSO)
3280			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3281
3282		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3283			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3284			vmxnet3_init_locked(sc);
3285		}
3286
3287		VMXNET3_CORE_UNLOCK(sc);
3288		VLAN_CAPABILITIES(ifp);
3289		break;
3290
3291	default:
3292		error = ether_ioctl(ifp, cmd, data);
3293		break;
3294	}
3295
3296	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3297
3298	return (error);
3299}
3300
3301#ifndef VMXNET3_LEGACY_TX
3302static void
3303vmxnet3_qflush(struct ifnet *ifp)
3304{
3305	struct vmxnet3_softc *sc;
3306	struct vmxnet3_txqueue *txq;
3307	struct mbuf *m;
3308	int i;
3309
3310	sc = ifp->if_softc;
3311
3312	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3313		txq = &sc->vmx_txq[i];
3314
3315		VMXNET3_TXQ_LOCK(txq);
3316		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3317			m_freem(m);
3318		VMXNET3_TXQ_UNLOCK(txq);
3319	}
3320
3321	if_qflush(ifp);
3322}
3323#endif
3324
3325static int
3326vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3327{
3328	struct vmxnet3_softc *sc;
3329
3330	sc = txq->vxtxq_sc;
3331
3332	VMXNET3_TXQ_LOCK(txq);
3333	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3334		VMXNET3_TXQ_UNLOCK(txq);
3335		return (0);
3336	}
3337	VMXNET3_TXQ_UNLOCK(txq);
3338
3339	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3340	    txq->vxtxq_id);
3341	return (1);
3342}
3343
3344static void
3345vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3346{
3347
3348	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3349}
3350
3351static void
3352vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3353    struct vmxnet3_txq_stats *accum)
3354{
3355	struct vmxnet3_txq_stats *st;
3356
3357	st = &txq->vxtxq_stats;
3358
3359	accum->vmtxs_opackets += st->vmtxs_opackets;
3360	accum->vmtxs_obytes += st->vmtxs_obytes;
3361	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3362	accum->vmtxs_csum += st->vmtxs_csum;
3363	accum->vmtxs_tso += st->vmtxs_tso;
3364	accum->vmtxs_full += st->vmtxs_full;
3365	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3366}
3367
3368static void
3369vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3370    struct vmxnet3_rxq_stats *accum)
3371{
3372	struct vmxnet3_rxq_stats *st;
3373
3374	st = &rxq->vxrxq_stats;
3375
3376	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3377	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3378	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3379	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3380}
3381
3382static void
3383vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3384{
3385	struct ifnet *ifp;
3386	struct vmxnet3_statistics *st;
3387	struct vmxnet3_txq_stats txaccum;
3388	struct vmxnet3_rxq_stats rxaccum;
3389	int i;
3390
3391	ifp = sc->vmx_ifp;
3392	st = &sc->vmx_stats;
3393
3394	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3395	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3396
3397	for (i = 0; i < sc->vmx_ntxqueues; i++)
3398		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3399	for (i = 0; i < sc->vmx_nrxqueues; i++)
3400		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3401
3402	/*
3403	 * With the exception of if_ierrors, these ifnet statistics are
3404	 * only updated in the driver, so just set them to our accumulated
3405	 * values. if_ierrors is updated in ether_input() for malformed
3406	 * frames that we should have already discarded.
3407	 */
3408	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3409	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3410	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3411	ifp->if_opackets = txaccum.vmtxs_opackets;
3412#ifndef VMXNET3_LEGACY_TX
3413	ifp->if_obytes = txaccum.vmtxs_obytes;
3414	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3415#endif
3416}
3417
3418static void
3419vmxnet3_tick(void *xsc)
3420{
3421	struct vmxnet3_softc *sc;
3422	struct ifnet *ifp;
3423	int i, timedout;
3424
3425	sc = xsc;
3426	ifp = sc->vmx_ifp;
3427	timedout = 0;
3428
3429	VMXNET3_CORE_LOCK_ASSERT(sc);
3430
3431	vmxnet3_accumulate_stats(sc);
3432	vmxnet3_refresh_host_stats(sc);
3433
3434	for (i = 0; i < sc->vmx_ntxqueues; i++)
3435		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3436
3437	if (timedout != 0) {
3438		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3439		vmxnet3_init_locked(sc);
3440	} else
3441		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3442}
3443
3444static int
3445vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3446{
3447	uint32_t status;
3448
3449	/* Also update the link speed while here. */
3450	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3451	sc->vmx_link_speed = status >> 16;
3452	return !!(status & 0x1);
3453}
3454
3455static void
3456vmxnet3_link_status(struct vmxnet3_softc *sc)
3457{
3458	struct ifnet *ifp;
3459	int link;
3460
3461	ifp = sc->vmx_ifp;
3462	link = vmxnet3_link_is_up(sc);
3463
3464	if (link != 0 && sc->vmx_link_active == 0) {
3465		sc->vmx_link_active = 1;
3466		if_link_state_change(ifp, LINK_STATE_UP);
3467	} else if (link == 0 && sc->vmx_link_active != 0) {
3468		sc->vmx_link_active = 0;
3469		if_link_state_change(ifp, LINK_STATE_DOWN);
3470	}
3471}
3472
3473static void
3474vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3475{
3476	struct vmxnet3_softc *sc;
3477
3478	sc = ifp->if_softc;
3479
3480	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3481	ifmr->ifm_status = IFM_AVALID;
3482
3483	VMXNET3_CORE_LOCK(sc);
3484	if (vmxnet3_link_is_up(sc) != 0)
3485		ifmr->ifm_status |= IFM_ACTIVE;
3486	else
3487		ifmr->ifm_status |= IFM_NONE;
3488	VMXNET3_CORE_UNLOCK(sc);
3489}
3490
3491static int
3492vmxnet3_media_change(struct ifnet *ifp)
3493{
3494
3495	/* Ignore. */
3496	return (0);
3497}
3498
3499static void
3500vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3501{
3502	uint32_t ml, mh;
3503
3504	ml  = sc->vmx_lladdr[0];
3505	ml |= sc->vmx_lladdr[1] << 8;
3506	ml |= sc->vmx_lladdr[2] << 16;
3507	ml |= sc->vmx_lladdr[3] << 24;
3508	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3509
3510	mh  = sc->vmx_lladdr[4];
3511	mh |= sc->vmx_lladdr[5] << 8;
3512	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3513}
3514
3515static void
3516vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3517{
3518	uint32_t ml, mh;
3519
3520	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3521	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3522
3523	sc->vmx_lladdr[0] = ml;
3524	sc->vmx_lladdr[1] = ml >> 8;
3525	sc->vmx_lladdr[2] = ml >> 16;
3526	sc->vmx_lladdr[3] = ml >> 24;
3527	sc->vmx_lladdr[4] = mh;
3528	sc->vmx_lladdr[5] = mh >> 8;
3529}
3530
3531static void
3532vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3533    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3534{
3535	struct sysctl_oid *node, *txsnode;
3536	struct sysctl_oid_list *list, *txslist;
3537	struct vmxnet3_txq_stats *stats;
3538	struct UPT1_TxStats *txstats;
3539	char namebuf[16];
3540
3541	stats = &txq->vxtxq_stats;
3542	txstats = &txq->vxtxq_ts->stats;
3543
3544	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3545	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3546	    NULL, "Transmit Queue");
3547	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3548
3549	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3550	    &stats->vmtxs_opackets, "Transmit packets");
3551	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3552	    &stats->vmtxs_obytes, "Transmit bytes");
3553	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3554	    &stats->vmtxs_omcasts, "Transmit multicasts");
3555	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3556	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3557	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3558	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3559	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3560	    &stats->vmtxs_full, "Transmit ring full");
3561	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3562	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3563
3564	/*
3565	 * Add statistics reported by the host. These are updated once
3566	 * per second.
3567	 */
3568	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3569	    NULL, "Host Statistics");
3570	txslist = SYSCTL_CHILDREN(txsnode);
3571	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3572	    &txstats->TSO_packets, "TSO packets");
3573	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3574	    &txstats->TSO_bytes, "TSO bytes");
3575	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3576	    &txstats->ucast_packets, "Unicast packets");
3577	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3578	    &txstats->ucast_bytes, "Unicast bytes");
3579	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3580	    &txstats->mcast_packets, "Multicast packets");
3581	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3582	    &txstats->mcast_bytes, "Multicast bytes");
3583	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3584	    &txstats->error, "Errors");
3585	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3586	    &txstats->discard, "Discards");
3587}
3588
3589static void
3590vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3591    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3592{
3593	struct sysctl_oid *node, *rxsnode;
3594	struct sysctl_oid_list *list, *rxslist;
3595	struct vmxnet3_rxq_stats *stats;
3596	struct UPT1_RxStats *rxstats;
3597	char namebuf[16];
3598
3599	stats = &rxq->vxrxq_stats;
3600	rxstats = &rxq->vxrxq_rs->stats;
3601
3602	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3603	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3604	    NULL, "Receive Queue");
3605	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3606
3607	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3608	    &stats->vmrxs_ipackets, "Receive packets");
3609	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3610	    &stats->vmrxs_ibytes, "Receive bytes");
3611	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3612	    &stats->vmrxs_iqdrops, "Receive drops");
3613	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3614	    &stats->vmrxs_ierrors, "Receive errors");
3615
3616	/*
3617	 * Add statistics reported by the host. These are updated once
3618	 * per second.
3619	 */
3620	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3621	    NULL, "Host Statistics");
3622	rxslist = SYSCTL_CHILDREN(rxsnode);
3623	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3624	    &rxstats->LRO_packets, "LRO packets");
3625	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3626	    &rxstats->LRO_bytes, "LRO bytes");
3627	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3628	    &rxstats->ucast_packets, "Unicast packets");
3629	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3630	    &rxstats->ucast_bytes, "Unicast bytes");
3631	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3632	    &rxstats->mcast_packets, "Multicast packets");
3633	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3634	    &rxstats->mcast_bytes, "Multicast bytes");
3635	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3636	    &rxstats->bcast_packets, "Broadcast packets");
3637	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3638	    &rxstats->bcast_bytes, "Broadcast bytes");
3639	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3640	    &rxstats->nobuffer, "No buffer");
3641	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3642	    &rxstats->error, "Errors");
3643}
3644
3645static void
3646vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3647    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3648{
3649	struct sysctl_oid *node;
3650	struct sysctl_oid_list *list;
3651	int i;
3652
3653	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3654		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3655
3656		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3657		    "debug", CTLFLAG_RD, NULL, "");
3658		list = SYSCTL_CHILDREN(node);
3659
3660		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3661		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3662		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3663		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3664		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3665		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3666		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3667		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3668		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3669		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3670		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3671		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3672		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3673		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3674	}
3675
3676	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3677		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3678
3679		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3680		    "debug", CTLFLAG_RD, NULL, "");
3681		list = SYSCTL_CHILDREN(node);
3682
3683		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3684		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3685		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3686		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3687		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3688		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3689		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3690		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3691		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3692		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3693		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3694		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3695		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3696		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3697		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3698		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3699		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3700		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3701	}
3702}
3703
3704static void
3705vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3706    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3707{
3708	int i;
3709
3710	for (i = 0; i < sc->vmx_ntxqueues; i++)
3711		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3712	for (i = 0; i < sc->vmx_nrxqueues; i++)
3713		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3714
3715	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3716}
3717
3718static void
3719vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3720{
3721	device_t dev;
3722	struct vmxnet3_statistics *stats;
3723	struct sysctl_ctx_list *ctx;
3724	struct sysctl_oid *tree;
3725	struct sysctl_oid_list *child;
3726
3727	dev = sc->vmx_dev;
3728	ctx = device_get_sysctl_ctx(dev);
3729	tree = device_get_sysctl_tree(dev);
3730	child = SYSCTL_CHILDREN(tree);
3731
3732	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3733	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3734	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3735	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3736	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3737	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3738	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3739	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3740
3741	stats = &sc->vmx_stats;
3742	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3743	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3744	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3745	    &stats->vmst_defrag_failed, 0,
3746	    "Tx mbuf dropped because defrag failed");
3747	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3748	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3749	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3750	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3751
3752	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3753}
3754
3755static void
3756vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3757{
3758
3759	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3760}
3761
3762static uint32_t
3763vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3764{
3765
3766	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3767}
3768
3769static void
3770vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3771{
3772
3773	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3774}
3775
3776static void
3777vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3778{
3779
3780	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3781}
3782
3783static uint32_t
3784vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3785{
3786
3787	vmxnet3_write_cmd(sc, cmd);
3788	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3789	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3790	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3791}
3792
3793static void
3794vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3795{
3796
3797	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3798}
3799
3800static void
3801vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3802{
3803
3804	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3805}
3806
3807static void
3808vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3809{
3810	int i;
3811
3812	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3813	for (i = 0; i < sc->vmx_nintrs; i++)
3814		vmxnet3_enable_intr(sc, i);
3815}
3816
3817static void
3818vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3819{
3820	int i;
3821
3822	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3823	for (i = 0; i < sc->vmx_nintrs; i++)
3824		vmxnet3_disable_intr(sc, i);
3825}
3826
3827static void
3828vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3829{
3830	bus_addr_t *baddr = arg;
3831
3832	if (error == 0)
3833		*baddr = segs->ds_addr;
3834}
3835
3836static int
3837vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3838    struct vmxnet3_dma_alloc *dma)
3839{
3840	device_t dev;
3841	int error;
3842
3843	dev = sc->vmx_dev;
3844	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3845
3846	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3847	    align, 0,		/* alignment, bounds */
3848	    BUS_SPACE_MAXADDR,	/* lowaddr */
3849	    BUS_SPACE_MAXADDR,	/* highaddr */
3850	    NULL, NULL,		/* filter, filterarg */
3851	    size,		/* maxsize */
3852	    1,			/* nsegments */
3853	    size,		/* maxsegsize */
3854	    BUS_DMA_ALLOCNOW,	/* flags */
3855	    NULL,		/* lockfunc */
3856	    NULL,		/* lockfuncarg */
3857	    &dma->dma_tag);
3858	if (error) {
3859		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3860		goto fail;
3861	}
3862
3863	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3864	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3865	if (error) {
3866		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3867		goto fail;
3868	}
3869
3870	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3871	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3872	if (error) {
3873		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3874		goto fail;
3875	}
3876
3877	dma->dma_size = size;
3878
3879fail:
3880	if (error)
3881		vmxnet3_dma_free(sc, dma);
3882
3883	return (error);
3884}
3885
3886static void
3887vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3888{
3889
3890	if (dma->dma_tag != NULL) {
3891		if (dma->dma_paddr != 0) {
3892			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3893			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3894			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3895		}
3896
3897		if (dma->dma_vaddr != NULL) {
3898			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3899			    dma->dma_map);
3900		}
3901
3902		bus_dma_tag_destroy(dma->dma_tag);
3903	}
3904	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3905}
3906
3907static int
3908vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3909{
3910	char path[64];
3911
3912	snprintf(path, sizeof(path),
3913	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3914	TUNABLE_INT_FETCH(path, &def);
3915
3916	return (def);
3917}
3918
3919/*
3920 * Since this is a purely paravirtualized device, we do not have
3921 * to worry about DMA coherency. But at times, we must make sure
3922 * both the compiler and CPU do not reorder memory operations.
3923 */
3924static inline void
3925vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3926{
3927
3928	switch (type) {
3929	case VMXNET3_BARRIER_RD:
3930		rmb();
3931		break;
3932	case VMXNET3_BARRIER_WR:
3933		wmb();
3934		break;
3935	case VMXNET3_BARRIER_RDWR:
3936		mb();
3937		break;
3938	default:
3939		panic("%s: bad barrier type %d", __func__, type);
3940	}
3941}
3942