if_vmx.c revision 263259
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 263259 2014-03-17 05:45:29Z bryanv $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/eventhandler.h>
28#include <sys/kernel.h>
29#include <sys/endian.h>
30#include <sys/sockio.h>
31#include <sys/mbuf.h>
32#include <sys/malloc.h>
33#include <sys/module.h>
34#include <sys/socket.h>
35#include <sys/sysctl.h>
36#include <sys/smp.h>
37#include <sys/taskqueue.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40
41#include <net/ethernet.h>
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/if_arp.h>
45#include <net/if_dl.h>
46#include <net/if_types.h>
47#include <net/if_media.h>
48#include <net/if_vlan_var.h>
49
50#include <net/bpf.h>
51
52#include <netinet/in_systm.h>
53#include <netinet/in.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#include <netinet/udp.h>
58#include <netinet/tcp.h>
59
60#include <machine/bus.h>
61#include <machine/resource.h>
62#include <sys/bus.h>
63#include <sys/rman.h>
64
65#include <dev/pci/pcireg.h>
66#include <dev/pci/pcivar.h>
67
68#include "if_vmxreg.h"
69#include "if_vmxvar.h"
70
71#include "opt_inet.h"
72#include "opt_inet6.h"
73
74#ifdef VMXNET3_FAILPOINTS
75#include <sys/fail.h>
76static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77    "vmxnet3 fail points");
78#define VMXNET3_FP	_debug_fail_point_vmxnet3
79#endif
80
81static int	vmxnet3_probe(device_t);
82static int	vmxnet3_attach(device_t);
83static int	vmxnet3_detach(device_t);
84static int	vmxnet3_shutdown(device_t);
85
86static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87static void	vmxnet3_free_resources(struct vmxnet3_softc *);
88static int	vmxnet3_check_version(struct vmxnet3_softc *);
89static void	vmxnet3_initial_config(struct vmxnet3_softc *);
90static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96		    struct vmxnet3_interrupt *);
97static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104		    struct vmxnet3_interrupt *);
105static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107#ifndef VMXNET3_LEGACY_TX
108static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112#endif
113
114static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135static void	vmxnet3_free_data(struct vmxnet3_softc *);
136static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138static void	vmxnet3_evintr(struct vmxnet3_softc *);
139static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143		    struct vmxnet3_rxring *, int);
144static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145static void	vmxnet3_legacy_intr(void *);
146static void	vmxnet3_txq_intr(void *);
147static void	vmxnet3_rxq_intr(void *);
148static void	vmxnet3_event_intr(void *);
149
150static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152static void	vmxnet3_stop(struct vmxnet3_softc *);
153
154static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157static int	vmxnet3_enable_device(struct vmxnet3_softc *);
158static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159static int	vmxnet3_reinit(struct vmxnet3_softc *);
160static void	vmxnet3_init_locked(struct vmxnet3_softc *);
161static void	vmxnet3_init(void *);
162
163static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164		    int *, int *, int *);
165static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169static void	vmxnet3_txq_update_pending(struct vmxnet3_txqueue *);
170#ifdef VMXNET3_LEGACY_TX
171static void	vmxnet3_start_locked(struct ifnet *);
172static void	vmxnet3_start(struct ifnet *);
173#else
174static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
175		    struct mbuf *);
176static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
177static void	vmxnet3_txq_tq_deferred(void *, int);
178#endif
179static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
180static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
181
182static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
183		    uint16_t);
184static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
186static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
187static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
188static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
189
190#ifndef VMXNET3_LEGACY_TX
191static void	vmxnet3_qflush(struct ifnet *);
192#endif
193
194static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
195static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
196static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
197		    struct vmxnet3_txq_stats *);
198static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
199		    struct vmxnet3_rxq_stats *);
200static void	vmxnet3_tick(void *);
201static void	vmxnet3_link_status(struct vmxnet3_softc *);
202static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
203static int	vmxnet3_media_change(struct ifnet *);
204static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
205static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
206
207static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
208		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
209static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
210		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
211static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
212		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
213static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
214
215static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
216		    uint32_t);
217static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
218static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
219		    uint32_t);
220static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
221static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
222
223static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
225static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
226static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
227
228static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
229		    bus_size_t, struct vmxnet3_dma_alloc *);
230static void	vmxnet3_dma_free(struct vmxnet3_softc *,
231		    struct vmxnet3_dma_alloc *);
232static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
233		    const char *, int);
234
235typedef enum {
236	VMXNET3_BARRIER_RD,
237	VMXNET3_BARRIER_WR,
238	VMXNET3_BARRIER_RDWR,
239} vmxnet3_barrier_t;
240
241static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
242
243/* Tunables. */
244static int vmxnet3_mq_disable = 0;
245TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
246static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
247TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
248static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
249TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
250static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
251TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
252static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
253TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
254
255static device_method_t vmxnet3_methods[] = {
256	/* Device interface. */
257	DEVMETHOD(device_probe,		vmxnet3_probe),
258	DEVMETHOD(device_attach,	vmxnet3_attach),
259	DEVMETHOD(device_detach,	vmxnet3_detach),
260	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
261
262	DEVMETHOD_END
263};
264
265static driver_t vmxnet3_driver = {
266	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
267};
268
269static devclass_t vmxnet3_devclass;
270DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
271
272MODULE_DEPEND(vmx, pci, 1, 1, 1);
273MODULE_DEPEND(vmx, ether, 1, 1, 1);
274
275#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
276#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
277
278static int
279vmxnet3_probe(device_t dev)
280{
281
282	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
283	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
284		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
285		return (BUS_PROBE_DEFAULT);
286	}
287
288	return (ENXIO);
289}
290
291static int
292vmxnet3_attach(device_t dev)
293{
294	struct vmxnet3_softc *sc;
295	int error;
296
297	sc = device_get_softc(dev);
298	sc->vmx_dev = dev;
299
300	pci_enable_busmaster(dev);
301
302	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
303	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
304
305	vmxnet3_initial_config(sc);
306
307	error = vmxnet3_alloc_resources(sc);
308	if (error)
309		goto fail;
310
311	error = vmxnet3_check_version(sc);
312	if (error)
313		goto fail;
314
315	error = vmxnet3_alloc_rxtx_queues(sc);
316	if (error)
317		goto fail;
318
319#ifndef VMXNET3_LEGACY_TX
320	error = vmxnet3_alloc_taskqueue(sc);
321	if (error)
322		goto fail;
323#endif
324
325	error = vmxnet3_alloc_interrupts(sc);
326	if (error)
327		goto fail;
328
329	vmxnet3_check_multiqueue(sc);
330
331	error = vmxnet3_alloc_data(sc);
332	if (error)
333		goto fail;
334
335	error = vmxnet3_setup_interface(sc);
336	if (error)
337		goto fail;
338
339	error = vmxnet3_setup_interrupts(sc);
340	if (error) {
341		ether_ifdetach(sc->vmx_ifp);
342		device_printf(dev, "could not set up interrupt\n");
343		goto fail;
344	}
345
346	vmxnet3_setup_sysctl(sc);
347#ifndef VMXNET3_LEGACY_TX
348	vmxnet3_start_taskqueue(sc);
349#endif
350
351fail:
352	if (error)
353		vmxnet3_detach(dev);
354
355	return (error);
356}
357
358static int
359vmxnet3_detach(device_t dev)
360{
361	struct vmxnet3_softc *sc;
362	struct ifnet *ifp;
363
364	sc = device_get_softc(dev);
365	ifp = sc->vmx_ifp;
366
367	if (device_is_attached(dev)) {
368		VMXNET3_CORE_LOCK(sc);
369		vmxnet3_stop(sc);
370		VMXNET3_CORE_UNLOCK(sc);
371
372		callout_drain(&sc->vmx_tick);
373#ifndef VMXNET3_LEGACY_TX
374		vmxnet3_drain_taskqueue(sc);
375#endif
376
377		ether_ifdetach(ifp);
378	}
379
380	if (sc->vmx_vlan_attach != NULL) {
381		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
382		sc->vmx_vlan_attach = NULL;
383	}
384	if (sc->vmx_vlan_detach != NULL) {
385		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
386		sc->vmx_vlan_detach = NULL;
387	}
388
389#ifndef VMXNET3_LEGACY_TX
390	vmxnet3_free_taskqueue(sc);
391#endif
392	vmxnet3_free_interrupts(sc);
393
394	if (ifp != NULL) {
395		if_free(ifp);
396		sc->vmx_ifp = NULL;
397	}
398
399	ifmedia_removeall(&sc->vmx_media);
400
401	vmxnet3_free_data(sc);
402	vmxnet3_free_resources(sc);
403	vmxnet3_free_rxtx_queues(sc);
404
405	VMXNET3_CORE_LOCK_DESTROY(sc);
406
407	return (0);
408}
409
410static int
411vmxnet3_shutdown(device_t dev)
412{
413
414	return (0);
415}
416
417static int
418vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
419{
420	device_t dev;
421	int rid;
422
423	dev = sc->vmx_dev;
424
425	rid = PCIR_BAR(0);
426	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
427	    RF_ACTIVE);
428	if (sc->vmx_res0 == NULL) {
429		device_printf(dev,
430		    "could not map BAR0 memory\n");
431		return (ENXIO);
432	}
433
434	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
435	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
436
437	rid = PCIR_BAR(1);
438	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
439	    RF_ACTIVE);
440	if (sc->vmx_res1 == NULL) {
441		device_printf(dev,
442		    "could not map BAR1 memory\n");
443		return (ENXIO);
444	}
445
446	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
447	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
448
449	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
450		rid = PCIR_BAR(2);
451		sc->vmx_msix_res = bus_alloc_resource_any(dev,
452		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
453	}
454
455	if (sc->vmx_msix_res == NULL)
456		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
457
458	return (0);
459}
460
461static void
462vmxnet3_free_resources(struct vmxnet3_softc *sc)
463{
464	device_t dev;
465	int rid;
466
467	dev = sc->vmx_dev;
468
469	if (sc->vmx_res0 != NULL) {
470		rid = PCIR_BAR(0);
471		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
472		sc->vmx_res0 = NULL;
473	}
474
475	if (sc->vmx_res1 != NULL) {
476		rid = PCIR_BAR(1);
477		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
478		sc->vmx_res1 = NULL;
479	}
480
481	if (sc->vmx_msix_res != NULL) {
482		rid = PCIR_BAR(2);
483		bus_release_resource(dev, SYS_RES_MEMORY, rid,
484		    sc->vmx_msix_res);
485		sc->vmx_msix_res = NULL;
486	}
487}
488
489static int
490vmxnet3_check_version(struct vmxnet3_softc *sc)
491{
492	device_t dev;
493	uint32_t version;
494
495	dev = sc->vmx_dev;
496
497	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
498	if ((version & 0x01) == 0) {
499		device_printf(dev, "unsupported hardware version %#x\n",
500		    version);
501		return (ENOTSUP);
502	}
503	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
504
505	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
506	if ((version & 0x01) == 0) {
507		device_printf(dev, "unsupported UPT version %#x\n", version);
508		return (ENOTSUP);
509	}
510	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
511
512	return (0);
513}
514
515static void
516vmxnet3_initial_config(struct vmxnet3_softc *sc)
517{
518	int nqueue, ndesc;
519
520	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
521	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
522		nqueue = VMXNET3_DEF_TX_QUEUES;
523	if (nqueue > mp_ncpus)
524		nqueue = mp_ncpus;
525	sc->vmx_max_ntxqueues = nqueue;
526
527	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
528	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
529		nqueue = VMXNET3_DEF_RX_QUEUES;
530	if (nqueue > mp_ncpus)
531		nqueue = mp_ncpus;
532	sc->vmx_max_nrxqueues = nqueue;
533
534	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
535		sc->vmx_max_nrxqueues = 1;
536		sc->vmx_max_ntxqueues = 1;
537	}
538
539	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
540	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
541		ndesc = VMXNET3_DEF_TX_NDESC;
542	if (ndesc & VMXNET3_MASK_TX_NDESC)
543		ndesc &= ~VMXNET3_MASK_TX_NDESC;
544	sc->vmx_ntxdescs = ndesc;
545
546	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
547	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
548		ndesc = VMXNET3_DEF_RX_NDESC;
549	if (ndesc & VMXNET3_MASK_RX_NDESC)
550		ndesc &= ~VMXNET3_MASK_RX_NDESC;
551	sc->vmx_nrxdescs = ndesc;
552	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
553}
554
555static void
556vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
557{
558
559	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
560		goto out;
561
562	/* BMV: Just use the maximum configured for now. */
563	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
564	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
565
566	if (sc->vmx_nrxqueues > 1)
567		sc->vmx_flags |= VMXNET3_FLAG_RSS;
568
569	return;
570
571out:
572	sc->vmx_ntxqueues = 1;
573	sc->vmx_nrxqueues = 1;
574}
575
576static int
577vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
578{
579	device_t dev;
580	int nmsix, cnt, required;
581
582	dev = sc->vmx_dev;
583
584	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
585		return (1);
586
587	/* Allocate an additional vector for the events interrupt. */
588	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
589
590	nmsix = pci_msix_count(dev);
591	if (nmsix < required)
592		return (1);
593
594	cnt = required;
595	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
596		sc->vmx_nintrs = required;
597		return (0);
598	} else
599		pci_release_msi(dev);
600
601	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
602
603	return (1);
604}
605
606static int
607vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
608{
609	device_t dev;
610	int nmsi, cnt, required;
611
612	dev = sc->vmx_dev;
613	required = 1;
614
615	nmsi = pci_msi_count(dev);
616	if (nmsi < required)
617		return (1);
618
619	cnt = required;
620	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
621		sc->vmx_nintrs = 1;
622		return (0);
623	} else
624		pci_release_msi(dev);
625
626	return (1);
627}
628
629static int
630vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
631{
632
633	sc->vmx_nintrs = 1;
634	return (0);
635}
636
637static int
638vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
639    struct vmxnet3_interrupt *intr)
640{
641	struct resource *irq;
642
643	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
644	if (irq == NULL)
645		return (ENXIO);
646
647	intr->vmxi_irq = irq;
648	intr->vmxi_rid = rid;
649
650	return (0);
651}
652
653static int
654vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
655{
656	int i, rid, flags, error;
657
658	rid = 0;
659	flags = RF_ACTIVE;
660
661	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
662		flags |= RF_SHAREABLE;
663	else
664		rid = 1;
665
666	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
667		error = vmxnet3_alloc_interrupt(sc, rid, flags,
668		    &sc->vmx_intrs[i]);
669		if (error)
670			return (error);
671	}
672
673	return (0);
674}
675
676static int
677vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
678{
679	device_t dev;
680	struct vmxnet3_txqueue *txq;
681	struct vmxnet3_rxqueue *rxq;
682	struct vmxnet3_interrupt *intr;
683	enum intr_type type;
684	int i, error;
685
686	dev = sc->vmx_dev;
687	intr = &sc->vmx_intrs[0];
688	type = INTR_TYPE_NET | INTR_MPSAFE;
689
690	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
691		txq = &sc->vmx_txq[i];
692		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
693		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
694		if (error)
695			return (error);
696		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
697	}
698
699	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
700		rxq = &sc->vmx_rxq[i];
701		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
702		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
703		if (error)
704			return (error);
705		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
706	}
707
708	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
709	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
710	if (error)
711		return (error);
712	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
713
714	return (0);
715}
716
717static int
718vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
719{
720	struct vmxnet3_interrupt *intr;
721	int i, error;
722
723	intr = &sc->vmx_intrs[0];
724	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
725	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
726	    &intr->vmxi_handler);
727
728	for (i = 0; i < sc->vmx_ntxqueues; i++)
729		sc->vmx_txq[i].vxtxq_intr_idx = 0;
730	for (i = 0; i < sc->vmx_nrxqueues; i++)
731		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
732	sc->vmx_event_intr_idx = 0;
733
734	return (error);
735}
736
737static void
738vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
739{
740	struct vmxnet3_txqueue *txq;
741	struct vmxnet3_txq_shared *txs;
742	struct vmxnet3_rxqueue *rxq;
743	struct vmxnet3_rxq_shared *rxs;
744	int i;
745
746	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
747
748	for (i = 0; i < sc->vmx_ntxqueues; i++) {
749		txq = &sc->vmx_txq[i];
750		txs = txq->vxtxq_ts;
751		txs->intr_idx = txq->vxtxq_intr_idx;
752	}
753
754	for (i = 0; i < sc->vmx_nrxqueues; i++) {
755		rxq = &sc->vmx_rxq[i];
756		rxs = rxq->vxrxq_rs;
757		rxs->intr_idx = rxq->vxrxq_intr_idx;
758	}
759}
760
761static int
762vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
763{
764	int error;
765
766	error = vmxnet3_alloc_intr_resources(sc);
767	if (error)
768		return (error);
769
770	switch (sc->vmx_intr_type) {
771	case VMXNET3_IT_MSIX:
772		error = vmxnet3_setup_msix_interrupts(sc);
773		break;
774	case VMXNET3_IT_MSI:
775	case VMXNET3_IT_LEGACY:
776		error = vmxnet3_setup_legacy_interrupt(sc);
777		break;
778	default:
779		panic("%s: invalid interrupt type %d", __func__,
780		    sc->vmx_intr_type);
781	}
782
783	if (error == 0)
784		vmxnet3_set_interrupt_idx(sc);
785
786	return (error);
787}
788
789static int
790vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
791{
792	device_t dev;
793	uint32_t config;
794	int error;
795
796	dev = sc->vmx_dev;
797	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
798
799	sc->vmx_intr_type = config & 0x03;
800	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
801
802	switch (sc->vmx_intr_type) {
803	case VMXNET3_IT_AUTO:
804		sc->vmx_intr_type = VMXNET3_IT_MSIX;
805		/* FALLTHROUGH */
806	case VMXNET3_IT_MSIX:
807		error = vmxnet3_alloc_msix_interrupts(sc);
808		if (error == 0)
809			break;
810		sc->vmx_intr_type = VMXNET3_IT_MSI;
811		/* FALLTHROUGH */
812	case VMXNET3_IT_MSI:
813		error = vmxnet3_alloc_msi_interrupts(sc);
814		if (error == 0)
815			break;
816		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
817		/* FALLTHROUGH */
818	case VMXNET3_IT_LEGACY:
819		error = vmxnet3_alloc_legacy_interrupts(sc);
820		if (error == 0)
821			break;
822		/* FALLTHROUGH */
823	default:
824		sc->vmx_intr_type = -1;
825		device_printf(dev, "cannot allocate any interrupt resources\n");
826		return (ENXIO);
827	}
828
829	return (error);
830}
831
832static void
833vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
834    struct vmxnet3_interrupt *intr)
835{
836	device_t dev;
837
838	dev = sc->vmx_dev;
839
840	if (intr->vmxi_handler != NULL) {
841		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
842		intr->vmxi_handler = NULL;
843	}
844
845	if (intr->vmxi_irq != NULL) {
846		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
847		    intr->vmxi_irq);
848		intr->vmxi_irq = NULL;
849		intr->vmxi_rid = -1;
850	}
851}
852
853static void
854vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
855{
856	int i;
857
858	for (i = 0; i < sc->vmx_nintrs; i++)
859		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
860
861	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
862	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
863		pci_release_msi(sc->vmx_dev);
864}
865
866#ifndef VMXNET3_LEGACY_TX
867static int
868vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
869{
870	device_t dev;
871
872	dev = sc->vmx_dev;
873
874	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
875	    taskqueue_thread_enqueue, &sc->vmx_tq);
876	if (sc->vmx_tq == NULL)
877		return (ENOMEM);
878
879	return (0);
880}
881
882static void
883vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
884{
885	device_t dev;
886	int nthreads, error;
887
888	dev = sc->vmx_dev;
889
890	/*
891	 * The taskqueue is typically not frequently used, so a dedicated
892	 * thread for each queue is unnecessary.
893	 */
894	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
895
896	/*
897	 * Most drivers just ignore the return value - it only fails
898	 * with ENOMEM so an error is not likely. It is hard for us
899	 * to recover from an error here.
900	 */
901	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
902	    "%s taskq", device_get_nameunit(dev));
903	if (error)
904		device_printf(dev, "failed to start taskqueue: %d", error);
905}
906
907static void
908vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
909{
910	struct vmxnet3_txqueue *txq;
911	int i;
912
913	if (sc->vmx_tq != NULL) {
914		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
915			txq = &sc->vmx_txq[i];
916			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
917		}
918	}
919}
920
921static void
922vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
923{
924	if (sc->vmx_tq != NULL) {
925		taskqueue_free(sc->vmx_tq);
926		sc->vmx_tq = NULL;
927	}
928}
929#endif
930
931static int
932vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
933{
934	struct vmxnet3_rxqueue *rxq;
935	struct vmxnet3_rxring *rxr;
936	int i;
937
938	rxq = &sc->vmx_rxq[q];
939
940	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
941	    device_get_nameunit(sc->vmx_dev), q);
942	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
943
944	rxq->vxrxq_sc = sc;
945	rxq->vxrxq_id = q;
946
947	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
948		rxr = &rxq->vxrxq_cmd_ring[i];
949		rxr->vxrxr_rid = i;
950		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
951		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
952		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
953		if (rxr->vxrxr_rxbuf == NULL)
954			return (ENOMEM);
955
956		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
957	}
958
959	return (0);
960}
961
962static int
963vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
964{
965	struct vmxnet3_txqueue *txq;
966	struct vmxnet3_txring *txr;
967
968	txq = &sc->vmx_txq[q];
969	txr = &txq->vxtxq_cmd_ring;
970
971	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
972	    device_get_nameunit(sc->vmx_dev), q);
973	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
974
975	txq->vxtxq_sc = sc;
976	txq->vxtxq_id = q;
977
978	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
979	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
980	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
981	if (txr->vxtxr_txbuf == NULL)
982		return (ENOMEM);
983
984	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
985
986#ifndef VMXNET3_LEGACY_TX
987	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
988
989	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
990	    M_NOWAIT, &txq->vxtxq_mtx);
991	if (txq->vxtxq_br == NULL)
992		return (ENOMEM);
993#endif
994
995	return (0);
996}
997
998static int
999vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1000{
1001	int i, error;
1002
1003	/*
1004	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1005	 * disabled by default because its apparently broken for devices passed
1006	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1007	 * must be set to zero for MSIX. This check prevents us from allocating
1008	 * queue structures that we will not use.
1009	 */
1010	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1011		sc->vmx_max_nrxqueues = 1;
1012		sc->vmx_max_ntxqueues = 1;
1013	}
1014
1015	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1016	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1017	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1018	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1019	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1020		return (ENOMEM);
1021
1022	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1023		error = vmxnet3_init_rxq(sc, i);
1024		if (error)
1025			return (error);
1026	}
1027
1028	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1029		error = vmxnet3_init_txq(sc, i);
1030		if (error)
1031			return (error);
1032	}
1033
1034	return (0);
1035}
1036
1037static void
1038vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1039{
1040	struct vmxnet3_rxring *rxr;
1041	int i;
1042
1043	rxq->vxrxq_sc = NULL;
1044	rxq->vxrxq_id = -1;
1045
1046	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1047		rxr = &rxq->vxrxq_cmd_ring[i];
1048
1049		if (rxr->vxrxr_rxbuf != NULL) {
1050			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1051			rxr->vxrxr_rxbuf = NULL;
1052		}
1053	}
1054
1055	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1056		mtx_destroy(&rxq->vxrxq_mtx);
1057}
1058
1059static void
1060vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1061{
1062	struct vmxnet3_txring *txr;
1063
1064	txr = &txq->vxtxq_cmd_ring;
1065
1066	txq->vxtxq_sc = NULL;
1067	txq->vxtxq_id = -1;
1068
1069#ifndef VMXNET3_LEGACY_TX
1070	if (txq->vxtxq_br != NULL) {
1071		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1072		txq->vxtxq_br = NULL;
1073	}
1074#endif
1075
1076	if (txr->vxtxr_txbuf != NULL) {
1077		free(txr->vxtxr_txbuf, M_DEVBUF);
1078		txr->vxtxr_txbuf = NULL;
1079	}
1080
1081	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1082		mtx_destroy(&txq->vxtxq_mtx);
1083}
1084
1085static void
1086vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1087{
1088	int i;
1089
1090	if (sc->vmx_rxq != NULL) {
1091		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1092			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1093		free(sc->vmx_rxq, M_DEVBUF);
1094		sc->vmx_rxq = NULL;
1095	}
1096
1097	if (sc->vmx_txq != NULL) {
1098		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1099			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1100		free(sc->vmx_txq, M_DEVBUF);
1101		sc->vmx_txq = NULL;
1102	}
1103}
1104
1105static int
1106vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1107{
1108	device_t dev;
1109	uint8_t *kva;
1110	size_t size;
1111	int i, error;
1112
1113	dev = sc->vmx_dev;
1114
1115	size = sizeof(struct vmxnet3_driver_shared);
1116	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1117	if (error) {
1118		device_printf(dev, "cannot alloc shared memory\n");
1119		return (error);
1120	}
1121	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1122
1123	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1124	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1125	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1126	if (error) {
1127		device_printf(dev, "cannot alloc queue shared memory\n");
1128		return (error);
1129	}
1130	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1131	kva = sc->vmx_qs;
1132
1133	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1134		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1135		kva += sizeof(struct vmxnet3_txq_shared);
1136	}
1137	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1138		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1139		kva += sizeof(struct vmxnet3_rxq_shared);
1140	}
1141
1142	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1143		size = sizeof(struct vmxnet3_rss_shared);
1144		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1145		if (error) {
1146			device_printf(dev, "cannot alloc rss shared memory\n");
1147			return (error);
1148		}
1149		sc->vmx_rss =
1150		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1151	}
1152
1153	return (0);
1154}
1155
1156static void
1157vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1158{
1159
1160	if (sc->vmx_rss != NULL) {
1161		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1162		sc->vmx_rss = NULL;
1163	}
1164
1165	if (sc->vmx_qs != NULL) {
1166		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1167		sc->vmx_qs = NULL;
1168	}
1169
1170	if (sc->vmx_ds != NULL) {
1171		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1172		sc->vmx_ds = NULL;
1173	}
1174}
1175
1176static int
1177vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1178{
1179	device_t dev;
1180	struct vmxnet3_txqueue *txq;
1181	struct vmxnet3_txring *txr;
1182	struct vmxnet3_comp_ring *txc;
1183	size_t descsz, compsz;
1184	int i, q, error;
1185
1186	dev = sc->vmx_dev;
1187
1188	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1189		txq = &sc->vmx_txq[q];
1190		txr = &txq->vxtxq_cmd_ring;
1191		txc = &txq->vxtxq_comp_ring;
1192
1193		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1194		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1195
1196		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1197		    1, 0,			/* alignment, boundary */
1198		    BUS_SPACE_MAXADDR,		/* lowaddr */
1199		    BUS_SPACE_MAXADDR,		/* highaddr */
1200		    NULL, NULL,			/* filter, filterarg */
1201		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1202		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1203		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1204		    0,				/* flags */
1205		    NULL, NULL,			/* lockfunc, lockarg */
1206		    &txr->vxtxr_txtag);
1207		if (error) {
1208			device_printf(dev,
1209			    "unable to create Tx buffer tag for queue %d\n", q);
1210			return (error);
1211		}
1212
1213		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1214		if (error) {
1215			device_printf(dev, "cannot alloc Tx descriptors for "
1216			    "queue %d error %d\n", q, error);
1217			return (error);
1218		}
1219		txr->vxtxr_txd =
1220		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1221
1222		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1223		if (error) {
1224			device_printf(dev, "cannot alloc Tx comp descriptors "
1225			   "for queue %d error %d\n", q, error);
1226			return (error);
1227		}
1228		txc->vxcr_u.txcd =
1229		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1230
1231		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1232			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1233			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1234			if (error) {
1235				device_printf(dev, "unable to create Tx buf "
1236				    "dmamap for queue %d idx %d\n", q, i);
1237				return (error);
1238			}
1239		}
1240	}
1241
1242	return (0);
1243}
1244
1245static void
1246vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1247{
1248	device_t dev;
1249	struct vmxnet3_txqueue *txq;
1250	struct vmxnet3_txring *txr;
1251	struct vmxnet3_comp_ring *txc;
1252	struct vmxnet3_txbuf *txb;
1253	int i, q;
1254
1255	dev = sc->vmx_dev;
1256
1257	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1258		txq = &sc->vmx_txq[q];
1259		txr = &txq->vxtxq_cmd_ring;
1260		txc = &txq->vxtxq_comp_ring;
1261
1262		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1263			txb = &txr->vxtxr_txbuf[i];
1264			if (txb->vtxb_dmamap != NULL) {
1265				bus_dmamap_destroy(txr->vxtxr_txtag,
1266				    txb->vtxb_dmamap);
1267				txb->vtxb_dmamap = NULL;
1268			}
1269		}
1270
1271		if (txc->vxcr_u.txcd != NULL) {
1272			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1273			txc->vxcr_u.txcd = NULL;
1274		}
1275
1276		if (txr->vxtxr_txd != NULL) {
1277			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1278			txr->vxtxr_txd = NULL;
1279		}
1280
1281		if (txr->vxtxr_txtag != NULL) {
1282			bus_dma_tag_destroy(txr->vxtxr_txtag);
1283			txr->vxtxr_txtag = NULL;
1284		}
1285	}
1286}
1287
1288static int
1289vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1290{
1291	device_t dev;
1292	struct vmxnet3_rxqueue *rxq;
1293	struct vmxnet3_rxring *rxr;
1294	struct vmxnet3_comp_ring *rxc;
1295	int descsz, compsz;
1296	int i, j, q, error;
1297
1298	dev = sc->vmx_dev;
1299
1300	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1301		rxq = &sc->vmx_rxq[q];
1302		rxc = &rxq->vxrxq_comp_ring;
1303		compsz = 0;
1304
1305		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1306			rxr = &rxq->vxrxq_cmd_ring[i];
1307
1308			descsz = rxr->vxrxr_ndesc *
1309			    sizeof(struct vmxnet3_rxdesc);
1310			compsz += rxr->vxrxr_ndesc *
1311			    sizeof(struct vmxnet3_rxcompdesc);
1312
1313			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1314			    1, 0,		/* alignment, boundary */
1315			    BUS_SPACE_MAXADDR,	/* lowaddr */
1316			    BUS_SPACE_MAXADDR,	/* highaddr */
1317			    NULL, NULL,		/* filter, filterarg */
1318			    MJUMPAGESIZE,	/* maxsize */
1319			    1,			/* nsegments */
1320			    MJUMPAGESIZE,	/* maxsegsize */
1321			    0,			/* flags */
1322			    NULL, NULL,		/* lockfunc, lockarg */
1323			    &rxr->vxrxr_rxtag);
1324			if (error) {
1325				device_printf(dev,
1326				    "unable to create Rx buffer tag for "
1327				    "queue %d\n", q);
1328				return (error);
1329			}
1330
1331			error = vmxnet3_dma_malloc(sc, descsz, 512,
1332			    &rxr->vxrxr_dma);
1333			if (error) {
1334				device_printf(dev, "cannot allocate Rx "
1335				    "descriptors for queue %d/%d error %d\n",
1336				    i, q, error);
1337				return (error);
1338			}
1339			rxr->vxrxr_rxd =
1340			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1341		}
1342
1343		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1344		if (error) {
1345			device_printf(dev, "cannot alloc Rx comp descriptors "
1346			    "for queue %d error %d\n", q, error);
1347			return (error);
1348		}
1349		rxc->vxcr_u.rxcd =
1350		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1351
1352		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1353			rxr = &rxq->vxrxq_cmd_ring[i];
1354
1355			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1356			    &rxr->vxrxr_spare_dmap);
1357			if (error) {
1358				device_printf(dev, "unable to create spare "
1359				    "dmamap for queue %d/%d error %d\n",
1360				    q, i, error);
1361				return (error);
1362			}
1363
1364			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1365				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1366				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1367				if (error) {
1368					device_printf(dev, "unable to create "
1369					    "dmamap for queue %d/%d slot %d "
1370					    "error %d\n",
1371					    q, i, j, error);
1372					return (error);
1373				}
1374			}
1375		}
1376	}
1377
1378	return (0);
1379}
1380
1381static void
1382vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1383{
1384	device_t dev;
1385	struct vmxnet3_rxqueue *rxq;
1386	struct vmxnet3_rxring *rxr;
1387	struct vmxnet3_comp_ring *rxc;
1388	struct vmxnet3_rxbuf *rxb;
1389	int i, j, q;
1390
1391	dev = sc->vmx_dev;
1392
1393	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1394		rxq = &sc->vmx_rxq[q];
1395		rxc = &rxq->vxrxq_comp_ring;
1396
1397		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1398			rxr = &rxq->vxrxq_cmd_ring[i];
1399
1400			if (rxr->vxrxr_spare_dmap != NULL) {
1401				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1402				    rxr->vxrxr_spare_dmap);
1403				rxr->vxrxr_spare_dmap = NULL;
1404			}
1405
1406			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1407				rxb = &rxr->vxrxr_rxbuf[j];
1408				if (rxb->vrxb_dmamap != NULL) {
1409					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1410					    rxb->vrxb_dmamap);
1411					rxb->vrxb_dmamap = NULL;
1412				}
1413			}
1414		}
1415
1416		if (rxc->vxcr_u.rxcd != NULL) {
1417			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1418			rxc->vxcr_u.rxcd = NULL;
1419		}
1420
1421		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1422			rxr = &rxq->vxrxq_cmd_ring[i];
1423
1424			if (rxr->vxrxr_rxd != NULL) {
1425				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1426				rxr->vxrxr_rxd = NULL;
1427			}
1428
1429			if (rxr->vxrxr_rxtag != NULL) {
1430				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1431				rxr->vxrxr_rxtag = NULL;
1432			}
1433		}
1434	}
1435}
1436
1437static int
1438vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1439{
1440	int error;
1441
1442	error = vmxnet3_alloc_txq_data(sc);
1443	if (error)
1444		return (error);
1445
1446	error = vmxnet3_alloc_rxq_data(sc);
1447	if (error)
1448		return (error);
1449
1450	return (0);
1451}
1452
1453static void
1454vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1455{
1456
1457	if (sc->vmx_rxq != NULL)
1458		vmxnet3_free_rxq_data(sc);
1459
1460	if (sc->vmx_txq != NULL)
1461		vmxnet3_free_txq_data(sc);
1462}
1463
1464static int
1465vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1466{
1467	int error;
1468
1469	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1470	    32, &sc->vmx_mcast_dma);
1471	if (error)
1472		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1473	else
1474		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1475
1476	return (error);
1477}
1478
1479static void
1480vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1481{
1482
1483	if (sc->vmx_mcast != NULL) {
1484		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1485		sc->vmx_mcast = NULL;
1486	}
1487}
1488
1489static void
1490vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1491{
1492	struct vmxnet3_driver_shared *ds;
1493	struct vmxnet3_txqueue *txq;
1494	struct vmxnet3_txq_shared *txs;
1495	struct vmxnet3_rxqueue *rxq;
1496	struct vmxnet3_rxq_shared *rxs;
1497	int i;
1498
1499	ds = sc->vmx_ds;
1500
1501	/*
1502	 * Initialize fields of the shared data that remains the same across
1503	 * reinits. Note the shared data is zero'd when allocated.
1504	 */
1505
1506	ds->magic = VMXNET3_REV1_MAGIC;
1507
1508	/* DriverInfo */
1509	ds->version = VMXNET3_DRIVER_VERSION;
1510	ds->guest = VMXNET3_GOS_FREEBSD |
1511#ifdef __LP64__
1512	    VMXNET3_GOS_64BIT;
1513#else
1514	    VMXNET3_GOS_32BIT;
1515#endif
1516	ds->vmxnet3_revision = 1;
1517	ds->upt_version = 1;
1518
1519	/* Misc. conf */
1520	ds->driver_data = vtophys(sc);
1521	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1522	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1523	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1524	ds->nrxsg_max = sc->vmx_max_rxsegs;
1525
1526	/* RSS conf */
1527	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1528		ds->rss.version = 1;
1529		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1530		ds->rss.len = sc->vmx_rss_dma.dma_size;
1531	}
1532
1533	/* Interrupt control. */
1534	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1535	ds->nintr = sc->vmx_nintrs;
1536	ds->evintr = sc->vmx_event_intr_idx;
1537	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1538
1539	for (i = 0; i < sc->vmx_nintrs; i++)
1540		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1541
1542	/* Receive filter. */
1543	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1544	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1545
1546	/* Tx queues */
1547	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1548		txq = &sc->vmx_txq[i];
1549		txs = txq->vxtxq_ts;
1550
1551		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1552		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1553		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1554		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1555		txs->driver_data = vtophys(txq);
1556		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1557	}
1558
1559	/* Rx queues */
1560	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1561		rxq = &sc->vmx_rxq[i];
1562		rxs = rxq->vxrxq_rs;
1563
1564		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1565		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1566		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1567		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1568		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1569		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1570		rxs->driver_data = vtophys(rxq);
1571		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1572	}
1573}
1574
1575static void
1576vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1577{
1578	struct ifnet *ifp;
1579
1580	ifp = sc->vmx_ifp;
1581
1582	/* Use the current MAC address. */
1583	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1584	vmxnet3_set_lladdr(sc);
1585
1586	ifp->if_hwassist = 0;
1587	if (ifp->if_capenable & IFCAP_TXCSUM)
1588		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1589	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1590		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1591	if (ifp->if_capenable & IFCAP_TSO4)
1592		ifp->if_hwassist |= CSUM_IP_TSO;
1593	if (ifp->if_capenable & IFCAP_TSO6)
1594		ifp->if_hwassist |= CSUM_IP6_TSO;
1595}
1596
1597static void
1598vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1599{
1600	/*
1601	 * Use the same key as the Linux driver until FreeBSD can do
1602	 * RSS (presumably Toeplitz) in software.
1603	 */
1604	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1605	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1606	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1607	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1608	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1609	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1610	};
1611
1612	struct vmxnet3_driver_shared *ds;
1613	struct vmxnet3_rss_shared *rss;
1614	int i;
1615
1616	ds = sc->vmx_ds;
1617	rss = sc->vmx_rss;
1618
1619	rss->hash_type =
1620	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1621	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1622	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1623	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1624	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1625	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1626
1627	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1628		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1629}
1630
1631static void
1632vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1633{
1634	struct ifnet *ifp;
1635	struct vmxnet3_driver_shared *ds;
1636
1637	ifp = sc->vmx_ifp;
1638	ds = sc->vmx_ds;
1639
1640	ds->mtu = ifp->if_mtu;
1641	ds->ntxqueue = sc->vmx_ntxqueues;
1642	ds->nrxqueue = sc->vmx_nrxqueues;
1643
1644	ds->upt_features = 0;
1645	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1646		ds->upt_features |= UPT1_F_CSUM;
1647	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1648		ds->upt_features |= UPT1_F_VLAN;
1649	if (ifp->if_capenable & IFCAP_LRO)
1650		ds->upt_features |= UPT1_F_LRO;
1651
1652	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1653		ds->upt_features |= UPT1_F_RSS;
1654		vmxnet3_reinit_rss_shared_data(sc);
1655	}
1656
1657	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1658	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1659	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1660}
1661
1662static int
1663vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1664{
1665	int error;
1666
1667	error = vmxnet3_alloc_shared_data(sc);
1668	if (error)
1669		return (error);
1670
1671	error = vmxnet3_alloc_queue_data(sc);
1672	if (error)
1673		return (error);
1674
1675	error = vmxnet3_alloc_mcast_table(sc);
1676	if (error)
1677		return (error);
1678
1679	vmxnet3_init_shared_data(sc);
1680
1681	return (0);
1682}
1683
1684static void
1685vmxnet3_free_data(struct vmxnet3_softc *sc)
1686{
1687
1688	vmxnet3_free_mcast_table(sc);
1689	vmxnet3_free_queue_data(sc);
1690	vmxnet3_free_shared_data(sc);
1691}
1692
1693static int
1694vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1695{
1696	device_t dev;
1697	struct ifnet *ifp;
1698
1699	dev = sc->vmx_dev;
1700
1701	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1702	if (ifp == NULL) {
1703		device_printf(dev, "cannot allocate ifnet structure\n");
1704		return (ENOSPC);
1705	}
1706
1707	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1708#if __FreeBSD_version < 1000025
1709	ifp->if_baudrate = 1000000000;
1710#elif __FreeBSD_version < 1100011
1711	if_initbaudrate(ifp, IF_Gbps(10));
1712#else
1713	ifp->if_baudrate = IF_Gbps(10);
1714#endif
1715	ifp->if_softc = sc;
1716	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1717	ifp->if_init = vmxnet3_init;
1718	ifp->if_ioctl = vmxnet3_ioctl;
1719	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1720
1721#ifdef VMXNET3_LEGACY_TX
1722	ifp->if_start = vmxnet3_start;
1723	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1724	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1725	IFQ_SET_READY(&ifp->if_snd);
1726#else
1727	ifp->if_transmit = vmxnet3_txq_mq_start;
1728	ifp->if_qflush = vmxnet3_qflush;
1729#endif
1730
1731	vmxnet3_get_lladdr(sc);
1732	ether_ifattach(ifp, sc->vmx_lladdr);
1733
1734	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1735	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1736	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1737	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1738	    IFCAP_VLAN_HWCSUM;
1739	ifp->if_capenable = ifp->if_capabilities;
1740
1741	/* These capabilities are not enabled by default. */
1742	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1743
1744	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1745	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1746	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1747	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1748
1749	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1750	    vmxnet3_media_status);
1751	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1752	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1753
1754	return (0);
1755}
1756
1757static void
1758vmxnet3_evintr(struct vmxnet3_softc *sc)
1759{
1760	device_t dev;
1761	struct ifnet *ifp;
1762	struct vmxnet3_txq_shared *ts;
1763	struct vmxnet3_rxq_shared *rs;
1764	uint32_t event;
1765	int reset;
1766
1767	dev = sc->vmx_dev;
1768	ifp = sc->vmx_ifp;
1769	reset = 0;
1770
1771	VMXNET3_CORE_LOCK(sc);
1772
1773	/* Clear events. */
1774	event = sc->vmx_ds->event;
1775	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1776
1777	if (event & VMXNET3_EVENT_LINK) {
1778		vmxnet3_link_status(sc);
1779		if (sc->vmx_link_active != 0)
1780			vmxnet3_tx_start_all(sc);
1781	}
1782
1783	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1784		reset = 1;
1785		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1786		ts = sc->vmx_txq[0].vxtxq_ts;
1787		if (ts->stopped != 0)
1788			device_printf(dev, "Tx queue error %#x\n", ts->error);
1789		rs = sc->vmx_rxq[0].vxrxq_rs;
1790		if (rs->stopped != 0)
1791			device_printf(dev, "Rx queue error %#x\n", rs->error);
1792		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1793	}
1794
1795	if (event & VMXNET3_EVENT_DIC)
1796		device_printf(dev, "device implementation change event\n");
1797	if (event & VMXNET3_EVENT_DEBUG)
1798		device_printf(dev, "debug event\n");
1799
1800	if (reset != 0) {
1801		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1802		vmxnet3_init_locked(sc);
1803	}
1804
1805	VMXNET3_CORE_UNLOCK(sc);
1806}
1807
1808static void
1809vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1810{
1811	struct vmxnet3_softc *sc;
1812	struct ifnet *ifp;
1813	struct vmxnet3_txring *txr;
1814	struct vmxnet3_comp_ring *txc;
1815	struct vmxnet3_txcompdesc *txcd;
1816	struct vmxnet3_txbuf *txb;
1817	struct mbuf *m;
1818	u_int sop;
1819
1820	sc = txq->vxtxq_sc;
1821	ifp = sc->vmx_ifp;
1822	txr = &txq->vxtxq_cmd_ring;
1823	txc = &txq->vxtxq_comp_ring;
1824
1825	VMXNET3_TXQ_LOCK_ASSERT(txq);
1826
1827	for (;;) {
1828		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1829		if (txcd->gen != txc->vxcr_gen)
1830			break;
1831		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1832
1833		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1834			txc->vxcr_next = 0;
1835			txc->vxcr_gen ^= 1;
1836		}
1837
1838		sop = txr->vxtxr_next;
1839		txb = &txr->vxtxr_txbuf[sop];
1840
1841		if ((m = txb->vtxb_m) != NULL) {
1842			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1843			    BUS_DMASYNC_POSTWRITE);
1844			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1845
1846			txq->vxtxq_stats.vmtxs_opackets++;
1847			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1848			if (m->m_flags & M_MCAST)
1849				txq->vxtxq_stats.vmtxs_omcasts++;
1850
1851			m_freem(m);
1852			txb->vtxb_m = NULL;
1853		}
1854
1855		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1856	}
1857
1858	if (txr->vxtxr_head == txr->vxtxr_next)
1859		txq->vxtxq_watchdog = 0;
1860}
1861
1862static int
1863vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1864{
1865	struct ifnet *ifp;
1866	struct mbuf *m;
1867	struct vmxnet3_rxdesc *rxd;
1868	struct vmxnet3_rxbuf *rxb;
1869	bus_dma_tag_t tag;
1870	bus_dmamap_t dmap;
1871	bus_dma_segment_t segs[1];
1872	int idx, clsize, btype, flags, nsegs, error;
1873
1874	ifp = sc->vmx_ifp;
1875	tag = rxr->vxrxr_rxtag;
1876	dmap = rxr->vxrxr_spare_dmap;
1877	idx = rxr->vxrxr_fill;
1878	rxd = &rxr->vxrxr_rxd[idx];
1879	rxb = &rxr->vxrxr_rxbuf[idx];
1880
1881#ifdef VMXNET3_FAILPOINTS
1882	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1883	if (rxr->vxrxr_rid != 0)
1884		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1885#endif
1886
1887	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1888		flags = M_PKTHDR;
1889		clsize = MCLBYTES;
1890		btype = VMXNET3_BTYPE_HEAD;
1891	} else {
1892#if __FreeBSD_version < 902001
1893		/*
1894		 * These mbufs will never be used for the start of a frame.
1895		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1896		 * required the mbuf to always be a packet header. Avoid
1897		 * unnecessary mbuf initialization in newer versions where
1898		 * that is not the case.
1899		 */
1900		flags = M_PKTHDR;
1901#else
1902		flags = 0;
1903#endif
1904		clsize = MJUMPAGESIZE;
1905		btype = VMXNET3_BTYPE_BODY;
1906	}
1907
1908	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1909	if (m == NULL) {
1910		sc->vmx_stats.vmst_mgetcl_failed++;
1911		return (ENOBUFS);
1912	}
1913
1914	if (btype == VMXNET3_BTYPE_HEAD) {
1915		m->m_len = m->m_pkthdr.len = clsize;
1916		m_adj(m, ETHER_ALIGN);
1917	} else
1918		m->m_len = clsize;
1919
1920	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1921	    BUS_DMA_NOWAIT);
1922	if (error) {
1923		m_freem(m);
1924		sc->vmx_stats.vmst_mbuf_load_failed++;
1925		return (error);
1926	}
1927	KASSERT(nsegs == 1,
1928	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1929#if __FreeBSD_version < 902001
1930	if (btype == VMXNET3_BTYPE_BODY)
1931		m->m_flags &= ~M_PKTHDR;
1932#endif
1933
1934	if (rxb->vrxb_m != NULL) {
1935		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1936		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1937	}
1938
1939	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1940	rxb->vrxb_dmamap = dmap;
1941	rxb->vrxb_m = m;
1942
1943	rxd->addr = segs[0].ds_addr;
1944	rxd->len = segs[0].ds_len;
1945	rxd->btype = btype;
1946	rxd->gen = rxr->vxrxr_gen;
1947
1948	vmxnet3_rxr_increment_fill(rxr);
1949	return (0);
1950}
1951
1952static void
1953vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1954    struct vmxnet3_rxring *rxr, int idx)
1955{
1956	struct vmxnet3_rxdesc *rxd;
1957
1958	rxd = &rxr->vxrxr_rxd[idx];
1959	rxd->gen = rxr->vxrxr_gen;
1960	vmxnet3_rxr_increment_fill(rxr);
1961}
1962
1963static void
1964vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1965{
1966	struct vmxnet3_softc *sc;
1967	struct vmxnet3_rxring *rxr;
1968	struct vmxnet3_comp_ring *rxc;
1969	struct vmxnet3_rxcompdesc *rxcd;
1970	int idx, eof;
1971
1972	sc = rxq->vxrxq_sc;
1973	rxc = &rxq->vxrxq_comp_ring;
1974
1975	do {
1976		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1977		if (rxcd->gen != rxc->vxcr_gen)
1978			break;		/* Not expected. */
1979		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1980
1981		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1982			rxc->vxcr_next = 0;
1983			rxc->vxcr_gen ^= 1;
1984		}
1985
1986		idx = rxcd->rxd_idx;
1987		eof = rxcd->eop;
1988		if (rxcd->qid < sc->vmx_nrxqueues)
1989			rxr = &rxq->vxrxq_cmd_ring[0];
1990		else
1991			rxr = &rxq->vxrxq_cmd_ring[1];
1992		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1993	} while (!eof);
1994}
1995
1996static void
1997vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1998{
1999
2000	if (rxcd->ipv4) {
2001		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2002		if (rxcd->ipcsum_ok)
2003			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2004	}
2005
2006	if (!rxcd->fragment) {
2007		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2008			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2009			    CSUM_PSEUDO_HDR;
2010			m->m_pkthdr.csum_data = 0xFFFF;
2011		}
2012	}
2013}
2014
2015static void
2016vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2017    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2018{
2019	struct vmxnet3_softc *sc;
2020	struct ifnet *ifp;
2021
2022	sc = rxq->vxrxq_sc;
2023	ifp = sc->vmx_ifp;
2024
2025	if (rxcd->error) {
2026		rxq->vxrxq_stats.vmrxs_ierrors++;
2027		m_freem(m);
2028		return;
2029	}
2030
2031#ifdef notyet
2032	switch (rxcd->rss_type) {
2033	case VMXNET3_RCD_RSS_TYPE_IPV4:
2034		m->m_pkthdr.flowid = rxcd->rss_hash;
2035		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2036		break;
2037	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2038		m->m_pkthdr.flowid = rxcd->rss_hash;
2039		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2040		break;
2041	case VMXNET3_RCD_RSS_TYPE_IPV6:
2042		m->m_pkthdr.flowid = rxcd->rss_hash;
2043		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2044		break;
2045	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2046		m->m_pkthdr.flowid = rxcd->rss_hash;
2047		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2048		break;
2049	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2050		m->m_pkthdr.flowid = rxq->vxrxq_id;
2051		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2052		break;
2053	}
2054#else
2055	m->m_pkthdr.flowid = rxq->vxrxq_id;
2056	m->m_flags |= M_FLOWID;
2057#endif
2058
2059	if (!rxcd->no_csum)
2060		vmxnet3_rx_csum(rxcd, m);
2061	if (rxcd->vlan) {
2062		m->m_flags |= M_VLANTAG;
2063		m->m_pkthdr.ether_vtag = rxcd->vtag;
2064	}
2065
2066	rxq->vxrxq_stats.vmrxs_ipackets++;
2067	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2068
2069	VMXNET3_RXQ_UNLOCK(rxq);
2070	(*ifp->if_input)(ifp, m);
2071	VMXNET3_RXQ_LOCK(rxq);
2072}
2073
2074static void
2075vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2076{
2077	struct vmxnet3_softc *sc;
2078	struct ifnet *ifp;
2079	struct vmxnet3_rxring *rxr;
2080	struct vmxnet3_comp_ring *rxc;
2081	struct vmxnet3_rxdesc *rxd;
2082	struct vmxnet3_rxcompdesc *rxcd;
2083	struct mbuf *m, *m_head, *m_tail;
2084	int idx, length;
2085
2086	sc = rxq->vxrxq_sc;
2087	ifp = sc->vmx_ifp;
2088	rxc = &rxq->vxrxq_comp_ring;
2089	m_head = m_tail = NULL;
2090
2091	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2092
2093	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2094		return;
2095
2096	for (;;) {
2097		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2098		if (rxcd->gen != rxc->vxcr_gen)
2099			break;
2100		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2101
2102		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2103			rxc->vxcr_next = 0;
2104			rxc->vxcr_gen ^= 1;
2105		}
2106
2107		idx = rxcd->rxd_idx;
2108		length = rxcd->len;
2109		if (rxcd->qid < sc->vmx_nrxqueues)
2110			rxr = &rxq->vxrxq_cmd_ring[0];
2111		else
2112			rxr = &rxq->vxrxq_cmd_ring[1];
2113		rxd = &rxr->vxrxr_rxd[idx];
2114
2115		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2116		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2117		    __func__, rxcd->qid, idx));
2118
2119		/*
2120		 * The host may skip descriptors. We detect this when this
2121		 * descriptor does not match the previous fill index. Catch
2122		 * up with the host now.
2123		 */
2124		if (__predict_false(rxr->vxrxr_fill != idx)) {
2125			while (rxr->vxrxr_fill != idx) {
2126				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2127				    rxr->vxrxr_gen;
2128				vmxnet3_rxr_increment_fill(rxr);
2129			}
2130		}
2131
2132		if (rxcd->sop) {
2133			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2134			    ("%s: start of frame w/o head buffer", __func__));
2135			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2136			    ("%s: start of frame not in ring 0", __func__));
2137			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2138			    ("%s: start of frame at unexcepted index %d (%d)",
2139			     __func__, idx, sc->vmx_rx_max_chain));
2140			KASSERT(m_head == NULL,
2141			    ("%s: duplicate start of frame?", __func__));
2142
2143			if (length == 0) {
2144				/* Just ignore this descriptor. */
2145				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2146				goto nextp;
2147			}
2148
2149			if (vmxnet3_newbuf(sc, rxr) != 0) {
2150				rxq->vxrxq_stats.vmrxs_iqdrops++;
2151				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2152				if (!rxcd->eop)
2153					vmxnet3_rxq_discard_chain(rxq);
2154				goto nextp;
2155			}
2156
2157			m->m_pkthdr.rcvif = ifp;
2158			m->m_pkthdr.len = m->m_len = length;
2159			m->m_pkthdr.csum_flags = 0;
2160			m_head = m_tail = m;
2161
2162		} else {
2163			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2164			    ("%s: non start of frame w/o body buffer", __func__));
2165			KASSERT(m_head != NULL,
2166			    ("%s: frame not started?", __func__));
2167
2168			if (vmxnet3_newbuf(sc, rxr) != 0) {
2169				rxq->vxrxq_stats.vmrxs_iqdrops++;
2170				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2171				if (!rxcd->eop)
2172					vmxnet3_rxq_discard_chain(rxq);
2173				m_freem(m_head);
2174				m_head = m_tail = NULL;
2175				goto nextp;
2176			}
2177
2178			m->m_len = length;
2179			m_head->m_pkthdr.len += length;
2180			m_tail->m_next = m;
2181			m_tail = m;
2182		}
2183
2184		if (rxcd->eop) {
2185			vmxnet3_rxq_input(rxq, rxcd, m_head);
2186			m_head = m_tail = NULL;
2187
2188			/* Must recheck after dropping the Rx lock. */
2189			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2190				break;
2191		}
2192
2193nextp:
2194		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2195			int qid = rxcd->qid;
2196			bus_size_t r;
2197
2198			idx = (idx + 1) % rxr->vxrxr_ndesc;
2199			if (qid >= sc->vmx_nrxqueues) {
2200				qid -= sc->vmx_nrxqueues;
2201				r = VMXNET3_BAR0_RXH2(qid);
2202			} else
2203				r = VMXNET3_BAR0_RXH1(qid);
2204			vmxnet3_write_bar0(sc, r, idx);
2205		}
2206	}
2207}
2208
2209static void
2210vmxnet3_legacy_intr(void *xsc)
2211{
2212	struct vmxnet3_softc *sc;
2213	struct vmxnet3_rxqueue *rxq;
2214	struct vmxnet3_txqueue *txq;
2215	struct ifnet *ifp;
2216
2217	sc = xsc;
2218	rxq = &sc->vmx_rxq[0];
2219	txq = &sc->vmx_txq[0];
2220	ifp = sc->vmx_ifp;
2221
2222	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2223		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2224			return;
2225	}
2226	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2227		vmxnet3_disable_all_intrs(sc);
2228
2229	if (sc->vmx_ds->event != 0)
2230		vmxnet3_evintr(sc);
2231
2232	VMXNET3_RXQ_LOCK(rxq);
2233	vmxnet3_rxq_eof(rxq);
2234	VMXNET3_RXQ_UNLOCK(rxq);
2235
2236	VMXNET3_TXQ_LOCK(txq);
2237	vmxnet3_txq_eof(txq);
2238	vmxnet3_txq_start(txq);
2239	VMXNET3_TXQ_UNLOCK(txq);
2240
2241	vmxnet3_enable_all_intrs(sc);
2242}
2243
2244static void
2245vmxnet3_txq_intr(void *xtxq)
2246{
2247	struct vmxnet3_softc *sc;
2248	struct vmxnet3_txqueue *txq;
2249	struct ifnet *ifp;
2250
2251	txq = xtxq;
2252	sc = txq->vxtxq_sc;
2253	ifp = sc->vmx_ifp;
2254
2255	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2256		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2257
2258	VMXNET3_TXQ_LOCK(txq);
2259	vmxnet3_txq_eof(txq);
2260	vmxnet3_txq_start(txq);
2261	VMXNET3_TXQ_UNLOCK(txq);
2262
2263	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2264}
2265
2266static void
2267vmxnet3_rxq_intr(void *xrxq)
2268{
2269	struct vmxnet3_softc *sc;
2270	struct vmxnet3_rxqueue *rxq;
2271
2272	rxq = xrxq;
2273	sc = rxq->vxrxq_sc;
2274
2275	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2276		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2277
2278	VMXNET3_RXQ_LOCK(rxq);
2279	vmxnet3_rxq_eof(rxq);
2280	VMXNET3_RXQ_UNLOCK(rxq);
2281
2282	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2283}
2284
2285static void
2286vmxnet3_event_intr(void *xsc)
2287{
2288	struct vmxnet3_softc *sc;
2289
2290	sc = xsc;
2291
2292	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2293		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2294
2295	if (sc->vmx_ds->event != 0)
2296		vmxnet3_evintr(sc);
2297
2298	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2299}
2300
2301static void
2302vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2303{
2304	struct vmxnet3_txring *txr;
2305	struct vmxnet3_txbuf *txb;
2306	int i;
2307
2308	txr = &txq->vxtxq_cmd_ring;
2309
2310	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2311		txb = &txr->vxtxr_txbuf[i];
2312
2313		if (txb->vtxb_m == NULL)
2314			continue;
2315
2316		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2317		    BUS_DMASYNC_POSTWRITE);
2318		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2319		m_freem(txb->vtxb_m);
2320		txb->vtxb_m = NULL;
2321	}
2322}
2323
2324static void
2325vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2326{
2327	struct vmxnet3_rxring *rxr;
2328	struct vmxnet3_rxbuf *rxb;
2329	int i, j;
2330
2331	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2332		rxr = &rxq->vxrxq_cmd_ring[i];
2333
2334		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2335			rxb = &rxr->vxrxr_rxbuf[j];
2336
2337			if (rxb->vrxb_m == NULL)
2338				continue;
2339
2340			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2341			    BUS_DMASYNC_POSTREAD);
2342			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2343			m_freem(rxb->vrxb_m);
2344			rxb->vrxb_m = NULL;
2345		}
2346	}
2347}
2348
2349static void
2350vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2351{
2352	struct vmxnet3_rxqueue *rxq;
2353	struct vmxnet3_txqueue *txq;
2354	int i;
2355
2356	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2357		rxq = &sc->vmx_rxq[i];
2358		VMXNET3_RXQ_LOCK(rxq);
2359		VMXNET3_RXQ_UNLOCK(rxq);
2360	}
2361
2362	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2363		txq = &sc->vmx_txq[i];
2364		VMXNET3_TXQ_LOCK(txq);
2365		VMXNET3_TXQ_UNLOCK(txq);
2366	}
2367}
2368
2369static void
2370vmxnet3_stop(struct vmxnet3_softc *sc)
2371{
2372	struct ifnet *ifp;
2373	int q;
2374
2375	ifp = sc->vmx_ifp;
2376	VMXNET3_CORE_LOCK_ASSERT(sc);
2377
2378	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2379	sc->vmx_link_active = 0;
2380	callout_stop(&sc->vmx_tick);
2381
2382	/* Disable interrupts. */
2383	vmxnet3_disable_all_intrs(sc);
2384	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2385
2386	vmxnet3_stop_rendezvous(sc);
2387
2388	for (q = 0; q < sc->vmx_ntxqueues; q++)
2389		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2390	for (q = 0; q < sc->vmx_nrxqueues; q++)
2391		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2392
2393	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2394}
2395
2396static void
2397vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2398{
2399	struct vmxnet3_txring *txr;
2400	struct vmxnet3_comp_ring *txc;
2401
2402	txr = &txq->vxtxq_cmd_ring;
2403	txr->vxtxr_head = 0;
2404	txr->vxtxr_next = 0;
2405	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2406	bzero(txr->vxtxr_txd,
2407	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2408
2409	txc = &txq->vxtxq_comp_ring;
2410	txc->vxcr_next = 0;
2411	txc->vxcr_gen = VMXNET3_INIT_GEN;
2412	bzero(txc->vxcr_u.txcd,
2413	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2414}
2415
2416static int
2417vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2418{
2419	struct ifnet *ifp;
2420	struct vmxnet3_rxring *rxr;
2421	struct vmxnet3_comp_ring *rxc;
2422	int i, populate, idx, frame_size, error;
2423
2424	ifp = sc->vmx_ifp;
2425	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2426	    ifp->if_mtu;
2427
2428	/*
2429	 * If the MTU causes us to exceed what a regular sized cluster can
2430	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2431	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2432	 *
2433	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2434	 * our life easier. We do not support changing the ring size after
2435	 * the attach.
2436	 */
2437	if (frame_size <= MCLBYTES)
2438		sc->vmx_rx_max_chain = 1;
2439	else
2440		sc->vmx_rx_max_chain = 2;
2441
2442	/*
2443	 * Only populate ring 1 if the configuration will take advantage
2444	 * of it. That is either when LRO is enabled or the frame size
2445	 * exceeds what ring 0 can contain.
2446	 */
2447	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2448	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2449		populate = 1;
2450	else
2451		populate = VMXNET3_RXRINGS_PERQ;
2452
2453	for (i = 0; i < populate; i++) {
2454		rxr = &rxq->vxrxq_cmd_ring[i];
2455		rxr->vxrxr_fill = 0;
2456		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2457		bzero(rxr->vxrxr_rxd,
2458		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2459
2460		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2461			error = vmxnet3_newbuf(sc, rxr);
2462			if (error)
2463				return (error);
2464		}
2465	}
2466
2467	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2468		rxr = &rxq->vxrxq_cmd_ring[i];
2469		rxr->vxrxr_fill = 0;
2470		rxr->vxrxr_gen = 0;
2471		bzero(rxr->vxrxr_rxd,
2472		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2473	}
2474
2475	rxc = &rxq->vxrxq_comp_ring;
2476	rxc->vxcr_next = 0;
2477	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2478	bzero(rxc->vxcr_u.rxcd,
2479	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2480
2481	return (0);
2482}
2483
2484static int
2485vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2486{
2487	device_t dev;
2488	int q, error;
2489
2490	dev = sc->vmx_dev;
2491
2492	for (q = 0; q < sc->vmx_ntxqueues; q++)
2493		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2494
2495	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2496		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2497		if (error) {
2498			device_printf(dev, "cannot populate Rx queue %d\n", q);
2499			return (error);
2500		}
2501	}
2502
2503	return (0);
2504}
2505
2506static int
2507vmxnet3_enable_device(struct vmxnet3_softc *sc)
2508{
2509	int q;
2510
2511	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2512		device_printf(sc->vmx_dev, "device enable command failed!\n");
2513		return (1);
2514	}
2515
2516	/* Reset the Rx queue heads. */
2517	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2518		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2519		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2520	}
2521
2522	return (0);
2523}
2524
2525static void
2526vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2527{
2528	struct ifnet *ifp;
2529
2530	ifp = sc->vmx_ifp;
2531
2532	vmxnet3_set_rxfilter(sc);
2533
2534	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2535		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2536		    sizeof(sc->vmx_ds->vlan_filter));
2537	else
2538		bzero(sc->vmx_ds->vlan_filter,
2539		    sizeof(sc->vmx_ds->vlan_filter));
2540	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2541}
2542
2543static int
2544vmxnet3_reinit(struct vmxnet3_softc *sc)
2545{
2546
2547	vmxnet3_reinit_interface(sc);
2548	vmxnet3_reinit_shared_data(sc);
2549
2550	if (vmxnet3_reinit_queues(sc) != 0)
2551		return (ENXIO);
2552
2553	if (vmxnet3_enable_device(sc) != 0)
2554		return (ENXIO);
2555
2556	vmxnet3_reinit_rxfilters(sc);
2557
2558	return (0);
2559}
2560
2561static void
2562vmxnet3_init_locked(struct vmxnet3_softc *sc)
2563{
2564	struct ifnet *ifp;
2565
2566	ifp = sc->vmx_ifp;
2567
2568	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2569		return;
2570
2571	vmxnet3_stop(sc);
2572
2573	if (vmxnet3_reinit(sc) != 0) {
2574		vmxnet3_stop(sc);
2575		return;
2576	}
2577
2578	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2579	vmxnet3_link_status(sc);
2580
2581	vmxnet3_enable_all_intrs(sc);
2582	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2583}
2584
2585static void
2586vmxnet3_init(void *xsc)
2587{
2588	struct vmxnet3_softc *sc;
2589
2590	sc = xsc;
2591
2592	VMXNET3_CORE_LOCK(sc);
2593	vmxnet3_init_locked(sc);
2594	VMXNET3_CORE_UNLOCK(sc);
2595}
2596
2597/*
2598 * BMV: Much of this can go away once we finally have offsets in
2599 * the mbuf packet header. Bug andre@.
2600 */
2601static int
2602vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2603    int *etype, int *proto, int *start)
2604{
2605	struct ether_vlan_header *evh;
2606	int offset;
2607
2608	evh = mtod(m, struct ether_vlan_header *);
2609	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2610		/* BMV: We should handle nested VLAN tags too. */
2611		*etype = ntohs(evh->evl_proto);
2612		offset = sizeof(struct ether_vlan_header);
2613	} else {
2614		*etype = ntohs(evh->evl_encap_proto);
2615		offset = sizeof(struct ether_header);
2616	}
2617
2618	switch (*etype) {
2619#if defined(INET)
2620	case ETHERTYPE_IP: {
2621		struct ip *ip, iphdr;
2622		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2623			m_copydata(m, offset, sizeof(struct ip),
2624			    (caddr_t) &iphdr);
2625			ip = &iphdr;
2626		} else
2627			ip = mtodo(m, offset);
2628		*proto = ip->ip_p;
2629		*start = offset + (ip->ip_hl << 2);
2630		break;
2631	}
2632#endif
2633#if defined(INET6)
2634	case ETHERTYPE_IPV6:
2635		*proto = -1;
2636		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2637		/* Assert the network stack sent us a valid packet. */
2638		KASSERT(*start > offset,
2639		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2640		    *start, offset, *proto));
2641		break;
2642#endif
2643	default:
2644		return (EINVAL);
2645	}
2646
2647	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2648		struct tcphdr *tcp, tcphdr;
2649
2650		if (__predict_false(*proto != IPPROTO_TCP)) {
2651			/* Likely failed to correctly parse the mbuf. */
2652			return (EINVAL);
2653		}
2654
2655		txq->vxtxq_stats.vmtxs_tso++;
2656
2657		/*
2658		 * For TSO, the size of the protocol header is also
2659		 * included in the descriptor header size.
2660		 */
2661		if (m->m_len < *start + sizeof(struct tcphdr)) {
2662			m_copydata(m, offset, sizeof(struct tcphdr),
2663			    (caddr_t) &tcphdr);
2664			tcp = &tcphdr;
2665		} else
2666			tcp = mtodo(m, *start);
2667		*start += (tcp->th_off << 2);
2668	} else
2669		txq->vxtxq_stats.vmtxs_csum++;
2670
2671	return (0);
2672}
2673
2674static int
2675vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2676    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2677{
2678	struct vmxnet3_txring *txr;
2679	struct mbuf *m;
2680	bus_dma_tag_t tag;
2681	int error;
2682
2683	txr = &txq->vxtxq_cmd_ring;
2684	m = *m0;
2685	tag = txr->vxtxr_txtag;
2686
2687	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2688	if (error == 0 || error != EFBIG)
2689		return (error);
2690
2691	m = m_defrag(m, M_NOWAIT);
2692	if (m != NULL) {
2693		*m0 = m;
2694		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2695	} else
2696		error = ENOBUFS;
2697
2698	if (error) {
2699		m_freem(*m0);
2700		*m0 = NULL;
2701		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2702	} else
2703		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2704
2705	return (error);
2706}
2707
2708static void
2709vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2710{
2711	struct vmxnet3_txring *txr;
2712
2713	txr = &txq->vxtxq_cmd_ring;
2714	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2715}
2716
2717static int
2718vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2719{
2720	struct vmxnet3_softc *sc;
2721	struct ifnet *ifp;
2722	struct vmxnet3_txring *txr;
2723	struct vmxnet3_txdesc *txd, *sop;
2724	struct mbuf *m;
2725	bus_dmamap_t dmap;
2726	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2727	int i, gen, nsegs, etype, proto, start, error;
2728
2729	sc = txq->vxtxq_sc;
2730	ifp = sc->vmx_ifp;
2731	start = 0;
2732	txd = NULL;
2733	txr = &txq->vxtxq_cmd_ring;
2734	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2735
2736	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2737	if (error)
2738		return (error);
2739
2740	m = *m0;
2741	M_ASSERTPKTHDR(m);
2742	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2743	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2744
2745	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2746		txq->vxtxq_stats.vmtxs_full++;
2747		vmxnet3_txq_unload_mbuf(txq, dmap);
2748		return (ENOSPC);
2749	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2750		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2751		if (error) {
2752			txq->vxtxq_stats.vmtxs_offload_failed++;
2753			vmxnet3_txq_unload_mbuf(txq, dmap);
2754			m_freem(m);
2755			*m0 = NULL;
2756			return (error);
2757		}
2758	}
2759
2760	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m = *m0;
2761	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2762	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2763
2764	for (i = 0; i < nsegs; i++) {
2765		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2766
2767		txd->addr = segs[i].ds_addr;
2768		txd->len = segs[i].ds_len;
2769		txd->gen = gen;
2770		txd->dtype = 0;
2771		txd->offload_mode = VMXNET3_OM_NONE;
2772		txd->offload_pos = 0;
2773		txd->hlen = 0;
2774		txd->eop = 0;
2775		txd->compreq = 0;
2776		txd->vtag_mode = 0;
2777		txd->vtag = 0;
2778
2779		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2780			txr->vxtxr_head = 0;
2781			txr->vxtxr_gen ^= 1;
2782		}
2783		gen = txr->vxtxr_gen;
2784	}
2785	txd->eop = 1;
2786	txd->compreq = 1;
2787
2788	if (m->m_flags & M_VLANTAG) {
2789		sop->vtag_mode = 1;
2790		sop->vtag = m->m_pkthdr.ether_vtag;
2791	}
2792
2793	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2794		sop->offload_mode = VMXNET3_OM_TSO;
2795		sop->hlen = start;
2796		sop->offload_pos = m->m_pkthdr.tso_segsz;
2797	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2798	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2799		sop->offload_mode = VMXNET3_OM_CSUM;
2800		sop->hlen = start;
2801		sop->offload_pos = start + m->m_pkthdr.csum_data;
2802	}
2803
2804	/* Finally, change the ownership. */
2805	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2806	sop->gen ^= 1;
2807
2808	if (++txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2809		txq->vxtxq_ts->npending = 0;
2810		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2811		    txr->vxtxr_head);
2812	}
2813
2814	return (0);
2815}
2816
2817static void
2818vmxnet3_txq_update_pending(struct vmxnet3_txqueue *txq)
2819{
2820	struct vmxnet3_txring *txr;
2821
2822	txr = &txq->vxtxq_cmd_ring;
2823
2824	if (txq->vxtxq_ts->npending > 0) {
2825		txq->vxtxq_ts->npending = 0;
2826		vmxnet3_write_bar0(txq->vxtxq_sc,
2827		    VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head);
2828	}
2829}
2830
2831#ifdef VMXNET3_LEGACY_TX
2832
2833static void
2834vmxnet3_start_locked(struct ifnet *ifp)
2835{
2836	struct vmxnet3_softc *sc;
2837	struct vmxnet3_txqueue *txq;
2838	struct vmxnet3_txring *txr;
2839	struct mbuf *m_head;
2840	int tx, avail;
2841
2842	sc = ifp->if_softc;
2843	txq = &sc->vmx_txq[0];
2844	txr = &txq->vxtxq_cmd_ring;
2845	tx = 0;
2846
2847	VMXNET3_TXQ_LOCK_ASSERT(txq);
2848
2849	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2850	    sc->vmx_link_active == 0)
2851		return;
2852
2853	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2854		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2855			break;
2856
2857		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2858		if (m_head == NULL)
2859			break;
2860
2861		/* Assume worse case if this mbuf is the head of a chain. */
2862		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2863			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2864			break;
2865		}
2866
2867		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2868			if (m_head != NULL)
2869				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2870			break;
2871		}
2872
2873		tx++;
2874		ETHER_BPF_MTAP(ifp, m_head);
2875	}
2876
2877	if (tx > 0) {
2878		vmxnet3_txq_update_pending(txq);
2879		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2880	}
2881}
2882
2883static void
2884vmxnet3_start(struct ifnet *ifp)
2885{
2886	struct vmxnet3_softc *sc;
2887	struct vmxnet3_txqueue *txq;
2888
2889	sc = ifp->if_softc;
2890	txq = &sc->vmx_txq[0];
2891
2892	VMXNET3_TXQ_LOCK(txq);
2893	vmxnet3_start_locked(ifp);
2894	VMXNET3_TXQ_UNLOCK(txq);
2895}
2896
2897#else /* !VMXNET3_LEGACY_TX */
2898
2899static int
2900vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2901{
2902	struct vmxnet3_softc *sc;
2903	struct vmxnet3_txring *txr;
2904	struct buf_ring *br;
2905	struct ifnet *ifp;
2906	int tx, avail, error;
2907
2908	sc = txq->vxtxq_sc;
2909	br = txq->vxtxq_br;
2910	ifp = sc->vmx_ifp;
2911	txr = &txq->vxtxq_cmd_ring;
2912	tx = 0;
2913	error = 0;
2914
2915	VMXNET3_TXQ_LOCK_ASSERT(txq);
2916
2917	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2918	    sc->vmx_link_active == 0) {
2919		if (m != NULL)
2920			error = drbr_enqueue(ifp, br, m);
2921		return (error);
2922	}
2923
2924	if (m != NULL) {
2925		error = drbr_enqueue(ifp, br, m);
2926		if (error)
2927			return (error);
2928	}
2929
2930	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2931		m = drbr_peek(ifp, br);
2932		if (m == NULL)
2933			break;
2934
2935		/* Assume worse case if this mbuf is the head of a chain. */
2936		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2937			drbr_putback(ifp, br, m);
2938			error = ENOBUFS;
2939			break;
2940		}
2941
2942		error = vmxnet3_txq_encap(txq, &m);
2943		if (error) {
2944			if (m != NULL)
2945				drbr_putback(ifp, br, m);
2946			else
2947				drbr_advance(ifp, br);
2948			break;
2949		}
2950		drbr_advance(ifp, br);
2951
2952		tx++;
2953		ETHER_BPF_MTAP(ifp, m);
2954	}
2955
2956	if (tx > 0) {
2957		vmxnet3_txq_update_pending(txq);
2958		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2959	}
2960
2961	return (error);
2962}
2963
2964static int
2965vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2966{
2967	struct vmxnet3_softc *sc;
2968	struct vmxnet3_txqueue *txq;
2969	int i, ntxq, error;
2970
2971	sc = ifp->if_softc;
2972	ntxq = sc->vmx_ntxqueues;
2973
2974	if (m->m_flags & M_FLOWID)
2975		i = m->m_pkthdr.flowid % ntxq;
2976	else
2977		i = curcpu % ntxq;
2978
2979	txq = &sc->vmx_txq[i];
2980
2981	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
2982		error = vmxnet3_txq_mq_start_locked(txq, m);
2983		VMXNET3_TXQ_UNLOCK(txq);
2984	} else {
2985		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
2986		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
2987	}
2988
2989	return (error);
2990}
2991
2992static void
2993vmxnet3_txq_tq_deferred(void *xtxq, int pending)
2994{
2995	struct vmxnet3_softc *sc;
2996	struct vmxnet3_txqueue *txq;
2997
2998	txq = xtxq;
2999	sc = txq->vxtxq_sc;
3000
3001	VMXNET3_TXQ_LOCK(txq);
3002	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3003		vmxnet3_txq_mq_start_locked(txq, NULL);
3004	VMXNET3_TXQ_UNLOCK(txq);
3005}
3006
3007#endif /* VMXNET3_LEGACY_TX */
3008
3009static void
3010vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3011{
3012	struct vmxnet3_softc *sc;
3013	struct ifnet *ifp;
3014
3015	sc = txq->vxtxq_sc;
3016	ifp = sc->vmx_ifp;
3017
3018#ifdef VMXNET3_LEGACY_TX
3019	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3020		vmxnet3_start_locked(ifp);
3021#else
3022	if (!drbr_empty(ifp, txq->vxtxq_br))
3023		vmxnet3_txq_mq_start_locked(txq, NULL);
3024#endif
3025}
3026
3027static void
3028vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3029{
3030	struct vmxnet3_txqueue *txq;
3031	int i;
3032
3033	VMXNET3_CORE_LOCK_ASSERT(sc);
3034
3035	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3036		txq = &sc->vmx_txq[i];
3037
3038		VMXNET3_TXQ_LOCK(txq);
3039		vmxnet3_txq_start(txq);
3040		VMXNET3_TXQ_UNLOCK(txq);
3041	}
3042}
3043
3044static void
3045vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3046{
3047	struct ifnet *ifp;
3048	int idx, bit;
3049
3050	ifp = sc->vmx_ifp;
3051	idx = (tag >> 5) & 0x7F;
3052	bit = tag & 0x1F;
3053
3054	if (tag == 0 || tag > 4095)
3055		return;
3056
3057	VMXNET3_CORE_LOCK(sc);
3058
3059	/* Update our private VLAN bitvector. */
3060	if (add)
3061		sc->vmx_vlan_filter[idx] |= (1 << bit);
3062	else
3063		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3064
3065	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3066		if (add)
3067			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3068		else
3069			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3070		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3071	}
3072
3073	VMXNET3_CORE_UNLOCK(sc);
3074}
3075
3076static void
3077vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3078{
3079
3080	if (ifp->if_softc == arg)
3081		vmxnet3_update_vlan_filter(arg, 1, tag);
3082}
3083
3084static void
3085vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3086{
3087
3088	if (ifp->if_softc == arg)
3089		vmxnet3_update_vlan_filter(arg, 0, tag);
3090}
3091
3092static void
3093vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3094{
3095	struct ifnet *ifp;
3096	struct vmxnet3_driver_shared *ds;
3097	struct ifmultiaddr *ifma;
3098	u_int mode;
3099
3100	ifp = sc->vmx_ifp;
3101	ds = sc->vmx_ds;
3102
3103	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3104	if (ifp->if_flags & IFF_PROMISC)
3105		mode |= VMXNET3_RXMODE_PROMISC;
3106	if (ifp->if_flags & IFF_ALLMULTI)
3107		mode |= VMXNET3_RXMODE_ALLMULTI;
3108	else {
3109		int cnt = 0, overflow = 0;
3110
3111		if_maddr_rlock(ifp);
3112		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3113			if (ifma->ifma_addr->sa_family != AF_LINK)
3114				continue;
3115			else if (cnt == VMXNET3_MULTICAST_MAX) {
3116				overflow = 1;
3117				break;
3118			}
3119
3120			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3121			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3122			cnt++;
3123		}
3124		if_maddr_runlock(ifp);
3125
3126		if (overflow != 0) {
3127			cnt = 0;
3128			mode |= VMXNET3_RXMODE_ALLMULTI;
3129		} else if (cnt > 0)
3130			mode |= VMXNET3_RXMODE_MCAST;
3131		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3132	}
3133
3134	ds->rxmode = mode;
3135
3136	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3137	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3138}
3139
3140static int
3141vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3142{
3143	struct ifnet *ifp;
3144
3145	ifp = sc->vmx_ifp;
3146
3147	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3148		return (EINVAL);
3149
3150	ifp->if_mtu = mtu;
3151
3152	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3153		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3154		vmxnet3_init_locked(sc);
3155	}
3156
3157	return (0);
3158}
3159
3160static int
3161vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3162{
3163	struct vmxnet3_softc *sc;
3164	struct ifreq *ifr;
3165	int reinit, mask, error;
3166
3167	sc = ifp->if_softc;
3168	ifr = (struct ifreq *) data;
3169	error = 0;
3170
3171	switch (cmd) {
3172	case SIOCSIFMTU:
3173		if (ifp->if_mtu != ifr->ifr_mtu) {
3174			VMXNET3_CORE_LOCK(sc);
3175			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3176			VMXNET3_CORE_UNLOCK(sc);
3177		}
3178		break;
3179
3180	case SIOCSIFFLAGS:
3181		VMXNET3_CORE_LOCK(sc);
3182		if (ifp->if_flags & IFF_UP) {
3183			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3184				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3185				    (IFF_PROMISC | IFF_ALLMULTI)) {
3186					vmxnet3_set_rxfilter(sc);
3187				}
3188			} else
3189				vmxnet3_init_locked(sc);
3190		} else {
3191			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3192				vmxnet3_stop(sc);
3193		}
3194		sc->vmx_if_flags = ifp->if_flags;
3195		VMXNET3_CORE_UNLOCK(sc);
3196		break;
3197
3198	case SIOCADDMULTI:
3199	case SIOCDELMULTI:
3200		VMXNET3_CORE_LOCK(sc);
3201		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3202			vmxnet3_set_rxfilter(sc);
3203		VMXNET3_CORE_UNLOCK(sc);
3204		break;
3205
3206	case SIOCSIFMEDIA:
3207	case SIOCGIFMEDIA:
3208		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3209		break;
3210
3211	case SIOCSIFCAP:
3212		VMXNET3_CORE_LOCK(sc);
3213		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3214
3215		if (mask & IFCAP_TXCSUM)
3216			ifp->if_capenable ^= IFCAP_TXCSUM;
3217		if (mask & IFCAP_TXCSUM_IPV6)
3218			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3219		if (mask & IFCAP_TSO4)
3220			ifp->if_capenable ^= IFCAP_TSO4;
3221		if (mask & IFCAP_TSO6)
3222			ifp->if_capenable ^= IFCAP_TSO6;
3223
3224		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3225		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3226			/* Changing these features requires us to reinit. */
3227			reinit = 1;
3228
3229			if (mask & IFCAP_RXCSUM)
3230				ifp->if_capenable ^= IFCAP_RXCSUM;
3231			if (mask & IFCAP_RXCSUM_IPV6)
3232				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3233			if (mask & IFCAP_LRO)
3234				ifp->if_capenable ^= IFCAP_LRO;
3235			if (mask & IFCAP_VLAN_HWTAGGING)
3236				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3237			if (mask & IFCAP_VLAN_HWFILTER)
3238				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3239		} else
3240			reinit = 0;
3241
3242		if (mask & IFCAP_VLAN_HWTSO)
3243			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3244
3245		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3246			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3247			vmxnet3_init_locked(sc);
3248		}
3249
3250		VMXNET3_CORE_UNLOCK(sc);
3251		VLAN_CAPABILITIES(ifp);
3252		break;
3253
3254	default:
3255		error = ether_ioctl(ifp, cmd, data);
3256		break;
3257	}
3258
3259	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3260
3261	return (error);
3262}
3263
3264#ifndef VMXNET3_LEGACY_TX
3265static void
3266vmxnet3_qflush(struct ifnet *ifp)
3267{
3268	struct vmxnet3_softc *sc;
3269	struct vmxnet3_txqueue *txq;
3270	struct mbuf *m;
3271	int i;
3272
3273	sc = ifp->if_softc;
3274
3275	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3276		txq = &sc->vmx_txq[i];
3277
3278		VMXNET3_TXQ_LOCK(txq);
3279		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3280			m_freem(m);
3281		VMXNET3_TXQ_UNLOCK(txq);
3282	}
3283
3284	if_qflush(ifp);
3285}
3286#endif
3287
3288static int
3289vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3290{
3291	struct vmxnet3_softc *sc;
3292
3293	sc = txq->vxtxq_sc;
3294
3295	VMXNET3_TXQ_LOCK(txq);
3296	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3297		VMXNET3_TXQ_UNLOCK(txq);
3298		return (0);
3299	}
3300	VMXNET3_TXQ_UNLOCK(txq);
3301
3302	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3303	    txq->vxtxq_id);
3304	return (1);
3305}
3306
3307static void
3308vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3309{
3310
3311	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3312}
3313
3314static void
3315vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3316    struct vmxnet3_txq_stats *accum)
3317{
3318	struct vmxnet3_txq_stats *st;
3319
3320	st = &txq->vxtxq_stats;
3321
3322	accum->vmtxs_opackets += st->vmtxs_opackets;
3323	accum->vmtxs_obytes += st->vmtxs_obytes;
3324	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3325	accum->vmtxs_csum += st->vmtxs_csum;
3326	accum->vmtxs_tso += st->vmtxs_tso;
3327	accum->vmtxs_full += st->vmtxs_full;
3328	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3329}
3330
3331static void
3332vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3333    struct vmxnet3_rxq_stats *accum)
3334{
3335	struct vmxnet3_rxq_stats *st;
3336
3337	st = &rxq->vxrxq_stats;
3338
3339	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3340	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3341	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3342	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3343}
3344
3345static void
3346vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3347{
3348	struct ifnet *ifp;
3349	struct vmxnet3_statistics *st;
3350	struct vmxnet3_txq_stats txaccum;
3351	struct vmxnet3_rxq_stats rxaccum;
3352	int i;
3353
3354	ifp = sc->vmx_ifp;
3355	st = &sc->vmx_stats;
3356
3357	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3358	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3359
3360	for (i = 0; i < sc->vmx_ntxqueues; i++)
3361		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3362	for (i = 0; i < sc->vmx_nrxqueues; i++)
3363		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3364
3365	/*
3366	 * With the exception of if_ierrors, these ifnet statistics are
3367	 * only updated in the driver, so just set them to our accumulated
3368	 * values. if_ierrors is updated in ether_input() for malformed
3369	 * frames that we should have already discarded.
3370	 */
3371	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3372	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3373	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3374	ifp->if_opackets = txaccum.vmtxs_opackets;
3375#ifndef VMXNET3_LEGACY_TX
3376	ifp->if_obytes = txaccum.vmtxs_obytes;
3377	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3378#endif
3379}
3380
3381static void
3382vmxnet3_tick(void *xsc)
3383{
3384	struct vmxnet3_softc *sc;
3385	struct ifnet *ifp;
3386	int i, timedout;
3387
3388	sc = xsc;
3389	ifp = sc->vmx_ifp;
3390	timedout = 0;
3391
3392	VMXNET3_CORE_LOCK_ASSERT(sc);
3393
3394	vmxnet3_accumulate_stats(sc);
3395	vmxnet3_refresh_host_stats(sc);
3396
3397	for (i = 0; i < sc->vmx_ntxqueues; i++)
3398		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3399
3400	if (timedout != 0) {
3401		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3402		vmxnet3_init_locked(sc);
3403	} else
3404		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3405}
3406
3407static int
3408vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3409{
3410	uint32_t status;
3411
3412	/* Also update the link speed while here. */
3413	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3414	sc->vmx_link_speed = status >> 16;
3415	return !!(status & 0x1);
3416}
3417
3418static void
3419vmxnet3_link_status(struct vmxnet3_softc *sc)
3420{
3421	struct ifnet *ifp;
3422	int link;
3423
3424	ifp = sc->vmx_ifp;
3425	link = vmxnet3_link_is_up(sc);
3426
3427	if (link != 0 && sc->vmx_link_active == 0) {
3428		sc->vmx_link_active = 1;
3429		if_link_state_change(ifp, LINK_STATE_UP);
3430	} else if (link == 0 && sc->vmx_link_active != 0) {
3431		sc->vmx_link_active = 0;
3432		if_link_state_change(ifp, LINK_STATE_DOWN);
3433	}
3434}
3435
3436static void
3437vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3438{
3439	struct vmxnet3_softc *sc;
3440
3441	sc = ifp->if_softc;
3442
3443	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3444	ifmr->ifm_status = IFM_AVALID;
3445
3446	VMXNET3_CORE_LOCK(sc);
3447	if (vmxnet3_link_is_up(sc) != 0)
3448		ifmr->ifm_status |= IFM_ACTIVE;
3449	else
3450		ifmr->ifm_status |= IFM_NONE;
3451	VMXNET3_CORE_UNLOCK(sc);
3452}
3453
3454static int
3455vmxnet3_media_change(struct ifnet *ifp)
3456{
3457
3458	/* Ignore. */
3459	return (0);
3460}
3461
3462static void
3463vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3464{
3465	uint32_t ml, mh;
3466
3467	ml  = sc->vmx_lladdr[0];
3468	ml |= sc->vmx_lladdr[1] << 8;
3469	ml |= sc->vmx_lladdr[2] << 16;
3470	ml |= sc->vmx_lladdr[3] << 24;
3471	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3472
3473	mh  = sc->vmx_lladdr[4];
3474	mh |= sc->vmx_lladdr[5] << 8;
3475	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3476}
3477
3478static void
3479vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3480{
3481	uint32_t ml, mh;
3482
3483	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3484	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3485
3486	sc->vmx_lladdr[0] = ml;
3487	sc->vmx_lladdr[1] = ml >> 8;
3488	sc->vmx_lladdr[2] = ml >> 16;
3489	sc->vmx_lladdr[3] = ml >> 24;
3490	sc->vmx_lladdr[4] = mh;
3491	sc->vmx_lladdr[5] = mh >> 8;
3492}
3493
3494static void
3495vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3496    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3497{
3498	struct sysctl_oid *node, *txsnode;
3499	struct sysctl_oid_list *list, *txslist;
3500	struct vmxnet3_txq_stats *stats;
3501	struct UPT1_TxStats *txstats;
3502	char namebuf[16];
3503
3504	stats = &txq->vxtxq_stats;
3505	txstats = &txq->vxtxq_ts->stats;
3506
3507	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3508	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3509	    NULL, "Transmit Queue");
3510	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3511
3512	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3513	    &stats->vmtxs_opackets, "Transmit packets");
3514	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3515	    &stats->vmtxs_obytes, "Transmit bytes");
3516	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3517	    &stats->vmtxs_omcasts, "Transmit multicasts");
3518	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3519	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3520	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3521	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3522	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3523	    &stats->vmtxs_full, "Transmit ring full");
3524	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3525	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3526
3527	/*
3528	 * Add statistics reported by the host. These are updated once
3529	 * per second.
3530	 */
3531	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3532	    NULL, "Host Statistics");
3533	txslist = SYSCTL_CHILDREN(txsnode);
3534	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3535	    &txstats->TSO_packets, "TSO packets");
3536	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3537	    &txstats->TSO_bytes, "TSO bytes");
3538	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3539	    &txstats->ucast_packets, "Unicast packets");
3540	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3541	    &txstats->ucast_bytes, "Unicast bytes");
3542	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3543	    &txstats->mcast_packets, "Multicast packets");
3544	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3545	    &txstats->mcast_bytes, "Multicast bytes");
3546	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3547	    &txstats->error, "Errors");
3548	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3549	    &txstats->discard, "Discards");
3550}
3551
3552static void
3553vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3554    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3555{
3556	struct sysctl_oid *node, *rxsnode;
3557	struct sysctl_oid_list *list, *rxslist;
3558	struct vmxnet3_rxq_stats *stats;
3559	struct UPT1_RxStats *rxstats;
3560	char namebuf[16];
3561
3562	stats = &rxq->vxrxq_stats;
3563	rxstats = &rxq->vxrxq_rs->stats;
3564
3565	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3566	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3567	    NULL, "Receive Queue");
3568	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3569
3570	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3571	    &stats->vmrxs_ipackets, "Receive packets");
3572	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3573	    &stats->vmrxs_ibytes, "Receive bytes");
3574	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3575	    &stats->vmrxs_iqdrops, "Receive drops");
3576	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3577	    &stats->vmrxs_ierrors, "Receive errors");
3578
3579	/*
3580	 * Add statistics reported by the host. These are updated once
3581	 * per second.
3582	 */
3583	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3584	    NULL, "Host Statistics");
3585	rxslist = SYSCTL_CHILDREN(rxsnode);
3586	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3587	    &rxstats->LRO_packets, "LRO packets");
3588	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3589	    &rxstats->LRO_bytes, "LRO bytes");
3590	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3591	    &rxstats->ucast_packets, "Unicast packets");
3592	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3593	    &rxstats->ucast_bytes, "Unicast bytes");
3594	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3595	    &rxstats->mcast_packets, "Multicast packets");
3596	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3597	    &rxstats->mcast_bytes, "Multicast bytes");
3598	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3599	    &rxstats->bcast_packets, "Broadcast packets");
3600	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3601	    &rxstats->bcast_bytes, "Broadcast bytes");
3602	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3603	    &rxstats->nobuffer, "No buffer");
3604	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3605	    &rxstats->error, "Errors");
3606}
3607
3608static void
3609vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3610    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3611{
3612	struct sysctl_oid *node;
3613	struct sysctl_oid_list *list;
3614	int i;
3615
3616	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3617		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3618
3619		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3620		    "debug", CTLFLAG_RD, NULL, "");
3621		list = SYSCTL_CHILDREN(node);
3622
3623		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3624		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3625		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3626		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3627		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3628		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3629		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3630		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3631		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3632		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3633		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3634		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3635		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3636		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3637	}
3638
3639	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3640		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3641
3642		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3643		    "debug", CTLFLAG_RD, NULL, "");
3644		list = SYSCTL_CHILDREN(node);
3645
3646		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3647		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3648		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3649		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3650		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3651		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3652		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3653		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3654		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3655		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3656		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3657		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3658		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3659		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3660		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3661		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3662		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3663		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3664	}
3665}
3666
3667static void
3668vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3669    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3670{
3671	int i;
3672
3673	for (i = 0; i < sc->vmx_ntxqueues; i++)
3674		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3675	for (i = 0; i < sc->vmx_nrxqueues; i++)
3676		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3677
3678	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3679}
3680
3681static void
3682vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3683{
3684	device_t dev;
3685	struct vmxnet3_statistics *stats;
3686	struct sysctl_ctx_list *ctx;
3687	struct sysctl_oid *tree;
3688	struct sysctl_oid_list *child;
3689
3690	dev = sc->vmx_dev;
3691	ctx = device_get_sysctl_ctx(dev);
3692	tree = device_get_sysctl_tree(dev);
3693	child = SYSCTL_CHILDREN(tree);
3694
3695	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3696	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3697	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3698	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3699	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3700	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3701	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3702	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3703
3704	stats = &sc->vmx_stats;
3705	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3706	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3707	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3708	    &stats->vmst_defrag_failed, 0,
3709	    "Tx mbuf dropped because defrag failed");
3710	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3711	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3712	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3713	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3714
3715	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3716}
3717
3718static void
3719vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3720{
3721
3722	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3723}
3724
3725static uint32_t
3726vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3727{
3728
3729	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3730}
3731
3732static void
3733vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3734{
3735
3736	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3737}
3738
3739static void
3740vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3741{
3742
3743	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3744}
3745
3746static uint32_t
3747vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3748{
3749
3750	vmxnet3_write_cmd(sc, cmd);
3751	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3752	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3753	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3754}
3755
3756static void
3757vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3758{
3759
3760	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3761}
3762
3763static void
3764vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3765{
3766
3767	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3768}
3769
3770static void
3771vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3772{
3773	int i;
3774
3775	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3776	for (i = 0; i < sc->vmx_nintrs; i++)
3777		vmxnet3_enable_intr(sc, i);
3778}
3779
3780static void
3781vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3782{
3783	int i;
3784
3785	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3786	for (i = 0; i < sc->vmx_nintrs; i++)
3787		vmxnet3_disable_intr(sc, i);
3788}
3789
3790static void
3791vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3792{
3793	bus_addr_t *baddr = arg;
3794
3795	if (error == 0)
3796		*baddr = segs->ds_addr;
3797}
3798
3799static int
3800vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3801    struct vmxnet3_dma_alloc *dma)
3802{
3803	device_t dev;
3804	int error;
3805
3806	dev = sc->vmx_dev;
3807	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3808
3809	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3810	    align, 0,		/* alignment, bounds */
3811	    BUS_SPACE_MAXADDR,	/* lowaddr */
3812	    BUS_SPACE_MAXADDR,	/* highaddr */
3813	    NULL, NULL,		/* filter, filterarg */
3814	    size,		/* maxsize */
3815	    1,			/* nsegments */
3816	    size,		/* maxsegsize */
3817	    BUS_DMA_ALLOCNOW,	/* flags */
3818	    NULL,		/* lockfunc */
3819	    NULL,		/* lockfuncarg */
3820	    &dma->dma_tag);
3821	if (error) {
3822		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3823		goto fail;
3824	}
3825
3826	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3827	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3828	if (error) {
3829		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3830		goto fail;
3831	}
3832
3833	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3834	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3835	if (error) {
3836		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3837		goto fail;
3838	}
3839
3840	dma->dma_size = size;
3841
3842fail:
3843	if (error)
3844		vmxnet3_dma_free(sc, dma);
3845
3846	return (error);
3847}
3848
3849static void
3850vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3851{
3852
3853	if (dma->dma_tag != NULL) {
3854		if (dma->dma_map != NULL) {
3855			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3856			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3857			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3858		}
3859
3860		if (dma->dma_vaddr != NULL) {
3861			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3862			    dma->dma_map);
3863		}
3864
3865		bus_dma_tag_destroy(dma->dma_tag);
3866	}
3867	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3868}
3869
3870static int
3871vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3872{
3873	char path[64];
3874
3875	snprintf(path, sizeof(path),
3876	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3877	TUNABLE_INT_FETCH(path, &def);
3878
3879	return (def);
3880}
3881
3882/*
3883 * Since this is a purely paravirtualized device, we do not have
3884 * to worry about DMA coherency. But at times, we must make sure
3885 * both the compiler and CPU do not reorder memory operations.
3886 */
3887static inline void
3888vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3889{
3890
3891	switch (type) {
3892	case VMXNET3_BARRIER_RD:
3893		rmb();
3894		break;
3895	case VMXNET3_BARRIER_WR:
3896		wmb();
3897		break;
3898	case VMXNET3_BARRIER_RDWR:
3899		mb();
3900		break;
3901	default:
3902		panic("%s: bad barrier type %d", __func__, type);
3903	}
3904}
3905