if_vmx.c revision 267279
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: stable/10/sys/dev/vmware/vmxnet3/if_vmx.c 267279 2014-06-09 15:09:05Z luigi $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/endian.h>
29#include <sys/sockio.h>
30#include <sys/mbuf.h>
31#include <sys/malloc.h>
32#include <sys/module.h>
33#include <sys/socket.h>
34#include <sys/sysctl.h>
35#include <sys/smp.h>
36#include <sys/taskqueue.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <net/if_dl.h>
44#include <net/if_types.h>
45#include <net/if_media.h>
46#include <net/if_vlan_var.h>
47
48#include <net/bpf.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/udp.h>
56#include <netinet/tcp.h>
57
58#include <machine/bus.h>
59#include <machine/resource.h>
60#include <sys/bus.h>
61#include <sys/rman.h>
62
63#include <dev/pci/pcireg.h>
64#include <dev/pci/pcivar.h>
65
66#include "if_vmxreg.h"
67#include "if_vmxvar.h"
68
69#include "opt_inet.h"
70#include "opt_inet6.h"
71
72#ifdef VMXNET3_FAILPOINTS
73#include <sys/fail.h>
74static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
75    "vmxnet3 fail points");
76#define VMXNET3_FP	_debug_fail_point_vmxnet3
77#endif
78
79static int	vmxnet3_probe(device_t);
80static int	vmxnet3_attach(device_t);
81static int	vmxnet3_detach(device_t);
82static int	vmxnet3_shutdown(device_t);
83
84static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
85static void	vmxnet3_free_resources(struct vmxnet3_softc *);
86static int	vmxnet3_check_version(struct vmxnet3_softc *);
87static void	vmxnet3_initial_config(struct vmxnet3_softc *);
88static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
89
90static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
91static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
92static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
94		    struct vmxnet3_interrupt *);
95static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
96static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
97static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
98static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
100
101static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
102		    struct vmxnet3_interrupt *);
103static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
104
105#ifndef VMXNET3_LEGACY_TX
106static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
107static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
108static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
110#endif
111
112static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
113static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
114static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
115static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
116static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
117static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
118
119static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
120static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
121static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
128static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
129static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
130static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
132static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
133static void	vmxnet3_free_data(struct vmxnet3_softc *);
134static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
135
136static void	vmxnet3_evintr(struct vmxnet3_softc *);
137static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
138static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
139static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
140static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
141		    struct vmxnet3_rxring *, int);
142static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
143static void	vmxnet3_legacy_intr(void *);
144static void	vmxnet3_txq_intr(void *);
145static void	vmxnet3_rxq_intr(void *);
146static void	vmxnet3_event_intr(void *);
147
148static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
149static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
150static void	vmxnet3_stop(struct vmxnet3_softc *);
151
152static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
153static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
154static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
155static int	vmxnet3_enable_device(struct vmxnet3_softc *);
156static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
157static int	vmxnet3_reinit(struct vmxnet3_softc *);
158static void	vmxnet3_init_locked(struct vmxnet3_softc *);
159static void	vmxnet3_init(void *);
160
161static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
162		    int *, int *, int *);
163static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
164		    bus_dmamap_t, bus_dma_segment_t [], int *);
165static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
166static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
167static void	vmxnet3_txq_update_pending(struct vmxnet3_txqueue *);
168#ifdef VMXNET3_LEGACY_TX
169static void	vmxnet3_start_locked(struct ifnet *);
170static void	vmxnet3_start(struct ifnet *);
171#else
172static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
173		    struct mbuf *);
174static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
175static void	vmxnet3_txq_tq_deferred(void *, int);
176#endif
177static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
178static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
179
180static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
181		    uint16_t);
182static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
183static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
184static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
185static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
186static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
187
188#ifndef VMXNET3_LEGACY_TX
189static void	vmxnet3_qflush(struct ifnet *);
190#endif
191
192static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
193static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
194static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
195		    struct vmxnet3_txq_stats *);
196static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
197		    struct vmxnet3_rxq_stats *);
198static void	vmxnet3_tick(void *);
199static void	vmxnet3_link_status(struct vmxnet3_softc *);
200static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
201static int	vmxnet3_media_change(struct ifnet *);
202static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
203static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
204
205static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
206		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
207static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
208		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
209static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
210		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
211static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
212
213static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
214		    uint32_t);
215static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
216static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
217		    uint32_t);
218static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
219static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
220
221static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
222static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
224static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
225
226static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
227		    bus_size_t, struct vmxnet3_dma_alloc *);
228static void	vmxnet3_dma_free(struct vmxnet3_softc *,
229		    struct vmxnet3_dma_alloc *);
230static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
231		    const char *, int);
232
233typedef enum {
234	VMXNET3_BARRIER_RD,
235	VMXNET3_BARRIER_WR,
236	VMXNET3_BARRIER_RDWR,
237} vmxnet3_barrier_t;
238
239static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
240
241/* Tunables. */
242static int vmxnet3_mq_disable = 0;
243TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
244static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
245TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
246static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
247TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
248static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
249TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
250static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
251TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
252
253static device_method_t vmxnet3_methods[] = {
254	/* Device interface. */
255	DEVMETHOD(device_probe,		vmxnet3_probe),
256	DEVMETHOD(device_attach,	vmxnet3_attach),
257	DEVMETHOD(device_detach,	vmxnet3_detach),
258	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
259
260	DEVMETHOD_END
261};
262
263static driver_t vmxnet3_driver = {
264	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
265};
266
267static devclass_t vmxnet3_devclass;
268DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
269
270MODULE_DEPEND(vmx, pci, 1, 1, 1);
271MODULE_DEPEND(vmx, ether, 1, 1, 1);
272
273#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
274#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
275
276static int
277vmxnet3_probe(device_t dev)
278{
279
280	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
281	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
282		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
283		return (BUS_PROBE_DEFAULT);
284	}
285
286	return (ENXIO);
287}
288
289static int
290vmxnet3_attach(device_t dev)
291{
292	struct vmxnet3_softc *sc;
293	int error;
294
295	sc = device_get_softc(dev);
296	sc->vmx_dev = dev;
297
298	pci_enable_busmaster(dev);
299
300	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
301	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
302
303	vmxnet3_initial_config(sc);
304
305	error = vmxnet3_alloc_resources(sc);
306	if (error)
307		goto fail;
308
309	error = vmxnet3_check_version(sc);
310	if (error)
311		goto fail;
312
313	error = vmxnet3_alloc_rxtx_queues(sc);
314	if (error)
315		goto fail;
316
317#ifndef VMXNET3_LEGACY_TX
318	error = vmxnet3_alloc_taskqueue(sc);
319	if (error)
320		goto fail;
321#endif
322
323	error = vmxnet3_alloc_interrupts(sc);
324	if (error)
325		goto fail;
326
327	vmxnet3_check_multiqueue(sc);
328
329	error = vmxnet3_alloc_data(sc);
330	if (error)
331		goto fail;
332
333	error = vmxnet3_setup_interface(sc);
334	if (error)
335		goto fail;
336
337	error = vmxnet3_setup_interrupts(sc);
338	if (error) {
339		ether_ifdetach(sc->vmx_ifp);
340		device_printf(dev, "could not set up interrupt\n");
341		goto fail;
342	}
343
344	vmxnet3_setup_sysctl(sc);
345#ifndef VMXNET3_LEGACY_TX
346	vmxnet3_start_taskqueue(sc);
347#endif
348
349fail:
350	if (error)
351		vmxnet3_detach(dev);
352
353	return (error);
354}
355
356static int
357vmxnet3_detach(device_t dev)
358{
359	struct vmxnet3_softc *sc;
360	struct ifnet *ifp;
361
362	sc = device_get_softc(dev);
363	ifp = sc->vmx_ifp;
364
365	if (device_is_attached(dev)) {
366		VMXNET3_CORE_LOCK(sc);
367		vmxnet3_stop(sc);
368		VMXNET3_CORE_UNLOCK(sc);
369
370		callout_drain(&sc->vmx_tick);
371#ifndef VMXNET3_LEGACY_TX
372		vmxnet3_drain_taskqueue(sc);
373#endif
374
375		ether_ifdetach(ifp);
376	}
377
378	if (sc->vmx_vlan_attach != NULL) {
379		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
380		sc->vmx_vlan_attach = NULL;
381	}
382	if (sc->vmx_vlan_detach != NULL) {
383		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
384		sc->vmx_vlan_detach = NULL;
385	}
386
387#ifndef VMXNET3_LEGACY_TX
388	vmxnet3_free_taskqueue(sc);
389#endif
390	vmxnet3_free_interrupts(sc);
391
392	if (ifp != NULL) {
393		if_free(ifp);
394		sc->vmx_ifp = NULL;
395	}
396
397	ifmedia_removeall(&sc->vmx_media);
398
399	vmxnet3_free_data(sc);
400	vmxnet3_free_resources(sc);
401	vmxnet3_free_rxtx_queues(sc);
402
403	VMXNET3_CORE_LOCK_DESTROY(sc);
404
405	return (0);
406}
407
408static int
409vmxnet3_shutdown(device_t dev)
410{
411
412	return (0);
413}
414
415static int
416vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
417{
418	device_t dev;
419	int rid;
420
421	dev = sc->vmx_dev;
422
423	rid = PCIR_BAR(0);
424	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
425	    RF_ACTIVE);
426	if (sc->vmx_res0 == NULL) {
427		device_printf(dev,
428		    "could not map BAR0 memory\n");
429		return (ENXIO);
430	}
431
432	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
433	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
434
435	rid = PCIR_BAR(1);
436	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
437	    RF_ACTIVE);
438	if (sc->vmx_res1 == NULL) {
439		device_printf(dev,
440		    "could not map BAR1 memory\n");
441		return (ENXIO);
442	}
443
444	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
445	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
446
447	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
448		rid = PCIR_BAR(2);
449		sc->vmx_msix_res = bus_alloc_resource_any(dev,
450		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
451	}
452
453	if (sc->vmx_msix_res == NULL)
454		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
455
456	return (0);
457}
458
459static void
460vmxnet3_free_resources(struct vmxnet3_softc *sc)
461{
462	device_t dev;
463	int rid;
464
465	dev = sc->vmx_dev;
466
467	if (sc->vmx_res0 != NULL) {
468		rid = PCIR_BAR(0);
469		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
470		sc->vmx_res0 = NULL;
471	}
472
473	if (sc->vmx_res1 != NULL) {
474		rid = PCIR_BAR(1);
475		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
476		sc->vmx_res1 = NULL;
477	}
478
479	if (sc->vmx_msix_res != NULL) {
480		rid = PCIR_BAR(2);
481		bus_release_resource(dev, SYS_RES_MEMORY, rid,
482		    sc->vmx_msix_res);
483		sc->vmx_msix_res = NULL;
484	}
485}
486
487static int
488vmxnet3_check_version(struct vmxnet3_softc *sc)
489{
490	device_t dev;
491	uint32_t version;
492
493	dev = sc->vmx_dev;
494
495	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
496	if ((version & 0x01) == 0) {
497		device_printf(dev, "unsupported hardware version %#x\n",
498		    version);
499		return (ENOTSUP);
500	}
501	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
502
503	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
504	if ((version & 0x01) == 0) {
505		device_printf(dev, "unsupported UPT version %#x\n", version);
506		return (ENOTSUP);
507	}
508	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
509
510	return (0);
511}
512
513static void
514vmxnet3_initial_config(struct vmxnet3_softc *sc)
515{
516	int nqueue, ndesc;
517
518	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
519	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
520		nqueue = VMXNET3_DEF_TX_QUEUES;
521	if (nqueue > mp_ncpus)
522		nqueue = mp_ncpus;
523	sc->vmx_max_ntxqueues = nqueue;
524
525	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
526	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
527		nqueue = VMXNET3_DEF_RX_QUEUES;
528	if (nqueue > mp_ncpus)
529		nqueue = mp_ncpus;
530	sc->vmx_max_nrxqueues = nqueue;
531
532	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
533		sc->vmx_max_nrxqueues = 1;
534		sc->vmx_max_ntxqueues = 1;
535	}
536
537	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
538	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
539		ndesc = VMXNET3_DEF_TX_NDESC;
540	if (ndesc & VMXNET3_MASK_TX_NDESC)
541		ndesc &= ~VMXNET3_MASK_TX_NDESC;
542	sc->vmx_ntxdescs = ndesc;
543
544	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
545	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
546		ndesc = VMXNET3_DEF_RX_NDESC;
547	if (ndesc & VMXNET3_MASK_RX_NDESC)
548		ndesc &= ~VMXNET3_MASK_RX_NDESC;
549	sc->vmx_nrxdescs = ndesc;
550	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
551}
552
553static void
554vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
555{
556
557	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
558		goto out;
559
560	/* BMV: Just use the maximum configured for now. */
561	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
562	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
563
564	if (sc->vmx_nrxqueues > 1)
565		sc->vmx_flags |= VMXNET3_FLAG_RSS;
566
567	return;
568
569out:
570	sc->vmx_ntxqueues = 1;
571	sc->vmx_nrxqueues = 1;
572}
573
574static int
575vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
576{
577	device_t dev;
578	int nmsix, cnt, required;
579
580	dev = sc->vmx_dev;
581
582	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
583		return (1);
584
585	/* Allocate an additional vector for the events interrupt. */
586	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
587
588	nmsix = pci_msix_count(dev);
589	if (nmsix < required)
590		return (1);
591
592	cnt = required;
593	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
594		sc->vmx_nintrs = required;
595		return (0);
596	} else
597		pci_release_msi(dev);
598
599	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
600
601	return (1);
602}
603
604static int
605vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
606{
607	device_t dev;
608	int nmsi, cnt, required;
609
610	dev = sc->vmx_dev;
611	required = 1;
612
613	nmsi = pci_msi_count(dev);
614	if (nmsi < required)
615		return (1);
616
617	cnt = required;
618	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
619		sc->vmx_nintrs = 1;
620		return (0);
621	} else
622		pci_release_msi(dev);
623
624	return (1);
625}
626
627static int
628vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
629{
630
631	sc->vmx_nintrs = 1;
632	return (0);
633}
634
635static int
636vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
637    struct vmxnet3_interrupt *intr)
638{
639	struct resource *irq;
640
641	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
642	if (irq == NULL)
643		return (ENXIO);
644
645	intr->vmxi_irq = irq;
646	intr->vmxi_rid = rid;
647
648	return (0);
649}
650
651static int
652vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
653{
654	int i, rid, flags, error;
655
656	rid = 0;
657	flags = RF_ACTIVE;
658
659	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
660		flags |= RF_SHAREABLE;
661	else
662		rid = 1;
663
664	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
665		error = vmxnet3_alloc_interrupt(sc, rid, flags,
666		    &sc->vmx_intrs[i]);
667		if (error)
668			return (error);
669	}
670
671	return (0);
672}
673
674static int
675vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
676{
677	device_t dev;
678	struct vmxnet3_txqueue *txq;
679	struct vmxnet3_rxqueue *rxq;
680	struct vmxnet3_interrupt *intr;
681	enum intr_type type;
682	int i, error;
683
684	dev = sc->vmx_dev;
685	intr = &sc->vmx_intrs[0];
686	type = INTR_TYPE_NET | INTR_MPSAFE;
687
688	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
689		txq = &sc->vmx_txq[i];
690		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
691		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
692		if (error)
693			return (error);
694		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
695	}
696
697	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
698		rxq = &sc->vmx_rxq[i];
699		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
700		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
701		if (error)
702			return (error);
703		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
704	}
705
706	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
707	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
708	if (error)
709		return (error);
710	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
711
712	return (0);
713}
714
715static int
716vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
717{
718	struct vmxnet3_interrupt *intr;
719	int i, error;
720
721	intr = &sc->vmx_intrs[0];
722	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
723	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
724	    &intr->vmxi_handler);
725
726	for (i = 0; i < sc->vmx_ntxqueues; i++)
727		sc->vmx_txq[i].vxtxq_intr_idx = 0;
728	for (i = 0; i < sc->vmx_nrxqueues; i++)
729		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
730	sc->vmx_event_intr_idx = 0;
731
732	return (error);
733}
734
735static void
736vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
737{
738	struct vmxnet3_txqueue *txq;
739	struct vmxnet3_txq_shared *txs;
740	struct vmxnet3_rxqueue *rxq;
741	struct vmxnet3_rxq_shared *rxs;
742	int i;
743
744	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
745
746	for (i = 0; i < sc->vmx_ntxqueues; i++) {
747		txq = &sc->vmx_txq[i];
748		txs = txq->vxtxq_ts;
749		txs->intr_idx = txq->vxtxq_intr_idx;
750	}
751
752	for (i = 0; i < sc->vmx_nrxqueues; i++) {
753		rxq = &sc->vmx_rxq[i];
754		rxs = rxq->vxrxq_rs;
755		rxs->intr_idx = rxq->vxrxq_intr_idx;
756	}
757}
758
759static int
760vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
761{
762	int error;
763
764	error = vmxnet3_alloc_intr_resources(sc);
765	if (error)
766		return (error);
767
768	switch (sc->vmx_intr_type) {
769	case VMXNET3_IT_MSIX:
770		error = vmxnet3_setup_msix_interrupts(sc);
771		break;
772	case VMXNET3_IT_MSI:
773	case VMXNET3_IT_LEGACY:
774		error = vmxnet3_setup_legacy_interrupt(sc);
775		break;
776	default:
777		panic("%s: invalid interrupt type %d", __func__,
778		    sc->vmx_intr_type);
779	}
780
781	if (error == 0)
782		vmxnet3_set_interrupt_idx(sc);
783
784	return (error);
785}
786
787static int
788vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
789{
790	device_t dev;
791	uint32_t config;
792	int error;
793
794	dev = sc->vmx_dev;
795	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
796
797	sc->vmx_intr_type = config & 0x03;
798	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
799
800	switch (sc->vmx_intr_type) {
801	case VMXNET3_IT_AUTO:
802		sc->vmx_intr_type = VMXNET3_IT_MSIX;
803		/* FALLTHROUGH */
804	case VMXNET3_IT_MSIX:
805		error = vmxnet3_alloc_msix_interrupts(sc);
806		if (error == 0)
807			break;
808		sc->vmx_intr_type = VMXNET3_IT_MSI;
809		/* FALLTHROUGH */
810	case VMXNET3_IT_MSI:
811		error = vmxnet3_alloc_msi_interrupts(sc);
812		if (error == 0)
813			break;
814		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
815		/* FALLTHROUGH */
816	case VMXNET3_IT_LEGACY:
817		error = vmxnet3_alloc_legacy_interrupts(sc);
818		if (error == 0)
819			break;
820		/* FALLTHROUGH */
821	default:
822		sc->vmx_intr_type = -1;
823		device_printf(dev, "cannot allocate any interrupt resources\n");
824		return (ENXIO);
825	}
826
827	return (error);
828}
829
830static void
831vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
832    struct vmxnet3_interrupt *intr)
833{
834	device_t dev;
835
836	dev = sc->vmx_dev;
837
838	if (intr->vmxi_handler != NULL) {
839		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
840		intr->vmxi_handler = NULL;
841	}
842
843	if (intr->vmxi_irq != NULL) {
844		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
845		    intr->vmxi_irq);
846		intr->vmxi_irq = NULL;
847		intr->vmxi_rid = -1;
848	}
849}
850
851static void
852vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
853{
854	int i;
855
856	for (i = 0; i < sc->vmx_nintrs; i++)
857		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
858
859	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
860	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
861		pci_release_msi(sc->vmx_dev);
862}
863
864#ifndef VMXNET3_LEGACY_TX
865static int
866vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
867{
868	device_t dev;
869
870	dev = sc->vmx_dev;
871
872	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
873	    taskqueue_thread_enqueue, &sc->vmx_tq);
874	if (sc->vmx_tq == NULL)
875		return (ENOMEM);
876
877	return (0);
878}
879
880static void
881vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
882{
883	device_t dev;
884	int nthreads, error;
885
886	dev = sc->vmx_dev;
887
888	/*
889	 * The taskqueue is typically not frequently used, so a dedicated
890	 * thread for each queue is unnecessary.
891	 */
892	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
893
894	/*
895	 * Most drivers just ignore the return value - it only fails
896	 * with ENOMEM so an error is not likely. It is hard for us
897	 * to recover from an error here.
898	 */
899	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
900	    "%s taskq", device_get_nameunit(dev));
901	if (error)
902		device_printf(dev, "failed to start taskqueue: %d", error);
903}
904
905static void
906vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
907{
908	struct vmxnet3_txqueue *txq;
909	int i;
910
911	if (sc->vmx_tq != NULL) {
912		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
913			txq = &sc->vmx_txq[i];
914			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
915		}
916	}
917}
918
919static void
920vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
921{
922	if (sc->vmx_tq != NULL) {
923		taskqueue_free(sc->vmx_tq);
924		sc->vmx_tq = NULL;
925	}
926}
927#endif
928
929static int
930vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
931{
932	struct vmxnet3_rxqueue *rxq;
933	struct vmxnet3_rxring *rxr;
934	int i;
935
936	rxq = &sc->vmx_rxq[q];
937
938	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
939	    device_get_nameunit(sc->vmx_dev), q);
940	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
941
942	rxq->vxrxq_sc = sc;
943	rxq->vxrxq_id = q;
944
945	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
946		rxr = &rxq->vxrxq_cmd_ring[i];
947		rxr->vxrxr_rid = i;
948		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
949		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
950		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
951		if (rxr->vxrxr_rxbuf == NULL)
952			return (ENOMEM);
953
954		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
955	}
956
957	return (0);
958}
959
960static int
961vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
962{
963	struct vmxnet3_txqueue *txq;
964	struct vmxnet3_txring *txr;
965
966	txq = &sc->vmx_txq[q];
967	txr = &txq->vxtxq_cmd_ring;
968
969	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
970	    device_get_nameunit(sc->vmx_dev), q);
971	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
972
973	txq->vxtxq_sc = sc;
974	txq->vxtxq_id = q;
975
976	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
977	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
978	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
979	if (txr->vxtxr_txbuf == NULL)
980		return (ENOMEM);
981
982	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
983
984#ifndef VMXNET3_LEGACY_TX
985	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
986
987	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
988	    M_NOWAIT, &txq->vxtxq_mtx);
989	if (txq->vxtxq_br == NULL)
990		return (ENOMEM);
991#endif
992
993	return (0);
994}
995
996static int
997vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
998{
999	int i, error;
1000
1001	/*
1002	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1003	 * disabled by default because its apparently broken for devices passed
1004	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1005	 * must be set to zero for MSIX. This check prevents us from allocating
1006	 * queue structures that we will not use.
1007	 */
1008	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1009		sc->vmx_max_nrxqueues = 1;
1010		sc->vmx_max_ntxqueues = 1;
1011	}
1012
1013	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1014	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1015	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1016	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1017	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1018		return (ENOMEM);
1019
1020	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1021		error = vmxnet3_init_rxq(sc, i);
1022		if (error)
1023			return (error);
1024	}
1025
1026	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1027		error = vmxnet3_init_txq(sc, i);
1028		if (error)
1029			return (error);
1030	}
1031
1032	return (0);
1033}
1034
1035static void
1036vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1037{
1038	struct vmxnet3_rxring *rxr;
1039	int i;
1040
1041	rxq->vxrxq_sc = NULL;
1042	rxq->vxrxq_id = -1;
1043
1044	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1045		rxr = &rxq->vxrxq_cmd_ring[i];
1046
1047		if (rxr->vxrxr_rxbuf != NULL) {
1048			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1049			rxr->vxrxr_rxbuf = NULL;
1050		}
1051	}
1052
1053	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1054		mtx_destroy(&rxq->vxrxq_mtx);
1055}
1056
1057static void
1058vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1059{
1060	struct vmxnet3_txring *txr;
1061
1062	txr = &txq->vxtxq_cmd_ring;
1063
1064	txq->vxtxq_sc = NULL;
1065	txq->vxtxq_id = -1;
1066
1067#ifndef VMXNET3_LEGACY_TX
1068	if (txq->vxtxq_br != NULL) {
1069		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1070		txq->vxtxq_br = NULL;
1071	}
1072#endif
1073
1074	if (txr->vxtxr_txbuf != NULL) {
1075		free(txr->vxtxr_txbuf, M_DEVBUF);
1076		txr->vxtxr_txbuf = NULL;
1077	}
1078
1079	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1080		mtx_destroy(&txq->vxtxq_mtx);
1081}
1082
1083static void
1084vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1085{
1086	int i;
1087
1088	if (sc->vmx_rxq != NULL) {
1089		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1090			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1091		free(sc->vmx_rxq, M_DEVBUF);
1092		sc->vmx_rxq = NULL;
1093	}
1094
1095	if (sc->vmx_txq != NULL) {
1096		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1097			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1098		free(sc->vmx_txq, M_DEVBUF);
1099		sc->vmx_txq = NULL;
1100	}
1101}
1102
1103static int
1104vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1105{
1106	device_t dev;
1107	uint8_t *kva;
1108	size_t size;
1109	int i, error;
1110
1111	dev = sc->vmx_dev;
1112
1113	size = sizeof(struct vmxnet3_driver_shared);
1114	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1115	if (error) {
1116		device_printf(dev, "cannot alloc shared memory\n");
1117		return (error);
1118	}
1119	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1120
1121	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1122	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1123	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1124	if (error) {
1125		device_printf(dev, "cannot alloc queue shared memory\n");
1126		return (error);
1127	}
1128	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1129	kva = sc->vmx_qs;
1130
1131	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1132		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1133		kva += sizeof(struct vmxnet3_txq_shared);
1134	}
1135	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1136		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1137		kva += sizeof(struct vmxnet3_rxq_shared);
1138	}
1139
1140	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1141		size = sizeof(struct vmxnet3_rss_shared);
1142		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1143		if (error) {
1144			device_printf(dev, "cannot alloc rss shared memory\n");
1145			return (error);
1146		}
1147		sc->vmx_rss =
1148		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1149	}
1150
1151	return (0);
1152}
1153
1154static void
1155vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1156{
1157
1158	if (sc->vmx_rss != NULL) {
1159		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1160		sc->vmx_rss = NULL;
1161	}
1162
1163	if (sc->vmx_qs != NULL) {
1164		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1165		sc->vmx_qs = NULL;
1166	}
1167
1168	if (sc->vmx_ds != NULL) {
1169		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1170		sc->vmx_ds = NULL;
1171	}
1172}
1173
1174static int
1175vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1176{
1177	device_t dev;
1178	struct vmxnet3_txqueue *txq;
1179	struct vmxnet3_txring *txr;
1180	struct vmxnet3_comp_ring *txc;
1181	size_t descsz, compsz;
1182	int i, q, error;
1183
1184	dev = sc->vmx_dev;
1185
1186	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1187		txq = &sc->vmx_txq[q];
1188		txr = &txq->vxtxq_cmd_ring;
1189		txc = &txq->vxtxq_comp_ring;
1190
1191		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1192		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1193
1194		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1195		    1, 0,			/* alignment, boundary */
1196		    BUS_SPACE_MAXADDR,		/* lowaddr */
1197		    BUS_SPACE_MAXADDR,		/* highaddr */
1198		    NULL, NULL,			/* filter, filterarg */
1199		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1200		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1201		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1202		    0,				/* flags */
1203		    NULL, NULL,			/* lockfunc, lockarg */
1204		    &txr->vxtxr_txtag);
1205		if (error) {
1206			device_printf(dev,
1207			    "unable to create Tx buffer tag for queue %d\n", q);
1208			return (error);
1209		}
1210
1211		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1212		if (error) {
1213			device_printf(dev, "cannot alloc Tx descriptors for "
1214			    "queue %d error %d\n", q, error);
1215			return (error);
1216		}
1217		txr->vxtxr_txd =
1218		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1219
1220		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1221		if (error) {
1222			device_printf(dev, "cannot alloc Tx comp descriptors "
1223			   "for queue %d error %d\n", q, error);
1224			return (error);
1225		}
1226		txc->vxcr_u.txcd =
1227		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1228
1229		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1230			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1231			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1232			if (error) {
1233				device_printf(dev, "unable to create Tx buf "
1234				    "dmamap for queue %d idx %d\n", q, i);
1235				return (error);
1236			}
1237		}
1238	}
1239
1240	return (0);
1241}
1242
1243static void
1244vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1245{
1246	device_t dev;
1247	struct vmxnet3_txqueue *txq;
1248	struct vmxnet3_txring *txr;
1249	struct vmxnet3_comp_ring *txc;
1250	struct vmxnet3_txbuf *txb;
1251	int i, q;
1252
1253	dev = sc->vmx_dev;
1254
1255	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1256		txq = &sc->vmx_txq[q];
1257		txr = &txq->vxtxq_cmd_ring;
1258		txc = &txq->vxtxq_comp_ring;
1259
1260		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1261			txb = &txr->vxtxr_txbuf[i];
1262			if (txb->vtxb_dmamap != NULL) {
1263				bus_dmamap_destroy(txr->vxtxr_txtag,
1264				    txb->vtxb_dmamap);
1265				txb->vtxb_dmamap = NULL;
1266			}
1267		}
1268
1269		if (txc->vxcr_u.txcd != NULL) {
1270			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1271			txc->vxcr_u.txcd = NULL;
1272		}
1273
1274		if (txr->vxtxr_txd != NULL) {
1275			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1276			txr->vxtxr_txd = NULL;
1277		}
1278
1279		if (txr->vxtxr_txtag != NULL) {
1280			bus_dma_tag_destroy(txr->vxtxr_txtag);
1281			txr->vxtxr_txtag = NULL;
1282		}
1283	}
1284}
1285
1286static int
1287vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1288{
1289	device_t dev;
1290	struct vmxnet3_rxqueue *rxq;
1291	struct vmxnet3_rxring *rxr;
1292	struct vmxnet3_comp_ring *rxc;
1293	int descsz, compsz;
1294	int i, j, q, error;
1295
1296	dev = sc->vmx_dev;
1297
1298	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1299		rxq = &sc->vmx_rxq[q];
1300		rxc = &rxq->vxrxq_comp_ring;
1301		compsz = 0;
1302
1303		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1304			rxr = &rxq->vxrxq_cmd_ring[i];
1305
1306			descsz = rxr->vxrxr_ndesc *
1307			    sizeof(struct vmxnet3_rxdesc);
1308			compsz += rxr->vxrxr_ndesc *
1309			    sizeof(struct vmxnet3_rxcompdesc);
1310
1311			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1312			    1, 0,		/* alignment, boundary */
1313			    BUS_SPACE_MAXADDR,	/* lowaddr */
1314			    BUS_SPACE_MAXADDR,	/* highaddr */
1315			    NULL, NULL,		/* filter, filterarg */
1316			    MJUMPAGESIZE,	/* maxsize */
1317			    1,			/* nsegments */
1318			    MJUMPAGESIZE,	/* maxsegsize */
1319			    0,			/* flags */
1320			    NULL, NULL,		/* lockfunc, lockarg */
1321			    &rxr->vxrxr_rxtag);
1322			if (error) {
1323				device_printf(dev,
1324				    "unable to create Rx buffer tag for "
1325				    "queue %d\n", q);
1326				return (error);
1327			}
1328
1329			error = vmxnet3_dma_malloc(sc, descsz, 512,
1330			    &rxr->vxrxr_dma);
1331			if (error) {
1332				device_printf(dev, "cannot allocate Rx "
1333				    "descriptors for queue %d/%d error %d\n",
1334				    i, q, error);
1335				return (error);
1336			}
1337			rxr->vxrxr_rxd =
1338			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1339		}
1340
1341		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1342		if (error) {
1343			device_printf(dev, "cannot alloc Rx comp descriptors "
1344			    "for queue %d error %d\n", q, error);
1345			return (error);
1346		}
1347		rxc->vxcr_u.rxcd =
1348		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1349
1350		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1351			rxr = &rxq->vxrxq_cmd_ring[i];
1352
1353			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1354			    &rxr->vxrxr_spare_dmap);
1355			if (error) {
1356				device_printf(dev, "unable to create spare "
1357				    "dmamap for queue %d/%d error %d\n",
1358				    q, i, error);
1359				return (error);
1360			}
1361
1362			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1363				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1364				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1365				if (error) {
1366					device_printf(dev, "unable to create "
1367					    "dmamap for queue %d/%d slot %d "
1368					    "error %d\n",
1369					    q, i, j, error);
1370					return (error);
1371				}
1372			}
1373		}
1374	}
1375
1376	return (0);
1377}
1378
1379static void
1380vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1381{
1382	device_t dev;
1383	struct vmxnet3_rxqueue *rxq;
1384	struct vmxnet3_rxring *rxr;
1385	struct vmxnet3_comp_ring *rxc;
1386	struct vmxnet3_rxbuf *rxb;
1387	int i, j, q;
1388
1389	dev = sc->vmx_dev;
1390
1391	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1392		rxq = &sc->vmx_rxq[q];
1393		rxc = &rxq->vxrxq_comp_ring;
1394
1395		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1396			rxr = &rxq->vxrxq_cmd_ring[i];
1397
1398			if (rxr->vxrxr_spare_dmap != NULL) {
1399				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1400				    rxr->vxrxr_spare_dmap);
1401				rxr->vxrxr_spare_dmap = NULL;
1402			}
1403
1404			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1405				rxb = &rxr->vxrxr_rxbuf[j];
1406				if (rxb->vrxb_dmamap != NULL) {
1407					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1408					    rxb->vrxb_dmamap);
1409					rxb->vrxb_dmamap = NULL;
1410				}
1411			}
1412		}
1413
1414		if (rxc->vxcr_u.rxcd != NULL) {
1415			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1416			rxc->vxcr_u.rxcd = NULL;
1417		}
1418
1419		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1420			rxr = &rxq->vxrxq_cmd_ring[i];
1421
1422			if (rxr->vxrxr_rxd != NULL) {
1423				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1424				rxr->vxrxr_rxd = NULL;
1425			}
1426
1427			if (rxr->vxrxr_rxtag != NULL) {
1428				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1429				rxr->vxrxr_rxtag = NULL;
1430			}
1431		}
1432	}
1433}
1434
1435static int
1436vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1437{
1438	int error;
1439
1440	error = vmxnet3_alloc_txq_data(sc);
1441	if (error)
1442		return (error);
1443
1444	error = vmxnet3_alloc_rxq_data(sc);
1445	if (error)
1446		return (error);
1447
1448	return (0);
1449}
1450
1451static void
1452vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1453{
1454
1455	if (sc->vmx_rxq != NULL)
1456		vmxnet3_free_rxq_data(sc);
1457
1458	if (sc->vmx_txq != NULL)
1459		vmxnet3_free_txq_data(sc);
1460}
1461
1462static int
1463vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1464{
1465	int error;
1466
1467	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1468	    32, &sc->vmx_mcast_dma);
1469	if (error)
1470		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1471	else
1472		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1473
1474	return (error);
1475}
1476
1477static void
1478vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1479{
1480
1481	if (sc->vmx_mcast != NULL) {
1482		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1483		sc->vmx_mcast = NULL;
1484	}
1485}
1486
1487static void
1488vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1489{
1490	struct vmxnet3_driver_shared *ds;
1491	struct vmxnet3_txqueue *txq;
1492	struct vmxnet3_txq_shared *txs;
1493	struct vmxnet3_rxqueue *rxq;
1494	struct vmxnet3_rxq_shared *rxs;
1495	int i;
1496
1497	ds = sc->vmx_ds;
1498
1499	/*
1500	 * Initialize fields of the shared data that remains the same across
1501	 * reinits. Note the shared data is zero'd when allocated.
1502	 */
1503
1504	ds->magic = VMXNET3_REV1_MAGIC;
1505
1506	/* DriverInfo */
1507	ds->version = VMXNET3_DRIVER_VERSION;
1508	ds->guest = VMXNET3_GOS_FREEBSD |
1509#ifdef __LP64__
1510	    VMXNET3_GOS_64BIT;
1511#else
1512	    VMXNET3_GOS_32BIT;
1513#endif
1514	ds->vmxnet3_revision = 1;
1515	ds->upt_version = 1;
1516
1517	/* Misc. conf */
1518	ds->driver_data = vtophys(sc);
1519	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1520	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1521	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1522	ds->nrxsg_max = sc->vmx_max_rxsegs;
1523
1524	/* RSS conf */
1525	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1526		ds->rss.version = 1;
1527		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1528		ds->rss.len = sc->vmx_rss_dma.dma_size;
1529	}
1530
1531	/* Interrupt control. */
1532	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1533	ds->nintr = sc->vmx_nintrs;
1534	ds->evintr = sc->vmx_event_intr_idx;
1535	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1536
1537	for (i = 0; i < sc->vmx_nintrs; i++)
1538		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1539
1540	/* Receive filter. */
1541	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1542	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1543
1544	/* Tx queues */
1545	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1546		txq = &sc->vmx_txq[i];
1547		txs = txq->vxtxq_ts;
1548
1549		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1550		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1551		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1552		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1553		txs->driver_data = vtophys(txq);
1554		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1555	}
1556
1557	/* Rx queues */
1558	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1559		rxq = &sc->vmx_rxq[i];
1560		rxs = rxq->vxrxq_rs;
1561
1562		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1563		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1564		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1565		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1566		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1567		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1568		rxs->driver_data = vtophys(rxq);
1569		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1570	}
1571}
1572
1573static void
1574vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1575{
1576	struct ifnet *ifp;
1577
1578	ifp = sc->vmx_ifp;
1579
1580	/* Use the current MAC address. */
1581	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1582	vmxnet3_set_lladdr(sc);
1583
1584	ifp->if_hwassist = 0;
1585	if (ifp->if_capenable & IFCAP_TXCSUM)
1586		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1587	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1588		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1589	if (ifp->if_capenable & IFCAP_TSO4)
1590		ifp->if_hwassist |= CSUM_IP_TSO;
1591	if (ifp->if_capenable & IFCAP_TSO6)
1592		ifp->if_hwassist |= CSUM_IP6_TSO;
1593}
1594
1595static void
1596vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1597{
1598	/*
1599	 * Use the same key as the Linux driver until FreeBSD can do
1600	 * RSS (presumably Toeplitz) in software.
1601	 */
1602	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1603	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1604	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1605	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1606	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1607	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1608	};
1609
1610	struct vmxnet3_driver_shared *ds;
1611	struct vmxnet3_rss_shared *rss;
1612	int i;
1613
1614	ds = sc->vmx_ds;
1615	rss = sc->vmx_rss;
1616
1617	rss->hash_type =
1618	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1619	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1620	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1621	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1622	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1623	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1624
1625	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1626		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1627}
1628
1629static void
1630vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1631{
1632	struct ifnet *ifp;
1633	struct vmxnet3_driver_shared *ds;
1634
1635	ifp = sc->vmx_ifp;
1636	ds = sc->vmx_ds;
1637
1638	ds->mtu = ifp->if_mtu;
1639	ds->ntxqueue = sc->vmx_ntxqueues;
1640	ds->nrxqueue = sc->vmx_nrxqueues;
1641
1642	ds->upt_features = 0;
1643	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1644		ds->upt_features |= UPT1_F_CSUM;
1645	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1646		ds->upt_features |= UPT1_F_VLAN;
1647	if (ifp->if_capenable & IFCAP_LRO)
1648		ds->upt_features |= UPT1_F_LRO;
1649
1650	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1651		ds->upt_features |= UPT1_F_RSS;
1652		vmxnet3_reinit_rss_shared_data(sc);
1653	}
1654
1655	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1656	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1657	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1658}
1659
1660static int
1661vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1662{
1663	int error;
1664
1665	error = vmxnet3_alloc_shared_data(sc);
1666	if (error)
1667		return (error);
1668
1669	error = vmxnet3_alloc_queue_data(sc);
1670	if (error)
1671		return (error);
1672
1673	error = vmxnet3_alloc_mcast_table(sc);
1674	if (error)
1675		return (error);
1676
1677	vmxnet3_init_shared_data(sc);
1678
1679	return (0);
1680}
1681
1682static void
1683vmxnet3_free_data(struct vmxnet3_softc *sc)
1684{
1685
1686	vmxnet3_free_mcast_table(sc);
1687	vmxnet3_free_queue_data(sc);
1688	vmxnet3_free_shared_data(sc);
1689}
1690
1691static int
1692vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1693{
1694	device_t dev;
1695	struct ifnet *ifp;
1696
1697	dev = sc->vmx_dev;
1698
1699	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1700	if (ifp == NULL) {
1701		device_printf(dev, "cannot allocate ifnet structure\n");
1702		return (ENOSPC);
1703	}
1704
1705	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1706#if __FreeBSD_version < 1000025
1707	ifp->if_baudrate = 1000000000;
1708#elif __FreeBSD_version < 1100011
1709	if_initbaudrate(ifp, IF_Gbps(10));
1710#else
1711	ifp->if_baudrate = IF_Gbps(10);
1712#endif
1713	ifp->if_softc = sc;
1714	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1715	ifp->if_init = vmxnet3_init;
1716	ifp->if_ioctl = vmxnet3_ioctl;
1717	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1718
1719#ifdef VMXNET3_LEGACY_TX
1720	ifp->if_start = vmxnet3_start;
1721	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1722	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1723	IFQ_SET_READY(&ifp->if_snd);
1724#else
1725	ifp->if_transmit = vmxnet3_txq_mq_start;
1726	ifp->if_qflush = vmxnet3_qflush;
1727#endif
1728
1729	vmxnet3_get_lladdr(sc);
1730	ether_ifattach(ifp, sc->vmx_lladdr);
1731
1732	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1733	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1734	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1735	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1736	    IFCAP_VLAN_HWCSUM;
1737	ifp->if_capenable = ifp->if_capabilities;
1738
1739	/* These capabilities are not enabled by default. */
1740	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1741
1742	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1743	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1744	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1745	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1746
1747	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1748	    vmxnet3_media_status);
1749	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1750	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1751
1752	return (0);
1753}
1754
1755static void
1756vmxnet3_evintr(struct vmxnet3_softc *sc)
1757{
1758	device_t dev;
1759	struct ifnet *ifp;
1760	struct vmxnet3_txq_shared *ts;
1761	struct vmxnet3_rxq_shared *rs;
1762	uint32_t event;
1763	int reset;
1764
1765	dev = sc->vmx_dev;
1766	ifp = sc->vmx_ifp;
1767	reset = 0;
1768
1769	VMXNET3_CORE_LOCK(sc);
1770
1771	/* Clear events. */
1772	event = sc->vmx_ds->event;
1773	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1774
1775	if (event & VMXNET3_EVENT_LINK) {
1776		vmxnet3_link_status(sc);
1777		if (sc->vmx_link_active != 0)
1778			vmxnet3_tx_start_all(sc);
1779	}
1780
1781	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1782		reset = 1;
1783		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1784		ts = sc->vmx_txq[0].vxtxq_ts;
1785		if (ts->stopped != 0)
1786			device_printf(dev, "Tx queue error %#x\n", ts->error);
1787		rs = sc->vmx_rxq[0].vxrxq_rs;
1788		if (rs->stopped != 0)
1789			device_printf(dev, "Rx queue error %#x\n", rs->error);
1790		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1791	}
1792
1793	if (event & VMXNET3_EVENT_DIC)
1794		device_printf(dev, "device implementation change event\n");
1795	if (event & VMXNET3_EVENT_DEBUG)
1796		device_printf(dev, "debug event\n");
1797
1798	if (reset != 0) {
1799		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1800		vmxnet3_init_locked(sc);
1801	}
1802
1803	VMXNET3_CORE_UNLOCK(sc);
1804}
1805
1806static void
1807vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1808{
1809	struct vmxnet3_softc *sc;
1810	struct ifnet *ifp;
1811	struct vmxnet3_txring *txr;
1812	struct vmxnet3_comp_ring *txc;
1813	struct vmxnet3_txcompdesc *txcd;
1814	struct vmxnet3_txbuf *txb;
1815	struct mbuf *m;
1816	u_int sop;
1817
1818	sc = txq->vxtxq_sc;
1819	ifp = sc->vmx_ifp;
1820	txr = &txq->vxtxq_cmd_ring;
1821	txc = &txq->vxtxq_comp_ring;
1822
1823	VMXNET3_TXQ_LOCK_ASSERT(txq);
1824
1825	for (;;) {
1826		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1827		if (txcd->gen != txc->vxcr_gen)
1828			break;
1829		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1830
1831		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1832			txc->vxcr_next = 0;
1833			txc->vxcr_gen ^= 1;
1834		}
1835
1836		sop = txr->vxtxr_next;
1837		txb = &txr->vxtxr_txbuf[sop];
1838
1839		if ((m = txb->vtxb_m) != NULL) {
1840			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1841			    BUS_DMASYNC_POSTWRITE);
1842			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1843
1844			txq->vxtxq_stats.vmtxs_opackets++;
1845			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1846			if (m->m_flags & M_MCAST)
1847				txq->vxtxq_stats.vmtxs_omcasts++;
1848
1849			m_freem(m);
1850			txb->vtxb_m = NULL;
1851		}
1852
1853		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1854	}
1855
1856	if (txr->vxtxr_head == txr->vxtxr_next)
1857		txq->vxtxq_watchdog = 0;
1858}
1859
1860static int
1861vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1862{
1863	struct ifnet *ifp;
1864	struct mbuf *m;
1865	struct vmxnet3_rxdesc *rxd;
1866	struct vmxnet3_rxbuf *rxb;
1867	bus_dma_tag_t tag;
1868	bus_dmamap_t dmap;
1869	bus_dma_segment_t segs[1];
1870	int idx, clsize, btype, flags, nsegs, error;
1871
1872	ifp = sc->vmx_ifp;
1873	tag = rxr->vxrxr_rxtag;
1874	dmap = rxr->vxrxr_spare_dmap;
1875	idx = rxr->vxrxr_fill;
1876	rxd = &rxr->vxrxr_rxd[idx];
1877	rxb = &rxr->vxrxr_rxbuf[idx];
1878
1879#ifdef VMXNET3_FAILPOINTS
1880	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1881	if (rxr->vxrxr_rid != 0)
1882		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1883#endif
1884
1885	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1886		flags = M_PKTHDR;
1887		clsize = MCLBYTES;
1888		btype = VMXNET3_BTYPE_HEAD;
1889	} else {
1890#if __FreeBSD_version < 902001
1891		/*
1892		 * These mbufs will never be used for the start of a frame.
1893		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1894		 * required the mbuf to always be a packet header. Avoid
1895		 * unnecessary mbuf initialization in newer versions where
1896		 * that is not the case.
1897		 */
1898		flags = M_PKTHDR;
1899#else
1900		flags = 0;
1901#endif
1902		clsize = MJUMPAGESIZE;
1903		btype = VMXNET3_BTYPE_BODY;
1904	}
1905
1906	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1907	if (m == NULL) {
1908		sc->vmx_stats.vmst_mgetcl_failed++;
1909		return (ENOBUFS);
1910	}
1911
1912	if (btype == VMXNET3_BTYPE_HEAD) {
1913		m->m_len = m->m_pkthdr.len = clsize;
1914		m_adj(m, ETHER_ALIGN);
1915	} else
1916		m->m_len = clsize;
1917
1918	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1919	    BUS_DMA_NOWAIT);
1920	if (error) {
1921		m_freem(m);
1922		sc->vmx_stats.vmst_mbuf_load_failed++;
1923		return (error);
1924	}
1925	KASSERT(nsegs == 1,
1926	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1927#if __FreeBSD_version < 902001
1928	if (btype == VMXNET3_BTYPE_BODY)
1929		m->m_flags &= ~M_PKTHDR;
1930#endif
1931
1932	if (rxb->vrxb_m != NULL) {
1933		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1934		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1935	}
1936
1937	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1938	rxb->vrxb_dmamap = dmap;
1939	rxb->vrxb_m = m;
1940
1941	rxd->addr = segs[0].ds_addr;
1942	rxd->len = segs[0].ds_len;
1943	rxd->btype = btype;
1944	rxd->gen = rxr->vxrxr_gen;
1945
1946	vmxnet3_rxr_increment_fill(rxr);
1947	return (0);
1948}
1949
1950static void
1951vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1952    struct vmxnet3_rxring *rxr, int idx)
1953{
1954	struct vmxnet3_rxdesc *rxd;
1955
1956	rxd = &rxr->vxrxr_rxd[idx];
1957	rxd->gen = rxr->vxrxr_gen;
1958	vmxnet3_rxr_increment_fill(rxr);
1959}
1960
1961static void
1962vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1963{
1964	struct vmxnet3_softc *sc;
1965	struct vmxnet3_rxring *rxr;
1966	struct vmxnet3_comp_ring *rxc;
1967	struct vmxnet3_rxcompdesc *rxcd;
1968	int idx, eof;
1969
1970	sc = rxq->vxrxq_sc;
1971	rxc = &rxq->vxrxq_comp_ring;
1972
1973	do {
1974		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1975		if (rxcd->gen != rxc->vxcr_gen)
1976			break;		/* Not expected. */
1977		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1978
1979		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1980			rxc->vxcr_next = 0;
1981			rxc->vxcr_gen ^= 1;
1982		}
1983
1984		idx = rxcd->rxd_idx;
1985		eof = rxcd->eop;
1986		if (rxcd->qid < sc->vmx_nrxqueues)
1987			rxr = &rxq->vxrxq_cmd_ring[0];
1988		else
1989			rxr = &rxq->vxrxq_cmd_ring[1];
1990		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1991	} while (!eof);
1992}
1993
1994static void
1995vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1996{
1997
1998	if (rxcd->ipv4) {
1999		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2000		if (rxcd->ipcsum_ok)
2001			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2002	}
2003
2004	if (!rxcd->fragment) {
2005		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2006			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2007			    CSUM_PSEUDO_HDR;
2008			m->m_pkthdr.csum_data = 0xFFFF;
2009		}
2010	}
2011}
2012
2013static void
2014vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2015    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2016{
2017	struct vmxnet3_softc *sc;
2018	struct ifnet *ifp;
2019
2020	sc = rxq->vxrxq_sc;
2021	ifp = sc->vmx_ifp;
2022
2023	if (rxcd->error) {
2024		rxq->vxrxq_stats.vmrxs_ierrors++;
2025		m_freem(m);
2026		return;
2027	}
2028
2029#ifdef notyet
2030	switch (rxcd->rss_type) {
2031	case VMXNET3_RCD_RSS_TYPE_IPV4:
2032		m->m_pkthdr.flowid = rxcd->rss_hash;
2033		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2034		break;
2035	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2036		m->m_pkthdr.flowid = rxcd->rss_hash;
2037		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2038		break;
2039	case VMXNET3_RCD_RSS_TYPE_IPV6:
2040		m->m_pkthdr.flowid = rxcd->rss_hash;
2041		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2042		break;
2043	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2044		m->m_pkthdr.flowid = rxcd->rss_hash;
2045		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2046		break;
2047	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2048		m->m_pkthdr.flowid = rxq->vxrxq_id;
2049		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2050		break;
2051	}
2052#else
2053	m->m_pkthdr.flowid = rxq->vxrxq_id;
2054	m->m_flags |= M_FLOWID;
2055#endif
2056
2057	if (!rxcd->no_csum)
2058		vmxnet3_rx_csum(rxcd, m);
2059	if (rxcd->vlan) {
2060		m->m_flags |= M_VLANTAG;
2061		m->m_pkthdr.ether_vtag = rxcd->vtag;
2062	}
2063
2064	rxq->vxrxq_stats.vmrxs_ipackets++;
2065	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2066
2067	VMXNET3_RXQ_UNLOCK(rxq);
2068	(*ifp->if_input)(ifp, m);
2069	VMXNET3_RXQ_LOCK(rxq);
2070}
2071
2072static void
2073vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2074{
2075	struct vmxnet3_softc *sc;
2076	struct ifnet *ifp;
2077	struct vmxnet3_rxring *rxr;
2078	struct vmxnet3_comp_ring *rxc;
2079	struct vmxnet3_rxdesc *rxd;
2080	struct vmxnet3_rxcompdesc *rxcd;
2081	struct mbuf *m, *m_head, *m_tail;
2082	int idx, length;
2083
2084	sc = rxq->vxrxq_sc;
2085	ifp = sc->vmx_ifp;
2086	rxc = &rxq->vxrxq_comp_ring;
2087	m_head = m_tail = NULL;
2088
2089	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2090
2091	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2092		return;
2093
2094	for (;;) {
2095		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2096		if (rxcd->gen != rxc->vxcr_gen)
2097			break;
2098		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2099
2100		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2101			rxc->vxcr_next = 0;
2102			rxc->vxcr_gen ^= 1;
2103		}
2104
2105		idx = rxcd->rxd_idx;
2106		length = rxcd->len;
2107		if (rxcd->qid < sc->vmx_nrxqueues)
2108			rxr = &rxq->vxrxq_cmd_ring[0];
2109		else
2110			rxr = &rxq->vxrxq_cmd_ring[1];
2111		rxd = &rxr->vxrxr_rxd[idx];
2112
2113		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2114		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2115		    __func__, rxcd->qid, idx));
2116
2117		/*
2118		 * The host may skip descriptors. We detect this when this
2119		 * descriptor does not match the previous fill index. Catch
2120		 * up with the host now.
2121		 */
2122		if (__predict_false(rxr->vxrxr_fill != idx)) {
2123			while (rxr->vxrxr_fill != idx) {
2124				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2125				    rxr->vxrxr_gen;
2126				vmxnet3_rxr_increment_fill(rxr);
2127			}
2128		}
2129
2130		if (rxcd->sop) {
2131			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2132			    ("%s: start of frame w/o head buffer", __func__));
2133			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2134			    ("%s: start of frame not in ring 0", __func__));
2135			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2136			    ("%s: start of frame at unexcepted index %d (%d)",
2137			     __func__, idx, sc->vmx_rx_max_chain));
2138			KASSERT(m_head == NULL,
2139			    ("%s: duplicate start of frame?", __func__));
2140
2141			if (length == 0) {
2142				/* Just ignore this descriptor. */
2143				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2144				goto nextp;
2145			}
2146
2147			if (vmxnet3_newbuf(sc, rxr) != 0) {
2148				rxq->vxrxq_stats.vmrxs_iqdrops++;
2149				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2150				if (!rxcd->eop)
2151					vmxnet3_rxq_discard_chain(rxq);
2152				goto nextp;
2153			}
2154
2155			m->m_pkthdr.rcvif = ifp;
2156			m->m_pkthdr.len = m->m_len = length;
2157			m->m_pkthdr.csum_flags = 0;
2158			m_head = m_tail = m;
2159
2160		} else {
2161			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2162			    ("%s: non start of frame w/o body buffer", __func__));
2163			KASSERT(m_head != NULL,
2164			    ("%s: frame not started?", __func__));
2165
2166			if (vmxnet3_newbuf(sc, rxr) != 0) {
2167				rxq->vxrxq_stats.vmrxs_iqdrops++;
2168				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2169				if (!rxcd->eop)
2170					vmxnet3_rxq_discard_chain(rxq);
2171				m_freem(m_head);
2172				m_head = m_tail = NULL;
2173				goto nextp;
2174			}
2175
2176			m->m_len = length;
2177			m_head->m_pkthdr.len += length;
2178			m_tail->m_next = m;
2179			m_tail = m;
2180		}
2181
2182		if (rxcd->eop) {
2183			vmxnet3_rxq_input(rxq, rxcd, m_head);
2184			m_head = m_tail = NULL;
2185
2186			/* Must recheck after dropping the Rx lock. */
2187			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2188				break;
2189		}
2190
2191nextp:
2192		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2193			int qid = rxcd->qid;
2194			bus_size_t r;
2195
2196			idx = (idx + 1) % rxr->vxrxr_ndesc;
2197			if (qid >= sc->vmx_nrxqueues) {
2198				qid -= sc->vmx_nrxqueues;
2199				r = VMXNET3_BAR0_RXH2(qid);
2200			} else
2201				r = VMXNET3_BAR0_RXH1(qid);
2202			vmxnet3_write_bar0(sc, r, idx);
2203		}
2204	}
2205}
2206
2207static void
2208vmxnet3_legacy_intr(void *xsc)
2209{
2210	struct vmxnet3_softc *sc;
2211	struct vmxnet3_rxqueue *rxq;
2212	struct vmxnet3_txqueue *txq;
2213	struct ifnet *ifp;
2214
2215	sc = xsc;
2216	rxq = &sc->vmx_rxq[0];
2217	txq = &sc->vmx_txq[0];
2218	ifp = sc->vmx_ifp;
2219
2220	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2221		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2222			return;
2223	}
2224	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2225		vmxnet3_disable_all_intrs(sc);
2226
2227	if (sc->vmx_ds->event != 0)
2228		vmxnet3_evintr(sc);
2229
2230	VMXNET3_RXQ_LOCK(rxq);
2231	vmxnet3_rxq_eof(rxq);
2232	VMXNET3_RXQ_UNLOCK(rxq);
2233
2234	VMXNET3_TXQ_LOCK(txq);
2235	vmxnet3_txq_eof(txq);
2236	vmxnet3_txq_start(txq);
2237	VMXNET3_TXQ_UNLOCK(txq);
2238
2239	vmxnet3_enable_all_intrs(sc);
2240}
2241
2242static void
2243vmxnet3_txq_intr(void *xtxq)
2244{
2245	struct vmxnet3_softc *sc;
2246	struct vmxnet3_txqueue *txq;
2247	struct ifnet *ifp;
2248
2249	txq = xtxq;
2250	sc = txq->vxtxq_sc;
2251	ifp = sc->vmx_ifp;
2252
2253	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2254		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2255
2256	VMXNET3_TXQ_LOCK(txq);
2257	vmxnet3_txq_eof(txq);
2258	vmxnet3_txq_start(txq);
2259	VMXNET3_TXQ_UNLOCK(txq);
2260
2261	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2262}
2263
2264static void
2265vmxnet3_rxq_intr(void *xrxq)
2266{
2267	struct vmxnet3_softc *sc;
2268	struct vmxnet3_rxqueue *rxq;
2269
2270	rxq = xrxq;
2271	sc = rxq->vxrxq_sc;
2272
2273	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2274		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2275
2276	VMXNET3_RXQ_LOCK(rxq);
2277	vmxnet3_rxq_eof(rxq);
2278	VMXNET3_RXQ_UNLOCK(rxq);
2279
2280	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2281}
2282
2283static void
2284vmxnet3_event_intr(void *xsc)
2285{
2286	struct vmxnet3_softc *sc;
2287
2288	sc = xsc;
2289
2290	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2291		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2292
2293	if (sc->vmx_ds->event != 0)
2294		vmxnet3_evintr(sc);
2295
2296	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2297}
2298
2299static void
2300vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2301{
2302	struct vmxnet3_txring *txr;
2303	struct vmxnet3_txbuf *txb;
2304	int i;
2305
2306	txr = &txq->vxtxq_cmd_ring;
2307
2308	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2309		txb = &txr->vxtxr_txbuf[i];
2310
2311		if (txb->vtxb_m == NULL)
2312			continue;
2313
2314		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2315		    BUS_DMASYNC_POSTWRITE);
2316		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2317		m_freem(txb->vtxb_m);
2318		txb->vtxb_m = NULL;
2319	}
2320}
2321
2322static void
2323vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2324{
2325	struct vmxnet3_rxring *rxr;
2326	struct vmxnet3_rxbuf *rxb;
2327	int i, j;
2328
2329	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2330		rxr = &rxq->vxrxq_cmd_ring[i];
2331
2332		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2333			rxb = &rxr->vxrxr_rxbuf[j];
2334
2335			if (rxb->vrxb_m == NULL)
2336				continue;
2337
2338			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2339			    BUS_DMASYNC_POSTREAD);
2340			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2341			m_freem(rxb->vrxb_m);
2342			rxb->vrxb_m = NULL;
2343		}
2344	}
2345}
2346
2347static void
2348vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2349{
2350	struct vmxnet3_rxqueue *rxq;
2351	struct vmxnet3_txqueue *txq;
2352	int i;
2353
2354	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2355		rxq = &sc->vmx_rxq[i];
2356		VMXNET3_RXQ_LOCK(rxq);
2357		VMXNET3_RXQ_UNLOCK(rxq);
2358	}
2359
2360	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2361		txq = &sc->vmx_txq[i];
2362		VMXNET3_TXQ_LOCK(txq);
2363		VMXNET3_TXQ_UNLOCK(txq);
2364	}
2365}
2366
2367static void
2368vmxnet3_stop(struct vmxnet3_softc *sc)
2369{
2370	struct ifnet *ifp;
2371	int q;
2372
2373	ifp = sc->vmx_ifp;
2374	VMXNET3_CORE_LOCK_ASSERT(sc);
2375
2376	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2377	sc->vmx_link_active = 0;
2378	callout_stop(&sc->vmx_tick);
2379
2380	/* Disable interrupts. */
2381	vmxnet3_disable_all_intrs(sc);
2382	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2383
2384	vmxnet3_stop_rendezvous(sc);
2385
2386	for (q = 0; q < sc->vmx_ntxqueues; q++)
2387		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2388	for (q = 0; q < sc->vmx_nrxqueues; q++)
2389		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2390
2391	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2392}
2393
2394static void
2395vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2396{
2397	struct vmxnet3_txring *txr;
2398	struct vmxnet3_comp_ring *txc;
2399
2400	txr = &txq->vxtxq_cmd_ring;
2401	txr->vxtxr_head = 0;
2402	txr->vxtxr_next = 0;
2403	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2404	bzero(txr->vxtxr_txd,
2405	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2406
2407	txc = &txq->vxtxq_comp_ring;
2408	txc->vxcr_next = 0;
2409	txc->vxcr_gen = VMXNET3_INIT_GEN;
2410	bzero(txc->vxcr_u.txcd,
2411	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2412}
2413
2414static int
2415vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2416{
2417	struct ifnet *ifp;
2418	struct vmxnet3_rxring *rxr;
2419	struct vmxnet3_comp_ring *rxc;
2420	int i, populate, idx, frame_size, error;
2421
2422	ifp = sc->vmx_ifp;
2423	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2424	    ifp->if_mtu;
2425
2426	/*
2427	 * If the MTU causes us to exceed what a regular sized cluster can
2428	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2429	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2430	 *
2431	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2432	 * our life easier. We do not support changing the ring size after
2433	 * the attach.
2434	 */
2435	if (frame_size <= MCLBYTES)
2436		sc->vmx_rx_max_chain = 1;
2437	else
2438		sc->vmx_rx_max_chain = 2;
2439
2440	/*
2441	 * Only populate ring 1 if the configuration will take advantage
2442	 * of it. That is either when LRO is enabled or the frame size
2443	 * exceeds what ring 0 can contain.
2444	 */
2445	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2446	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2447		populate = 1;
2448	else
2449		populate = VMXNET3_RXRINGS_PERQ;
2450
2451	for (i = 0; i < populate; i++) {
2452		rxr = &rxq->vxrxq_cmd_ring[i];
2453		rxr->vxrxr_fill = 0;
2454		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2455		bzero(rxr->vxrxr_rxd,
2456		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2457
2458		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2459			error = vmxnet3_newbuf(sc, rxr);
2460			if (error)
2461				return (error);
2462		}
2463	}
2464
2465	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2466		rxr = &rxq->vxrxq_cmd_ring[i];
2467		rxr->vxrxr_fill = 0;
2468		rxr->vxrxr_gen = 0;
2469		bzero(rxr->vxrxr_rxd,
2470		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2471	}
2472
2473	rxc = &rxq->vxrxq_comp_ring;
2474	rxc->vxcr_next = 0;
2475	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2476	bzero(rxc->vxcr_u.rxcd,
2477	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2478
2479	return (0);
2480}
2481
2482static int
2483vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2484{
2485	device_t dev;
2486	int q, error;
2487
2488	dev = sc->vmx_dev;
2489
2490	for (q = 0; q < sc->vmx_ntxqueues; q++)
2491		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2492
2493	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2494		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2495		if (error) {
2496			device_printf(dev, "cannot populate Rx queue %d\n", q);
2497			return (error);
2498		}
2499	}
2500
2501	return (0);
2502}
2503
2504static int
2505vmxnet3_enable_device(struct vmxnet3_softc *sc)
2506{
2507	int q;
2508
2509	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2510		device_printf(sc->vmx_dev, "device enable command failed!\n");
2511		return (1);
2512	}
2513
2514	/* Reset the Rx queue heads. */
2515	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2516		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2517		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2518	}
2519
2520	return (0);
2521}
2522
2523static void
2524vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2525{
2526	struct ifnet *ifp;
2527
2528	ifp = sc->vmx_ifp;
2529
2530	vmxnet3_set_rxfilter(sc);
2531
2532	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2533		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2534		    sizeof(sc->vmx_ds->vlan_filter));
2535	else
2536		bzero(sc->vmx_ds->vlan_filter,
2537		    sizeof(sc->vmx_ds->vlan_filter));
2538	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2539}
2540
2541static int
2542vmxnet3_reinit(struct vmxnet3_softc *sc)
2543{
2544
2545	vmxnet3_reinit_interface(sc);
2546	vmxnet3_reinit_shared_data(sc);
2547
2548	if (vmxnet3_reinit_queues(sc) != 0)
2549		return (ENXIO);
2550
2551	if (vmxnet3_enable_device(sc) != 0)
2552		return (ENXIO);
2553
2554	vmxnet3_reinit_rxfilters(sc);
2555
2556	return (0);
2557}
2558
2559static void
2560vmxnet3_init_locked(struct vmxnet3_softc *sc)
2561{
2562	struct ifnet *ifp;
2563
2564	ifp = sc->vmx_ifp;
2565
2566	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2567		return;
2568
2569	vmxnet3_stop(sc);
2570
2571	if (vmxnet3_reinit(sc) != 0) {
2572		vmxnet3_stop(sc);
2573		return;
2574	}
2575
2576	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2577	vmxnet3_link_status(sc);
2578
2579	vmxnet3_enable_all_intrs(sc);
2580	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2581}
2582
2583static void
2584vmxnet3_init(void *xsc)
2585{
2586	struct vmxnet3_softc *sc;
2587
2588	sc = xsc;
2589
2590	VMXNET3_CORE_LOCK(sc);
2591	vmxnet3_init_locked(sc);
2592	VMXNET3_CORE_UNLOCK(sc);
2593}
2594
2595/*
2596 * BMV: Much of this can go away once we finally have offsets in
2597 * the mbuf packet header. Bug andre@.
2598 */
2599static int
2600vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2601    int *etype, int *proto, int *start)
2602{
2603	struct ether_vlan_header *evh;
2604	int offset;
2605
2606	evh = mtod(m, struct ether_vlan_header *);
2607	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2608		/* BMV: We should handle nested VLAN tags too. */
2609		*etype = ntohs(evh->evl_proto);
2610		offset = sizeof(struct ether_vlan_header);
2611	} else {
2612		*etype = ntohs(evh->evl_encap_proto);
2613		offset = sizeof(struct ether_header);
2614	}
2615
2616	switch (*etype) {
2617#if defined(INET)
2618	case ETHERTYPE_IP: {
2619		struct ip *ip, iphdr;
2620		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2621			m_copydata(m, offset, sizeof(struct ip),
2622			    (caddr_t) &iphdr);
2623			ip = &iphdr;
2624		} else
2625			ip = mtodo(m, offset);
2626		*proto = ip->ip_p;
2627		*start = offset + (ip->ip_hl << 2);
2628		break;
2629	}
2630#endif
2631#if defined(INET6)
2632	case ETHERTYPE_IPV6:
2633		*proto = -1;
2634		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2635		/* Assert the network stack sent us a valid packet. */
2636		KASSERT(*start > offset,
2637		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2638		    *start, offset, *proto));
2639		break;
2640#endif
2641	default:
2642		return (EINVAL);
2643	}
2644
2645	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2646		struct tcphdr *tcp, tcphdr;
2647
2648		if (__predict_false(*proto != IPPROTO_TCP)) {
2649			/* Likely failed to correctly parse the mbuf. */
2650			return (EINVAL);
2651		}
2652
2653		txq->vxtxq_stats.vmtxs_tso++;
2654
2655		/*
2656		 * For TSO, the size of the protocol header is also
2657		 * included in the descriptor header size.
2658		 */
2659		if (m->m_len < *start + sizeof(struct tcphdr)) {
2660			m_copydata(m, offset, sizeof(struct tcphdr),
2661			    (caddr_t) &tcphdr);
2662			tcp = &tcphdr;
2663		} else
2664			tcp = mtodo(m, *start);
2665		*start += (tcp->th_off << 2);
2666	} else
2667		txq->vxtxq_stats.vmtxs_csum++;
2668
2669	return (0);
2670}
2671
2672static int
2673vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2674    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2675{
2676	struct vmxnet3_txring *txr;
2677	struct mbuf *m;
2678	bus_dma_tag_t tag;
2679	int error;
2680
2681	txr = &txq->vxtxq_cmd_ring;
2682	m = *m0;
2683	tag = txr->vxtxr_txtag;
2684
2685	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2686	if (error == 0 || error != EFBIG)
2687		return (error);
2688
2689	m = m_defrag(m, M_NOWAIT);
2690	if (m != NULL) {
2691		*m0 = m;
2692		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2693	} else
2694		error = ENOBUFS;
2695
2696	if (error) {
2697		m_freem(*m0);
2698		*m0 = NULL;
2699		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2700	} else
2701		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2702
2703	return (error);
2704}
2705
2706static void
2707vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2708{
2709	struct vmxnet3_txring *txr;
2710
2711	txr = &txq->vxtxq_cmd_ring;
2712	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2713}
2714
2715static int
2716vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2717{
2718	struct vmxnet3_softc *sc;
2719	struct ifnet *ifp;
2720	struct vmxnet3_txring *txr;
2721	struct vmxnet3_txdesc *txd, *sop;
2722	struct mbuf *m;
2723	bus_dmamap_t dmap;
2724	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2725	int i, gen, nsegs, etype, proto, start, error;
2726
2727	sc = txq->vxtxq_sc;
2728	ifp = sc->vmx_ifp;
2729	start = 0;
2730	txd = NULL;
2731	txr = &txq->vxtxq_cmd_ring;
2732	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2733
2734	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2735	if (error)
2736		return (error);
2737
2738	m = *m0;
2739	M_ASSERTPKTHDR(m);
2740	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2741	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2742
2743	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2744		txq->vxtxq_stats.vmtxs_full++;
2745		vmxnet3_txq_unload_mbuf(txq, dmap);
2746		return (ENOSPC);
2747	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2748		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2749		if (error) {
2750			txq->vxtxq_stats.vmtxs_offload_failed++;
2751			vmxnet3_txq_unload_mbuf(txq, dmap);
2752			m_freem(m);
2753			*m0 = NULL;
2754			return (error);
2755		}
2756	}
2757
2758	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m = *m0;
2759	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2760	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2761
2762	for (i = 0; i < nsegs; i++) {
2763		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2764
2765		txd->addr = segs[i].ds_addr;
2766		txd->len = segs[i].ds_len;
2767		txd->gen = gen;
2768		txd->dtype = 0;
2769		txd->offload_mode = VMXNET3_OM_NONE;
2770		txd->offload_pos = 0;
2771		txd->hlen = 0;
2772		txd->eop = 0;
2773		txd->compreq = 0;
2774		txd->vtag_mode = 0;
2775		txd->vtag = 0;
2776
2777		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2778			txr->vxtxr_head = 0;
2779			txr->vxtxr_gen ^= 1;
2780		}
2781		gen = txr->vxtxr_gen;
2782	}
2783	txd->eop = 1;
2784	txd->compreq = 1;
2785
2786	if (m->m_flags & M_VLANTAG) {
2787		sop->vtag_mode = 1;
2788		sop->vtag = m->m_pkthdr.ether_vtag;
2789	}
2790
2791	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2792		sop->offload_mode = VMXNET3_OM_TSO;
2793		sop->hlen = start;
2794		sop->offload_pos = m->m_pkthdr.tso_segsz;
2795	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2796	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2797		sop->offload_mode = VMXNET3_OM_CSUM;
2798		sop->hlen = start;
2799		sop->offload_pos = start + m->m_pkthdr.csum_data;
2800	}
2801
2802	/* Finally, change the ownership. */
2803	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2804	sop->gen ^= 1;
2805
2806	if (++txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2807		txq->vxtxq_ts->npending = 0;
2808		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2809		    txr->vxtxr_head);
2810	}
2811
2812	return (0);
2813}
2814
2815static void
2816vmxnet3_txq_update_pending(struct vmxnet3_txqueue *txq)
2817{
2818	struct vmxnet3_txring *txr;
2819
2820	txr = &txq->vxtxq_cmd_ring;
2821
2822	if (txq->vxtxq_ts->npending > 0) {
2823		txq->vxtxq_ts->npending = 0;
2824		vmxnet3_write_bar0(txq->vxtxq_sc,
2825		    VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head);
2826	}
2827}
2828
2829#ifdef VMXNET3_LEGACY_TX
2830
2831static void
2832vmxnet3_start_locked(struct ifnet *ifp)
2833{
2834	struct vmxnet3_softc *sc;
2835	struct vmxnet3_txqueue *txq;
2836	struct vmxnet3_txring *txr;
2837	struct mbuf *m_head;
2838	int tx, avail;
2839
2840	sc = ifp->if_softc;
2841	txq = &sc->vmx_txq[0];
2842	txr = &txq->vxtxq_cmd_ring;
2843	tx = 0;
2844
2845	VMXNET3_TXQ_LOCK_ASSERT(txq);
2846
2847	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2848	    sc->vmx_link_active == 0)
2849		return;
2850
2851	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2852		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2853			break;
2854
2855		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2856		if (m_head == NULL)
2857			break;
2858
2859		/* Assume worse case if this mbuf is the head of a chain. */
2860		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2861			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2862			break;
2863		}
2864
2865		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2866			if (m_head != NULL)
2867				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2868			break;
2869		}
2870
2871		tx++;
2872		ETHER_BPF_MTAP(ifp, m_head);
2873	}
2874
2875	if (tx > 0) {
2876		vmxnet3_txq_update_pending(txq);
2877		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2878	}
2879}
2880
2881static void
2882vmxnet3_start(struct ifnet *ifp)
2883{
2884	struct vmxnet3_softc *sc;
2885	struct vmxnet3_txqueue *txq;
2886
2887	sc = ifp->if_softc;
2888	txq = &sc->vmx_txq[0];
2889
2890	VMXNET3_TXQ_LOCK(txq);
2891	vmxnet3_start_locked(ifp);
2892	VMXNET3_TXQ_UNLOCK(txq);
2893}
2894
2895#else /* !VMXNET3_LEGACY_TX */
2896
2897static int
2898vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2899{
2900	struct vmxnet3_softc *sc;
2901	struct vmxnet3_txring *txr;
2902	struct buf_ring *br;
2903	struct ifnet *ifp;
2904	int tx, avail, error;
2905
2906	sc = txq->vxtxq_sc;
2907	br = txq->vxtxq_br;
2908	ifp = sc->vmx_ifp;
2909	txr = &txq->vxtxq_cmd_ring;
2910	tx = 0;
2911	error = 0;
2912
2913	VMXNET3_TXQ_LOCK_ASSERT(txq);
2914
2915	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2916	    sc->vmx_link_active == 0) {
2917		if (m != NULL)
2918			error = drbr_enqueue(ifp, br, m);
2919		return (error);
2920	}
2921
2922	if (m != NULL) {
2923		error = drbr_enqueue(ifp, br, m);
2924		if (error)
2925			return (error);
2926	}
2927
2928	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2929		m = drbr_peek(ifp, br);
2930		if (m == NULL)
2931			break;
2932
2933		/* Assume worse case if this mbuf is the head of a chain. */
2934		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2935			drbr_putback(ifp, br, m);
2936			break;
2937		}
2938
2939		error = vmxnet3_txq_encap(txq, &m);
2940		if (error) {
2941			if (m != NULL)
2942				drbr_putback(ifp, br, m);
2943			else
2944				drbr_advance(ifp, br);
2945			break;
2946		}
2947		drbr_advance(ifp, br);
2948
2949		tx++;
2950		ETHER_BPF_MTAP(ifp, m);
2951	}
2952
2953	if (tx > 0) {
2954		vmxnet3_txq_update_pending(txq);
2955		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2956	}
2957
2958	return (0);
2959}
2960
2961static int
2962vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2963{
2964	struct vmxnet3_softc *sc;
2965	struct vmxnet3_txqueue *txq;
2966	int i, ntxq, error;
2967
2968	sc = ifp->if_softc;
2969	ntxq = sc->vmx_ntxqueues;
2970
2971	if (m->m_flags & M_FLOWID)
2972		i = m->m_pkthdr.flowid % ntxq;
2973	else
2974		i = curcpu % ntxq;
2975
2976	txq = &sc->vmx_txq[i];
2977
2978	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
2979		error = vmxnet3_txq_mq_start_locked(txq, m);
2980		VMXNET3_TXQ_UNLOCK(txq);
2981	} else {
2982		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
2983		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
2984	}
2985
2986	return (error);
2987}
2988
2989static void
2990vmxnet3_txq_tq_deferred(void *xtxq, int pending)
2991{
2992	struct vmxnet3_softc *sc;
2993	struct vmxnet3_txqueue *txq;
2994
2995	txq = xtxq;
2996	sc = txq->vxtxq_sc;
2997
2998	VMXNET3_TXQ_LOCK(txq);
2999	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3000		vmxnet3_txq_mq_start_locked(txq, NULL);
3001	VMXNET3_TXQ_UNLOCK(txq);
3002}
3003
3004#endif /* VMXNET3_LEGACY_TX */
3005
3006static void
3007vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3008{
3009	struct vmxnet3_softc *sc;
3010	struct ifnet *ifp;
3011
3012	sc = txq->vxtxq_sc;
3013	ifp = sc->vmx_ifp;
3014
3015#ifdef VMXNET3_LEGACY_TX
3016	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3017		vmxnet3_start_locked(ifp);
3018#else
3019	if (!drbr_empty(ifp, txq->vxtxq_br))
3020		vmxnet3_txq_mq_start_locked(txq, NULL);
3021#endif
3022}
3023
3024static void
3025vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3026{
3027	struct vmxnet3_txqueue *txq;
3028	int i;
3029
3030	VMXNET3_CORE_LOCK_ASSERT(sc);
3031
3032	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3033		txq = &sc->vmx_txq[i];
3034
3035		VMXNET3_TXQ_LOCK(txq);
3036		vmxnet3_txq_start(txq);
3037		VMXNET3_TXQ_UNLOCK(txq);
3038	}
3039}
3040
3041static void
3042vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3043{
3044	struct ifnet *ifp;
3045	int idx, bit;
3046
3047	ifp = sc->vmx_ifp;
3048	idx = (tag >> 5) & 0x7F;
3049	bit = tag & 0x1F;
3050
3051	if (tag == 0 || tag > 4095)
3052		return;
3053
3054	VMXNET3_CORE_LOCK(sc);
3055
3056	/* Update our private VLAN bitvector. */
3057	if (add)
3058		sc->vmx_vlan_filter[idx] |= (1 << bit);
3059	else
3060		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3061
3062	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3063		if (add)
3064			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3065		else
3066			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3067		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3068	}
3069
3070	VMXNET3_CORE_UNLOCK(sc);
3071}
3072
3073static void
3074vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3075{
3076
3077	if (ifp->if_softc == arg)
3078		vmxnet3_update_vlan_filter(arg, 1, tag);
3079}
3080
3081static void
3082vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3083{
3084
3085	if (ifp->if_softc == arg)
3086		vmxnet3_update_vlan_filter(arg, 0, tag);
3087}
3088
3089static void
3090vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3091{
3092	struct ifnet *ifp;
3093	struct vmxnet3_driver_shared *ds;
3094	struct ifmultiaddr *ifma;
3095	u_int mode;
3096
3097	ifp = sc->vmx_ifp;
3098	ds = sc->vmx_ds;
3099
3100	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3101	if (ifp->if_flags & IFF_PROMISC)
3102		mode |= VMXNET3_RXMODE_PROMISC;
3103	if (ifp->if_flags & IFF_ALLMULTI)
3104		mode |= VMXNET3_RXMODE_ALLMULTI;
3105	else {
3106		int cnt = 0, overflow = 0;
3107
3108		if_maddr_rlock(ifp);
3109		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3110			if (ifma->ifma_addr->sa_family != AF_LINK)
3111				continue;
3112			else if (cnt == VMXNET3_MULTICAST_MAX) {
3113				overflow = 1;
3114				break;
3115			}
3116
3117			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3118			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3119			cnt++;
3120		}
3121		if_maddr_runlock(ifp);
3122
3123		if (overflow != 0) {
3124			cnt = 0;
3125			mode |= VMXNET3_RXMODE_ALLMULTI;
3126		} else if (cnt > 0)
3127			mode |= VMXNET3_RXMODE_MCAST;
3128		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3129	}
3130
3131	ds->rxmode = mode;
3132
3133	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3134	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3135}
3136
3137static int
3138vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3139{
3140	struct ifnet *ifp;
3141
3142	ifp = sc->vmx_ifp;
3143
3144	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3145		return (EINVAL);
3146
3147	ifp->if_mtu = mtu;
3148
3149	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3150		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3151		vmxnet3_init_locked(sc);
3152	}
3153
3154	return (0);
3155}
3156
3157static int
3158vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3159{
3160	struct vmxnet3_softc *sc;
3161	struct ifreq *ifr;
3162	int reinit, mask, error;
3163
3164	sc = ifp->if_softc;
3165	ifr = (struct ifreq *) data;
3166	error = 0;
3167
3168	switch (cmd) {
3169	case SIOCSIFMTU:
3170		if (ifp->if_mtu != ifr->ifr_mtu) {
3171			VMXNET3_CORE_LOCK(sc);
3172			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3173			VMXNET3_CORE_UNLOCK(sc);
3174		}
3175		break;
3176
3177	case SIOCSIFFLAGS:
3178		VMXNET3_CORE_LOCK(sc);
3179		if (ifp->if_flags & IFF_UP) {
3180			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3181				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3182				    (IFF_PROMISC | IFF_ALLMULTI)) {
3183					vmxnet3_set_rxfilter(sc);
3184				}
3185			} else
3186				vmxnet3_init_locked(sc);
3187		} else {
3188			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3189				vmxnet3_stop(sc);
3190		}
3191		sc->vmx_if_flags = ifp->if_flags;
3192		VMXNET3_CORE_UNLOCK(sc);
3193		break;
3194
3195	case SIOCADDMULTI:
3196	case SIOCDELMULTI:
3197		VMXNET3_CORE_LOCK(sc);
3198		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3199			vmxnet3_set_rxfilter(sc);
3200		VMXNET3_CORE_UNLOCK(sc);
3201		break;
3202
3203	case SIOCSIFMEDIA:
3204	case SIOCGIFMEDIA:
3205		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3206		break;
3207
3208	case SIOCSIFCAP:
3209		VMXNET3_CORE_LOCK(sc);
3210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3211
3212		if (mask & IFCAP_TXCSUM)
3213			ifp->if_capenable ^= IFCAP_TXCSUM;
3214		if (mask & IFCAP_TXCSUM_IPV6)
3215			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3216		if (mask & IFCAP_TSO4)
3217			ifp->if_capenable ^= IFCAP_TSO4;
3218		if (mask & IFCAP_TSO6)
3219			ifp->if_capenable ^= IFCAP_TSO6;
3220
3221		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3222		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3223			/* Changing these features requires us to reinit. */
3224			reinit = 1;
3225
3226			if (mask & IFCAP_RXCSUM)
3227				ifp->if_capenable ^= IFCAP_RXCSUM;
3228			if (mask & IFCAP_RXCSUM_IPV6)
3229				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3230			if (mask & IFCAP_LRO)
3231				ifp->if_capenable ^= IFCAP_LRO;
3232			if (mask & IFCAP_VLAN_HWTAGGING)
3233				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3234			if (mask & IFCAP_VLAN_HWFILTER)
3235				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3236		} else
3237			reinit = 0;
3238
3239		if (mask & IFCAP_VLAN_HWTSO)
3240			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3241
3242		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3243			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3244			vmxnet3_init_locked(sc);
3245		}
3246
3247		VMXNET3_CORE_UNLOCK(sc);
3248		VLAN_CAPABILITIES(ifp);
3249		break;
3250
3251	default:
3252		error = ether_ioctl(ifp, cmd, data);
3253		break;
3254	}
3255
3256	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3257
3258	return (error);
3259}
3260
3261#ifndef VMXNET3_LEGACY_TX
3262static void
3263vmxnet3_qflush(struct ifnet *ifp)
3264{
3265	struct vmxnet3_softc *sc;
3266	struct vmxnet3_txqueue *txq;
3267	struct mbuf *m;
3268	int i;
3269
3270	sc = ifp->if_softc;
3271
3272	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3273		txq = &sc->vmx_txq[i];
3274
3275		VMXNET3_TXQ_LOCK(txq);
3276		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3277			m_freem(m);
3278		VMXNET3_TXQ_UNLOCK(txq);
3279	}
3280
3281	if_qflush(ifp);
3282}
3283#endif
3284
3285static int
3286vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3287{
3288	struct vmxnet3_softc *sc;
3289
3290	sc = txq->vxtxq_sc;
3291
3292	VMXNET3_TXQ_LOCK(txq);
3293	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3294		VMXNET3_TXQ_UNLOCK(txq);
3295		return (0);
3296	}
3297	VMXNET3_TXQ_UNLOCK(txq);
3298
3299	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3300	    txq->vxtxq_id);
3301	return (1);
3302}
3303
3304static void
3305vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3306{
3307
3308	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3309}
3310
3311static void
3312vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3313    struct vmxnet3_txq_stats *accum)
3314{
3315	struct vmxnet3_txq_stats *st;
3316
3317	st = &txq->vxtxq_stats;
3318
3319	accum->vmtxs_opackets += st->vmtxs_opackets;
3320	accum->vmtxs_obytes += st->vmtxs_obytes;
3321	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3322	accum->vmtxs_csum += st->vmtxs_csum;
3323	accum->vmtxs_tso += st->vmtxs_tso;
3324	accum->vmtxs_full += st->vmtxs_full;
3325	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3326}
3327
3328static void
3329vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3330    struct vmxnet3_rxq_stats *accum)
3331{
3332	struct vmxnet3_rxq_stats *st;
3333
3334	st = &rxq->vxrxq_stats;
3335
3336	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3337	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3338	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3339	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3340}
3341
3342static void
3343vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3344{
3345	struct ifnet *ifp;
3346	struct vmxnet3_statistics *st;
3347	struct vmxnet3_txq_stats txaccum;
3348	struct vmxnet3_rxq_stats rxaccum;
3349	int i;
3350
3351	ifp = sc->vmx_ifp;
3352	st = &sc->vmx_stats;
3353
3354	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3355	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3356
3357	for (i = 0; i < sc->vmx_ntxqueues; i++)
3358		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3359	for (i = 0; i < sc->vmx_nrxqueues; i++)
3360		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3361
3362	/*
3363	 * With the exception of if_ierrors, these ifnet statistics are
3364	 * only updated in the driver, so just set them to our accumulated
3365	 * values. if_ierrors is updated in ether_input() for malformed
3366	 * frames that we should have already discarded.
3367	 */
3368	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3369	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3370	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3371	ifp->if_opackets = txaccum.vmtxs_opackets;
3372#ifndef VMXNET3_LEGACY_TX
3373	ifp->if_obytes = txaccum.vmtxs_obytes;
3374	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3375#endif
3376}
3377
3378static void
3379vmxnet3_tick(void *xsc)
3380{
3381	struct vmxnet3_softc *sc;
3382	struct ifnet *ifp;
3383	int i, timedout;
3384
3385	sc = xsc;
3386	ifp = sc->vmx_ifp;
3387	timedout = 0;
3388
3389	VMXNET3_CORE_LOCK_ASSERT(sc);
3390
3391	vmxnet3_accumulate_stats(sc);
3392	vmxnet3_refresh_host_stats(sc);
3393
3394	for (i = 0; i < sc->vmx_ntxqueues; i++)
3395		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3396
3397	if (timedout != 0) {
3398		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3399		vmxnet3_init_locked(sc);
3400	} else
3401		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3402}
3403
3404static int
3405vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3406{
3407	uint32_t status;
3408
3409	/* Also update the link speed while here. */
3410	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3411	sc->vmx_link_speed = status >> 16;
3412	return !!(status & 0x1);
3413}
3414
3415static void
3416vmxnet3_link_status(struct vmxnet3_softc *sc)
3417{
3418	struct ifnet *ifp;
3419	int link;
3420
3421	ifp = sc->vmx_ifp;
3422	link = vmxnet3_link_is_up(sc);
3423
3424	if (link != 0 && sc->vmx_link_active == 0) {
3425		sc->vmx_link_active = 1;
3426		if_link_state_change(ifp, LINK_STATE_UP);
3427	} else if (link == 0 && sc->vmx_link_active != 0) {
3428		sc->vmx_link_active = 0;
3429		if_link_state_change(ifp, LINK_STATE_DOWN);
3430	}
3431}
3432
3433static void
3434vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3435{
3436	struct vmxnet3_softc *sc;
3437
3438	sc = ifp->if_softc;
3439
3440	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3441	ifmr->ifm_status = IFM_AVALID;
3442
3443	VMXNET3_CORE_LOCK(sc);
3444	if (vmxnet3_link_is_up(sc) != 0)
3445		ifmr->ifm_status |= IFM_ACTIVE;
3446	else
3447		ifmr->ifm_status |= IFM_NONE;
3448	VMXNET3_CORE_UNLOCK(sc);
3449}
3450
3451static int
3452vmxnet3_media_change(struct ifnet *ifp)
3453{
3454
3455	/* Ignore. */
3456	return (0);
3457}
3458
3459static void
3460vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3461{
3462	uint32_t ml, mh;
3463
3464	ml  = sc->vmx_lladdr[0];
3465	ml |= sc->vmx_lladdr[1] << 8;
3466	ml |= sc->vmx_lladdr[2] << 16;
3467	ml |= sc->vmx_lladdr[3] << 24;
3468	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3469
3470	mh  = sc->vmx_lladdr[4];
3471	mh |= sc->vmx_lladdr[5] << 8;
3472	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3473}
3474
3475static void
3476vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3477{
3478	uint32_t ml, mh;
3479
3480	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3481	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3482
3483	sc->vmx_lladdr[0] = ml;
3484	sc->vmx_lladdr[1] = ml >> 8;
3485	sc->vmx_lladdr[2] = ml >> 16;
3486	sc->vmx_lladdr[3] = ml >> 24;
3487	sc->vmx_lladdr[4] = mh;
3488	sc->vmx_lladdr[5] = mh >> 8;
3489}
3490
3491static void
3492vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3493    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3494{
3495	struct sysctl_oid *node, *txsnode;
3496	struct sysctl_oid_list *list, *txslist;
3497	struct vmxnet3_txq_stats *stats;
3498	struct UPT1_TxStats *txstats;
3499	char namebuf[16];
3500
3501	stats = &txq->vxtxq_stats;
3502	txstats = &txq->vxtxq_ts->stats;
3503
3504	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3505	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3506	    NULL, "Transmit Queue");
3507	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3508
3509	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3510	    &stats->vmtxs_opackets, "Transmit packets");
3511	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3512	    &stats->vmtxs_obytes, "Transmit bytes");
3513	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3514	    &stats->vmtxs_omcasts, "Transmit multicasts");
3515	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3516	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3517	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3518	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3519	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3520	    &stats->vmtxs_full, "Transmit ring full");
3521	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3522	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3523
3524	/*
3525	 * Add statistics reported by the host. These are updated once
3526	 * per second.
3527	 */
3528	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3529	    NULL, "Host Statistics");
3530	txslist = SYSCTL_CHILDREN(txsnode);
3531	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3532	    &txstats->TSO_packets, "TSO packets");
3533	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3534	    &txstats->TSO_bytes, "TSO bytes");
3535	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3536	    &txstats->ucast_packets, "Unicast packets");
3537	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3538	    &txstats->ucast_bytes, "Unicast bytes");
3539	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3540	    &txstats->mcast_packets, "Multicast packets");
3541	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3542	    &txstats->mcast_bytes, "Multicast bytes");
3543	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3544	    &txstats->error, "Errors");
3545	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3546	    &txstats->discard, "Discards");
3547}
3548
3549static void
3550vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3551    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3552{
3553	struct sysctl_oid *node, *rxsnode;
3554	struct sysctl_oid_list *list, *rxslist;
3555	struct vmxnet3_rxq_stats *stats;
3556	struct UPT1_RxStats *rxstats;
3557	char namebuf[16];
3558
3559	stats = &rxq->vxrxq_stats;
3560	rxstats = &rxq->vxrxq_rs->stats;
3561
3562	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3563	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3564	    NULL, "Receive Queue");
3565	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3566
3567	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3568	    &stats->vmrxs_ipackets, "Receive packets");
3569	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3570	    &stats->vmrxs_ibytes, "Receive bytes");
3571	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3572	    &stats->vmrxs_iqdrops, "Receive drops");
3573	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3574	    &stats->vmrxs_ierrors, "Receive errors");
3575
3576	/*
3577	 * Add statistics reported by the host. These are updated once
3578	 * per second.
3579	 */
3580	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3581	    NULL, "Host Statistics");
3582	rxslist = SYSCTL_CHILDREN(rxsnode);
3583	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3584	    &rxstats->LRO_packets, "LRO packets");
3585	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3586	    &rxstats->LRO_bytes, "LRO bytes");
3587	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3588	    &rxstats->ucast_packets, "Unicast packets");
3589	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3590	    &rxstats->ucast_bytes, "Unicast bytes");
3591	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3592	    &rxstats->mcast_packets, "Multicast packets");
3593	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3594	    &rxstats->mcast_bytes, "Multicast bytes");
3595	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3596	    &rxstats->bcast_packets, "Broadcast packets");
3597	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3598	    &rxstats->bcast_bytes, "Broadcast bytes");
3599	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3600	    &rxstats->nobuffer, "No buffer");
3601	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3602	    &rxstats->error, "Errors");
3603}
3604
3605static void
3606vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3607    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3608{
3609	struct sysctl_oid *node;
3610	struct sysctl_oid_list *list;
3611	int i;
3612
3613	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3614		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3615
3616		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3617		    "debug", CTLFLAG_RD, NULL, "");
3618		list = SYSCTL_CHILDREN(node);
3619
3620		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3621		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3622		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3623		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3624		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3625		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3626		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3627		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3628		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3629		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3630		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3631		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3632		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3633		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3634	}
3635
3636	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3637		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3638
3639		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3640		    "debug", CTLFLAG_RD, NULL, "");
3641		list = SYSCTL_CHILDREN(node);
3642
3643		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3644		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3645		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3646		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3647		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3648		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3649		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3650		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3651		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3652		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3653		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3654		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3655		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3656		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3657		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3658		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3659		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3660		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3661	}
3662}
3663
3664static void
3665vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3666    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3667{
3668	int i;
3669
3670	for (i = 0; i < sc->vmx_ntxqueues; i++)
3671		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3672	for (i = 0; i < sc->vmx_nrxqueues; i++)
3673		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3674
3675	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3676}
3677
3678static void
3679vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3680{
3681	device_t dev;
3682	struct vmxnet3_statistics *stats;
3683	struct sysctl_ctx_list *ctx;
3684	struct sysctl_oid *tree;
3685	struct sysctl_oid_list *child;
3686
3687	dev = sc->vmx_dev;
3688	ctx = device_get_sysctl_ctx(dev);
3689	tree = device_get_sysctl_tree(dev);
3690	child = SYSCTL_CHILDREN(tree);
3691
3692	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3693	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3694	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3695	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3696	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3697	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3698	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3699	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3700
3701	stats = &sc->vmx_stats;
3702	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3703	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3704	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3705	    &stats->vmst_defrag_failed, 0,
3706	    "Tx mbuf dropped because defrag failed");
3707	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3708	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3709	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3710	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3711
3712	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3713}
3714
3715static void
3716vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3717{
3718
3719	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3720}
3721
3722static uint32_t
3723vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3724{
3725
3726	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3727}
3728
3729static void
3730vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3731{
3732
3733	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3734}
3735
3736static void
3737vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3738{
3739
3740	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3741}
3742
3743static uint32_t
3744vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3745{
3746
3747	vmxnet3_write_cmd(sc, cmd);
3748	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3749	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3750	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3751}
3752
3753static void
3754vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3755{
3756
3757	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3758}
3759
3760static void
3761vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3762{
3763
3764	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3765}
3766
3767static void
3768vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3769{
3770	int i;
3771
3772	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3773	for (i = 0; i < sc->vmx_nintrs; i++)
3774		vmxnet3_enable_intr(sc, i);
3775}
3776
3777static void
3778vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3779{
3780	int i;
3781
3782	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3783	for (i = 0; i < sc->vmx_nintrs; i++)
3784		vmxnet3_disable_intr(sc, i);
3785}
3786
3787static void
3788vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3789{
3790	bus_addr_t *baddr = arg;
3791
3792	if (error == 0)
3793		*baddr = segs->ds_addr;
3794}
3795
3796static int
3797vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3798    struct vmxnet3_dma_alloc *dma)
3799{
3800	device_t dev;
3801	int error;
3802
3803	dev = sc->vmx_dev;
3804	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3805
3806	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3807	    align, 0,		/* alignment, bounds */
3808	    BUS_SPACE_MAXADDR,	/* lowaddr */
3809	    BUS_SPACE_MAXADDR,	/* highaddr */
3810	    NULL, NULL,		/* filter, filterarg */
3811	    size,		/* maxsize */
3812	    1,			/* nsegments */
3813	    size,		/* maxsegsize */
3814	    BUS_DMA_ALLOCNOW,	/* flags */
3815	    NULL,		/* lockfunc */
3816	    NULL,		/* lockfuncarg */
3817	    &dma->dma_tag);
3818	if (error) {
3819		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3820		goto fail;
3821	}
3822
3823	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3824	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3825	if (error) {
3826		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3827		goto fail;
3828	}
3829
3830	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3831	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3832	if (error) {
3833		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3834		goto fail;
3835	}
3836
3837	dma->dma_size = size;
3838
3839fail:
3840	if (error)
3841		vmxnet3_dma_free(sc, dma);
3842
3843	return (error);
3844}
3845
3846static void
3847vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3848{
3849
3850	if (dma->dma_tag != NULL) {
3851		if (dma->dma_map != NULL) {
3852			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3853			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3854			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3855		}
3856
3857		if (dma->dma_vaddr != NULL) {
3858			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3859			    dma->dma_map);
3860		}
3861
3862		bus_dma_tag_destroy(dma->dma_tag);
3863	}
3864	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3865}
3866
3867static int
3868vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3869{
3870	char path[64];
3871
3872	snprintf(path, sizeof(path),
3873	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3874	TUNABLE_INT_FETCH(path, &def);
3875
3876	return (def);
3877}
3878
3879/*
3880 * Since this is a purely paravirtualized device, we do not have
3881 * to worry about DMA coherency. But at times, we must make sure
3882 * both the compiler and CPU do not reorder memory operations.
3883 */
3884static inline void
3885vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3886{
3887
3888	switch (type) {
3889	case VMXNET3_BARRIER_RD:
3890		rmb();
3891		break;
3892	case VMXNET3_BARRIER_WR:
3893		wmb();
3894		break;
3895	case VMXNET3_BARRIER_RDWR:
3896		mb();
3897		break;
3898	default:
3899		panic("%s: bad barrier type %d", __func__, type);
3900	}
3901}
3902