if_vmx.c revision 267252
1228753Smm/*-
2228753Smm * Copyright (c) 2013 Tsubai Masanari
3228753Smm * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4228753Smm *
5228753Smm * Permission to use, copy, modify, and distribute this software for any
6228753Smm * purpose with or without fee is hereby granted, provided that the above
7228753Smm * copyright notice and this permission notice appear in all copies.
8228753Smm *
9228753Smm * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10228753Smm * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11228753Smm * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12228753Smm * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13228753Smm * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14228753Smm * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15228753Smm * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16228753Smm *
17228753Smm * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18228753Smm */
19228753Smm
20228753Smm/* Driver for VMware vmxnet3 virtual ethernet devices. */
21228753Smm
22228753Smm#include <sys/cdefs.h>
23228753Smm__FBSDID("$FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 267252 2014-06-09 02:39:05Z bryanv $");
24228753Smm
25228753Smm#include <sys/param.h>
26228753Smm#include <sys/systm.h>
27228753Smm#include <sys/eventhandler.h>
28228753Smm#include <sys/kernel.h>
29228753Smm#include <sys/endian.h>
30228753Smm#include <sys/sockio.h>
31228753Smm#include <sys/mbuf.h>
32228753Smm#include <sys/malloc.h>
33228753Smm#include <sys/module.h>
34228753Smm#include <sys/socket.h>
35228753Smm#include <sys/sysctl.h>
36228753Smm#include <sys/smp.h>
37228753Smm#include <sys/taskqueue.h>
38228753Smm#include <vm/vm.h>
39228753Smm#include <vm/pmap.h>
40228753Smm
41228753Smm#include <net/ethernet.h>
42228753Smm#include <net/if.h>
43228753Smm#include <net/if_var.h>
44228753Smm#include <net/if_arp.h>
45228753Smm#include <net/if_dl.h>
46228753Smm#include <net/if_types.h>
47228753Smm#include <net/if_media.h>
48228753Smm#include <net/if_vlan_var.h>
49228753Smm
50228753Smm#include <net/bpf.h>
51228753Smm
52228753Smm#include <netinet/in_systm.h>
53228753Smm#include <netinet/in.h>
54228753Smm#include <netinet/ip.h>
55228753Smm#include <netinet/ip6.h>
56228753Smm#include <netinet6/ip6_var.h>
57228753Smm#include <netinet/udp.h>
58228753Smm#include <netinet/tcp.h>
59228753Smm
60228753Smm#include <machine/bus.h>
61228753Smm#include <machine/resource.h>
62228753Smm#include <sys/bus.h>
63228753Smm#include <sys/rman.h>
64228753Smm
65228753Smm#include <dev/pci/pcireg.h>
66228753Smm#include <dev/pci/pcivar.h>
67228753Smm
68228753Smm#include "if_vmxreg.h"
69228753Smm#include "if_vmxvar.h"
70228753Smm
71228753Smm#include "opt_inet.h"
72228753Smm#include "opt_inet6.h"
73228753Smm
74228753Smm#ifdef VMXNET3_FAILPOINTS
75228753Smm#include <sys/fail.h>
76228753Smmstatic SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77228753Smm    "vmxnet3 fail points");
78228753Smm#define VMXNET3_FP	_debug_fail_point_vmxnet3
79228753Smm#endif
80228753Smm
81228753Smmstatic int	vmxnet3_probe(device_t);
82228753Smmstatic int	vmxnet3_attach(device_t);
83228753Smmstatic int	vmxnet3_detach(device_t);
84228753Smmstatic int	vmxnet3_shutdown(device_t);
85228753Smm
86228753Smmstatic int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87228753Smmstatic void	vmxnet3_free_resources(struct vmxnet3_softc *);
88228753Smmstatic int	vmxnet3_check_version(struct vmxnet3_softc *);
89228753Smmstatic void	vmxnet3_initial_config(struct vmxnet3_softc *);
90228753Smmstatic void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91228753Smm
92228753Smmstatic int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93228753Smmstatic int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94228753Smmstatic int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95228753Smmstatic int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96228753Smm		    struct vmxnet3_interrupt *);
97228753Smmstatic int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98228753Smmstatic int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99228753Smmstatic int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100228753Smmstatic int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101228753Smmstatic int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102228753Smm
103228753Smmstatic void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104228753Smm		    struct vmxnet3_interrupt *);
105228753Smmstatic void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106228753Smm
107228753Smm#ifndef VMXNET3_LEGACY_TX
108228753Smmstatic int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109228753Smmstatic void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110228753Smmstatic void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111228753Smmstatic void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112228753Smm#endif
113228753Smm
114228753Smmstatic int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115228753Smmstatic int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116228753Smmstatic int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117228753Smmstatic void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118228753Smmstatic void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119228753Smmstatic void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120228753Smm
121228753Smmstatic int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122228753Smmstatic void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123228753Smmstatic int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124228753Smmstatic void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125228753Smmstatic int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126228753Smmstatic void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127228753Smmstatic int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128228753Smmstatic void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129228753Smmstatic int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130228753Smmstatic void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131228753Smmstatic void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132228753Smmstatic void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133228753Smmstatic void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134228753Smmstatic int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135228753Smmstatic void	vmxnet3_free_data(struct vmxnet3_softc *);
136228753Smmstatic int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137228753Smm
138228753Smmstatic void	vmxnet3_evintr(struct vmxnet3_softc *);
139228753Smmstatic void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140228753Smmstatic void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141228753Smmstatic int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142228753Smmstatic void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143228753Smm		    struct vmxnet3_rxring *, int);
144228753Smmstatic void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145228753Smmstatic void	vmxnet3_legacy_intr(void *);
146228753Smmstatic void	vmxnet3_txq_intr(void *);
147228753Smmstatic void	vmxnet3_rxq_intr(void *);
148228753Smmstatic void	vmxnet3_event_intr(void *);
149228753Smm
150228753Smmstatic void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151228753Smmstatic void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152228753Smmstatic void	vmxnet3_stop(struct vmxnet3_softc *);
153228753Smm
154228753Smmstatic void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155228753Smmstatic int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156228753Smmstatic int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157228753Smmstatic int	vmxnet3_enable_device(struct vmxnet3_softc *);
158228753Smmstatic void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159228753Smmstatic int	vmxnet3_reinit(struct vmxnet3_softc *);
160228753Smmstatic void	vmxnet3_init_locked(struct vmxnet3_softc *);
161228753Smmstatic void	vmxnet3_init(void *);
162228753Smm
163228753Smmstatic int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164228753Smm		    int *, int *, int *);
165228753Smmstatic int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169static void	vmxnet3_txq_update_pending(struct vmxnet3_txqueue *);
170#ifdef VMXNET3_LEGACY_TX
171static void	vmxnet3_start_locked(struct ifnet *);
172static void	vmxnet3_start(struct ifnet *);
173#else
174static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
175		    struct mbuf *);
176static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
177static void	vmxnet3_txq_tq_deferred(void *, int);
178#endif
179static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
180static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
181
182static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
183		    uint16_t);
184static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
186static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
187static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
188static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
189
190#ifndef VMXNET3_LEGACY_TX
191static void	vmxnet3_qflush(struct ifnet *);
192#endif
193
194static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
195static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
196static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
197		    struct vmxnet3_txq_stats *);
198static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
199		    struct vmxnet3_rxq_stats *);
200static void	vmxnet3_tick(void *);
201static void	vmxnet3_link_status(struct vmxnet3_softc *);
202static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
203static int	vmxnet3_media_change(struct ifnet *);
204static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
205static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
206
207static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
208		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
209static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
210		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
211static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
212		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
213static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
214
215static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
216		    uint32_t);
217static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
218static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
219		    uint32_t);
220static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
221static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
222
223static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
225static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
226static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
227
228static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
229		    bus_size_t, struct vmxnet3_dma_alloc *);
230static void	vmxnet3_dma_free(struct vmxnet3_softc *,
231		    struct vmxnet3_dma_alloc *);
232static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
233		    const char *, int);
234
235typedef enum {
236	VMXNET3_BARRIER_RD,
237	VMXNET3_BARRIER_WR,
238	VMXNET3_BARRIER_RDWR,
239} vmxnet3_barrier_t;
240
241static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
242
243/* Tunables. */
244static int vmxnet3_mq_disable = 0;
245TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
246static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
247TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
248static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
249TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
250static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
251TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
252static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
253TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
254
255static device_method_t vmxnet3_methods[] = {
256	/* Device interface. */
257	DEVMETHOD(device_probe,		vmxnet3_probe),
258	DEVMETHOD(device_attach,	vmxnet3_attach),
259	DEVMETHOD(device_detach,	vmxnet3_detach),
260	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
261
262	DEVMETHOD_END
263};
264
265static driver_t vmxnet3_driver = {
266	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
267};
268
269static devclass_t vmxnet3_devclass;
270DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
271
272MODULE_DEPEND(vmx, pci, 1, 1, 1);
273MODULE_DEPEND(vmx, ether, 1, 1, 1);
274
275#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
276#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
277
278static int
279vmxnet3_probe(device_t dev)
280{
281
282	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
283	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
284		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
285		return (BUS_PROBE_DEFAULT);
286	}
287
288	return (ENXIO);
289}
290
291static int
292vmxnet3_attach(device_t dev)
293{
294	struct vmxnet3_softc *sc;
295	int error;
296
297	sc = device_get_softc(dev);
298	sc->vmx_dev = dev;
299
300	pci_enable_busmaster(dev);
301
302	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
303	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
304
305	vmxnet3_initial_config(sc);
306
307	error = vmxnet3_alloc_resources(sc);
308	if (error)
309		goto fail;
310
311	error = vmxnet3_check_version(sc);
312	if (error)
313		goto fail;
314
315	error = vmxnet3_alloc_rxtx_queues(sc);
316	if (error)
317		goto fail;
318
319#ifndef VMXNET3_LEGACY_TX
320	error = vmxnet3_alloc_taskqueue(sc);
321	if (error)
322		goto fail;
323#endif
324
325	error = vmxnet3_alloc_interrupts(sc);
326	if (error)
327		goto fail;
328
329	vmxnet3_check_multiqueue(sc);
330
331	error = vmxnet3_alloc_data(sc);
332	if (error)
333		goto fail;
334
335	error = vmxnet3_setup_interface(sc);
336	if (error)
337		goto fail;
338
339	error = vmxnet3_setup_interrupts(sc);
340	if (error) {
341		ether_ifdetach(sc->vmx_ifp);
342		device_printf(dev, "could not set up interrupt\n");
343		goto fail;
344	}
345
346	vmxnet3_setup_sysctl(sc);
347#ifndef VMXNET3_LEGACY_TX
348	vmxnet3_start_taskqueue(sc);
349#endif
350
351fail:
352	if (error)
353		vmxnet3_detach(dev);
354
355	return (error);
356}
357
358static int
359vmxnet3_detach(device_t dev)
360{
361	struct vmxnet3_softc *sc;
362	struct ifnet *ifp;
363
364	sc = device_get_softc(dev);
365	ifp = sc->vmx_ifp;
366
367	if (device_is_attached(dev)) {
368		VMXNET3_CORE_LOCK(sc);
369		vmxnet3_stop(sc);
370		VMXNET3_CORE_UNLOCK(sc);
371
372		callout_drain(&sc->vmx_tick);
373#ifndef VMXNET3_LEGACY_TX
374		vmxnet3_drain_taskqueue(sc);
375#endif
376
377		ether_ifdetach(ifp);
378	}
379
380	if (sc->vmx_vlan_attach != NULL) {
381		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
382		sc->vmx_vlan_attach = NULL;
383	}
384	if (sc->vmx_vlan_detach != NULL) {
385		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
386		sc->vmx_vlan_detach = NULL;
387	}
388
389#ifndef VMXNET3_LEGACY_TX
390	vmxnet3_free_taskqueue(sc);
391#endif
392	vmxnet3_free_interrupts(sc);
393
394	if (ifp != NULL) {
395		if_free(ifp);
396		sc->vmx_ifp = NULL;
397	}
398
399	ifmedia_removeall(&sc->vmx_media);
400
401	vmxnet3_free_data(sc);
402	vmxnet3_free_resources(sc);
403	vmxnet3_free_rxtx_queues(sc);
404
405	VMXNET3_CORE_LOCK_DESTROY(sc);
406
407	return (0);
408}
409
410static int
411vmxnet3_shutdown(device_t dev)
412{
413
414	return (0);
415}
416
417static int
418vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
419{
420	device_t dev;
421	int rid;
422
423	dev = sc->vmx_dev;
424
425	rid = PCIR_BAR(0);
426	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
427	    RF_ACTIVE);
428	if (sc->vmx_res0 == NULL) {
429		device_printf(dev,
430		    "could not map BAR0 memory\n");
431		return (ENXIO);
432	}
433
434	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
435	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
436
437	rid = PCIR_BAR(1);
438	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
439	    RF_ACTIVE);
440	if (sc->vmx_res1 == NULL) {
441		device_printf(dev,
442		    "could not map BAR1 memory\n");
443		return (ENXIO);
444	}
445
446	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
447	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
448
449	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
450		rid = PCIR_BAR(2);
451		sc->vmx_msix_res = bus_alloc_resource_any(dev,
452		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
453	}
454
455	if (sc->vmx_msix_res == NULL)
456		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
457
458	return (0);
459}
460
461static void
462vmxnet3_free_resources(struct vmxnet3_softc *sc)
463{
464	device_t dev;
465	int rid;
466
467	dev = sc->vmx_dev;
468
469	if (sc->vmx_res0 != NULL) {
470		rid = PCIR_BAR(0);
471		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
472		sc->vmx_res0 = NULL;
473	}
474
475	if (sc->vmx_res1 != NULL) {
476		rid = PCIR_BAR(1);
477		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
478		sc->vmx_res1 = NULL;
479	}
480
481	if (sc->vmx_msix_res != NULL) {
482		rid = PCIR_BAR(2);
483		bus_release_resource(dev, SYS_RES_MEMORY, rid,
484		    sc->vmx_msix_res);
485		sc->vmx_msix_res = NULL;
486	}
487}
488
489static int
490vmxnet3_check_version(struct vmxnet3_softc *sc)
491{
492	device_t dev;
493	uint32_t version;
494
495	dev = sc->vmx_dev;
496
497	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
498	if ((version & 0x01) == 0) {
499		device_printf(dev, "unsupported hardware version %#x\n",
500		    version);
501		return (ENOTSUP);
502	}
503	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
504
505	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
506	if ((version & 0x01) == 0) {
507		device_printf(dev, "unsupported UPT version %#x\n", version);
508		return (ENOTSUP);
509	}
510	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
511
512	return (0);
513}
514
515static void
516vmxnet3_initial_config(struct vmxnet3_softc *sc)
517{
518	int nqueue, ndesc;
519
520	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
521	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
522		nqueue = VMXNET3_DEF_TX_QUEUES;
523	if (nqueue > mp_ncpus)
524		nqueue = mp_ncpus;
525	sc->vmx_max_ntxqueues = nqueue;
526
527	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
528	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
529		nqueue = VMXNET3_DEF_RX_QUEUES;
530	if (nqueue > mp_ncpus)
531		nqueue = mp_ncpus;
532	sc->vmx_max_nrxqueues = nqueue;
533
534	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
535		sc->vmx_max_nrxqueues = 1;
536		sc->vmx_max_ntxqueues = 1;
537	}
538
539	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
540	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
541		ndesc = VMXNET3_DEF_TX_NDESC;
542	if (ndesc & VMXNET3_MASK_TX_NDESC)
543		ndesc &= ~VMXNET3_MASK_TX_NDESC;
544	sc->vmx_ntxdescs = ndesc;
545
546	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
547	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
548		ndesc = VMXNET3_DEF_RX_NDESC;
549	if (ndesc & VMXNET3_MASK_RX_NDESC)
550		ndesc &= ~VMXNET3_MASK_RX_NDESC;
551	sc->vmx_nrxdescs = ndesc;
552	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
553}
554
555static void
556vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
557{
558
559	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
560		goto out;
561
562	/* BMV: Just use the maximum configured for now. */
563	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
564	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
565
566	if (sc->vmx_nrxqueues > 1)
567		sc->vmx_flags |= VMXNET3_FLAG_RSS;
568
569	return;
570
571out:
572	sc->vmx_ntxqueues = 1;
573	sc->vmx_nrxqueues = 1;
574}
575
576static int
577vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
578{
579	device_t dev;
580	int nmsix, cnt, required;
581
582	dev = sc->vmx_dev;
583
584	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
585		return (1);
586
587	/* Allocate an additional vector for the events interrupt. */
588	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
589
590	nmsix = pci_msix_count(dev);
591	if (nmsix < required)
592		return (1);
593
594	cnt = required;
595	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
596		sc->vmx_nintrs = required;
597		return (0);
598	} else
599		pci_release_msi(dev);
600
601	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
602
603	return (1);
604}
605
606static int
607vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
608{
609	device_t dev;
610	int nmsi, cnt, required;
611
612	dev = sc->vmx_dev;
613	required = 1;
614
615	nmsi = pci_msi_count(dev);
616	if (nmsi < required)
617		return (1);
618
619	cnt = required;
620	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
621		sc->vmx_nintrs = 1;
622		return (0);
623	} else
624		pci_release_msi(dev);
625
626	return (1);
627}
628
629static int
630vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
631{
632
633	sc->vmx_nintrs = 1;
634	return (0);
635}
636
637static int
638vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
639    struct vmxnet3_interrupt *intr)
640{
641	struct resource *irq;
642
643	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
644	if (irq == NULL)
645		return (ENXIO);
646
647	intr->vmxi_irq = irq;
648	intr->vmxi_rid = rid;
649
650	return (0);
651}
652
653static int
654vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
655{
656	int i, rid, flags, error;
657
658	rid = 0;
659	flags = RF_ACTIVE;
660
661	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
662		flags |= RF_SHAREABLE;
663	else
664		rid = 1;
665
666	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
667		error = vmxnet3_alloc_interrupt(sc, rid, flags,
668		    &sc->vmx_intrs[i]);
669		if (error)
670			return (error);
671	}
672
673	return (0);
674}
675
676static int
677vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
678{
679	device_t dev;
680	struct vmxnet3_txqueue *txq;
681	struct vmxnet3_rxqueue *rxq;
682	struct vmxnet3_interrupt *intr;
683	enum intr_type type;
684	int i, error;
685
686	dev = sc->vmx_dev;
687	intr = &sc->vmx_intrs[0];
688	type = INTR_TYPE_NET | INTR_MPSAFE;
689
690	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
691		txq = &sc->vmx_txq[i];
692		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
693		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
694		if (error)
695			return (error);
696		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
697	}
698
699	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
700		rxq = &sc->vmx_rxq[i];
701		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
702		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
703		if (error)
704			return (error);
705		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
706	}
707
708	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
709	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
710	if (error)
711		return (error);
712	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
713
714	return (0);
715}
716
717static int
718vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
719{
720	struct vmxnet3_interrupt *intr;
721	int i, error;
722
723	intr = &sc->vmx_intrs[0];
724	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
725	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
726	    &intr->vmxi_handler);
727
728	for (i = 0; i < sc->vmx_ntxqueues; i++)
729		sc->vmx_txq[i].vxtxq_intr_idx = 0;
730	for (i = 0; i < sc->vmx_nrxqueues; i++)
731		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
732	sc->vmx_event_intr_idx = 0;
733
734	return (error);
735}
736
737static void
738vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
739{
740	struct vmxnet3_txqueue *txq;
741	struct vmxnet3_txq_shared *txs;
742	struct vmxnet3_rxqueue *rxq;
743	struct vmxnet3_rxq_shared *rxs;
744	int i;
745
746	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
747
748	for (i = 0; i < sc->vmx_ntxqueues; i++) {
749		txq = &sc->vmx_txq[i];
750		txs = txq->vxtxq_ts;
751		txs->intr_idx = txq->vxtxq_intr_idx;
752	}
753
754	for (i = 0; i < sc->vmx_nrxqueues; i++) {
755		rxq = &sc->vmx_rxq[i];
756		rxs = rxq->vxrxq_rs;
757		rxs->intr_idx = rxq->vxrxq_intr_idx;
758	}
759}
760
761static int
762vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
763{
764	int error;
765
766	error = vmxnet3_alloc_intr_resources(sc);
767	if (error)
768		return (error);
769
770	switch (sc->vmx_intr_type) {
771	case VMXNET3_IT_MSIX:
772		error = vmxnet3_setup_msix_interrupts(sc);
773		break;
774	case VMXNET3_IT_MSI:
775	case VMXNET3_IT_LEGACY:
776		error = vmxnet3_setup_legacy_interrupt(sc);
777		break;
778	default:
779		panic("%s: invalid interrupt type %d", __func__,
780		    sc->vmx_intr_type);
781	}
782
783	if (error == 0)
784		vmxnet3_set_interrupt_idx(sc);
785
786	return (error);
787}
788
789static int
790vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
791{
792	device_t dev;
793	uint32_t config;
794	int error;
795
796	dev = sc->vmx_dev;
797	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
798
799	sc->vmx_intr_type = config & 0x03;
800	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
801
802	switch (sc->vmx_intr_type) {
803	case VMXNET3_IT_AUTO:
804		sc->vmx_intr_type = VMXNET3_IT_MSIX;
805		/* FALLTHROUGH */
806	case VMXNET3_IT_MSIX:
807		error = vmxnet3_alloc_msix_interrupts(sc);
808		if (error == 0)
809			break;
810		sc->vmx_intr_type = VMXNET3_IT_MSI;
811		/* FALLTHROUGH */
812	case VMXNET3_IT_MSI:
813		error = vmxnet3_alloc_msi_interrupts(sc);
814		if (error == 0)
815			break;
816		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
817		/* FALLTHROUGH */
818	case VMXNET3_IT_LEGACY:
819		error = vmxnet3_alloc_legacy_interrupts(sc);
820		if (error == 0)
821			break;
822		/* FALLTHROUGH */
823	default:
824		sc->vmx_intr_type = -1;
825		device_printf(dev, "cannot allocate any interrupt resources\n");
826		return (ENXIO);
827	}
828
829	return (error);
830}
831
832static void
833vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
834    struct vmxnet3_interrupt *intr)
835{
836	device_t dev;
837
838	dev = sc->vmx_dev;
839
840	if (intr->vmxi_handler != NULL) {
841		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
842		intr->vmxi_handler = NULL;
843	}
844
845	if (intr->vmxi_irq != NULL) {
846		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
847		    intr->vmxi_irq);
848		intr->vmxi_irq = NULL;
849		intr->vmxi_rid = -1;
850	}
851}
852
853static void
854vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
855{
856	int i;
857
858	for (i = 0; i < sc->vmx_nintrs; i++)
859		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
860
861	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
862	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
863		pci_release_msi(sc->vmx_dev);
864}
865
866#ifndef VMXNET3_LEGACY_TX
867static int
868vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
869{
870	device_t dev;
871
872	dev = sc->vmx_dev;
873
874	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
875	    taskqueue_thread_enqueue, &sc->vmx_tq);
876	if (sc->vmx_tq == NULL)
877		return (ENOMEM);
878
879	return (0);
880}
881
882static void
883vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
884{
885	device_t dev;
886	int nthreads, error;
887
888	dev = sc->vmx_dev;
889
890	/*
891	 * The taskqueue is typically not frequently used, so a dedicated
892	 * thread for each queue is unnecessary.
893	 */
894	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
895
896	/*
897	 * Most drivers just ignore the return value - it only fails
898	 * with ENOMEM so an error is not likely. It is hard for us
899	 * to recover from an error here.
900	 */
901	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
902	    "%s taskq", device_get_nameunit(dev));
903	if (error)
904		device_printf(dev, "failed to start taskqueue: %d", error);
905}
906
907static void
908vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
909{
910	struct vmxnet3_txqueue *txq;
911	int i;
912
913	if (sc->vmx_tq != NULL) {
914		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
915			txq = &sc->vmx_txq[i];
916			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
917		}
918	}
919}
920
921static void
922vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
923{
924	if (sc->vmx_tq != NULL) {
925		taskqueue_free(sc->vmx_tq);
926		sc->vmx_tq = NULL;
927	}
928}
929#endif
930
931static int
932vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
933{
934	struct vmxnet3_rxqueue *rxq;
935	struct vmxnet3_rxring *rxr;
936	int i;
937
938	rxq = &sc->vmx_rxq[q];
939
940	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
941	    device_get_nameunit(sc->vmx_dev), q);
942	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
943
944	rxq->vxrxq_sc = sc;
945	rxq->vxrxq_id = q;
946
947	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
948		rxr = &rxq->vxrxq_cmd_ring[i];
949		rxr->vxrxr_rid = i;
950		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
951		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
952		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
953		if (rxr->vxrxr_rxbuf == NULL)
954			return (ENOMEM);
955
956		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
957	}
958
959	return (0);
960}
961
962static int
963vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
964{
965	struct vmxnet3_txqueue *txq;
966	struct vmxnet3_txring *txr;
967
968	txq = &sc->vmx_txq[q];
969	txr = &txq->vxtxq_cmd_ring;
970
971	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
972	    device_get_nameunit(sc->vmx_dev), q);
973	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
974
975	txq->vxtxq_sc = sc;
976	txq->vxtxq_id = q;
977
978	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
979	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
980	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
981	if (txr->vxtxr_txbuf == NULL)
982		return (ENOMEM);
983
984	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
985
986#ifndef VMXNET3_LEGACY_TX
987	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
988
989	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
990	    M_NOWAIT, &txq->vxtxq_mtx);
991	if (txq->vxtxq_br == NULL)
992		return (ENOMEM);
993#endif
994
995	return (0);
996}
997
998static int
999vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1000{
1001	int i, error;
1002
1003	/*
1004	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1005	 * disabled by default because its apparently broken for devices passed
1006	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1007	 * must be set to zero for MSIX. This check prevents us from allocating
1008	 * queue structures that we will not use.
1009	 */
1010	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1011		sc->vmx_max_nrxqueues = 1;
1012		sc->vmx_max_ntxqueues = 1;
1013	}
1014
1015	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1016	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1017	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1018	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1019	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1020		return (ENOMEM);
1021
1022	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1023		error = vmxnet3_init_rxq(sc, i);
1024		if (error)
1025			return (error);
1026	}
1027
1028	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1029		error = vmxnet3_init_txq(sc, i);
1030		if (error)
1031			return (error);
1032	}
1033
1034	return (0);
1035}
1036
1037static void
1038vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1039{
1040	struct vmxnet3_rxring *rxr;
1041	int i;
1042
1043	rxq->vxrxq_sc = NULL;
1044	rxq->vxrxq_id = -1;
1045
1046	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1047		rxr = &rxq->vxrxq_cmd_ring[i];
1048
1049		if (rxr->vxrxr_rxbuf != NULL) {
1050			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1051			rxr->vxrxr_rxbuf = NULL;
1052		}
1053	}
1054
1055	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1056		mtx_destroy(&rxq->vxrxq_mtx);
1057}
1058
1059static void
1060vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1061{
1062	struct vmxnet3_txring *txr;
1063
1064	txr = &txq->vxtxq_cmd_ring;
1065
1066	txq->vxtxq_sc = NULL;
1067	txq->vxtxq_id = -1;
1068
1069#ifndef VMXNET3_LEGACY_TX
1070	if (txq->vxtxq_br != NULL) {
1071		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1072		txq->vxtxq_br = NULL;
1073	}
1074#endif
1075
1076	if (txr->vxtxr_txbuf != NULL) {
1077		free(txr->vxtxr_txbuf, M_DEVBUF);
1078		txr->vxtxr_txbuf = NULL;
1079	}
1080
1081	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1082		mtx_destroy(&txq->vxtxq_mtx);
1083}
1084
1085static void
1086vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1087{
1088	int i;
1089
1090	if (sc->vmx_rxq != NULL) {
1091		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1092			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1093		free(sc->vmx_rxq, M_DEVBUF);
1094		sc->vmx_rxq = NULL;
1095	}
1096
1097	if (sc->vmx_txq != NULL) {
1098		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1099			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1100		free(sc->vmx_txq, M_DEVBUF);
1101		sc->vmx_txq = NULL;
1102	}
1103}
1104
1105static int
1106vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1107{
1108	device_t dev;
1109	uint8_t *kva;
1110	size_t size;
1111	int i, error;
1112
1113	dev = sc->vmx_dev;
1114
1115	size = sizeof(struct vmxnet3_driver_shared);
1116	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1117	if (error) {
1118		device_printf(dev, "cannot alloc shared memory\n");
1119		return (error);
1120	}
1121	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1122
1123	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1124	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1125	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1126	if (error) {
1127		device_printf(dev, "cannot alloc queue shared memory\n");
1128		return (error);
1129	}
1130	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1131	kva = sc->vmx_qs;
1132
1133	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1134		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1135		kva += sizeof(struct vmxnet3_txq_shared);
1136	}
1137	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1138		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1139		kva += sizeof(struct vmxnet3_rxq_shared);
1140	}
1141
1142	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1143		size = sizeof(struct vmxnet3_rss_shared);
1144		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1145		if (error) {
1146			device_printf(dev, "cannot alloc rss shared memory\n");
1147			return (error);
1148		}
1149		sc->vmx_rss =
1150		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1151	}
1152
1153	return (0);
1154}
1155
1156static void
1157vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1158{
1159
1160	if (sc->vmx_rss != NULL) {
1161		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1162		sc->vmx_rss = NULL;
1163	}
1164
1165	if (sc->vmx_qs != NULL) {
1166		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1167		sc->vmx_qs = NULL;
1168	}
1169
1170	if (sc->vmx_ds != NULL) {
1171		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1172		sc->vmx_ds = NULL;
1173	}
1174}
1175
1176static int
1177vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1178{
1179	device_t dev;
1180	struct vmxnet3_txqueue *txq;
1181	struct vmxnet3_txring *txr;
1182	struct vmxnet3_comp_ring *txc;
1183	size_t descsz, compsz;
1184	int i, q, error;
1185
1186	dev = sc->vmx_dev;
1187
1188	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1189		txq = &sc->vmx_txq[q];
1190		txr = &txq->vxtxq_cmd_ring;
1191		txc = &txq->vxtxq_comp_ring;
1192
1193		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1194		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1195
1196		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1197		    1, 0,			/* alignment, boundary */
1198		    BUS_SPACE_MAXADDR,		/* lowaddr */
1199		    BUS_SPACE_MAXADDR,		/* highaddr */
1200		    NULL, NULL,			/* filter, filterarg */
1201		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1202		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1203		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1204		    0,				/* flags */
1205		    NULL, NULL,			/* lockfunc, lockarg */
1206		    &txr->vxtxr_txtag);
1207		if (error) {
1208			device_printf(dev,
1209			    "unable to create Tx buffer tag for queue %d\n", q);
1210			return (error);
1211		}
1212
1213		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1214		if (error) {
1215			device_printf(dev, "cannot alloc Tx descriptors for "
1216			    "queue %d error %d\n", q, error);
1217			return (error);
1218		}
1219		txr->vxtxr_txd =
1220		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1221
1222		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1223		if (error) {
1224			device_printf(dev, "cannot alloc Tx comp descriptors "
1225			   "for queue %d error %d\n", q, error);
1226			return (error);
1227		}
1228		txc->vxcr_u.txcd =
1229		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1230
1231		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1232			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1233			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1234			if (error) {
1235				device_printf(dev, "unable to create Tx buf "
1236				    "dmamap for queue %d idx %d\n", q, i);
1237				return (error);
1238			}
1239		}
1240	}
1241
1242	return (0);
1243}
1244
1245static void
1246vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1247{
1248	device_t dev;
1249	struct vmxnet3_txqueue *txq;
1250	struct vmxnet3_txring *txr;
1251	struct vmxnet3_comp_ring *txc;
1252	struct vmxnet3_txbuf *txb;
1253	int i, q;
1254
1255	dev = sc->vmx_dev;
1256
1257	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1258		txq = &sc->vmx_txq[q];
1259		txr = &txq->vxtxq_cmd_ring;
1260		txc = &txq->vxtxq_comp_ring;
1261
1262		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1263			txb = &txr->vxtxr_txbuf[i];
1264			if (txb->vtxb_dmamap != NULL) {
1265				bus_dmamap_destroy(txr->vxtxr_txtag,
1266				    txb->vtxb_dmamap);
1267				txb->vtxb_dmamap = NULL;
1268			}
1269		}
1270
1271		if (txc->vxcr_u.txcd != NULL) {
1272			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1273			txc->vxcr_u.txcd = NULL;
1274		}
1275
1276		if (txr->vxtxr_txd != NULL) {
1277			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1278			txr->vxtxr_txd = NULL;
1279		}
1280
1281		if (txr->vxtxr_txtag != NULL) {
1282			bus_dma_tag_destroy(txr->vxtxr_txtag);
1283			txr->vxtxr_txtag = NULL;
1284		}
1285	}
1286}
1287
1288static int
1289vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1290{
1291	device_t dev;
1292	struct vmxnet3_rxqueue *rxq;
1293	struct vmxnet3_rxring *rxr;
1294	struct vmxnet3_comp_ring *rxc;
1295	int descsz, compsz;
1296	int i, j, q, error;
1297
1298	dev = sc->vmx_dev;
1299
1300	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1301		rxq = &sc->vmx_rxq[q];
1302		rxc = &rxq->vxrxq_comp_ring;
1303		compsz = 0;
1304
1305		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1306			rxr = &rxq->vxrxq_cmd_ring[i];
1307
1308			descsz = rxr->vxrxr_ndesc *
1309			    sizeof(struct vmxnet3_rxdesc);
1310			compsz += rxr->vxrxr_ndesc *
1311			    sizeof(struct vmxnet3_rxcompdesc);
1312
1313			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1314			    1, 0,		/* alignment, boundary */
1315			    BUS_SPACE_MAXADDR,	/* lowaddr */
1316			    BUS_SPACE_MAXADDR,	/* highaddr */
1317			    NULL, NULL,		/* filter, filterarg */
1318			    MJUMPAGESIZE,	/* maxsize */
1319			    1,			/* nsegments */
1320			    MJUMPAGESIZE,	/* maxsegsize */
1321			    0,			/* flags */
1322			    NULL, NULL,		/* lockfunc, lockarg */
1323			    &rxr->vxrxr_rxtag);
1324			if (error) {
1325				device_printf(dev,
1326				    "unable to create Rx buffer tag for "
1327				    "queue %d\n", q);
1328				return (error);
1329			}
1330
1331			error = vmxnet3_dma_malloc(sc, descsz, 512,
1332			    &rxr->vxrxr_dma);
1333			if (error) {
1334				device_printf(dev, "cannot allocate Rx "
1335				    "descriptors for queue %d/%d error %d\n",
1336				    i, q, error);
1337				return (error);
1338			}
1339			rxr->vxrxr_rxd =
1340			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1341		}
1342
1343		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1344		if (error) {
1345			device_printf(dev, "cannot alloc Rx comp descriptors "
1346			    "for queue %d error %d\n", q, error);
1347			return (error);
1348		}
1349		rxc->vxcr_u.rxcd =
1350		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1351
1352		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1353			rxr = &rxq->vxrxq_cmd_ring[i];
1354
1355			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1356			    &rxr->vxrxr_spare_dmap);
1357			if (error) {
1358				device_printf(dev, "unable to create spare "
1359				    "dmamap for queue %d/%d error %d\n",
1360				    q, i, error);
1361				return (error);
1362			}
1363
1364			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1365				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1366				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1367				if (error) {
1368					device_printf(dev, "unable to create "
1369					    "dmamap for queue %d/%d slot %d "
1370					    "error %d\n",
1371					    q, i, j, error);
1372					return (error);
1373				}
1374			}
1375		}
1376	}
1377
1378	return (0);
1379}
1380
1381static void
1382vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1383{
1384	device_t dev;
1385	struct vmxnet3_rxqueue *rxq;
1386	struct vmxnet3_rxring *rxr;
1387	struct vmxnet3_comp_ring *rxc;
1388	struct vmxnet3_rxbuf *rxb;
1389	int i, j, q;
1390
1391	dev = sc->vmx_dev;
1392
1393	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1394		rxq = &sc->vmx_rxq[q];
1395		rxc = &rxq->vxrxq_comp_ring;
1396
1397		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1398			rxr = &rxq->vxrxq_cmd_ring[i];
1399
1400			if (rxr->vxrxr_spare_dmap != NULL) {
1401				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1402				    rxr->vxrxr_spare_dmap);
1403				rxr->vxrxr_spare_dmap = NULL;
1404			}
1405
1406			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1407				rxb = &rxr->vxrxr_rxbuf[j];
1408				if (rxb->vrxb_dmamap != NULL) {
1409					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1410					    rxb->vrxb_dmamap);
1411					rxb->vrxb_dmamap = NULL;
1412				}
1413			}
1414		}
1415
1416		if (rxc->vxcr_u.rxcd != NULL) {
1417			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1418			rxc->vxcr_u.rxcd = NULL;
1419		}
1420
1421		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1422			rxr = &rxq->vxrxq_cmd_ring[i];
1423
1424			if (rxr->vxrxr_rxd != NULL) {
1425				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1426				rxr->vxrxr_rxd = NULL;
1427			}
1428
1429			if (rxr->vxrxr_rxtag != NULL) {
1430				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1431				rxr->vxrxr_rxtag = NULL;
1432			}
1433		}
1434	}
1435}
1436
1437static int
1438vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1439{
1440	int error;
1441
1442	error = vmxnet3_alloc_txq_data(sc);
1443	if (error)
1444		return (error);
1445
1446	error = vmxnet3_alloc_rxq_data(sc);
1447	if (error)
1448		return (error);
1449
1450	return (0);
1451}
1452
1453static void
1454vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1455{
1456
1457	if (sc->vmx_rxq != NULL)
1458		vmxnet3_free_rxq_data(sc);
1459
1460	if (sc->vmx_txq != NULL)
1461		vmxnet3_free_txq_data(sc);
1462}
1463
1464static int
1465vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1466{
1467	int error;
1468
1469	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1470	    32, &sc->vmx_mcast_dma);
1471	if (error)
1472		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1473	else
1474		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1475
1476	return (error);
1477}
1478
1479static void
1480vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1481{
1482
1483	if (sc->vmx_mcast != NULL) {
1484		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1485		sc->vmx_mcast = NULL;
1486	}
1487}
1488
1489static void
1490vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1491{
1492	struct vmxnet3_driver_shared *ds;
1493	struct vmxnet3_txqueue *txq;
1494	struct vmxnet3_txq_shared *txs;
1495	struct vmxnet3_rxqueue *rxq;
1496	struct vmxnet3_rxq_shared *rxs;
1497	int i;
1498
1499	ds = sc->vmx_ds;
1500
1501	/*
1502	 * Initialize fields of the shared data that remains the same across
1503	 * reinits. Note the shared data is zero'd when allocated.
1504	 */
1505
1506	ds->magic = VMXNET3_REV1_MAGIC;
1507
1508	/* DriverInfo */
1509	ds->version = VMXNET3_DRIVER_VERSION;
1510	ds->guest = VMXNET3_GOS_FREEBSD |
1511#ifdef __LP64__
1512	    VMXNET3_GOS_64BIT;
1513#else
1514	    VMXNET3_GOS_32BIT;
1515#endif
1516	ds->vmxnet3_revision = 1;
1517	ds->upt_version = 1;
1518
1519	/* Misc. conf */
1520	ds->driver_data = vtophys(sc);
1521	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1522	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1523	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1524	ds->nrxsg_max = sc->vmx_max_rxsegs;
1525
1526	/* RSS conf */
1527	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1528		ds->rss.version = 1;
1529		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1530		ds->rss.len = sc->vmx_rss_dma.dma_size;
1531	}
1532
1533	/* Interrupt control. */
1534	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1535	ds->nintr = sc->vmx_nintrs;
1536	ds->evintr = sc->vmx_event_intr_idx;
1537	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1538
1539	for (i = 0; i < sc->vmx_nintrs; i++)
1540		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1541
1542	/* Receive filter. */
1543	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1544	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1545
1546	/* Tx queues */
1547	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1548		txq = &sc->vmx_txq[i];
1549		txs = txq->vxtxq_ts;
1550
1551		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1552		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1553		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1554		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1555		txs->driver_data = vtophys(txq);
1556		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1557	}
1558
1559	/* Rx queues */
1560	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1561		rxq = &sc->vmx_rxq[i];
1562		rxs = rxq->vxrxq_rs;
1563
1564		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1565		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1566		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1567		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1568		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1569		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1570		rxs->driver_data = vtophys(rxq);
1571		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1572	}
1573}
1574
1575static void
1576vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1577{
1578	struct ifnet *ifp;
1579
1580	ifp = sc->vmx_ifp;
1581
1582	/* Use the current MAC address. */
1583	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1584	vmxnet3_set_lladdr(sc);
1585
1586	ifp->if_hwassist = 0;
1587	if (ifp->if_capenable & IFCAP_TXCSUM)
1588		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1589	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1590		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1591	if (ifp->if_capenable & IFCAP_TSO4)
1592		ifp->if_hwassist |= CSUM_IP_TSO;
1593	if (ifp->if_capenable & IFCAP_TSO6)
1594		ifp->if_hwassist |= CSUM_IP6_TSO;
1595}
1596
1597static void
1598vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1599{
1600	/*
1601	 * Use the same key as the Linux driver until FreeBSD can do
1602	 * RSS (presumably Toeplitz) in software.
1603	 */
1604	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1605	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1606	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1607	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1608	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1609	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1610	};
1611
1612	struct vmxnet3_driver_shared *ds;
1613	struct vmxnet3_rss_shared *rss;
1614	int i;
1615
1616	ds = sc->vmx_ds;
1617	rss = sc->vmx_rss;
1618
1619	rss->hash_type =
1620	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1621	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1622	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1623	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1624	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1625	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1626
1627	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1628		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1629}
1630
1631static void
1632vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1633{
1634	struct ifnet *ifp;
1635	struct vmxnet3_driver_shared *ds;
1636
1637	ifp = sc->vmx_ifp;
1638	ds = sc->vmx_ds;
1639
1640	ds->mtu = ifp->if_mtu;
1641	ds->ntxqueue = sc->vmx_ntxqueues;
1642	ds->nrxqueue = sc->vmx_nrxqueues;
1643
1644	ds->upt_features = 0;
1645	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1646		ds->upt_features |= UPT1_F_CSUM;
1647	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1648		ds->upt_features |= UPT1_F_VLAN;
1649	if (ifp->if_capenable & IFCAP_LRO)
1650		ds->upt_features |= UPT1_F_LRO;
1651
1652	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1653		ds->upt_features |= UPT1_F_RSS;
1654		vmxnet3_reinit_rss_shared_data(sc);
1655	}
1656
1657	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1658	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1659	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1660}
1661
1662static int
1663vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1664{
1665	int error;
1666
1667	error = vmxnet3_alloc_shared_data(sc);
1668	if (error)
1669		return (error);
1670
1671	error = vmxnet3_alloc_queue_data(sc);
1672	if (error)
1673		return (error);
1674
1675	error = vmxnet3_alloc_mcast_table(sc);
1676	if (error)
1677		return (error);
1678
1679	vmxnet3_init_shared_data(sc);
1680
1681	return (0);
1682}
1683
1684static void
1685vmxnet3_free_data(struct vmxnet3_softc *sc)
1686{
1687
1688	vmxnet3_free_mcast_table(sc);
1689	vmxnet3_free_queue_data(sc);
1690	vmxnet3_free_shared_data(sc);
1691}
1692
1693static int
1694vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1695{
1696	device_t dev;
1697	struct ifnet *ifp;
1698
1699	dev = sc->vmx_dev;
1700
1701	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1702	if (ifp == NULL) {
1703		device_printf(dev, "cannot allocate ifnet structure\n");
1704		return (ENOSPC);
1705	}
1706
1707	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1708#if __FreeBSD_version < 1000025
1709	ifp->if_baudrate = 1000000000;
1710#elif __FreeBSD_version < 1100011
1711	if_initbaudrate(ifp, IF_Gbps(10));
1712#else
1713	ifp->if_baudrate = IF_Gbps(10);
1714#endif
1715	ifp->if_softc = sc;
1716	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1717	ifp->if_init = vmxnet3_init;
1718	ifp->if_ioctl = vmxnet3_ioctl;
1719	ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE;
1720
1721#ifdef VMXNET3_LEGACY_TX
1722	ifp->if_start = vmxnet3_start;
1723	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1724	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1725	IFQ_SET_READY(&ifp->if_snd);
1726#else
1727	ifp->if_transmit = vmxnet3_txq_mq_start;
1728	ifp->if_qflush = vmxnet3_qflush;
1729#endif
1730
1731	vmxnet3_get_lladdr(sc);
1732	ether_ifattach(ifp, sc->vmx_lladdr);
1733
1734	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1735	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1736	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1737	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1738	    IFCAP_VLAN_HWCSUM;
1739	ifp->if_capenable = ifp->if_capabilities;
1740
1741	/* These capabilities are not enabled by default. */
1742	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1743
1744	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1745	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1746	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1747	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1748
1749	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1750	    vmxnet3_media_status);
1751	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1752	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1753
1754	return (0);
1755}
1756
1757static void
1758vmxnet3_evintr(struct vmxnet3_softc *sc)
1759{
1760	device_t dev;
1761	struct ifnet *ifp;
1762	struct vmxnet3_txq_shared *ts;
1763	struct vmxnet3_rxq_shared *rs;
1764	uint32_t event;
1765	int reset;
1766
1767	dev = sc->vmx_dev;
1768	ifp = sc->vmx_ifp;
1769	reset = 0;
1770
1771	VMXNET3_CORE_LOCK(sc);
1772
1773	/* Clear events. */
1774	event = sc->vmx_ds->event;
1775	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1776
1777	if (event & VMXNET3_EVENT_LINK) {
1778		vmxnet3_link_status(sc);
1779		if (sc->vmx_link_active != 0)
1780			vmxnet3_tx_start_all(sc);
1781	}
1782
1783	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1784		reset = 1;
1785		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1786		ts = sc->vmx_txq[0].vxtxq_ts;
1787		if (ts->stopped != 0)
1788			device_printf(dev, "Tx queue error %#x\n", ts->error);
1789		rs = sc->vmx_rxq[0].vxrxq_rs;
1790		if (rs->stopped != 0)
1791			device_printf(dev, "Rx queue error %#x\n", rs->error);
1792		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1793	}
1794
1795	if (event & VMXNET3_EVENT_DIC)
1796		device_printf(dev, "device implementation change event\n");
1797	if (event & VMXNET3_EVENT_DEBUG)
1798		device_printf(dev, "debug event\n");
1799
1800	if (reset != 0) {
1801		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1802		vmxnet3_init_locked(sc);
1803	}
1804
1805	VMXNET3_CORE_UNLOCK(sc);
1806}
1807
1808static void
1809vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1810{
1811	struct vmxnet3_softc *sc;
1812	struct ifnet *ifp;
1813	struct vmxnet3_txring *txr;
1814	struct vmxnet3_comp_ring *txc;
1815	struct vmxnet3_txcompdesc *txcd;
1816	struct vmxnet3_txbuf *txb;
1817	struct mbuf *m;
1818	u_int sop;
1819
1820	sc = txq->vxtxq_sc;
1821	ifp = sc->vmx_ifp;
1822	txr = &txq->vxtxq_cmd_ring;
1823	txc = &txq->vxtxq_comp_ring;
1824
1825	VMXNET3_TXQ_LOCK_ASSERT(txq);
1826
1827	for (;;) {
1828		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1829		if (txcd->gen != txc->vxcr_gen)
1830			break;
1831		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1832
1833		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1834			txc->vxcr_next = 0;
1835			txc->vxcr_gen ^= 1;
1836		}
1837
1838		sop = txr->vxtxr_next;
1839		txb = &txr->vxtxr_txbuf[sop];
1840
1841		if ((m = txb->vtxb_m) != NULL) {
1842			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1843			    BUS_DMASYNC_POSTWRITE);
1844			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1845
1846			txq->vxtxq_stats.vmtxs_opackets++;
1847			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1848			if (m->m_flags & M_MCAST)
1849				txq->vxtxq_stats.vmtxs_omcasts++;
1850
1851			m_freem(m);
1852			txb->vtxb_m = NULL;
1853		}
1854
1855		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1856	}
1857
1858	if (txr->vxtxr_head == txr->vxtxr_next)
1859		txq->vxtxq_watchdog = 0;
1860}
1861
1862static int
1863vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1864{
1865	struct ifnet *ifp;
1866	struct mbuf *m;
1867	struct vmxnet3_rxdesc *rxd;
1868	struct vmxnet3_rxbuf *rxb;
1869	bus_dma_tag_t tag;
1870	bus_dmamap_t dmap;
1871	bus_dma_segment_t segs[1];
1872	int idx, clsize, btype, flags, nsegs, error;
1873
1874	ifp = sc->vmx_ifp;
1875	tag = rxr->vxrxr_rxtag;
1876	dmap = rxr->vxrxr_spare_dmap;
1877	idx = rxr->vxrxr_fill;
1878	rxd = &rxr->vxrxr_rxd[idx];
1879	rxb = &rxr->vxrxr_rxbuf[idx];
1880
1881#ifdef VMXNET3_FAILPOINTS
1882	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1883	if (rxr->vxrxr_rid != 0)
1884		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1885#endif
1886
1887	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1888		flags = M_PKTHDR;
1889		clsize = MCLBYTES;
1890		btype = VMXNET3_BTYPE_HEAD;
1891	} else {
1892#if __FreeBSD_version < 902001
1893		/*
1894		 * These mbufs will never be used for the start of a frame.
1895		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1896		 * required the mbuf to always be a packet header. Avoid
1897		 * unnecessary mbuf initialization in newer versions where
1898		 * that is not the case.
1899		 */
1900		flags = M_PKTHDR;
1901#else
1902		flags = 0;
1903#endif
1904		clsize = MJUMPAGESIZE;
1905		btype = VMXNET3_BTYPE_BODY;
1906	}
1907
1908	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1909	if (m == NULL) {
1910		sc->vmx_stats.vmst_mgetcl_failed++;
1911		return (ENOBUFS);
1912	}
1913
1914	if (btype == VMXNET3_BTYPE_HEAD) {
1915		m->m_len = m->m_pkthdr.len = clsize;
1916		m_adj(m, ETHER_ALIGN);
1917	} else
1918		m->m_len = clsize;
1919
1920	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1921	    BUS_DMA_NOWAIT);
1922	if (error) {
1923		m_freem(m);
1924		sc->vmx_stats.vmst_mbuf_load_failed++;
1925		return (error);
1926	}
1927	KASSERT(nsegs == 1,
1928	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1929#if __FreeBSD_version < 902001
1930	if (btype == VMXNET3_BTYPE_BODY)
1931		m->m_flags &= ~M_PKTHDR;
1932#endif
1933
1934	if (rxb->vrxb_m != NULL) {
1935		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1936		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1937	}
1938
1939	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1940	rxb->vrxb_dmamap = dmap;
1941	rxb->vrxb_m = m;
1942
1943	rxd->addr = segs[0].ds_addr;
1944	rxd->len = segs[0].ds_len;
1945	rxd->btype = btype;
1946	rxd->gen = rxr->vxrxr_gen;
1947
1948	vmxnet3_rxr_increment_fill(rxr);
1949	return (0);
1950}
1951
1952static void
1953vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1954    struct vmxnet3_rxring *rxr, int idx)
1955{
1956	struct vmxnet3_rxdesc *rxd;
1957
1958	rxd = &rxr->vxrxr_rxd[idx];
1959	rxd->gen = rxr->vxrxr_gen;
1960	vmxnet3_rxr_increment_fill(rxr);
1961}
1962
1963static void
1964vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1965{
1966	struct vmxnet3_softc *sc;
1967	struct vmxnet3_rxring *rxr;
1968	struct vmxnet3_comp_ring *rxc;
1969	struct vmxnet3_rxcompdesc *rxcd;
1970	int idx, eof;
1971
1972	sc = rxq->vxrxq_sc;
1973	rxc = &rxq->vxrxq_comp_ring;
1974
1975	do {
1976		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1977		if (rxcd->gen != rxc->vxcr_gen)
1978			break;		/* Not expected. */
1979		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1980
1981		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1982			rxc->vxcr_next = 0;
1983			rxc->vxcr_gen ^= 1;
1984		}
1985
1986		idx = rxcd->rxd_idx;
1987		eof = rxcd->eop;
1988		if (rxcd->qid < sc->vmx_nrxqueues)
1989			rxr = &rxq->vxrxq_cmd_ring[0];
1990		else
1991			rxr = &rxq->vxrxq_cmd_ring[1];
1992		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1993	} while (!eof);
1994}
1995
1996static void
1997vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1998{
1999
2000	if (rxcd->ipv4) {
2001		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2002		if (rxcd->ipcsum_ok)
2003			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2004	}
2005
2006	if (!rxcd->fragment) {
2007		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2008			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2009			    CSUM_PSEUDO_HDR;
2010			m->m_pkthdr.csum_data = 0xFFFF;
2011		}
2012	}
2013}
2014
2015static void
2016vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2017    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2018{
2019	struct vmxnet3_softc *sc;
2020	struct ifnet *ifp;
2021
2022	sc = rxq->vxrxq_sc;
2023	ifp = sc->vmx_ifp;
2024
2025	if (rxcd->error) {
2026		rxq->vxrxq_stats.vmrxs_ierrors++;
2027		m_freem(m);
2028		return;
2029	}
2030
2031#ifdef notyet
2032	switch (rxcd->rss_type) {
2033	case VMXNET3_RCD_RSS_TYPE_IPV4:
2034		m->m_pkthdr.flowid = rxcd->rss_hash;
2035		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2036		break;
2037	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2038		m->m_pkthdr.flowid = rxcd->rss_hash;
2039		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2040		break;
2041	case VMXNET3_RCD_RSS_TYPE_IPV6:
2042		m->m_pkthdr.flowid = rxcd->rss_hash;
2043		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2044		break;
2045	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2046		m->m_pkthdr.flowid = rxcd->rss_hash;
2047		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2048		break;
2049	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2050		m->m_pkthdr.flowid = rxq->vxrxq_id;
2051		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2052		break;
2053	}
2054#else
2055	m->m_pkthdr.flowid = rxq->vxrxq_id;
2056	m->m_flags |= M_FLOWID;
2057#endif
2058
2059	if (!rxcd->no_csum)
2060		vmxnet3_rx_csum(rxcd, m);
2061	if (rxcd->vlan) {
2062		m->m_flags |= M_VLANTAG;
2063		m->m_pkthdr.ether_vtag = rxcd->vtag;
2064	}
2065
2066	rxq->vxrxq_stats.vmrxs_ipackets++;
2067	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2068
2069	VMXNET3_RXQ_UNLOCK(rxq);
2070	(*ifp->if_input)(ifp, m);
2071	VMXNET3_RXQ_LOCK(rxq);
2072}
2073
2074static void
2075vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2076{
2077	struct vmxnet3_softc *sc;
2078	struct ifnet *ifp;
2079	struct vmxnet3_rxring *rxr;
2080	struct vmxnet3_comp_ring *rxc;
2081	struct vmxnet3_rxdesc *rxd;
2082	struct vmxnet3_rxcompdesc *rxcd;
2083	struct mbuf *m, *m_head, *m_tail;
2084	int idx, length;
2085
2086	sc = rxq->vxrxq_sc;
2087	ifp = sc->vmx_ifp;
2088	rxc = &rxq->vxrxq_comp_ring;
2089	m_head = m_tail = NULL;
2090
2091	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2092
2093	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2094		return;
2095
2096	for (;;) {
2097		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2098		if (rxcd->gen != rxc->vxcr_gen)
2099			break;
2100		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2101
2102		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2103			rxc->vxcr_next = 0;
2104			rxc->vxcr_gen ^= 1;
2105		}
2106
2107		idx = rxcd->rxd_idx;
2108		length = rxcd->len;
2109		if (rxcd->qid < sc->vmx_nrxqueues)
2110			rxr = &rxq->vxrxq_cmd_ring[0];
2111		else
2112			rxr = &rxq->vxrxq_cmd_ring[1];
2113		rxd = &rxr->vxrxr_rxd[idx];
2114
2115		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2116		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2117		    __func__, rxcd->qid, idx));
2118
2119		/*
2120		 * The host may skip descriptors. We detect this when this
2121		 * descriptor does not match the previous fill index. Catch
2122		 * up with the host now.
2123		 */
2124		if (__predict_false(rxr->vxrxr_fill != idx)) {
2125			while (rxr->vxrxr_fill != idx) {
2126				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2127				    rxr->vxrxr_gen;
2128				vmxnet3_rxr_increment_fill(rxr);
2129			}
2130		}
2131
2132		if (rxcd->sop) {
2133			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2134			    ("%s: start of frame w/o head buffer", __func__));
2135			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2136			    ("%s: start of frame not in ring 0", __func__));
2137			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2138			    ("%s: start of frame at unexcepted index %d (%d)",
2139			     __func__, idx, sc->vmx_rx_max_chain));
2140			KASSERT(m_head == NULL,
2141			    ("%s: duplicate start of frame?", __func__));
2142
2143			if (length == 0) {
2144				/* Just ignore this descriptor. */
2145				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2146				goto nextp;
2147			}
2148
2149			if (vmxnet3_newbuf(sc, rxr) != 0) {
2150				rxq->vxrxq_stats.vmrxs_iqdrops++;
2151				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2152				if (!rxcd->eop)
2153					vmxnet3_rxq_discard_chain(rxq);
2154				goto nextp;
2155			}
2156
2157			m->m_pkthdr.rcvif = ifp;
2158			m->m_pkthdr.len = m->m_len = length;
2159			m->m_pkthdr.csum_flags = 0;
2160			m_head = m_tail = m;
2161
2162		} else {
2163			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2164			    ("%s: non start of frame w/o body buffer", __func__));
2165			KASSERT(m_head != NULL,
2166			    ("%s: frame not started?", __func__));
2167
2168			if (vmxnet3_newbuf(sc, rxr) != 0) {
2169				rxq->vxrxq_stats.vmrxs_iqdrops++;
2170				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2171				if (!rxcd->eop)
2172					vmxnet3_rxq_discard_chain(rxq);
2173				m_freem(m_head);
2174				m_head = m_tail = NULL;
2175				goto nextp;
2176			}
2177
2178			m->m_len = length;
2179			m_head->m_pkthdr.len += length;
2180			m_tail->m_next = m;
2181			m_tail = m;
2182		}
2183
2184		if (rxcd->eop) {
2185			vmxnet3_rxq_input(rxq, rxcd, m_head);
2186			m_head = m_tail = NULL;
2187
2188			/* Must recheck after dropping the Rx lock. */
2189			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2190				break;
2191		}
2192
2193nextp:
2194		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2195			int qid = rxcd->qid;
2196			bus_size_t r;
2197
2198			idx = (idx + 1) % rxr->vxrxr_ndesc;
2199			if (qid >= sc->vmx_nrxqueues) {
2200				qid -= sc->vmx_nrxqueues;
2201				r = VMXNET3_BAR0_RXH2(qid);
2202			} else
2203				r = VMXNET3_BAR0_RXH1(qid);
2204			vmxnet3_write_bar0(sc, r, idx);
2205		}
2206	}
2207}
2208
2209static void
2210vmxnet3_legacy_intr(void *xsc)
2211{
2212	struct vmxnet3_softc *sc;
2213	struct vmxnet3_rxqueue *rxq;
2214	struct vmxnet3_txqueue *txq;
2215	struct ifnet *ifp;
2216
2217	sc = xsc;
2218	rxq = &sc->vmx_rxq[0];
2219	txq = &sc->vmx_txq[0];
2220	ifp = sc->vmx_ifp;
2221
2222	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2223		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2224			return;
2225	}
2226	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2227		vmxnet3_disable_all_intrs(sc);
2228
2229	if (sc->vmx_ds->event != 0)
2230		vmxnet3_evintr(sc);
2231
2232	VMXNET3_RXQ_LOCK(rxq);
2233	vmxnet3_rxq_eof(rxq);
2234	VMXNET3_RXQ_UNLOCK(rxq);
2235
2236	VMXNET3_TXQ_LOCK(txq);
2237	vmxnet3_txq_eof(txq);
2238	vmxnet3_txq_start(txq);
2239	VMXNET3_TXQ_UNLOCK(txq);
2240
2241	vmxnet3_enable_all_intrs(sc);
2242}
2243
2244static void
2245vmxnet3_txq_intr(void *xtxq)
2246{
2247	struct vmxnet3_softc *sc;
2248	struct vmxnet3_txqueue *txq;
2249	struct ifnet *ifp;
2250
2251	txq = xtxq;
2252	sc = txq->vxtxq_sc;
2253	ifp = sc->vmx_ifp;
2254
2255	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2256		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2257
2258	VMXNET3_TXQ_LOCK(txq);
2259	vmxnet3_txq_eof(txq);
2260	vmxnet3_txq_start(txq);
2261	VMXNET3_TXQ_UNLOCK(txq);
2262
2263	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2264}
2265
2266static void
2267vmxnet3_rxq_intr(void *xrxq)
2268{
2269	struct vmxnet3_softc *sc;
2270	struct vmxnet3_rxqueue *rxq;
2271
2272	rxq = xrxq;
2273	sc = rxq->vxrxq_sc;
2274
2275	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2276		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2277
2278	VMXNET3_RXQ_LOCK(rxq);
2279	vmxnet3_rxq_eof(rxq);
2280	VMXNET3_RXQ_UNLOCK(rxq);
2281
2282	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2283}
2284
2285static void
2286vmxnet3_event_intr(void *xsc)
2287{
2288	struct vmxnet3_softc *sc;
2289
2290	sc = xsc;
2291
2292	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2293		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2294
2295	if (sc->vmx_ds->event != 0)
2296		vmxnet3_evintr(sc);
2297
2298	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2299}
2300
2301static void
2302vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2303{
2304	struct vmxnet3_txring *txr;
2305	struct vmxnet3_txbuf *txb;
2306	int i;
2307
2308	txr = &txq->vxtxq_cmd_ring;
2309
2310	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2311		txb = &txr->vxtxr_txbuf[i];
2312
2313		if (txb->vtxb_m == NULL)
2314			continue;
2315
2316		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2317		    BUS_DMASYNC_POSTWRITE);
2318		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2319		m_freem(txb->vtxb_m);
2320		txb->vtxb_m = NULL;
2321	}
2322}
2323
2324static void
2325vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2326{
2327	struct vmxnet3_rxring *rxr;
2328	struct vmxnet3_rxbuf *rxb;
2329	int i, j;
2330
2331	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2332		rxr = &rxq->vxrxq_cmd_ring[i];
2333
2334		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2335			rxb = &rxr->vxrxr_rxbuf[j];
2336
2337			if (rxb->vrxb_m == NULL)
2338				continue;
2339
2340			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2341			    BUS_DMASYNC_POSTREAD);
2342			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2343			m_freem(rxb->vrxb_m);
2344			rxb->vrxb_m = NULL;
2345		}
2346	}
2347}
2348
2349static void
2350vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2351{
2352	struct vmxnet3_rxqueue *rxq;
2353	struct vmxnet3_txqueue *txq;
2354	int i;
2355
2356	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2357		rxq = &sc->vmx_rxq[i];
2358		VMXNET3_RXQ_LOCK(rxq);
2359		VMXNET3_RXQ_UNLOCK(rxq);
2360	}
2361
2362	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2363		txq = &sc->vmx_txq[i];
2364		VMXNET3_TXQ_LOCK(txq);
2365		VMXNET3_TXQ_UNLOCK(txq);
2366	}
2367}
2368
2369static void
2370vmxnet3_stop(struct vmxnet3_softc *sc)
2371{
2372	struct ifnet *ifp;
2373	int q;
2374
2375	ifp = sc->vmx_ifp;
2376	VMXNET3_CORE_LOCK_ASSERT(sc);
2377
2378	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2379	sc->vmx_link_active = 0;
2380	callout_stop(&sc->vmx_tick);
2381
2382	/* Disable interrupts. */
2383	vmxnet3_disable_all_intrs(sc);
2384	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2385
2386	vmxnet3_stop_rendezvous(sc);
2387
2388	for (q = 0; q < sc->vmx_ntxqueues; q++)
2389		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2390	for (q = 0; q < sc->vmx_nrxqueues; q++)
2391		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2392
2393	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2394}
2395
2396static void
2397vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2398{
2399	struct vmxnet3_txring *txr;
2400	struct vmxnet3_comp_ring *txc;
2401
2402	txr = &txq->vxtxq_cmd_ring;
2403	txr->vxtxr_head = 0;
2404	txr->vxtxr_next = 0;
2405	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2406	bzero(txr->vxtxr_txd,
2407	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2408
2409	txc = &txq->vxtxq_comp_ring;
2410	txc->vxcr_next = 0;
2411	txc->vxcr_gen = VMXNET3_INIT_GEN;
2412	bzero(txc->vxcr_u.txcd,
2413	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2414}
2415
2416static int
2417vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2418{
2419	struct ifnet *ifp;
2420	struct vmxnet3_rxring *rxr;
2421	struct vmxnet3_comp_ring *rxc;
2422	int i, populate, idx, frame_size, error;
2423
2424	ifp = sc->vmx_ifp;
2425	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2426	    ifp->if_mtu;
2427
2428	/*
2429	 * If the MTU causes us to exceed what a regular sized cluster can
2430	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2431	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2432	 *
2433	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2434	 * our life easier. We do not support changing the ring size after
2435	 * the attach.
2436	 */
2437	if (frame_size <= MCLBYTES)
2438		sc->vmx_rx_max_chain = 1;
2439	else
2440		sc->vmx_rx_max_chain = 2;
2441
2442	/*
2443	 * Only populate ring 1 if the configuration will take advantage
2444	 * of it. That is either when LRO is enabled or the frame size
2445	 * exceeds what ring 0 can contain.
2446	 */
2447	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2448	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2449		populate = 1;
2450	else
2451		populate = VMXNET3_RXRINGS_PERQ;
2452
2453	for (i = 0; i < populate; i++) {
2454		rxr = &rxq->vxrxq_cmd_ring[i];
2455		rxr->vxrxr_fill = 0;
2456		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2457		bzero(rxr->vxrxr_rxd,
2458		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2459
2460		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2461			error = vmxnet3_newbuf(sc, rxr);
2462			if (error)
2463				return (error);
2464		}
2465	}
2466
2467	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2468		rxr = &rxq->vxrxq_cmd_ring[i];
2469		rxr->vxrxr_fill = 0;
2470		rxr->vxrxr_gen = 0;
2471		bzero(rxr->vxrxr_rxd,
2472		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2473	}
2474
2475	rxc = &rxq->vxrxq_comp_ring;
2476	rxc->vxcr_next = 0;
2477	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2478	bzero(rxc->vxcr_u.rxcd,
2479	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2480
2481	return (0);
2482}
2483
2484static int
2485vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2486{
2487	device_t dev;
2488	int q, error;
2489
2490	dev = sc->vmx_dev;
2491
2492	for (q = 0; q < sc->vmx_ntxqueues; q++)
2493		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2494
2495	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2496		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2497		if (error) {
2498			device_printf(dev, "cannot populate Rx queue %d\n", q);
2499			return (error);
2500		}
2501	}
2502
2503	return (0);
2504}
2505
2506static int
2507vmxnet3_enable_device(struct vmxnet3_softc *sc)
2508{
2509	int q;
2510
2511	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2512		device_printf(sc->vmx_dev, "device enable command failed!\n");
2513		return (1);
2514	}
2515
2516	/* Reset the Rx queue heads. */
2517	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2518		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2519		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2520	}
2521
2522	return (0);
2523}
2524
2525static void
2526vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2527{
2528	struct ifnet *ifp;
2529
2530	ifp = sc->vmx_ifp;
2531
2532	vmxnet3_set_rxfilter(sc);
2533
2534	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2535		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2536		    sizeof(sc->vmx_ds->vlan_filter));
2537	else
2538		bzero(sc->vmx_ds->vlan_filter,
2539		    sizeof(sc->vmx_ds->vlan_filter));
2540	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2541}
2542
2543static int
2544vmxnet3_reinit(struct vmxnet3_softc *sc)
2545{
2546
2547	vmxnet3_reinit_interface(sc);
2548	vmxnet3_reinit_shared_data(sc);
2549
2550	if (vmxnet3_reinit_queues(sc) != 0)
2551		return (ENXIO);
2552
2553	if (vmxnet3_enable_device(sc) != 0)
2554		return (ENXIO);
2555
2556	vmxnet3_reinit_rxfilters(sc);
2557
2558	return (0);
2559}
2560
2561static void
2562vmxnet3_init_locked(struct vmxnet3_softc *sc)
2563{
2564	struct ifnet *ifp;
2565
2566	ifp = sc->vmx_ifp;
2567
2568	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2569		return;
2570
2571	vmxnet3_stop(sc);
2572
2573	if (vmxnet3_reinit(sc) != 0) {
2574		vmxnet3_stop(sc);
2575		return;
2576	}
2577
2578	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2579	vmxnet3_link_status(sc);
2580
2581	vmxnet3_enable_all_intrs(sc);
2582	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2583}
2584
2585static void
2586vmxnet3_init(void *xsc)
2587{
2588	struct vmxnet3_softc *sc;
2589
2590	sc = xsc;
2591
2592	VMXNET3_CORE_LOCK(sc);
2593	vmxnet3_init_locked(sc);
2594	VMXNET3_CORE_UNLOCK(sc);
2595}
2596
2597/*
2598 * BMV: Much of this can go away once we finally have offsets in
2599 * the mbuf packet header. Bug andre@.
2600 */
2601static int
2602vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2603    int *etype, int *proto, int *start)
2604{
2605	struct ether_vlan_header *evh;
2606	int offset;
2607
2608	evh = mtod(m, struct ether_vlan_header *);
2609	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2610		/* BMV: We should handle nested VLAN tags too. */
2611		*etype = ntohs(evh->evl_proto);
2612		offset = sizeof(struct ether_vlan_header);
2613	} else {
2614		*etype = ntohs(evh->evl_encap_proto);
2615		offset = sizeof(struct ether_header);
2616	}
2617
2618	switch (*etype) {
2619#if defined(INET)
2620	case ETHERTYPE_IP: {
2621		struct ip *ip, iphdr;
2622		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2623			m_copydata(m, offset, sizeof(struct ip),
2624			    (caddr_t) &iphdr);
2625			ip = &iphdr;
2626		} else
2627			ip = mtodo(m, offset);
2628		*proto = ip->ip_p;
2629		*start = offset + (ip->ip_hl << 2);
2630		break;
2631	}
2632#endif
2633#if defined(INET6)
2634	case ETHERTYPE_IPV6:
2635		*proto = -1;
2636		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2637		/* Assert the network stack sent us a valid packet. */
2638		KASSERT(*start > offset,
2639		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2640		    *start, offset, *proto));
2641		break;
2642#endif
2643	default:
2644		return (EINVAL);
2645	}
2646
2647	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2648		struct tcphdr *tcp, tcphdr;
2649
2650		if (__predict_false(*proto != IPPROTO_TCP)) {
2651			/* Likely failed to correctly parse the mbuf. */
2652			return (EINVAL);
2653		}
2654
2655		txq->vxtxq_stats.vmtxs_tso++;
2656
2657		/*
2658		 * For TSO, the size of the protocol header is also
2659		 * included in the descriptor header size.
2660		 */
2661		if (m->m_len < *start + sizeof(struct tcphdr)) {
2662			m_copydata(m, offset, sizeof(struct tcphdr),
2663			    (caddr_t) &tcphdr);
2664			tcp = &tcphdr;
2665		} else
2666			tcp = mtodo(m, *start);
2667		*start += (tcp->th_off << 2);
2668	} else
2669		txq->vxtxq_stats.vmtxs_csum++;
2670
2671	return (0);
2672}
2673
2674static int
2675vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2676    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2677{
2678	struct vmxnet3_txring *txr;
2679	struct mbuf *m;
2680	bus_dma_tag_t tag;
2681	int error;
2682
2683	txr = &txq->vxtxq_cmd_ring;
2684	m = *m0;
2685	tag = txr->vxtxr_txtag;
2686
2687	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2688	if (error == 0 || error != EFBIG)
2689		return (error);
2690
2691	m = m_defrag(m, M_NOWAIT);
2692	if (m != NULL) {
2693		*m0 = m;
2694		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2695	} else
2696		error = ENOBUFS;
2697
2698	if (error) {
2699		m_freem(*m0);
2700		*m0 = NULL;
2701		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2702	} else
2703		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2704
2705	return (error);
2706}
2707
2708static void
2709vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2710{
2711	struct vmxnet3_txring *txr;
2712
2713	txr = &txq->vxtxq_cmd_ring;
2714	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2715}
2716
2717static int
2718vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2719{
2720	struct vmxnet3_softc *sc;
2721	struct ifnet *ifp;
2722	struct vmxnet3_txring *txr;
2723	struct vmxnet3_txdesc *txd, *sop;
2724	struct mbuf *m;
2725	bus_dmamap_t dmap;
2726	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2727	int i, gen, nsegs, etype, proto, start, error;
2728
2729	sc = txq->vxtxq_sc;
2730	ifp = sc->vmx_ifp;
2731	start = 0;
2732	txd = NULL;
2733	txr = &txq->vxtxq_cmd_ring;
2734	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2735
2736	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2737	if (error)
2738		return (error);
2739
2740	m = *m0;
2741	M_ASSERTPKTHDR(m);
2742	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2743	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2744
2745	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2746		txq->vxtxq_stats.vmtxs_full++;
2747		vmxnet3_txq_unload_mbuf(txq, dmap);
2748		return (ENOSPC);
2749	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2750		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2751		if (error) {
2752			txq->vxtxq_stats.vmtxs_offload_failed++;
2753			vmxnet3_txq_unload_mbuf(txq, dmap);
2754			m_freem(m);
2755			*m0 = NULL;
2756			return (error);
2757		}
2758	}
2759
2760	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2761	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2762	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2763
2764	for (i = 0; i < nsegs; i++) {
2765		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2766
2767		txd->addr = segs[i].ds_addr;
2768		txd->len = segs[i].ds_len;
2769		txd->gen = gen;
2770		txd->dtype = 0;
2771		txd->offload_mode = VMXNET3_OM_NONE;
2772		txd->offload_pos = 0;
2773		txd->hlen = 0;
2774		txd->eop = 0;
2775		txd->compreq = 0;
2776		txd->vtag_mode = 0;
2777		txd->vtag = 0;
2778
2779		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2780			txr->vxtxr_head = 0;
2781			txr->vxtxr_gen ^= 1;
2782		}
2783		gen = txr->vxtxr_gen;
2784	}
2785	txd->eop = 1;
2786	txd->compreq = 1;
2787
2788	if (m->m_flags & M_VLANTAG) {
2789		sop->vtag_mode = 1;
2790		sop->vtag = m->m_pkthdr.ether_vtag;
2791	}
2792
2793	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2794		sop->offload_mode = VMXNET3_OM_TSO;
2795		sop->hlen = start;
2796		sop->offload_pos = m->m_pkthdr.tso_segsz;
2797	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2798	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2799		sop->offload_mode = VMXNET3_OM_CSUM;
2800		sop->hlen = start;
2801		sop->offload_pos = start + m->m_pkthdr.csum_data;
2802	}
2803
2804	/* Finally, change the ownership. */
2805	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2806	sop->gen ^= 1;
2807
2808	if (++txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2809		txq->vxtxq_ts->npending = 0;
2810		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2811		    txr->vxtxr_head);
2812	}
2813
2814	return (0);
2815}
2816
2817static void
2818vmxnet3_txq_update_pending(struct vmxnet3_txqueue *txq)
2819{
2820	struct vmxnet3_txring *txr;
2821
2822	txr = &txq->vxtxq_cmd_ring;
2823
2824	if (txq->vxtxq_ts->npending > 0) {
2825		txq->vxtxq_ts->npending = 0;
2826		vmxnet3_write_bar0(txq->vxtxq_sc,
2827		    VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head);
2828	}
2829}
2830
2831#ifdef VMXNET3_LEGACY_TX
2832
2833static void
2834vmxnet3_start_locked(struct ifnet *ifp)
2835{
2836	struct vmxnet3_softc *sc;
2837	struct vmxnet3_txqueue *txq;
2838	struct vmxnet3_txring *txr;
2839	struct mbuf *m_head;
2840	int tx, avail;
2841
2842	sc = ifp->if_softc;
2843	txq = &sc->vmx_txq[0];
2844	txr = &txq->vxtxq_cmd_ring;
2845	tx = 0;
2846
2847	VMXNET3_TXQ_LOCK_ASSERT(txq);
2848
2849	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2850	    sc->vmx_link_active == 0)
2851		return;
2852
2853	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2854		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2855			break;
2856
2857		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2858		if (m_head == NULL)
2859			break;
2860
2861		/* Assume worse case if this mbuf is the head of a chain. */
2862		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2863			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2864			break;
2865		}
2866
2867		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2868			if (m_head != NULL)
2869				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2870			break;
2871		}
2872
2873		tx++;
2874		ETHER_BPF_MTAP(ifp, m_head);
2875	}
2876
2877	if (tx > 0) {
2878		vmxnet3_txq_update_pending(txq);
2879		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2880	}
2881}
2882
2883static void
2884vmxnet3_start(struct ifnet *ifp)
2885{
2886	struct vmxnet3_softc *sc;
2887	struct vmxnet3_txqueue *txq;
2888
2889	sc = ifp->if_softc;
2890	txq = &sc->vmx_txq[0];
2891
2892	VMXNET3_TXQ_LOCK(txq);
2893	vmxnet3_start_locked(ifp);
2894	VMXNET3_TXQ_UNLOCK(txq);
2895}
2896
2897#else /* !VMXNET3_LEGACY_TX */
2898
2899static int
2900vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2901{
2902	struct vmxnet3_softc *sc;
2903	struct vmxnet3_txring *txr;
2904	struct buf_ring *br;
2905	struct ifnet *ifp;
2906	int tx, avail, error;
2907
2908	sc = txq->vxtxq_sc;
2909	br = txq->vxtxq_br;
2910	ifp = sc->vmx_ifp;
2911	txr = &txq->vxtxq_cmd_ring;
2912	tx = 0;
2913	error = 0;
2914
2915	VMXNET3_TXQ_LOCK_ASSERT(txq);
2916
2917	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2918	    sc->vmx_link_active == 0) {
2919		if (m != NULL)
2920			error = drbr_enqueue(ifp, br, m);
2921		return (error);
2922	}
2923
2924	if (m != NULL) {
2925		error = drbr_enqueue(ifp, br, m);
2926		if (error)
2927			return (error);
2928	}
2929
2930	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2931		m = drbr_peek(ifp, br);
2932		if (m == NULL)
2933			break;
2934
2935		/* Assume worse case if this mbuf is the head of a chain. */
2936		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2937			drbr_putback(ifp, br, m);
2938			break;
2939		}
2940
2941		error = vmxnet3_txq_encap(txq, &m);
2942		if (error) {
2943			if (m != NULL)
2944				drbr_putback(ifp, br, m);
2945			else
2946				drbr_advance(ifp, br);
2947			break;
2948		}
2949		drbr_advance(ifp, br);
2950
2951		tx++;
2952		ETHER_BPF_MTAP(ifp, m);
2953	}
2954
2955	if (tx > 0) {
2956		vmxnet3_txq_update_pending(txq);
2957		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2958	}
2959
2960	return (0);
2961}
2962
2963static int
2964vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2965{
2966	struct vmxnet3_softc *sc;
2967	struct vmxnet3_txqueue *txq;
2968	int i, ntxq, error;
2969
2970	sc = ifp->if_softc;
2971	ntxq = sc->vmx_ntxqueues;
2972
2973	if (m->m_flags & M_FLOWID)
2974		i = m->m_pkthdr.flowid % ntxq;
2975	else
2976		i = curcpu % ntxq;
2977
2978	txq = &sc->vmx_txq[i];
2979
2980	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
2981		error = vmxnet3_txq_mq_start_locked(txq, m);
2982		VMXNET3_TXQ_UNLOCK(txq);
2983	} else {
2984		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
2985		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
2986	}
2987
2988	return (error);
2989}
2990
2991static void
2992vmxnet3_txq_tq_deferred(void *xtxq, int pending)
2993{
2994	struct vmxnet3_softc *sc;
2995	struct vmxnet3_txqueue *txq;
2996
2997	txq = xtxq;
2998	sc = txq->vxtxq_sc;
2999
3000	VMXNET3_TXQ_LOCK(txq);
3001	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3002		vmxnet3_txq_mq_start_locked(txq, NULL);
3003	VMXNET3_TXQ_UNLOCK(txq);
3004}
3005
3006#endif /* VMXNET3_LEGACY_TX */
3007
3008static void
3009vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3010{
3011	struct vmxnet3_softc *sc;
3012	struct ifnet *ifp;
3013
3014	sc = txq->vxtxq_sc;
3015	ifp = sc->vmx_ifp;
3016
3017#ifdef VMXNET3_LEGACY_TX
3018	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3019		vmxnet3_start_locked(ifp);
3020#else
3021	if (!drbr_empty(ifp, txq->vxtxq_br))
3022		vmxnet3_txq_mq_start_locked(txq, NULL);
3023#endif
3024}
3025
3026static void
3027vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3028{
3029	struct vmxnet3_txqueue *txq;
3030	int i;
3031
3032	VMXNET3_CORE_LOCK_ASSERT(sc);
3033
3034	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3035		txq = &sc->vmx_txq[i];
3036
3037		VMXNET3_TXQ_LOCK(txq);
3038		vmxnet3_txq_start(txq);
3039		VMXNET3_TXQ_UNLOCK(txq);
3040	}
3041}
3042
3043static void
3044vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3045{
3046	struct ifnet *ifp;
3047	int idx, bit;
3048
3049	ifp = sc->vmx_ifp;
3050	idx = (tag >> 5) & 0x7F;
3051	bit = tag & 0x1F;
3052
3053	if (tag == 0 || tag > 4095)
3054		return;
3055
3056	VMXNET3_CORE_LOCK(sc);
3057
3058	/* Update our private VLAN bitvector. */
3059	if (add)
3060		sc->vmx_vlan_filter[idx] |= (1 << bit);
3061	else
3062		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3063
3064	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3065		if (add)
3066			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3067		else
3068			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3069		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3070	}
3071
3072	VMXNET3_CORE_UNLOCK(sc);
3073}
3074
3075static void
3076vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3077{
3078
3079	if (ifp->if_softc == arg)
3080		vmxnet3_update_vlan_filter(arg, 1, tag);
3081}
3082
3083static void
3084vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3085{
3086
3087	if (ifp->if_softc == arg)
3088		vmxnet3_update_vlan_filter(arg, 0, tag);
3089}
3090
3091static void
3092vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3093{
3094	struct ifnet *ifp;
3095	struct vmxnet3_driver_shared *ds;
3096	struct ifmultiaddr *ifma;
3097	u_int mode;
3098
3099	ifp = sc->vmx_ifp;
3100	ds = sc->vmx_ds;
3101
3102	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3103	if (ifp->if_flags & IFF_PROMISC)
3104		mode |= VMXNET3_RXMODE_PROMISC;
3105	if (ifp->if_flags & IFF_ALLMULTI)
3106		mode |= VMXNET3_RXMODE_ALLMULTI;
3107	else {
3108		int cnt = 0, overflow = 0;
3109
3110		if_maddr_rlock(ifp);
3111		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3112			if (ifma->ifma_addr->sa_family != AF_LINK)
3113				continue;
3114			else if (cnt == VMXNET3_MULTICAST_MAX) {
3115				overflow = 1;
3116				break;
3117			}
3118
3119			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3120			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3121			cnt++;
3122		}
3123		if_maddr_runlock(ifp);
3124
3125		if (overflow != 0) {
3126			cnt = 0;
3127			mode |= VMXNET3_RXMODE_ALLMULTI;
3128		} else if (cnt > 0)
3129			mode |= VMXNET3_RXMODE_MCAST;
3130		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3131	}
3132
3133	ds->rxmode = mode;
3134
3135	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3136	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3137}
3138
3139static int
3140vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3141{
3142	struct ifnet *ifp;
3143
3144	ifp = sc->vmx_ifp;
3145
3146	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3147		return (EINVAL);
3148
3149	ifp->if_mtu = mtu;
3150
3151	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3152		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3153		vmxnet3_init_locked(sc);
3154	}
3155
3156	return (0);
3157}
3158
3159static int
3160vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3161{
3162	struct vmxnet3_softc *sc;
3163	struct ifreq *ifr;
3164	int reinit, mask, error;
3165
3166	sc = ifp->if_softc;
3167	ifr = (struct ifreq *) data;
3168	error = 0;
3169
3170	switch (cmd) {
3171	case SIOCSIFMTU:
3172		if (ifp->if_mtu != ifr->ifr_mtu) {
3173			VMXNET3_CORE_LOCK(sc);
3174			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3175			VMXNET3_CORE_UNLOCK(sc);
3176		}
3177		break;
3178
3179	case SIOCSIFFLAGS:
3180		VMXNET3_CORE_LOCK(sc);
3181		if (ifp->if_flags & IFF_UP) {
3182			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3183				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3184				    (IFF_PROMISC | IFF_ALLMULTI)) {
3185					vmxnet3_set_rxfilter(sc);
3186				}
3187			} else
3188				vmxnet3_init_locked(sc);
3189		} else {
3190			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3191				vmxnet3_stop(sc);
3192		}
3193		sc->vmx_if_flags = ifp->if_flags;
3194		VMXNET3_CORE_UNLOCK(sc);
3195		break;
3196
3197	case SIOCADDMULTI:
3198	case SIOCDELMULTI:
3199		VMXNET3_CORE_LOCK(sc);
3200		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3201			vmxnet3_set_rxfilter(sc);
3202		VMXNET3_CORE_UNLOCK(sc);
3203		break;
3204
3205	case SIOCSIFMEDIA:
3206	case SIOCGIFMEDIA:
3207		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3208		break;
3209
3210	case SIOCSIFCAP:
3211		VMXNET3_CORE_LOCK(sc);
3212		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3213
3214		if (mask & IFCAP_TXCSUM)
3215			ifp->if_capenable ^= IFCAP_TXCSUM;
3216		if (mask & IFCAP_TXCSUM_IPV6)
3217			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3218		if (mask & IFCAP_TSO4)
3219			ifp->if_capenable ^= IFCAP_TSO4;
3220		if (mask & IFCAP_TSO6)
3221			ifp->if_capenable ^= IFCAP_TSO6;
3222
3223		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3224		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3225			/* Changing these features requires us to reinit. */
3226			reinit = 1;
3227
3228			if (mask & IFCAP_RXCSUM)
3229				ifp->if_capenable ^= IFCAP_RXCSUM;
3230			if (mask & IFCAP_RXCSUM_IPV6)
3231				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3232			if (mask & IFCAP_LRO)
3233				ifp->if_capenable ^= IFCAP_LRO;
3234			if (mask & IFCAP_VLAN_HWTAGGING)
3235				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3236			if (mask & IFCAP_VLAN_HWFILTER)
3237				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3238		} else
3239			reinit = 0;
3240
3241		if (mask & IFCAP_VLAN_HWTSO)
3242			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3243
3244		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3245			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3246			vmxnet3_init_locked(sc);
3247		}
3248
3249		VMXNET3_CORE_UNLOCK(sc);
3250		VLAN_CAPABILITIES(ifp);
3251		break;
3252
3253	default:
3254		error = ether_ioctl(ifp, cmd, data);
3255		break;
3256	}
3257
3258	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3259
3260	return (error);
3261}
3262
3263#ifndef VMXNET3_LEGACY_TX
3264static void
3265vmxnet3_qflush(struct ifnet *ifp)
3266{
3267	struct vmxnet3_softc *sc;
3268	struct vmxnet3_txqueue *txq;
3269	struct mbuf *m;
3270	int i;
3271
3272	sc = ifp->if_softc;
3273
3274	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3275		txq = &sc->vmx_txq[i];
3276
3277		VMXNET3_TXQ_LOCK(txq);
3278		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3279			m_freem(m);
3280		VMXNET3_TXQ_UNLOCK(txq);
3281	}
3282
3283	if_qflush(ifp);
3284}
3285#endif
3286
3287static int
3288vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3289{
3290	struct vmxnet3_softc *sc;
3291
3292	sc = txq->vxtxq_sc;
3293
3294	VMXNET3_TXQ_LOCK(txq);
3295	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3296		VMXNET3_TXQ_UNLOCK(txq);
3297		return (0);
3298	}
3299	VMXNET3_TXQ_UNLOCK(txq);
3300
3301	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3302	    txq->vxtxq_id);
3303	return (1);
3304}
3305
3306static void
3307vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3308{
3309
3310	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3311}
3312
3313static void
3314vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3315    struct vmxnet3_txq_stats *accum)
3316{
3317	struct vmxnet3_txq_stats *st;
3318
3319	st = &txq->vxtxq_stats;
3320
3321	accum->vmtxs_opackets += st->vmtxs_opackets;
3322	accum->vmtxs_obytes += st->vmtxs_obytes;
3323	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3324	accum->vmtxs_csum += st->vmtxs_csum;
3325	accum->vmtxs_tso += st->vmtxs_tso;
3326	accum->vmtxs_full += st->vmtxs_full;
3327	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3328}
3329
3330static void
3331vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3332    struct vmxnet3_rxq_stats *accum)
3333{
3334	struct vmxnet3_rxq_stats *st;
3335
3336	st = &rxq->vxrxq_stats;
3337
3338	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3339	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3340	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3341	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3342}
3343
3344static void
3345vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3346{
3347	struct ifnet *ifp;
3348	struct vmxnet3_statistics *st;
3349	struct vmxnet3_txq_stats txaccum;
3350	struct vmxnet3_rxq_stats rxaccum;
3351	int i;
3352
3353	ifp = sc->vmx_ifp;
3354	st = &sc->vmx_stats;
3355
3356	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3357	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3358
3359	for (i = 0; i < sc->vmx_ntxqueues; i++)
3360		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3361	for (i = 0; i < sc->vmx_nrxqueues; i++)
3362		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3363
3364	/*
3365	 * With the exception of if_ierrors, these ifnet statistics are
3366	 * only updated in the driver, so just set them to our accumulated
3367	 * values. if_ierrors is updated in ether_input() for malformed
3368	 * frames that we should have already discarded.
3369	 */
3370	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3371	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3372	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3373	ifp->if_opackets = txaccum.vmtxs_opackets;
3374#ifndef VMXNET3_LEGACY_TX
3375	ifp->if_obytes = txaccum.vmtxs_obytes;
3376	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3377#endif
3378}
3379
3380static void
3381vmxnet3_tick(void *xsc)
3382{
3383	struct vmxnet3_softc *sc;
3384	struct ifnet *ifp;
3385	int i, timedout;
3386
3387	sc = xsc;
3388	ifp = sc->vmx_ifp;
3389	timedout = 0;
3390
3391	VMXNET3_CORE_LOCK_ASSERT(sc);
3392
3393	vmxnet3_accumulate_stats(sc);
3394	vmxnet3_refresh_host_stats(sc);
3395
3396	for (i = 0; i < sc->vmx_ntxqueues; i++)
3397		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3398
3399	if (timedout != 0) {
3400		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3401		vmxnet3_init_locked(sc);
3402	} else
3403		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3404}
3405
3406static int
3407vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3408{
3409	uint32_t status;
3410
3411	/* Also update the link speed while here. */
3412	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3413	sc->vmx_link_speed = status >> 16;
3414	return !!(status & 0x1);
3415}
3416
3417static void
3418vmxnet3_link_status(struct vmxnet3_softc *sc)
3419{
3420	struct ifnet *ifp;
3421	int link;
3422
3423	ifp = sc->vmx_ifp;
3424	link = vmxnet3_link_is_up(sc);
3425
3426	if (link != 0 && sc->vmx_link_active == 0) {
3427		sc->vmx_link_active = 1;
3428		if_link_state_change(ifp, LINK_STATE_UP);
3429	} else if (link == 0 && sc->vmx_link_active != 0) {
3430		sc->vmx_link_active = 0;
3431		if_link_state_change(ifp, LINK_STATE_DOWN);
3432	}
3433}
3434
3435static void
3436vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3437{
3438	struct vmxnet3_softc *sc;
3439
3440	sc = ifp->if_softc;
3441
3442	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3443	ifmr->ifm_status = IFM_AVALID;
3444
3445	VMXNET3_CORE_LOCK(sc);
3446	if (vmxnet3_link_is_up(sc) != 0)
3447		ifmr->ifm_status |= IFM_ACTIVE;
3448	else
3449		ifmr->ifm_status |= IFM_NONE;
3450	VMXNET3_CORE_UNLOCK(sc);
3451}
3452
3453static int
3454vmxnet3_media_change(struct ifnet *ifp)
3455{
3456
3457	/* Ignore. */
3458	return (0);
3459}
3460
3461static void
3462vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3463{
3464	uint32_t ml, mh;
3465
3466	ml  = sc->vmx_lladdr[0];
3467	ml |= sc->vmx_lladdr[1] << 8;
3468	ml |= sc->vmx_lladdr[2] << 16;
3469	ml |= sc->vmx_lladdr[3] << 24;
3470	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3471
3472	mh  = sc->vmx_lladdr[4];
3473	mh |= sc->vmx_lladdr[5] << 8;
3474	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3475}
3476
3477static void
3478vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3479{
3480	uint32_t ml, mh;
3481
3482	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3483	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3484
3485	sc->vmx_lladdr[0] = ml;
3486	sc->vmx_lladdr[1] = ml >> 8;
3487	sc->vmx_lladdr[2] = ml >> 16;
3488	sc->vmx_lladdr[3] = ml >> 24;
3489	sc->vmx_lladdr[4] = mh;
3490	sc->vmx_lladdr[5] = mh >> 8;
3491}
3492
3493static void
3494vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3495    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3496{
3497	struct sysctl_oid *node, *txsnode;
3498	struct sysctl_oid_list *list, *txslist;
3499	struct vmxnet3_txq_stats *stats;
3500	struct UPT1_TxStats *txstats;
3501	char namebuf[16];
3502
3503	stats = &txq->vxtxq_stats;
3504	txstats = &txq->vxtxq_ts->stats;
3505
3506	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3507	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3508	    NULL, "Transmit Queue");
3509	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3510
3511	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3512	    &stats->vmtxs_opackets, "Transmit packets");
3513	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3514	    &stats->vmtxs_obytes, "Transmit bytes");
3515	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3516	    &stats->vmtxs_omcasts, "Transmit multicasts");
3517	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3518	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3519	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3520	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3521	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3522	    &stats->vmtxs_full, "Transmit ring full");
3523	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3524	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3525
3526	/*
3527	 * Add statistics reported by the host. These are updated once
3528	 * per second.
3529	 */
3530	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3531	    NULL, "Host Statistics");
3532	txslist = SYSCTL_CHILDREN(txsnode);
3533	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3534	    &txstats->TSO_packets, "TSO packets");
3535	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3536	    &txstats->TSO_bytes, "TSO bytes");
3537	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3538	    &txstats->ucast_packets, "Unicast packets");
3539	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3540	    &txstats->ucast_bytes, "Unicast bytes");
3541	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3542	    &txstats->mcast_packets, "Multicast packets");
3543	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3544	    &txstats->mcast_bytes, "Multicast bytes");
3545	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3546	    &txstats->error, "Errors");
3547	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3548	    &txstats->discard, "Discards");
3549}
3550
3551static void
3552vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3553    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3554{
3555	struct sysctl_oid *node, *rxsnode;
3556	struct sysctl_oid_list *list, *rxslist;
3557	struct vmxnet3_rxq_stats *stats;
3558	struct UPT1_RxStats *rxstats;
3559	char namebuf[16];
3560
3561	stats = &rxq->vxrxq_stats;
3562	rxstats = &rxq->vxrxq_rs->stats;
3563
3564	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3565	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3566	    NULL, "Receive Queue");
3567	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3568
3569	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3570	    &stats->vmrxs_ipackets, "Receive packets");
3571	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3572	    &stats->vmrxs_ibytes, "Receive bytes");
3573	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3574	    &stats->vmrxs_iqdrops, "Receive drops");
3575	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3576	    &stats->vmrxs_ierrors, "Receive errors");
3577
3578	/*
3579	 * Add statistics reported by the host. These are updated once
3580	 * per second.
3581	 */
3582	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3583	    NULL, "Host Statistics");
3584	rxslist = SYSCTL_CHILDREN(rxsnode);
3585	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3586	    &rxstats->LRO_packets, "LRO packets");
3587	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3588	    &rxstats->LRO_bytes, "LRO bytes");
3589	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3590	    &rxstats->ucast_packets, "Unicast packets");
3591	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3592	    &rxstats->ucast_bytes, "Unicast bytes");
3593	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3594	    &rxstats->mcast_packets, "Multicast packets");
3595	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3596	    &rxstats->mcast_bytes, "Multicast bytes");
3597	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3598	    &rxstats->bcast_packets, "Broadcast packets");
3599	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3600	    &rxstats->bcast_bytes, "Broadcast bytes");
3601	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3602	    &rxstats->nobuffer, "No buffer");
3603	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3604	    &rxstats->error, "Errors");
3605}
3606
3607static void
3608vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3609    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3610{
3611	struct sysctl_oid *node;
3612	struct sysctl_oid_list *list;
3613	int i;
3614
3615	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3616		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3617
3618		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3619		    "debug", CTLFLAG_RD, NULL, "");
3620		list = SYSCTL_CHILDREN(node);
3621
3622		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3623		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3624		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3625		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3626		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3627		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3628		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3629		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3630		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3631		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3632		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3633		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3634		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3635		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3636	}
3637
3638	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3639		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3640
3641		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3642		    "debug", CTLFLAG_RD, NULL, "");
3643		list = SYSCTL_CHILDREN(node);
3644
3645		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3646		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3647		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3648		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3649		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3650		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3651		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3652		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3653		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3654		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3655		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3656		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3657		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3658		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3659		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3660		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3661		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3662		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3663	}
3664}
3665
3666static void
3667vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3668    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3669{
3670	int i;
3671
3672	for (i = 0; i < sc->vmx_ntxqueues; i++)
3673		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3674	for (i = 0; i < sc->vmx_nrxqueues; i++)
3675		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3676
3677	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3678}
3679
3680static void
3681vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3682{
3683	device_t dev;
3684	struct vmxnet3_statistics *stats;
3685	struct sysctl_ctx_list *ctx;
3686	struct sysctl_oid *tree;
3687	struct sysctl_oid_list *child;
3688
3689	dev = sc->vmx_dev;
3690	ctx = device_get_sysctl_ctx(dev);
3691	tree = device_get_sysctl_tree(dev);
3692	child = SYSCTL_CHILDREN(tree);
3693
3694	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3695	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3696	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3697	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3698	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3699	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3700	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3701	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3702
3703	stats = &sc->vmx_stats;
3704	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3705	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3706	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3707	    &stats->vmst_defrag_failed, 0,
3708	    "Tx mbuf dropped because defrag failed");
3709	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3710	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3711	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3712	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3713
3714	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3715}
3716
3717static void
3718vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3719{
3720
3721	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3722}
3723
3724static uint32_t
3725vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3726{
3727
3728	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3729}
3730
3731static void
3732vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3733{
3734
3735	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3736}
3737
3738static void
3739vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3740{
3741
3742	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3743}
3744
3745static uint32_t
3746vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3747{
3748
3749	vmxnet3_write_cmd(sc, cmd);
3750	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3751	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3752	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3753}
3754
3755static void
3756vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3757{
3758
3759	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3760}
3761
3762static void
3763vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3764{
3765
3766	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3767}
3768
3769static void
3770vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3771{
3772	int i;
3773
3774	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3775	for (i = 0; i < sc->vmx_nintrs; i++)
3776		vmxnet3_enable_intr(sc, i);
3777}
3778
3779static void
3780vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3781{
3782	int i;
3783
3784	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3785	for (i = 0; i < sc->vmx_nintrs; i++)
3786		vmxnet3_disable_intr(sc, i);
3787}
3788
3789static void
3790vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3791{
3792	bus_addr_t *baddr = arg;
3793
3794	if (error == 0)
3795		*baddr = segs->ds_addr;
3796}
3797
3798static int
3799vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3800    struct vmxnet3_dma_alloc *dma)
3801{
3802	device_t dev;
3803	int error;
3804
3805	dev = sc->vmx_dev;
3806	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3807
3808	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3809	    align, 0,		/* alignment, bounds */
3810	    BUS_SPACE_MAXADDR,	/* lowaddr */
3811	    BUS_SPACE_MAXADDR,	/* highaddr */
3812	    NULL, NULL,		/* filter, filterarg */
3813	    size,		/* maxsize */
3814	    1,			/* nsegments */
3815	    size,		/* maxsegsize */
3816	    BUS_DMA_ALLOCNOW,	/* flags */
3817	    NULL,		/* lockfunc */
3818	    NULL,		/* lockfuncarg */
3819	    &dma->dma_tag);
3820	if (error) {
3821		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3822		goto fail;
3823	}
3824
3825	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3826	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3827	if (error) {
3828		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3829		goto fail;
3830	}
3831
3832	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3833	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3834	if (error) {
3835		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3836		goto fail;
3837	}
3838
3839	dma->dma_size = size;
3840
3841fail:
3842	if (error)
3843		vmxnet3_dma_free(sc, dma);
3844
3845	return (error);
3846}
3847
3848static void
3849vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3850{
3851
3852	if (dma->dma_tag != NULL) {
3853		if (dma->dma_map != NULL) {
3854			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3855			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3856			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3857		}
3858
3859		if (dma->dma_vaddr != NULL) {
3860			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3861			    dma->dma_map);
3862		}
3863
3864		bus_dma_tag_destroy(dma->dma_tag);
3865	}
3866	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3867}
3868
3869static int
3870vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3871{
3872	char path[64];
3873
3874	snprintf(path, sizeof(path),
3875	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3876	TUNABLE_INT_FETCH(path, &def);
3877
3878	return (def);
3879}
3880
3881/*
3882 * Since this is a purely paravirtualized device, we do not have
3883 * to worry about DMA coherency. But at times, we must make sure
3884 * both the compiler and CPU do not reorder memory operations.
3885 */
3886static inline void
3887vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3888{
3889
3890	switch (type) {
3891	case VMXNET3_BARRIER_RD:
3892		rmb();
3893		break;
3894	case VMXNET3_BARRIER_WR:
3895		wmb();
3896		break;
3897	case VMXNET3_BARRIER_RDWR:
3898		mb();
3899		break;
3900	default:
3901		panic("%s: bad barrier type %d", __func__, type);
3902	}
3903}
3904