if_vtnet.c revision 304081
1/*-
2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Driver for VirtIO network devices. */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/virtio/network/if_vtnet.c 304081 2016-08-14 15:27:59Z smh $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/sockio.h>
36#include <sys/mbuf.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/socket.h>
40#include <sys/sysctl.h>
41#include <sys/random.h>
42#include <sys/sglist.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/taskqueue.h>
46#include <sys/smp.h>
47#include <machine/smp.h>
48
49#include <vm/uma.h>
50
51#include <net/ethernet.h>
52#include <net/if.h>
53#include <net/if_arp.h>
54#include <net/if_dl.h>
55#include <net/if_types.h>
56#include <net/if_media.h>
57#include <net/if_vlan_var.h>
58
59#include <net/bpf.h>
60
61#include <netinet/in_systm.h>
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/ip6.h>
65#include <netinet6/ip6_var.h>
66#include <netinet/udp.h>
67#include <netinet/tcp.h>
68#include <netinet/sctp.h>
69
70#include <machine/bus.h>
71#include <machine/resource.h>
72#include <sys/bus.h>
73#include <sys/rman.h>
74
75#include <dev/virtio/virtio.h>
76#include <dev/virtio/virtqueue.h>
77#include <dev/virtio/network/virtio_net.h>
78#include <dev/virtio/network/if_vtnetvar.h>
79
80#include "virtio_if.h"
81
82#include "opt_inet.h"
83#include "opt_inet6.h"
84
85static int	vtnet_modevent(module_t, int, void *);
86
87static int	vtnet_probe(device_t);
88static int	vtnet_attach(device_t);
89static int	vtnet_detach(device_t);
90static int	vtnet_suspend(device_t);
91static int	vtnet_resume(device_t);
92static int	vtnet_shutdown(device_t);
93static int	vtnet_attach_completed(device_t);
94static int	vtnet_config_change(device_t);
95
96static void	vtnet_negotiate_features(struct vtnet_softc *);
97static void	vtnet_setup_features(struct vtnet_softc *);
98static int	vtnet_init_rxq(struct vtnet_softc *, int);
99static int	vtnet_init_txq(struct vtnet_softc *, int);
100static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
101static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
102static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
103static void	vtnet_free_rx_filters(struct vtnet_softc *);
104static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
105static int	vtnet_setup_interface(struct vtnet_softc *);
106static int	vtnet_change_mtu(struct vtnet_softc *, int);
107static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
108
109static int	vtnet_rxq_populate(struct vtnet_rxq *);
110static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
111static struct mbuf *
112		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
113static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
114		    struct mbuf *, int);
115static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
116static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
117static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
118static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
119		     struct virtio_net_hdr *);
120static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
121static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
122static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
123static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
124		    struct virtio_net_hdr *);
125static int	vtnet_rxq_eof(struct vtnet_rxq *);
126static void	vtnet_rx_vq_intr(void *);
127static void	vtnet_rxq_tq_intr(void *, int);
128
129static int	vtnet_txq_below_threshold(struct vtnet_txq *);
130static int	vtnet_txq_notify(struct vtnet_txq *);
131static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
132static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
133		    int *, int *, int *);
134static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
135		    int, struct virtio_net_hdr *);
136static struct mbuf *
137		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
138		    struct virtio_net_hdr *);
139static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
140		    struct vtnet_tx_header *);
141static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
142#ifdef VTNET_LEGACY_TX
143static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
144static void	vtnet_start(struct ifnet *);
145#else
146static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
147static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
148static void	vtnet_txq_tq_deferred(void *, int);
149#endif
150static void	vtnet_txq_start(struct vtnet_txq *);
151static void	vtnet_txq_tq_intr(void *, int);
152static int	vtnet_txq_eof(struct vtnet_txq *);
153static void	vtnet_tx_vq_intr(void *);
154static void	vtnet_tx_start_all(struct vtnet_softc *);
155
156#ifndef VTNET_LEGACY_TX
157static void	vtnet_qflush(struct ifnet *);
158#endif
159
160static int	vtnet_watchdog(struct vtnet_txq *);
161static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
162		    struct vtnet_rxq_stats *);
163static void	vtnet_txq_accum_stats(struct vtnet_txq *,
164		    struct vtnet_txq_stats *);
165static void	vtnet_accumulate_stats(struct vtnet_softc *);
166static void	vtnet_tick(void *);
167
168static void	vtnet_start_taskqueues(struct vtnet_softc *);
169static void	vtnet_free_taskqueues(struct vtnet_softc *);
170static void	vtnet_drain_taskqueues(struct vtnet_softc *);
171
172static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
173static void	vtnet_stop_rendezvous(struct vtnet_softc *);
174static void	vtnet_stop(struct vtnet_softc *);
175static int	vtnet_virtio_reinit(struct vtnet_softc *);
176static void	vtnet_init_rx_filters(struct vtnet_softc *);
177static int	vtnet_init_rx_queues(struct vtnet_softc *);
178static int	vtnet_init_tx_queues(struct vtnet_softc *);
179static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
180static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
181static int	vtnet_reinit(struct vtnet_softc *);
182static void	vtnet_init_locked(struct vtnet_softc *);
183static void	vtnet_init(void *);
184
185static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
186static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
187		    struct sglist *, int, int);
188static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
189static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
190static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
191static int	vtnet_set_promisc(struct vtnet_softc *, int);
192static int	vtnet_set_allmulti(struct vtnet_softc *, int);
193static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
194static void	vtnet_rx_filter(struct vtnet_softc *);
195static void	vtnet_rx_filter_mac(struct vtnet_softc *);
196static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
197static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
198static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
199static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
200static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
201
202static int	vtnet_is_link_up(struct vtnet_softc *);
203static void	vtnet_update_link_status(struct vtnet_softc *);
204static int	vtnet_ifmedia_upd(struct ifnet *);
205static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
206static void	vtnet_get_hwaddr(struct vtnet_softc *);
207static void	vtnet_set_hwaddr(struct vtnet_softc *);
208static void	vtnet_vlan_tag_remove(struct mbuf *);
209static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
210static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
211
212static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
213		    struct sysctl_oid_list *, struct vtnet_rxq *);
214static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
215		    struct sysctl_oid_list *, struct vtnet_txq *);
216static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
217static void	vtnet_setup_sysctl(struct vtnet_softc *);
218
219static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
220static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
221static int	vtnet_txq_enable_intr(struct vtnet_txq *);
222static void	vtnet_txq_disable_intr(struct vtnet_txq *);
223static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
224static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
225static void	vtnet_enable_interrupts(struct vtnet_softc *);
226static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
227static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
228static void	vtnet_disable_interrupts(struct vtnet_softc *);
229
230static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
231
232/* Tunables. */
233static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters");
234static int vtnet_csum_disable = 0;
235TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
236SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
237    &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
238static int vtnet_tso_disable = 0;
239TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
240SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
241    0, "Disables TCP Segmentation Offload");
242static int vtnet_lro_disable = 0;
243TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
244SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
245    0, "Disables TCP Large Receive Offload");
246static int vtnet_mq_disable = 0;
247TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
248SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
249    0, "Disables Multi Queue support");
250static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
251TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
252SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
253    &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
254static int vtnet_rx_process_limit = 512;
255TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
256SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
257    &vtnet_rx_process_limit, 0,
258    "Limits the number RX segments processed in a single pass");
259
260static uma_zone_t vtnet_tx_header_zone;
261
262static struct virtio_feature_desc vtnet_feature_desc[] = {
263	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
264	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
265	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
266	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
267	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
268	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
269	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
270	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
271	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
272	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
273	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
274	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
275	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
276	{ VIRTIO_NET_F_STATUS,		"Status"	},
277	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
278	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
279	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
280	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
281	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
282	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
283	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
284
285	{ 0, NULL }
286};
287
288static device_method_t vtnet_methods[] = {
289	/* Device methods. */
290	DEVMETHOD(device_probe,			vtnet_probe),
291	DEVMETHOD(device_attach,		vtnet_attach),
292	DEVMETHOD(device_detach,		vtnet_detach),
293	DEVMETHOD(device_suspend,		vtnet_suspend),
294	DEVMETHOD(device_resume,		vtnet_resume),
295	DEVMETHOD(device_shutdown,		vtnet_shutdown),
296
297	/* VirtIO methods. */
298	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
299	DEVMETHOD(virtio_config_change,		vtnet_config_change),
300
301	DEVMETHOD_END
302};
303
304#ifdef DEV_NETMAP
305#include <dev/netmap/if_vtnet_netmap.h>
306#endif /* DEV_NETMAP */
307
308static driver_t vtnet_driver = {
309	"vtnet",
310	vtnet_methods,
311	sizeof(struct vtnet_softc)
312};
313static devclass_t vtnet_devclass;
314
315DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
316    vtnet_modevent, 0);
317MODULE_VERSION(vtnet, 1);
318MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
319
320static int
321vtnet_modevent(module_t mod, int type, void *unused)
322{
323	int error;
324
325	error = 0;
326
327	switch (type) {
328	case MOD_LOAD:
329		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
330		    sizeof(struct vtnet_tx_header),
331		    NULL, NULL, NULL, NULL, 0, 0);
332		break;
333	case MOD_QUIESCE:
334	case MOD_UNLOAD:
335		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
336			error = EBUSY;
337		else if (type == MOD_UNLOAD) {
338			uma_zdestroy(vtnet_tx_header_zone);
339			vtnet_tx_header_zone = NULL;
340		}
341		break;
342	case MOD_SHUTDOWN:
343		break;
344	default:
345		error = EOPNOTSUPP;
346		break;
347	}
348
349	return (error);
350}
351
352static int
353vtnet_probe(device_t dev)
354{
355
356	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
357		return (ENXIO);
358
359	device_set_desc(dev, "VirtIO Networking Adapter");
360
361	return (BUS_PROBE_DEFAULT);
362}
363
364static int
365vtnet_attach(device_t dev)
366{
367	struct vtnet_softc *sc;
368	int error;
369
370	sc = device_get_softc(dev);
371	sc->vtnet_dev = dev;
372
373	/* Register our feature descriptions. */
374	virtio_set_feature_desc(dev, vtnet_feature_desc);
375
376	VTNET_CORE_LOCK_INIT(sc);
377	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
378
379	vtnet_setup_sysctl(sc);
380	vtnet_setup_features(sc);
381
382	error = vtnet_alloc_rx_filters(sc);
383	if (error) {
384		device_printf(dev, "cannot allocate Rx filters\n");
385		goto fail;
386	}
387
388	error = vtnet_alloc_rxtx_queues(sc);
389	if (error) {
390		device_printf(dev, "cannot allocate queues\n");
391		goto fail;
392	}
393
394	error = vtnet_alloc_virtqueues(sc);
395	if (error) {
396		device_printf(dev, "cannot allocate virtqueues\n");
397		goto fail;
398	}
399
400	error = vtnet_setup_interface(sc);
401	if (error) {
402		device_printf(dev, "cannot setup interface\n");
403		goto fail;
404	}
405
406	error = virtio_setup_intr(dev, INTR_TYPE_NET);
407	if (error) {
408		device_printf(dev, "cannot setup virtqueue interrupts\n");
409		/* BMV: This will crash if during boot! */
410		ether_ifdetach(sc->vtnet_ifp);
411		goto fail;
412	}
413
414#ifdef DEV_NETMAP
415	vtnet_netmap_attach(sc);
416#endif /* DEV_NETMAP */
417
418	vtnet_start_taskqueues(sc);
419
420fail:
421	if (error)
422		vtnet_detach(dev);
423
424	return (error);
425}
426
427static int
428vtnet_detach(device_t dev)
429{
430	struct vtnet_softc *sc;
431	struct ifnet *ifp;
432
433	sc = device_get_softc(dev);
434	ifp = sc->vtnet_ifp;
435
436	if (device_is_attached(dev)) {
437		VTNET_CORE_LOCK(sc);
438		vtnet_stop(sc);
439		VTNET_CORE_UNLOCK(sc);
440
441		callout_drain(&sc->vtnet_tick_ch);
442		vtnet_drain_taskqueues(sc);
443
444		ether_ifdetach(ifp);
445	}
446
447#ifdef DEV_NETMAP
448	netmap_detach(ifp);
449#endif /* DEV_NETMAP */
450
451	vtnet_free_taskqueues(sc);
452
453	if (sc->vtnet_vlan_attach != NULL) {
454		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
455		sc->vtnet_vlan_attach = NULL;
456	}
457	if (sc->vtnet_vlan_detach != NULL) {
458		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
459		sc->vtnet_vlan_detach = NULL;
460	}
461
462	ifmedia_removeall(&sc->vtnet_media);
463
464	if (ifp != NULL) {
465		if_free(ifp);
466		sc->vtnet_ifp = NULL;
467	}
468
469	vtnet_free_rxtx_queues(sc);
470	vtnet_free_rx_filters(sc);
471
472	if (sc->vtnet_ctrl_vq != NULL)
473		vtnet_free_ctrl_vq(sc);
474
475	VTNET_CORE_LOCK_DESTROY(sc);
476
477	return (0);
478}
479
480static int
481vtnet_suspend(device_t dev)
482{
483	struct vtnet_softc *sc;
484
485	sc = device_get_softc(dev);
486
487	VTNET_CORE_LOCK(sc);
488	vtnet_stop(sc);
489	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
490	VTNET_CORE_UNLOCK(sc);
491
492	return (0);
493}
494
495static int
496vtnet_resume(device_t dev)
497{
498	struct vtnet_softc *sc;
499	struct ifnet *ifp;
500
501	sc = device_get_softc(dev);
502	ifp = sc->vtnet_ifp;
503
504	VTNET_CORE_LOCK(sc);
505	if (ifp->if_flags & IFF_UP)
506		vtnet_init_locked(sc);
507	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
508	VTNET_CORE_UNLOCK(sc);
509
510	return (0);
511}
512
513static int
514vtnet_shutdown(device_t dev)
515{
516
517	/*
518	 * Suspend already does all of what we need to
519	 * do here; we just never expect to be resumed.
520	 */
521	return (vtnet_suspend(dev));
522}
523
524static int
525vtnet_attach_completed(device_t dev)
526{
527
528	vtnet_attach_disable_promisc(device_get_softc(dev));
529
530	return (0);
531}
532
533static int
534vtnet_config_change(device_t dev)
535{
536	struct vtnet_softc *sc;
537
538	sc = device_get_softc(dev);
539
540	VTNET_CORE_LOCK(sc);
541	vtnet_update_link_status(sc);
542	if (sc->vtnet_link_active != 0)
543		vtnet_tx_start_all(sc);
544	VTNET_CORE_UNLOCK(sc);
545
546	return (0);
547}
548
549static void
550vtnet_negotiate_features(struct vtnet_softc *sc)
551{
552	device_t dev;
553	uint64_t mask, features;
554
555	dev = sc->vtnet_dev;
556	mask = 0;
557
558	/*
559	 * TSO and LRO are only available when their corresponding checksum
560	 * offload feature is also negotiated.
561	 */
562	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
563		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
564		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
565	}
566	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
567		mask |= VTNET_TSO_FEATURES;
568	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
569		mask |= VTNET_LRO_FEATURES;
570#ifndef VTNET_LEGACY_TX
571	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
572		mask |= VIRTIO_NET_F_MQ;
573#else
574	mask |= VIRTIO_NET_F_MQ;
575#endif
576
577	features = VTNET_FEATURES & ~mask;
578	sc->vtnet_features = virtio_negotiate_features(dev, features);
579
580	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
581	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
582		/*
583		 * LRO without mergeable buffers requires special care. This
584		 * is not ideal because every receive buffer must be large
585		 * enough to hold the maximum TCP packet, the Ethernet header,
586		 * and the header. This requires up to 34 descriptors with
587		 * MCLBYTES clusters. If we do not have indirect descriptors,
588		 * LRO is disabled since the virtqueue will not contain very
589		 * many receive buffers.
590		 */
591		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
592			device_printf(dev,
593			    "LRO disabled due to both mergeable buffers and "
594			    "indirect descriptors not negotiated\n");
595
596			features &= ~VTNET_LRO_FEATURES;
597			sc->vtnet_features =
598			    virtio_negotiate_features(dev, features);
599		} else
600			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
601	}
602}
603
604static void
605vtnet_setup_features(struct vtnet_softc *sc)
606{
607	device_t dev;
608
609	dev = sc->vtnet_dev;
610
611	vtnet_negotiate_features(sc);
612
613	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
614		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
615	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
616		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
617
618	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
619		/* This feature should always be negotiated. */
620		sc->vtnet_flags |= VTNET_FLAG_MAC;
621	}
622
623	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
624		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
625		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
626	} else
627		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
628
629	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
630		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
631	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
632		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
633	else
634		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
635
636	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
637	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
638	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
639		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
640	else
641		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
642
643	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
644		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
645
646		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
647			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
648		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
649			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
650		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
651			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
652	}
653
654	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
655	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
656		sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
657		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
658	} else
659		sc->vtnet_max_vq_pairs = 1;
660
661	if (sc->vtnet_max_vq_pairs > 1) {
662		/*
663		 * Limit the maximum number of queue pairs to the lower of
664		 * the number of CPUs and the configured maximum.
665		 * The actual number of queues that get used may be less.
666		 */
667		int max;
668
669		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
670		if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
671			if (max > mp_ncpus)
672				max = mp_ncpus;
673			if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
674				max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
675			if (max > 1) {
676				sc->vtnet_requested_vq_pairs = max;
677				sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
678			}
679		}
680	}
681}
682
683static int
684vtnet_init_rxq(struct vtnet_softc *sc, int id)
685{
686	struct vtnet_rxq *rxq;
687
688	rxq = &sc->vtnet_rxqs[id];
689
690	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
691	    device_get_nameunit(sc->vtnet_dev), id);
692	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
693
694	rxq->vtnrx_sc = sc;
695	rxq->vtnrx_id = id;
696
697	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
698	if (rxq->vtnrx_sg == NULL)
699		return (ENOMEM);
700
701	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
702	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
703	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
704
705	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
706}
707
708static int
709vtnet_init_txq(struct vtnet_softc *sc, int id)
710{
711	struct vtnet_txq *txq;
712
713	txq = &sc->vtnet_txqs[id];
714
715	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
716	    device_get_nameunit(sc->vtnet_dev), id);
717	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
718
719	txq->vtntx_sc = sc;
720	txq->vtntx_id = id;
721
722	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
723	if (txq->vtntx_sg == NULL)
724		return (ENOMEM);
725
726#ifndef VTNET_LEGACY_TX
727	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
728	    M_NOWAIT, &txq->vtntx_mtx);
729	if (txq->vtntx_br == NULL)
730		return (ENOMEM);
731
732	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
733#endif
734	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
735	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
736	    taskqueue_thread_enqueue, &txq->vtntx_tq);
737	if (txq->vtntx_tq == NULL)
738		return (ENOMEM);
739
740	return (0);
741}
742
743static int
744vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
745{
746	int i, npairs, error;
747
748	npairs = sc->vtnet_max_vq_pairs;
749
750	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
751	    M_NOWAIT | M_ZERO);
752	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
753	    M_NOWAIT | M_ZERO);
754	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
755		return (ENOMEM);
756
757	for (i = 0; i < npairs; i++) {
758		error = vtnet_init_rxq(sc, i);
759		if (error)
760			return (error);
761		error = vtnet_init_txq(sc, i);
762		if (error)
763			return (error);
764	}
765
766	vtnet_setup_queue_sysctl(sc);
767
768	return (0);
769}
770
771static void
772vtnet_destroy_rxq(struct vtnet_rxq *rxq)
773{
774
775	rxq->vtnrx_sc = NULL;
776	rxq->vtnrx_id = -1;
777
778	if (rxq->vtnrx_sg != NULL) {
779		sglist_free(rxq->vtnrx_sg);
780		rxq->vtnrx_sg = NULL;
781	}
782
783	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
784		mtx_destroy(&rxq->vtnrx_mtx);
785}
786
787static void
788vtnet_destroy_txq(struct vtnet_txq *txq)
789{
790
791	txq->vtntx_sc = NULL;
792	txq->vtntx_id = -1;
793
794	if (txq->vtntx_sg != NULL) {
795		sglist_free(txq->vtntx_sg);
796		txq->vtntx_sg = NULL;
797	}
798
799#ifndef VTNET_LEGACY_TX
800	if (txq->vtntx_br != NULL) {
801		buf_ring_free(txq->vtntx_br, M_DEVBUF);
802		txq->vtntx_br = NULL;
803	}
804#endif
805
806	if (mtx_initialized(&txq->vtntx_mtx) != 0)
807		mtx_destroy(&txq->vtntx_mtx);
808}
809
810static void
811vtnet_free_rxtx_queues(struct vtnet_softc *sc)
812{
813	int i;
814
815	if (sc->vtnet_rxqs != NULL) {
816		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
817			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
818		free(sc->vtnet_rxqs, M_DEVBUF);
819		sc->vtnet_rxqs = NULL;
820	}
821
822	if (sc->vtnet_txqs != NULL) {
823		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
824			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
825		free(sc->vtnet_txqs, M_DEVBUF);
826		sc->vtnet_txqs = NULL;
827	}
828}
829
830static int
831vtnet_alloc_rx_filters(struct vtnet_softc *sc)
832{
833
834	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
835		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
836		    M_DEVBUF, M_NOWAIT | M_ZERO);
837		if (sc->vtnet_mac_filter == NULL)
838			return (ENOMEM);
839	}
840
841	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
842		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
843		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
844		if (sc->vtnet_vlan_filter == NULL)
845			return (ENOMEM);
846	}
847
848	return (0);
849}
850
851static void
852vtnet_free_rx_filters(struct vtnet_softc *sc)
853{
854
855	if (sc->vtnet_mac_filter != NULL) {
856		free(sc->vtnet_mac_filter, M_DEVBUF);
857		sc->vtnet_mac_filter = NULL;
858	}
859
860	if (sc->vtnet_vlan_filter != NULL) {
861		free(sc->vtnet_vlan_filter, M_DEVBUF);
862		sc->vtnet_vlan_filter = NULL;
863	}
864}
865
866static int
867vtnet_alloc_virtqueues(struct vtnet_softc *sc)
868{
869	device_t dev;
870	struct vq_alloc_info *info;
871	struct vtnet_rxq *rxq;
872	struct vtnet_txq *txq;
873	int i, idx, flags, nvqs, error;
874
875	dev = sc->vtnet_dev;
876	flags = 0;
877
878	nvqs = sc->vtnet_max_vq_pairs * 2;
879	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
880		nvqs++;
881
882	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
883	if (info == NULL)
884		return (ENOMEM);
885
886	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
887		rxq = &sc->vtnet_rxqs[i];
888		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
889		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
890		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
891
892		txq = &sc->vtnet_txqs[i];
893		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
894		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
895		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
896	}
897
898	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
899		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
900		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
901	}
902
903	/*
904	 * Enable interrupt binding if this is multiqueue. This only matters
905	 * when per-vq MSIX is available.
906	 */
907	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
908		flags |= 0;
909
910	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
911	free(info, M_TEMP);
912
913	return (error);
914}
915
916static int
917vtnet_setup_interface(struct vtnet_softc *sc)
918{
919	device_t dev;
920	struct ifnet *ifp;
921
922	dev = sc->vtnet_dev;
923
924	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
925	if (ifp == NULL) {
926		device_printf(dev, "cannot allocate ifnet structure\n");
927		return (ENOSPC);
928	}
929
930	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
931	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
932	ifp->if_softc = sc;
933	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
934	ifp->if_init = vtnet_init;
935	ifp->if_ioctl = vtnet_ioctl;
936
937#ifndef VTNET_LEGACY_TX
938	ifp->if_transmit = vtnet_txq_mq_start;
939	ifp->if_qflush = vtnet_qflush;
940#else
941	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
942	ifp->if_start = vtnet_start;
943	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
944	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
945	IFQ_SET_READY(&ifp->if_snd);
946#endif
947
948	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
949	    vtnet_ifmedia_sts);
950	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
951	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
952
953	/* Read (or generate) the MAC address for the adapter. */
954	vtnet_get_hwaddr(sc);
955
956	ether_ifattach(ifp, sc->vtnet_hwaddr);
957
958	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
959		ifp->if_capabilities |= IFCAP_LINKSTATE;
960
961	/* Tell the upper layer(s) we support long frames. */
962	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
963	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
964
965	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
966		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
967
968		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
969			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
970			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
971		} else {
972			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
973				ifp->if_capabilities |= IFCAP_TSO4;
974			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
975				ifp->if_capabilities |= IFCAP_TSO6;
976			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
977				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
978		}
979
980		if (ifp->if_capabilities & IFCAP_TSO)
981			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
982	}
983
984	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
985		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
986
987		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
988		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
989			ifp->if_capabilities |= IFCAP_LRO;
990	}
991
992	if (ifp->if_capabilities & IFCAP_HWCSUM) {
993		/*
994		 * VirtIO does not support VLAN tagging, but we can fake
995		 * it by inserting and removing the 802.1Q header during
996		 * transmit and receive. We are then able to do checksum
997		 * offloading of VLAN frames.
998		 */
999		ifp->if_capabilities |=
1000		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1001	}
1002
1003	ifp->if_capenable = ifp->if_capabilities;
1004
1005	/*
1006	 * Capabilities after here are not enabled by default.
1007	 */
1008
1009	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1010		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1011
1012		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1013		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1014		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1015		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1016	}
1017
1018	vtnet_set_rx_process_limit(sc);
1019	vtnet_set_tx_intr_threshold(sc);
1020
1021	return (0);
1022}
1023
1024static int
1025vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1026{
1027	struct ifnet *ifp;
1028	int frame_size, clsize;
1029
1030	ifp = sc->vtnet_ifp;
1031
1032	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1033		return (EINVAL);
1034
1035	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1036	    new_mtu;
1037
1038	/*
1039	 * Based on the new MTU (and hence frame size) determine which
1040	 * cluster size is most appropriate for the receive queues.
1041	 */
1042	if (frame_size <= MCLBYTES) {
1043		clsize = MCLBYTES;
1044	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1045		/* Avoid going past 9K jumbos. */
1046		if (frame_size > MJUM9BYTES)
1047			return (EINVAL);
1048		clsize = MJUM9BYTES;
1049	} else
1050		clsize = MJUMPAGESIZE;
1051
1052	ifp->if_mtu = new_mtu;
1053	sc->vtnet_rx_new_clsize = clsize;
1054
1055	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1057		vtnet_init_locked(sc);
1058	}
1059
1060	return (0);
1061}
1062
1063static int
1064vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1065{
1066	struct vtnet_softc *sc;
1067	struct ifreq *ifr;
1068	int reinit, mask, error;
1069
1070	sc = ifp->if_softc;
1071	ifr = (struct ifreq *) data;
1072	error = 0;
1073
1074	switch (cmd) {
1075	case SIOCSIFMTU:
1076		if (ifp->if_mtu != ifr->ifr_mtu) {
1077			VTNET_CORE_LOCK(sc);
1078			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1079			VTNET_CORE_UNLOCK(sc);
1080		}
1081		break;
1082
1083	case SIOCSIFFLAGS:
1084		VTNET_CORE_LOCK(sc);
1085		if ((ifp->if_flags & IFF_UP) == 0) {
1086			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1087				vtnet_stop(sc);
1088		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1089			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1090			    (IFF_PROMISC | IFF_ALLMULTI)) {
1091				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1092					vtnet_rx_filter(sc);
1093				else {
1094					ifp->if_flags |= IFF_PROMISC;
1095					if ((ifp->if_flags ^ sc->vtnet_if_flags)
1096					    & IFF_ALLMULTI)
1097						error = ENOTSUP;
1098				}
1099			}
1100		} else
1101			vtnet_init_locked(sc);
1102
1103		if (error == 0)
1104			sc->vtnet_if_flags = ifp->if_flags;
1105		VTNET_CORE_UNLOCK(sc);
1106		break;
1107
1108	case SIOCADDMULTI:
1109	case SIOCDELMULTI:
1110		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1111			break;
1112		VTNET_CORE_LOCK(sc);
1113		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1114			vtnet_rx_filter_mac(sc);
1115		VTNET_CORE_UNLOCK(sc);
1116		break;
1117
1118	case SIOCSIFMEDIA:
1119	case SIOCGIFMEDIA:
1120		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1121		break;
1122
1123	case SIOCSIFCAP:
1124		VTNET_CORE_LOCK(sc);
1125		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126
1127		if (mask & IFCAP_TXCSUM)
1128			ifp->if_capenable ^= IFCAP_TXCSUM;
1129		if (mask & IFCAP_TXCSUM_IPV6)
1130			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1131		if (mask & IFCAP_TSO4)
1132			ifp->if_capenable ^= IFCAP_TSO4;
1133		if (mask & IFCAP_TSO6)
1134			ifp->if_capenable ^= IFCAP_TSO6;
1135
1136		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1137		    IFCAP_VLAN_HWFILTER)) {
1138			/* These Rx features require us to renegotiate. */
1139			reinit = 1;
1140
1141			if (mask & IFCAP_RXCSUM)
1142				ifp->if_capenable ^= IFCAP_RXCSUM;
1143			if (mask & IFCAP_RXCSUM_IPV6)
1144				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1145			if (mask & IFCAP_LRO)
1146				ifp->if_capenable ^= IFCAP_LRO;
1147			if (mask & IFCAP_VLAN_HWFILTER)
1148				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1149		} else
1150			reinit = 0;
1151
1152		if (mask & IFCAP_VLAN_HWTSO)
1153			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1154		if (mask & IFCAP_VLAN_HWTAGGING)
1155			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156
1157		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1158			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1159			vtnet_init_locked(sc);
1160		}
1161
1162		VTNET_CORE_UNLOCK(sc);
1163		VLAN_CAPABILITIES(ifp);
1164
1165		break;
1166
1167	default:
1168		error = ether_ioctl(ifp, cmd, data);
1169		break;
1170	}
1171
1172	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1173
1174	return (error);
1175}
1176
1177static int
1178vtnet_rxq_populate(struct vtnet_rxq *rxq)
1179{
1180	struct virtqueue *vq;
1181	int nbufs, error;
1182
1183	vq = rxq->vtnrx_vq;
1184	error = ENOSPC;
1185
1186	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1187		error = vtnet_rxq_new_buf(rxq);
1188		if (error)
1189			break;
1190	}
1191
1192	if (nbufs > 0) {
1193		virtqueue_notify(vq);
1194		/*
1195		 * EMSGSIZE signifies the virtqueue did not have enough
1196		 * entries available to hold the last mbuf. This is not
1197		 * an error.
1198		 */
1199		if (error == EMSGSIZE)
1200			error = 0;
1201	}
1202
1203	return (error);
1204}
1205
1206static void
1207vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1208{
1209	struct virtqueue *vq;
1210	struct mbuf *m;
1211	int last;
1212
1213	vq = rxq->vtnrx_vq;
1214	last = 0;
1215
1216	while ((m = virtqueue_drain(vq, &last)) != NULL)
1217		m_freem(m);
1218
1219	KASSERT(virtqueue_empty(vq),
1220	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1221}
1222
1223static struct mbuf *
1224vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1225{
1226	struct mbuf *m_head, *m_tail, *m;
1227	int i, clsize;
1228
1229	clsize = sc->vtnet_rx_clsize;
1230
1231	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1232	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1233
1234	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1235	if (m_head == NULL)
1236		goto fail;
1237
1238	m_head->m_len = clsize;
1239	m_tail = m_head;
1240
1241	/* Allocate the rest of the chain. */
1242	for (i = 1; i < nbufs; i++) {
1243		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1244		if (m == NULL)
1245			goto fail;
1246
1247		m->m_len = clsize;
1248		m_tail->m_next = m;
1249		m_tail = m;
1250	}
1251
1252	if (m_tailp != NULL)
1253		*m_tailp = m_tail;
1254
1255	return (m_head);
1256
1257fail:
1258	sc->vtnet_stats.mbuf_alloc_failed++;
1259	m_freem(m_head);
1260
1261	return (NULL);
1262}
1263
1264/*
1265 * Slow path for when LRO without mergeable buffers is negotiated.
1266 */
1267static int
1268vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1269    int len0)
1270{
1271	struct vtnet_softc *sc;
1272	struct mbuf *m, *m_prev;
1273	struct mbuf *m_new, *m_tail;
1274	int len, clsize, nreplace, error;
1275
1276	sc = rxq->vtnrx_sc;
1277	clsize = sc->vtnet_rx_clsize;
1278
1279	m_prev = NULL;
1280	m_tail = NULL;
1281	nreplace = 0;
1282
1283	m = m0;
1284	len = len0;
1285
1286	/*
1287	 * Since these mbuf chains are so large, we avoid allocating an
1288	 * entire replacement chain if possible. When the received frame
1289	 * did not consume the entire chain, the unused mbufs are moved
1290	 * to the replacement chain.
1291	 */
1292	while (len > 0) {
1293		/*
1294		 * Something is seriously wrong if we received a frame
1295		 * larger than the chain. Drop it.
1296		 */
1297		if (m == NULL) {
1298			sc->vtnet_stats.rx_frame_too_large++;
1299			return (EMSGSIZE);
1300		}
1301
1302		/* We always allocate the same cluster size. */
1303		KASSERT(m->m_len == clsize,
1304		    ("%s: mbuf size %d is not the cluster size %d",
1305		    __func__, m->m_len, clsize));
1306
1307		m->m_len = MIN(m->m_len, len);
1308		len -= m->m_len;
1309
1310		m_prev = m;
1311		m = m->m_next;
1312		nreplace++;
1313	}
1314
1315	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1316	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1317	    sc->vtnet_rx_nmbufs));
1318
1319	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1320	if (m_new == NULL) {
1321		m_prev->m_len = clsize;
1322		return (ENOBUFS);
1323	}
1324
1325	/*
1326	 * Move any unused mbufs from the received chain onto the end
1327	 * of the new chain.
1328	 */
1329	if (m_prev->m_next != NULL) {
1330		m_tail->m_next = m_prev->m_next;
1331		m_prev->m_next = NULL;
1332	}
1333
1334	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1335	if (error) {
1336		/*
1337		 * BAD! We could not enqueue the replacement mbuf chain. We
1338		 * must restore the m0 chain to the original state if it was
1339		 * modified so we can subsequently discard it.
1340		 *
1341		 * NOTE: The replacement is suppose to be an identical copy
1342		 * to the one just dequeued so this is an unexpected error.
1343		 */
1344		sc->vtnet_stats.rx_enq_replacement_failed++;
1345
1346		if (m_tail->m_next != NULL) {
1347			m_prev->m_next = m_tail->m_next;
1348			m_tail->m_next = NULL;
1349		}
1350
1351		m_prev->m_len = clsize;
1352		m_freem(m_new);
1353	}
1354
1355	return (error);
1356}
1357
1358static int
1359vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1360{
1361	struct vtnet_softc *sc;
1362	struct mbuf *m_new;
1363	int error;
1364
1365	sc = rxq->vtnrx_sc;
1366
1367	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1368	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1369
1370	if (m->m_next == NULL) {
1371		/* Fast-path for the common case of just one mbuf. */
1372		if (m->m_len < len)
1373			return (EINVAL);
1374
1375		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1376		if (m_new == NULL)
1377			return (ENOBUFS);
1378
1379		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1380		if (error) {
1381			/*
1382			 * The new mbuf is suppose to be an identical
1383			 * copy of the one just dequeued so this is an
1384			 * unexpected error.
1385			 */
1386			m_freem(m_new);
1387			sc->vtnet_stats.rx_enq_replacement_failed++;
1388		} else
1389			m->m_len = len;
1390	} else
1391		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1392
1393	return (error);
1394}
1395
1396static int
1397vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1398{
1399	struct vtnet_softc *sc;
1400	struct sglist *sg;
1401	struct vtnet_rx_header *rxhdr;
1402	uint8_t *mdata;
1403	int offset, error;
1404
1405	sc = rxq->vtnrx_sc;
1406	sg = rxq->vtnrx_sg;
1407	mdata = mtod(m, uint8_t *);
1408
1409	VTNET_RXQ_LOCK_ASSERT(rxq);
1410	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1411	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1412	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1413	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1414	     sc->vtnet_rx_clsize));
1415
1416	sglist_reset(sg);
1417	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1418		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1419		rxhdr = (struct vtnet_rx_header *) mdata;
1420		sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1421		offset = sizeof(struct vtnet_rx_header);
1422	} else
1423		offset = 0;
1424
1425	sglist_append(sg, mdata + offset, m->m_len - offset);
1426	if (m->m_next != NULL) {
1427		error = sglist_append_mbuf(sg, m->m_next);
1428		MPASS(error == 0);
1429	}
1430
1431	error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
1432
1433	return (error);
1434}
1435
1436static int
1437vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1438{
1439	struct vtnet_softc *sc;
1440	struct mbuf *m;
1441	int error;
1442
1443	sc = rxq->vtnrx_sc;
1444
1445	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1446	if (m == NULL)
1447		return (ENOBUFS);
1448
1449	error = vtnet_rxq_enqueue_buf(rxq, m);
1450	if (error)
1451		m_freem(m);
1452
1453	return (error);
1454}
1455
1456/*
1457 * Use the checksum offset in the VirtIO header to set the
1458 * correct CSUM_* flags.
1459 */
1460static int
1461vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1462    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1463{
1464	struct vtnet_softc *sc;
1465#if defined(INET) || defined(INET6)
1466	int offset = hdr->csum_start + hdr->csum_offset;
1467#endif
1468
1469	sc = rxq->vtnrx_sc;
1470
1471	/* Only do a basic sanity check on the offset. */
1472	switch (eth_type) {
1473#if defined(INET)
1474	case ETHERTYPE_IP:
1475		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1476			return (1);
1477		break;
1478#endif
1479#if defined(INET6)
1480	case ETHERTYPE_IPV6:
1481		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1482			return (1);
1483		break;
1484#endif
1485	default:
1486		sc->vtnet_stats.rx_csum_bad_ethtype++;
1487		return (1);
1488	}
1489
1490	/*
1491	 * Use the offset to determine the appropriate CSUM_* flags. This is
1492	 * a bit dirty, but we can get by with it since the checksum offsets
1493	 * happen to be different. We assume the host host does not do IPv4
1494	 * header checksum offloading.
1495	 */
1496	switch (hdr->csum_offset) {
1497	case offsetof(struct udphdr, uh_sum):
1498	case offsetof(struct tcphdr, th_sum):
1499		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1500		m->m_pkthdr.csum_data = 0xFFFF;
1501		break;
1502	case offsetof(struct sctphdr, checksum):
1503		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1504		break;
1505	default:
1506		sc->vtnet_stats.rx_csum_bad_offset++;
1507		return (1);
1508	}
1509
1510	return (0);
1511}
1512
1513static int
1514vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1515    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1516{
1517	struct vtnet_softc *sc;
1518	int offset, proto;
1519
1520	sc = rxq->vtnrx_sc;
1521
1522	switch (eth_type) {
1523#if defined(INET)
1524	case ETHERTYPE_IP: {
1525		struct ip *ip;
1526		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1527			return (1);
1528		ip = (struct ip *)(m->m_data + ip_start);
1529		proto = ip->ip_p;
1530		offset = ip_start + (ip->ip_hl << 2);
1531		break;
1532	}
1533#endif
1534#if defined(INET6)
1535	case ETHERTYPE_IPV6:
1536		if (__predict_false(m->m_len < ip_start +
1537		    sizeof(struct ip6_hdr)))
1538			return (1);
1539		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1540		if (__predict_false(offset < 0))
1541			return (1);
1542		break;
1543#endif
1544	default:
1545		sc->vtnet_stats.rx_csum_bad_ethtype++;
1546		return (1);
1547	}
1548
1549	switch (proto) {
1550	case IPPROTO_TCP:
1551		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1552			return (1);
1553		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1554		m->m_pkthdr.csum_data = 0xFFFF;
1555		break;
1556	case IPPROTO_UDP:
1557		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1558			return (1);
1559		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1560		m->m_pkthdr.csum_data = 0xFFFF;
1561		break;
1562	case IPPROTO_SCTP:
1563		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1564			return (1);
1565		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1566		break;
1567	default:
1568		/*
1569		 * For the remaining protocols, FreeBSD does not support
1570		 * checksum offloading, so the checksum will be recomputed.
1571		 */
1572#if 0
1573		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1574		    "protocol eth_type=%#x proto=%d csum_start=%d "
1575		    "csum_offset=%d\n", __func__, eth_type, proto,
1576		    hdr->csum_start, hdr->csum_offset);
1577#endif
1578		break;
1579	}
1580
1581	return (0);
1582}
1583
1584/*
1585 * Set the appropriate CSUM_* flags. Unfortunately, the information
1586 * provided is not directly useful to us. The VirtIO header gives the
1587 * offset of the checksum, which is all Linux needs, but this is not
1588 * how FreeBSD does things. We are forced to peek inside the packet
1589 * a bit.
1590 *
1591 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1592 * could accept the offsets and let the stack figure it out.
1593 */
1594static int
1595vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1596    struct virtio_net_hdr *hdr)
1597{
1598	struct ether_header *eh;
1599	struct ether_vlan_header *evh;
1600	uint16_t eth_type;
1601	int offset, error;
1602
1603	eh = mtod(m, struct ether_header *);
1604	eth_type = ntohs(eh->ether_type);
1605	if (eth_type == ETHERTYPE_VLAN) {
1606		/* BMV: We should handle nested VLAN tags too. */
1607		evh = mtod(m, struct ether_vlan_header *);
1608		eth_type = ntohs(evh->evl_proto);
1609		offset = sizeof(struct ether_vlan_header);
1610	} else
1611		offset = sizeof(struct ether_header);
1612
1613	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1614		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1615	else
1616		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1617
1618	return (error);
1619}
1620
1621static void
1622vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1623{
1624	struct mbuf *m;
1625
1626	while (--nbufs > 0) {
1627		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1628		if (m == NULL)
1629			break;
1630		vtnet_rxq_discard_buf(rxq, m);
1631	}
1632}
1633
1634static void
1635vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1636{
1637	int error;
1638
1639	/*
1640	 * Requeue the discarded mbuf. This should always be successful
1641	 * since it was just dequeued.
1642	 */
1643	error = vtnet_rxq_enqueue_buf(rxq, m);
1644	KASSERT(error == 0,
1645	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1646}
1647
1648static int
1649vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1650{
1651	struct vtnet_softc *sc;
1652	struct ifnet *ifp;
1653	struct virtqueue *vq;
1654	struct mbuf *m, *m_tail;
1655	int len;
1656
1657	sc = rxq->vtnrx_sc;
1658	vq = rxq->vtnrx_vq;
1659	ifp = sc->vtnet_ifp;
1660	m_tail = m_head;
1661
1662	while (--nbufs > 0) {
1663		m = virtqueue_dequeue(vq, &len);
1664		if (m == NULL) {
1665			rxq->vtnrx_stats.vrxs_ierrors++;
1666			goto fail;
1667		}
1668
1669		if (vtnet_rxq_new_buf(rxq) != 0) {
1670			rxq->vtnrx_stats.vrxs_iqdrops++;
1671			vtnet_rxq_discard_buf(rxq, m);
1672			if (nbufs > 1)
1673				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1674			goto fail;
1675		}
1676
1677		if (m->m_len < len)
1678			len = m->m_len;
1679
1680		m->m_len = len;
1681		m->m_flags &= ~M_PKTHDR;
1682
1683		m_head->m_pkthdr.len += len;
1684		m_tail->m_next = m;
1685		m_tail = m;
1686	}
1687
1688	return (0);
1689
1690fail:
1691	sc->vtnet_stats.rx_mergeable_failed++;
1692	m_freem(m_head);
1693
1694	return (1);
1695}
1696
1697static void
1698vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1699    struct virtio_net_hdr *hdr)
1700{
1701	struct vtnet_softc *sc;
1702	struct ifnet *ifp;
1703	struct ether_header *eh;
1704
1705	sc = rxq->vtnrx_sc;
1706	ifp = sc->vtnet_ifp;
1707
1708	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1709		eh = mtod(m, struct ether_header *);
1710		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1711			vtnet_vlan_tag_remove(m);
1712			/*
1713			 * With the 802.1Q header removed, update the
1714			 * checksum starting location accordingly.
1715			 */
1716			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1717				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1718		}
1719	}
1720
1721	m->m_pkthdr.flowid = rxq->vtnrx_id;
1722	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1723
1724	/*
1725	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1726	 * distinction that Linux does. Need to reevaluate if performing
1727	 * offloading for the NEEDS_CSUM case is really appropriate.
1728	 */
1729	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1730	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1731		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1732			rxq->vtnrx_stats.vrxs_csum++;
1733		else
1734			rxq->vtnrx_stats.vrxs_csum_failed++;
1735	}
1736
1737	rxq->vtnrx_stats.vrxs_ipackets++;
1738	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1739
1740	VTNET_RXQ_UNLOCK(rxq);
1741	(*ifp->if_input)(ifp, m);
1742	VTNET_RXQ_LOCK(rxq);
1743}
1744
1745static int
1746vtnet_rxq_eof(struct vtnet_rxq *rxq)
1747{
1748	struct virtio_net_hdr lhdr, *hdr;
1749	struct vtnet_softc *sc;
1750	struct ifnet *ifp;
1751	struct virtqueue *vq;
1752	struct mbuf *m;
1753	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1754	int len, deq, nbufs, adjsz, count;
1755
1756	sc = rxq->vtnrx_sc;
1757	vq = rxq->vtnrx_vq;
1758	ifp = sc->vtnet_ifp;
1759	hdr = &lhdr;
1760	deq = 0;
1761	count = sc->vtnet_rx_process_limit;
1762
1763	VTNET_RXQ_LOCK_ASSERT(rxq);
1764
1765#ifdef DEV_NETMAP
1766	if (netmap_rx_irq(ifp, 0, &deq)) {
1767		return (FALSE);
1768	}
1769#endif /* DEV_NETMAP */
1770
1771	while (count-- > 0) {
1772		m = virtqueue_dequeue(vq, &len);
1773		if (m == NULL)
1774			break;
1775		deq++;
1776
1777		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1778			rxq->vtnrx_stats.vrxs_ierrors++;
1779			vtnet_rxq_discard_buf(rxq, m);
1780			continue;
1781		}
1782
1783		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1784			nbufs = 1;
1785			adjsz = sizeof(struct vtnet_rx_header);
1786			/*
1787			 * Account for our pad inserted between the header
1788			 * and the actual start of the frame.
1789			 */
1790			len += VTNET_RX_HEADER_PAD;
1791		} else {
1792			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1793			nbufs = mhdr->num_buffers;
1794			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1795		}
1796
1797		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1798			rxq->vtnrx_stats.vrxs_iqdrops++;
1799			vtnet_rxq_discard_buf(rxq, m);
1800			if (nbufs > 1)
1801				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1802			continue;
1803		}
1804
1805		m->m_pkthdr.len = len;
1806		m->m_pkthdr.rcvif = ifp;
1807		m->m_pkthdr.csum_flags = 0;
1808
1809		if (nbufs > 1) {
1810			/* Dequeue the rest of chain. */
1811			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1812				continue;
1813		}
1814
1815		/*
1816		 * Save copy of header before we strip it. For both mergeable
1817		 * and non-mergeable, the header is at the beginning of the
1818		 * mbuf data. We no longer need num_buffers, so always use a
1819		 * regular header.
1820		 *
1821		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1822		 * still valid even after the m_adj().
1823		 */
1824		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1825		m_adj(m, adjsz);
1826
1827		vtnet_rxq_input(rxq, m, hdr);
1828
1829		/* Must recheck after dropping the Rx lock. */
1830		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1831			break;
1832	}
1833
1834	if (deq > 0)
1835		virtqueue_notify(vq);
1836
1837	return (count > 0 ? 0 : EAGAIN);
1838}
1839
1840static void
1841vtnet_rx_vq_intr(void *xrxq)
1842{
1843	struct vtnet_softc *sc;
1844	struct vtnet_rxq *rxq;
1845	struct ifnet *ifp;
1846	int tries, more;
1847
1848	rxq = xrxq;
1849	sc = rxq->vtnrx_sc;
1850	ifp = sc->vtnet_ifp;
1851	tries = 0;
1852
1853	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1854		/*
1855		 * Ignore this interrupt. Either this is a spurious interrupt
1856		 * or multiqueue without per-VQ MSIX so every queue needs to
1857		 * be polled (a brain dead configuration we could try harder
1858		 * to avoid).
1859		 */
1860		vtnet_rxq_disable_intr(rxq);
1861		return;
1862	}
1863
1864	VTNET_RXQ_LOCK(rxq);
1865
1866again:
1867	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1868		VTNET_RXQ_UNLOCK(rxq);
1869		return;
1870	}
1871
1872	more = vtnet_rxq_eof(rxq);
1873	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1874		if (!more)
1875			vtnet_rxq_disable_intr(rxq);
1876		/*
1877		 * This is an occasional condition or race (when !more),
1878		 * so retry a few times before scheduling the taskqueue.
1879		 */
1880		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1881			goto again;
1882
1883		VTNET_RXQ_UNLOCK(rxq);
1884		rxq->vtnrx_stats.vrxs_rescheduled++;
1885		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1886	} else
1887		VTNET_RXQ_UNLOCK(rxq);
1888}
1889
1890static void
1891vtnet_rxq_tq_intr(void *xrxq, int pending)
1892{
1893	struct vtnet_softc *sc;
1894	struct vtnet_rxq *rxq;
1895	struct ifnet *ifp;
1896	int more;
1897
1898	rxq = xrxq;
1899	sc = rxq->vtnrx_sc;
1900	ifp = sc->vtnet_ifp;
1901
1902	VTNET_RXQ_LOCK(rxq);
1903
1904	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1905		VTNET_RXQ_UNLOCK(rxq);
1906		return;
1907	}
1908
1909	more = vtnet_rxq_eof(rxq);
1910	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1911		if (!more)
1912			vtnet_rxq_disable_intr(rxq);
1913		rxq->vtnrx_stats.vrxs_rescheduled++;
1914		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1915	}
1916
1917	VTNET_RXQ_UNLOCK(rxq);
1918}
1919
1920static int
1921vtnet_txq_below_threshold(struct vtnet_txq *txq)
1922{
1923	struct vtnet_softc *sc;
1924	struct virtqueue *vq;
1925
1926	sc = txq->vtntx_sc;
1927	vq = txq->vtntx_vq;
1928
1929	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
1930}
1931
1932static int
1933vtnet_txq_notify(struct vtnet_txq *txq)
1934{
1935	struct virtqueue *vq;
1936
1937	vq = txq->vtntx_vq;
1938
1939	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
1940	virtqueue_notify(vq);
1941
1942	if (vtnet_txq_enable_intr(txq) == 0)
1943		return (0);
1944
1945	/*
1946	 * Drain frames that were completed since last checked. If this
1947	 * causes the queue to go above the threshold, the caller should
1948	 * continue transmitting.
1949	 */
1950	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
1951		virtqueue_disable_intr(vq);
1952		return (1);
1953	}
1954
1955	return (0);
1956}
1957
1958static void
1959vtnet_txq_free_mbufs(struct vtnet_txq *txq)
1960{
1961	struct virtqueue *vq;
1962	struct vtnet_tx_header *txhdr;
1963	int last;
1964
1965	vq = txq->vtntx_vq;
1966	last = 0;
1967
1968	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1969		m_freem(txhdr->vth_mbuf);
1970		uma_zfree(vtnet_tx_header_zone, txhdr);
1971	}
1972
1973	KASSERT(virtqueue_empty(vq),
1974	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1975}
1976
1977/*
1978 * BMV: Much of this can go away once we finally have offsets in
1979 * the mbuf packet header. Bug andre@.
1980 */
1981static int
1982vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1983    int *etype, int *proto, int *start)
1984{
1985	struct vtnet_softc *sc;
1986	struct ether_vlan_header *evh;
1987	int offset;
1988
1989	sc = txq->vtntx_sc;
1990
1991	evh = mtod(m, struct ether_vlan_header *);
1992	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1993		/* BMV: We should handle nested VLAN tags too. */
1994		*etype = ntohs(evh->evl_proto);
1995		offset = sizeof(struct ether_vlan_header);
1996	} else {
1997		*etype = ntohs(evh->evl_encap_proto);
1998		offset = sizeof(struct ether_header);
1999	}
2000
2001	switch (*etype) {
2002#if defined(INET)
2003	case ETHERTYPE_IP: {
2004		struct ip *ip, iphdr;
2005		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2006			m_copydata(m, offset, sizeof(struct ip),
2007			    (caddr_t) &iphdr);
2008			ip = &iphdr;
2009		} else
2010			ip = (struct ip *)(m->m_data + offset);
2011		*proto = ip->ip_p;
2012		*start = offset + (ip->ip_hl << 2);
2013		break;
2014	}
2015#endif
2016#if defined(INET6)
2017	case ETHERTYPE_IPV6:
2018		*proto = -1;
2019		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2020		/* Assert the network stack sent us a valid packet. */
2021		KASSERT(*start > offset,
2022		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2023		    *start, offset, *proto));
2024		break;
2025#endif
2026	default:
2027		sc->vtnet_stats.tx_csum_bad_ethtype++;
2028		return (EINVAL);
2029	}
2030
2031	return (0);
2032}
2033
2034static int
2035vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2036    int offset, struct virtio_net_hdr *hdr)
2037{
2038	static struct timeval lastecn;
2039	static int curecn;
2040	struct vtnet_softc *sc;
2041	struct tcphdr *tcp, tcphdr;
2042
2043	sc = txq->vtntx_sc;
2044
2045	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2046		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2047		tcp = &tcphdr;
2048	} else
2049		tcp = (struct tcphdr *)(m->m_data + offset);
2050
2051	hdr->hdr_len = offset + (tcp->th_off << 2);
2052	hdr->gso_size = m->m_pkthdr.tso_segsz;
2053	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2054	    VIRTIO_NET_HDR_GSO_TCPV6;
2055
2056	if (tcp->th_flags & TH_CWR) {
2057		/*
2058		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2059		 * ECN support is not on a per-interface basis, but globally via
2060		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2061		 */
2062		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2063			if (ppsratecheck(&lastecn, &curecn, 1))
2064				if_printf(sc->vtnet_ifp,
2065				    "TSO with ECN not negotiated with host\n");
2066			return (ENOTSUP);
2067		}
2068		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2069	}
2070
2071	txq->vtntx_stats.vtxs_tso++;
2072
2073	return (0);
2074}
2075
2076static struct mbuf *
2077vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2078    struct virtio_net_hdr *hdr)
2079{
2080	struct vtnet_softc *sc;
2081	int flags, etype, csum_start, proto, error;
2082
2083	sc = txq->vtntx_sc;
2084	flags = m->m_pkthdr.csum_flags;
2085
2086	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2087	if (error)
2088		goto drop;
2089
2090	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2091	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2092		/*
2093		 * We could compare the IP protocol vs the CSUM_ flag too,
2094		 * but that really should not be necessary.
2095		 */
2096		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2097		hdr->csum_start = csum_start;
2098		hdr->csum_offset = m->m_pkthdr.csum_data;
2099		txq->vtntx_stats.vtxs_csum++;
2100	}
2101
2102	if (flags & CSUM_TSO) {
2103		if (__predict_false(proto != IPPROTO_TCP)) {
2104			/* Likely failed to correctly parse the mbuf. */
2105			sc->vtnet_stats.tx_tso_not_tcp++;
2106			goto drop;
2107		}
2108
2109		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2110		    ("%s: mbuf %p TSO without checksum offload %#x",
2111		    __func__, m, flags));
2112
2113		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2114		if (error)
2115			goto drop;
2116	}
2117
2118	return (m);
2119
2120drop:
2121	m_freem(m);
2122	return (NULL);
2123}
2124
2125static int
2126vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2127    struct vtnet_tx_header *txhdr)
2128{
2129	struct vtnet_softc *sc;
2130	struct virtqueue *vq;
2131	struct sglist *sg;
2132	struct mbuf *m;
2133	int error;
2134
2135	sc = txq->vtntx_sc;
2136	vq = txq->vtntx_vq;
2137	sg = txq->vtntx_sg;
2138	m = *m_head;
2139
2140	sglist_reset(sg);
2141	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2142	KASSERT(error == 0 && sg->sg_nseg == 1,
2143	    ("%s: error %d adding header to sglist", __func__, error));
2144
2145	error = sglist_append_mbuf(sg, m);
2146	if (error) {
2147		m = m_defrag(m, M_NOWAIT);
2148		if (m == NULL)
2149			goto fail;
2150
2151		*m_head = m;
2152		sc->vtnet_stats.tx_defragged++;
2153
2154		error = sglist_append_mbuf(sg, m);
2155		if (error)
2156			goto fail;
2157	}
2158
2159	txhdr->vth_mbuf = m;
2160	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2161
2162	return (error);
2163
2164fail:
2165	sc->vtnet_stats.tx_defrag_failed++;
2166	m_freem(*m_head);
2167	*m_head = NULL;
2168
2169	return (ENOBUFS);
2170}
2171
2172static int
2173vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2174{
2175	struct vtnet_tx_header *txhdr;
2176	struct virtio_net_hdr *hdr;
2177	struct mbuf *m;
2178	int error;
2179
2180	m = *m_head;
2181	M_ASSERTPKTHDR(m);
2182
2183	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2184	if (txhdr == NULL) {
2185		m_freem(m);
2186		*m_head = NULL;
2187		return (ENOMEM);
2188	}
2189
2190	/*
2191	 * Always use the non-mergeable header, regardless if the feature
2192	 * was negotiated. For transmit, num_buffers is always zero. The
2193	 * vtnet_hdr_size is used to enqueue the correct header size.
2194	 */
2195	hdr = &txhdr->vth_uhdr.hdr;
2196
2197	if (m->m_flags & M_VLANTAG) {
2198		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2199		if ((*m_head = m) == NULL) {
2200			error = ENOBUFS;
2201			goto fail;
2202		}
2203		m->m_flags &= ~M_VLANTAG;
2204	}
2205
2206	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2207		m = vtnet_txq_offload(txq, m, hdr);
2208		if ((*m_head = m) == NULL) {
2209			error = ENOBUFS;
2210			goto fail;
2211		}
2212	}
2213
2214	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2215	if (error == 0)
2216		return (0);
2217
2218fail:
2219	uma_zfree(vtnet_tx_header_zone, txhdr);
2220
2221	return (error);
2222}
2223
2224#ifdef VTNET_LEGACY_TX
2225
2226static void
2227vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2228{
2229	struct vtnet_softc *sc;
2230	struct virtqueue *vq;
2231	struct mbuf *m0;
2232	int tries, enq;
2233
2234	sc = txq->vtntx_sc;
2235	vq = txq->vtntx_vq;
2236	tries = 0;
2237
2238	VTNET_TXQ_LOCK_ASSERT(txq);
2239
2240	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2241	    sc->vtnet_link_active == 0)
2242		return;
2243
2244	vtnet_txq_eof(txq);
2245
2246again:
2247	enq = 0;
2248
2249	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2250		if (virtqueue_full(vq))
2251			break;
2252
2253		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2254		if (m0 == NULL)
2255			break;
2256
2257		if (vtnet_txq_encap(txq, &m0) != 0) {
2258			if (m0 != NULL)
2259				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2260			break;
2261		}
2262
2263		enq++;
2264		ETHER_BPF_MTAP(ifp, m0);
2265	}
2266
2267	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2268		if (tries++ < VTNET_NOTIFY_RETRIES)
2269			goto again;
2270
2271		txq->vtntx_stats.vtxs_rescheduled++;
2272		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2273	}
2274}
2275
2276static void
2277vtnet_start(struct ifnet *ifp)
2278{
2279	struct vtnet_softc *sc;
2280	struct vtnet_txq *txq;
2281
2282	sc = ifp->if_softc;
2283	txq = &sc->vtnet_txqs[0];
2284
2285	VTNET_TXQ_LOCK(txq);
2286	vtnet_start_locked(txq, ifp);
2287	VTNET_TXQ_UNLOCK(txq);
2288}
2289
2290#else /* !VTNET_LEGACY_TX */
2291
2292static int
2293vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2294{
2295	struct vtnet_softc *sc;
2296	struct virtqueue *vq;
2297	struct buf_ring *br;
2298	struct ifnet *ifp;
2299	int enq, tries, error;
2300
2301	sc = txq->vtntx_sc;
2302	vq = txq->vtntx_vq;
2303	br = txq->vtntx_br;
2304	ifp = sc->vtnet_ifp;
2305	tries = 0;
2306	error = 0;
2307
2308	VTNET_TXQ_LOCK_ASSERT(txq);
2309
2310	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2311	    sc->vtnet_link_active == 0) {
2312		if (m != NULL)
2313			error = drbr_enqueue(ifp, br, m);
2314		return (error);
2315	}
2316
2317	if (m != NULL) {
2318		error = drbr_enqueue(ifp, br, m);
2319		if (error)
2320			return (error);
2321	}
2322
2323	vtnet_txq_eof(txq);
2324
2325again:
2326	enq = 0;
2327
2328	while ((m = drbr_peek(ifp, br)) != NULL) {
2329		if (virtqueue_full(vq)) {
2330			drbr_putback(ifp, br, m);
2331			break;
2332		}
2333
2334		if (vtnet_txq_encap(txq, &m) != 0) {
2335			if (m != NULL)
2336				drbr_putback(ifp, br, m);
2337			else
2338				drbr_advance(ifp, br);
2339			break;
2340		}
2341		drbr_advance(ifp, br);
2342
2343		enq++;
2344		ETHER_BPF_MTAP(ifp, m);
2345	}
2346
2347	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2348		if (tries++ < VTNET_NOTIFY_RETRIES)
2349			goto again;
2350
2351		txq->vtntx_stats.vtxs_rescheduled++;
2352		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2353	}
2354
2355	return (0);
2356}
2357
2358static int
2359vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2360{
2361	struct vtnet_softc *sc;
2362	struct vtnet_txq *txq;
2363	int i, npairs, error;
2364
2365	sc = ifp->if_softc;
2366	npairs = sc->vtnet_act_vq_pairs;
2367
2368	/* check if flowid is set */
2369	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2370		i = m->m_pkthdr.flowid % npairs;
2371	else
2372		i = curcpu % npairs;
2373
2374	txq = &sc->vtnet_txqs[i];
2375
2376	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2377		error = vtnet_txq_mq_start_locked(txq, m);
2378		VTNET_TXQ_UNLOCK(txq);
2379	} else {
2380		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2381		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2382	}
2383
2384	return (error);
2385}
2386
2387static void
2388vtnet_txq_tq_deferred(void *xtxq, int pending)
2389{
2390	struct vtnet_softc *sc;
2391	struct vtnet_txq *txq;
2392
2393	txq = xtxq;
2394	sc = txq->vtntx_sc;
2395
2396	VTNET_TXQ_LOCK(txq);
2397	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2398		vtnet_txq_mq_start_locked(txq, NULL);
2399	VTNET_TXQ_UNLOCK(txq);
2400}
2401
2402#endif /* VTNET_LEGACY_TX */
2403
2404static void
2405vtnet_txq_start(struct vtnet_txq *txq)
2406{
2407	struct vtnet_softc *sc;
2408	struct ifnet *ifp;
2409
2410	sc = txq->vtntx_sc;
2411	ifp = sc->vtnet_ifp;
2412
2413#ifdef VTNET_LEGACY_TX
2414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2415		vtnet_start_locked(txq, ifp);
2416#else
2417	if (!drbr_empty(ifp, txq->vtntx_br))
2418		vtnet_txq_mq_start_locked(txq, NULL);
2419#endif
2420}
2421
2422static void
2423vtnet_txq_tq_intr(void *xtxq, int pending)
2424{
2425	struct vtnet_softc *sc;
2426	struct vtnet_txq *txq;
2427	struct ifnet *ifp;
2428
2429	txq = xtxq;
2430	sc = txq->vtntx_sc;
2431	ifp = sc->vtnet_ifp;
2432
2433	VTNET_TXQ_LOCK(txq);
2434
2435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2436		VTNET_TXQ_UNLOCK(txq);
2437		return;
2438	}
2439
2440	vtnet_txq_eof(txq);
2441	vtnet_txq_start(txq);
2442
2443	VTNET_TXQ_UNLOCK(txq);
2444}
2445
2446static int
2447vtnet_txq_eof(struct vtnet_txq *txq)
2448{
2449	struct virtqueue *vq;
2450	struct vtnet_tx_header *txhdr;
2451	struct mbuf *m;
2452	int deq;
2453
2454	vq = txq->vtntx_vq;
2455	deq = 0;
2456	VTNET_TXQ_LOCK_ASSERT(txq);
2457
2458#ifdef DEV_NETMAP
2459	if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
2460		virtqueue_disable_intr(vq); // XXX luigi
2461		return 0; // XXX or 1 ?
2462	}
2463#endif /* DEV_NETMAP */
2464
2465	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2466		m = txhdr->vth_mbuf;
2467		deq++;
2468
2469		txq->vtntx_stats.vtxs_opackets++;
2470		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2471		if (m->m_flags & M_MCAST)
2472			txq->vtntx_stats.vtxs_omcasts++;
2473
2474		m_freem(m);
2475		uma_zfree(vtnet_tx_header_zone, txhdr);
2476	}
2477
2478	if (virtqueue_empty(vq))
2479		txq->vtntx_watchdog = 0;
2480
2481	return (deq);
2482}
2483
2484static void
2485vtnet_tx_vq_intr(void *xtxq)
2486{
2487	struct vtnet_softc *sc;
2488	struct vtnet_txq *txq;
2489	struct ifnet *ifp;
2490
2491	txq = xtxq;
2492	sc = txq->vtntx_sc;
2493	ifp = sc->vtnet_ifp;
2494
2495	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2496		/*
2497		 * Ignore this interrupt. Either this is a spurious interrupt
2498		 * or multiqueue without per-VQ MSIX so every queue needs to
2499		 * be polled (a brain dead configuration we could try harder
2500		 * to avoid).
2501		 */
2502		vtnet_txq_disable_intr(txq);
2503		return;
2504	}
2505
2506	VTNET_TXQ_LOCK(txq);
2507
2508	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2509		VTNET_TXQ_UNLOCK(txq);
2510		return;
2511	}
2512
2513	vtnet_txq_eof(txq);
2514	vtnet_txq_start(txq);
2515
2516	VTNET_TXQ_UNLOCK(txq);
2517}
2518
2519static void
2520vtnet_tx_start_all(struct vtnet_softc *sc)
2521{
2522	struct vtnet_txq *txq;
2523	int i;
2524
2525	VTNET_CORE_LOCK_ASSERT(sc);
2526
2527	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2528		txq = &sc->vtnet_txqs[i];
2529
2530		VTNET_TXQ_LOCK(txq);
2531		vtnet_txq_start(txq);
2532		VTNET_TXQ_UNLOCK(txq);
2533	}
2534}
2535
2536#ifndef VTNET_LEGACY_TX
2537static void
2538vtnet_qflush(struct ifnet *ifp)
2539{
2540	struct vtnet_softc *sc;
2541	struct vtnet_txq *txq;
2542	struct mbuf *m;
2543	int i;
2544
2545	sc = ifp->if_softc;
2546
2547	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2548		txq = &sc->vtnet_txqs[i];
2549
2550		VTNET_TXQ_LOCK(txq);
2551		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2552			m_freem(m);
2553		VTNET_TXQ_UNLOCK(txq);
2554	}
2555
2556	if_qflush(ifp);
2557}
2558#endif
2559
2560static int
2561vtnet_watchdog(struct vtnet_txq *txq)
2562{
2563	struct ifnet *ifp;
2564
2565	ifp = txq->vtntx_sc->vtnet_ifp;
2566
2567	VTNET_TXQ_LOCK(txq);
2568	if (txq->vtntx_watchdog == 1) {
2569		/*
2570		 * Only drain completed frames if the watchdog is about to
2571		 * expire. If any frames were drained, there may be enough
2572		 * free descriptors now available to transmit queued frames.
2573		 * In that case, the timer will immediately be decremented
2574		 * below, but the timeout is generous enough that should not
2575		 * be a problem.
2576		 */
2577		if (vtnet_txq_eof(txq) != 0)
2578			vtnet_txq_start(txq);
2579	}
2580
2581	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2582		VTNET_TXQ_UNLOCK(txq);
2583		return (0);
2584	}
2585	VTNET_TXQ_UNLOCK(txq);
2586
2587	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2588	return (1);
2589}
2590
2591static void
2592vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2593{
2594	struct vtnet_rxq_stats *st;
2595
2596	st = &rxq->vtnrx_stats;
2597
2598	accum->vrxs_ipackets += st->vrxs_ipackets;
2599	accum->vrxs_ibytes += st->vrxs_ibytes;
2600	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2601	accum->vrxs_csum += st->vrxs_csum;
2602	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2603	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2604}
2605
2606static void
2607vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2608{
2609	struct vtnet_txq_stats *st;
2610
2611	st = &txq->vtntx_stats;
2612
2613	accum->vtxs_opackets += st->vtxs_opackets;
2614	accum->vtxs_obytes += st->vtxs_obytes;
2615	accum->vtxs_csum += st->vtxs_csum;
2616	accum->vtxs_tso += st->vtxs_tso;
2617	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2618}
2619
2620static void
2621vtnet_accumulate_stats(struct vtnet_softc *sc)
2622{
2623	struct ifnet *ifp;
2624	struct vtnet_statistics *st;
2625	struct vtnet_rxq_stats rxaccum;
2626	struct vtnet_txq_stats txaccum;
2627	int i;
2628
2629	ifp = sc->vtnet_ifp;
2630	st = &sc->vtnet_stats;
2631	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2632	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2633
2634	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2635		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2636		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2637	}
2638
2639	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2640	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2641	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2642	st->tx_csum_offloaded = txaccum.vtxs_csum;
2643	st->tx_tso_offloaded = txaccum.vtxs_tso;
2644	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2645
2646	/*
2647	 * With the exception of if_ierrors, these ifnet statistics are
2648	 * only updated in the driver, so just set them to our accumulated
2649	 * values. if_ierrors is updated in ether_input() for malformed
2650	 * frames that we should have already discarded.
2651	 */
2652	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2653	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2654	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2655	ifp->if_opackets = txaccum.vtxs_opackets;
2656#ifndef VTNET_LEGACY_TX
2657	ifp->if_obytes = txaccum.vtxs_obytes;
2658	ifp->if_omcasts = txaccum.vtxs_omcasts;
2659#endif
2660}
2661
2662static void
2663vtnet_tick(void *xsc)
2664{
2665	struct vtnet_softc *sc;
2666	struct ifnet *ifp;
2667	int i, timedout;
2668
2669	sc = xsc;
2670	ifp = sc->vtnet_ifp;
2671	timedout = 0;
2672
2673	VTNET_CORE_LOCK_ASSERT(sc);
2674	vtnet_accumulate_stats(sc);
2675
2676	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2677		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2678
2679	if (timedout != 0) {
2680		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2681		vtnet_init_locked(sc);
2682	} else
2683		callout_schedule(&sc->vtnet_tick_ch, hz);
2684}
2685
2686static void
2687vtnet_start_taskqueues(struct vtnet_softc *sc)
2688{
2689	device_t dev;
2690	struct vtnet_rxq *rxq;
2691	struct vtnet_txq *txq;
2692	int i, error;
2693
2694	dev = sc->vtnet_dev;
2695
2696	/*
2697	 * Errors here are very difficult to recover from - we cannot
2698	 * easily fail because, if this is during boot, we will hang
2699	 * when freeing any successfully started taskqueues because
2700	 * the scheduler isn't up yet.
2701	 *
2702	 * Most drivers just ignore the return value - it only fails
2703	 * with ENOMEM so an error is not likely.
2704	 */
2705	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2706		rxq = &sc->vtnet_rxqs[i];
2707		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2708		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2709		if (error) {
2710			device_printf(dev, "failed to start rx taskq %d\n",
2711			    rxq->vtnrx_id);
2712		}
2713
2714		txq = &sc->vtnet_txqs[i];
2715		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2716		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2717		if (error) {
2718			device_printf(dev, "failed to start tx taskq %d\n",
2719			    txq->vtntx_id);
2720		}
2721	}
2722}
2723
2724static void
2725vtnet_free_taskqueues(struct vtnet_softc *sc)
2726{
2727	struct vtnet_rxq *rxq;
2728	struct vtnet_txq *txq;
2729	int i;
2730
2731	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2732		rxq = &sc->vtnet_rxqs[i];
2733		if (rxq->vtnrx_tq != NULL) {
2734			taskqueue_free(rxq->vtnrx_tq);
2735			rxq->vtnrx_vq = NULL;
2736		}
2737
2738		txq = &sc->vtnet_txqs[i];
2739		if (txq->vtntx_tq != NULL) {
2740			taskqueue_free(txq->vtntx_tq);
2741			txq->vtntx_tq = NULL;
2742		}
2743	}
2744}
2745
2746static void
2747vtnet_drain_taskqueues(struct vtnet_softc *sc)
2748{
2749	struct vtnet_rxq *rxq;
2750	struct vtnet_txq *txq;
2751	int i;
2752
2753	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2754		rxq = &sc->vtnet_rxqs[i];
2755		if (rxq->vtnrx_tq != NULL)
2756			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2757
2758		txq = &sc->vtnet_txqs[i];
2759		if (txq->vtntx_tq != NULL) {
2760			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2761#ifndef VTNET_LEGACY_TX
2762			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2763#endif
2764		}
2765	}
2766}
2767
2768static void
2769vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2770{
2771	struct vtnet_rxq *rxq;
2772	struct vtnet_txq *txq;
2773	int i;
2774
2775#ifdef DEV_NETMAP
2776	if (nm_native_on(NA(sc->vtnet_ifp)))
2777		return;
2778#endif /* DEV_NETMAP */
2779
2780	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2781		rxq = &sc->vtnet_rxqs[i];
2782		vtnet_rxq_free_mbufs(rxq);
2783
2784		txq = &sc->vtnet_txqs[i];
2785		vtnet_txq_free_mbufs(txq);
2786	}
2787}
2788
2789static void
2790vtnet_stop_rendezvous(struct vtnet_softc *sc)
2791{
2792	struct vtnet_rxq *rxq;
2793	struct vtnet_txq *txq;
2794	int i;
2795
2796	/*
2797	 * Lock and unlock the per-queue mutex so we known the stop
2798	 * state is visible. Doing only the active queues should be
2799	 * sufficient, but it does not cost much extra to do all the
2800	 * queues. Note we hold the core mutex here too.
2801	 */
2802	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2803		rxq = &sc->vtnet_rxqs[i];
2804		VTNET_RXQ_LOCK(rxq);
2805		VTNET_RXQ_UNLOCK(rxq);
2806
2807		txq = &sc->vtnet_txqs[i];
2808		VTNET_TXQ_LOCK(txq);
2809		VTNET_TXQ_UNLOCK(txq);
2810	}
2811}
2812
2813static void
2814vtnet_stop(struct vtnet_softc *sc)
2815{
2816	device_t dev;
2817	struct ifnet *ifp;
2818
2819	dev = sc->vtnet_dev;
2820	ifp = sc->vtnet_ifp;
2821
2822	VTNET_CORE_LOCK_ASSERT(sc);
2823
2824	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2825	sc->vtnet_link_active = 0;
2826	callout_stop(&sc->vtnet_tick_ch);
2827
2828	/* Only advisory. */
2829	vtnet_disable_interrupts(sc);
2830
2831	/*
2832	 * Stop the host adapter. This resets it to the pre-initialized
2833	 * state. It will not generate any interrupts until after it is
2834	 * reinitialized.
2835	 */
2836	virtio_stop(dev);
2837	vtnet_stop_rendezvous(sc);
2838
2839	/* Free any mbufs left in the virtqueues. */
2840	vtnet_drain_rxtx_queues(sc);
2841}
2842
2843static int
2844vtnet_virtio_reinit(struct vtnet_softc *sc)
2845{
2846	device_t dev;
2847	struct ifnet *ifp;
2848	uint64_t features;
2849	int mask, error;
2850
2851	dev = sc->vtnet_dev;
2852	ifp = sc->vtnet_ifp;
2853	features = sc->vtnet_features;
2854
2855	mask = 0;
2856#if defined(INET)
2857	mask |= IFCAP_RXCSUM;
2858#endif
2859#if defined (INET6)
2860	mask |= IFCAP_RXCSUM_IPV6;
2861#endif
2862
2863	/*
2864	 * Re-negotiate with the host, removing any disabled receive
2865	 * features. Transmit features are disabled only on our side
2866	 * via if_capenable and if_hwassist.
2867	 */
2868
2869	if (ifp->if_capabilities & mask) {
2870		/*
2871		 * We require both IPv4 and IPv6 offloading to be enabled
2872		 * in order to negotiated it: VirtIO does not distinguish
2873		 * between the two.
2874		 */
2875		if ((ifp->if_capenable & mask) != mask)
2876			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2877	}
2878
2879	if (ifp->if_capabilities & IFCAP_LRO) {
2880		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2881			features &= ~VTNET_LRO_FEATURES;
2882	}
2883
2884	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2885		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2886			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2887	}
2888
2889	error = virtio_reinit(dev, features);
2890	if (error)
2891		device_printf(dev, "virtio reinit error %d\n", error);
2892
2893	return (error);
2894}
2895
2896static void
2897vtnet_init_rx_filters(struct vtnet_softc *sc)
2898{
2899	struct ifnet *ifp;
2900
2901	ifp = sc->vtnet_ifp;
2902
2903	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2904		/* Restore promiscuous and all-multicast modes. */
2905		vtnet_rx_filter(sc);
2906		/* Restore filtered MAC addresses. */
2907		vtnet_rx_filter_mac(sc);
2908	}
2909
2910	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2911		vtnet_rx_filter_vlan(sc);
2912}
2913
2914static int
2915vtnet_init_rx_queues(struct vtnet_softc *sc)
2916{
2917	device_t dev;
2918	struct vtnet_rxq *rxq;
2919	int i, clsize, error;
2920
2921	dev = sc->vtnet_dev;
2922
2923	/*
2924	 * Use the new cluster size if one has been set (via a MTU
2925	 * change). Otherwise, use the standard 2K clusters.
2926	 *
2927	 * BMV: It might make sense to use page sized clusters as
2928	 * the default (depending on the features negotiated).
2929	 */
2930	if (sc->vtnet_rx_new_clsize != 0) {
2931		clsize = sc->vtnet_rx_new_clsize;
2932		sc->vtnet_rx_new_clsize = 0;
2933	} else
2934		clsize = MCLBYTES;
2935
2936	sc->vtnet_rx_clsize = clsize;
2937	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2938
2939	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
2940	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
2941	    ("%s: too many rx mbufs %d for %d segments", __func__,
2942	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
2943
2944#ifdef DEV_NETMAP
2945	if (vtnet_netmap_init_rx_buffers(sc))
2946		return 0;
2947#endif /* DEV_NETMAP */
2948
2949	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2950		rxq = &sc->vtnet_rxqs[i];
2951
2952		/* Hold the lock to satisfy asserts. */
2953		VTNET_RXQ_LOCK(rxq);
2954		error = vtnet_rxq_populate(rxq);
2955		VTNET_RXQ_UNLOCK(rxq);
2956
2957		if (error) {
2958			device_printf(dev,
2959			    "cannot allocate mbufs for Rx queue %d\n", i);
2960			return (error);
2961		}
2962	}
2963
2964	return (0);
2965}
2966
2967static int
2968vtnet_init_tx_queues(struct vtnet_softc *sc)
2969{
2970	struct vtnet_txq *txq;
2971	int i;
2972
2973	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2974		txq = &sc->vtnet_txqs[i];
2975		txq->vtntx_watchdog = 0;
2976	}
2977
2978	return (0);
2979}
2980
2981static int
2982vtnet_init_rxtx_queues(struct vtnet_softc *sc)
2983{
2984	int error;
2985
2986	error = vtnet_init_rx_queues(sc);
2987	if (error)
2988		return (error);
2989
2990	error = vtnet_init_tx_queues(sc);
2991	if (error)
2992		return (error);
2993
2994	return (0);
2995}
2996
2997static void
2998vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2999{
3000	device_t dev;
3001	int npairs;
3002
3003	dev = sc->vtnet_dev;
3004
3005	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
3006		sc->vtnet_act_vq_pairs = 1;
3007		return;
3008	}
3009
3010	npairs = sc->vtnet_requested_vq_pairs;
3011
3012	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3013		device_printf(dev,
3014		    "cannot set active queue pairs to %d\n", npairs);
3015		npairs = 1;
3016	}
3017
3018	sc->vtnet_act_vq_pairs = npairs;
3019}
3020
3021static int
3022vtnet_reinit(struct vtnet_softc *sc)
3023{
3024	struct ifnet *ifp;
3025	int error;
3026
3027	ifp = sc->vtnet_ifp;
3028
3029	/* Use the current MAC address. */
3030	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3031	vtnet_set_hwaddr(sc);
3032
3033	vtnet_set_active_vq_pairs(sc);
3034
3035	ifp->if_hwassist = 0;
3036	if (ifp->if_capenable & IFCAP_TXCSUM)
3037		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3038	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3039		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3040	if (ifp->if_capenable & IFCAP_TSO4)
3041		ifp->if_hwassist |= CSUM_IP_TSO;
3042	if (ifp->if_capenable & IFCAP_TSO6)
3043		ifp->if_hwassist |= CSUM_IP6_TSO;
3044
3045	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3046		vtnet_init_rx_filters(sc);
3047
3048	error = vtnet_init_rxtx_queues(sc);
3049	if (error)
3050		return (error);
3051
3052	vtnet_enable_interrupts(sc);
3053	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3054
3055	return (0);
3056}
3057
3058static void
3059vtnet_init_locked(struct vtnet_softc *sc)
3060{
3061	device_t dev;
3062	struct ifnet *ifp;
3063
3064	dev = sc->vtnet_dev;
3065	ifp = sc->vtnet_ifp;
3066
3067	VTNET_CORE_LOCK_ASSERT(sc);
3068
3069	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3070		return;
3071
3072	vtnet_stop(sc);
3073
3074	/* Reinitialize with the host. */
3075	if (vtnet_virtio_reinit(sc) != 0)
3076		goto fail;
3077
3078	if (vtnet_reinit(sc) != 0)
3079		goto fail;
3080
3081	virtio_reinit_complete(dev);
3082
3083	vtnet_update_link_status(sc);
3084	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3085
3086	return;
3087
3088fail:
3089	vtnet_stop(sc);
3090}
3091
3092static void
3093vtnet_init(void *xsc)
3094{
3095	struct vtnet_softc *sc;
3096
3097	sc = xsc;
3098
3099#ifdef DEV_NETMAP
3100	if (!NA(sc->vtnet_ifp)) {
3101		D("try to attach again");
3102		vtnet_netmap_attach(sc);
3103	}
3104#endif /* DEV_NETMAP */
3105
3106	VTNET_CORE_LOCK(sc);
3107	vtnet_init_locked(sc);
3108	VTNET_CORE_UNLOCK(sc);
3109}
3110
3111static void
3112vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3113{
3114	struct virtqueue *vq;
3115
3116	vq = sc->vtnet_ctrl_vq;
3117
3118	/*
3119	 * The control virtqueue is only polled and therefore it should
3120	 * already be empty.
3121	 */
3122	KASSERT(virtqueue_empty(vq),
3123	    ("%s: ctrl vq %p not empty", __func__, vq));
3124}
3125
3126static void
3127vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3128    struct sglist *sg, int readable, int writable)
3129{
3130	struct virtqueue *vq;
3131
3132	vq = sc->vtnet_ctrl_vq;
3133
3134	VTNET_CORE_LOCK_ASSERT(sc);
3135	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3136	    ("%s: CTRL_VQ feature not negotiated", __func__));
3137
3138	if (!virtqueue_empty(vq))
3139		return;
3140	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3141		return;
3142
3143	/*
3144	 * Poll for the response, but the command is likely already
3145	 * done when we return from the notify.
3146	 */
3147	virtqueue_notify(vq);
3148	virtqueue_poll(vq, NULL);
3149}
3150
3151static int
3152vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3153{
3154	struct virtio_net_ctrl_hdr hdr __aligned(2);
3155	struct sglist_seg segs[3];
3156	struct sglist sg;
3157	uint8_t ack;
3158	int error;
3159
3160	hdr.class = VIRTIO_NET_CTRL_MAC;
3161	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3162	ack = VIRTIO_NET_ERR;
3163
3164	sglist_init(&sg, 3, segs);
3165	error = 0;
3166	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3167	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3168	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3169	KASSERT(error == 0 && sg.sg_nseg == 3,
3170	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3171
3172	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3173
3174	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3175}
3176
3177static int
3178vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3179{
3180	struct sglist_seg segs[3];
3181	struct sglist sg;
3182	struct {
3183		struct virtio_net_ctrl_hdr hdr;
3184		uint8_t pad1;
3185		struct virtio_net_ctrl_mq mq;
3186		uint8_t pad2;
3187		uint8_t ack;
3188	} s __aligned(2);
3189	int error;
3190
3191	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3192	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3193	s.mq.virtqueue_pairs = npairs;
3194	s.ack = VIRTIO_NET_ERR;
3195
3196	sglist_init(&sg, 3, segs);
3197	error = 0;
3198	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3199	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3200	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3201	KASSERT(error == 0 && sg.sg_nseg == 3,
3202	    ("%s: error %d adding MQ message to sglist", __func__, error));
3203
3204	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3205
3206	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3207}
3208
3209static int
3210vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3211{
3212	struct sglist_seg segs[3];
3213	struct sglist sg;
3214	struct {
3215		struct virtio_net_ctrl_hdr hdr;
3216		uint8_t pad1;
3217		uint8_t onoff;
3218		uint8_t pad2;
3219		uint8_t ack;
3220	} s __aligned(2);
3221	int error;
3222
3223	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3224	    ("%s: CTRL_RX feature not negotiated", __func__));
3225
3226	s.hdr.class = VIRTIO_NET_CTRL_RX;
3227	s.hdr.cmd = cmd;
3228	s.onoff = !!on;
3229	s.ack = VIRTIO_NET_ERR;
3230
3231	sglist_init(&sg, 3, segs);
3232	error = 0;
3233	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3234	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3235	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3236	KASSERT(error == 0 && sg.sg_nseg == 3,
3237	    ("%s: error %d adding Rx message to sglist", __func__, error));
3238
3239	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3240
3241	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3242}
3243
3244static int
3245vtnet_set_promisc(struct vtnet_softc *sc, int on)
3246{
3247
3248	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3249}
3250
3251static int
3252vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3253{
3254
3255	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3256}
3257
3258/*
3259 * The device defaults to promiscuous mode for backwards compatibility.
3260 * Turn it off at attach time if possible.
3261 */
3262static void
3263vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3264{
3265	struct ifnet *ifp;
3266
3267	ifp = sc->vtnet_ifp;
3268
3269	VTNET_CORE_LOCK(sc);
3270	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3271		ifp->if_flags |= IFF_PROMISC;
3272	} else if (vtnet_set_promisc(sc, 0) != 0) {
3273		ifp->if_flags |= IFF_PROMISC;
3274		device_printf(sc->vtnet_dev,
3275		    "cannot disable default promiscuous mode\n");
3276	}
3277	VTNET_CORE_UNLOCK(sc);
3278}
3279
3280static void
3281vtnet_rx_filter(struct vtnet_softc *sc)
3282{
3283	device_t dev;
3284	struct ifnet *ifp;
3285
3286	dev = sc->vtnet_dev;
3287	ifp = sc->vtnet_ifp;
3288
3289	VTNET_CORE_LOCK_ASSERT(sc);
3290
3291	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3292		device_printf(dev, "cannot %s promiscuous mode\n",
3293		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3294
3295	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3296		device_printf(dev, "cannot %s all-multicast mode\n",
3297		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3298}
3299
3300static void
3301vtnet_rx_filter_mac(struct vtnet_softc *sc)
3302{
3303	struct virtio_net_ctrl_hdr hdr __aligned(2);
3304	struct vtnet_mac_filter *filter;
3305	struct sglist_seg segs[4];
3306	struct sglist sg;
3307	struct ifnet *ifp;
3308	struct ifaddr *ifa;
3309	struct ifmultiaddr *ifma;
3310	int ucnt, mcnt, promisc, allmulti, error;
3311	uint8_t ack;
3312
3313	ifp = sc->vtnet_ifp;
3314	filter = sc->vtnet_mac_filter;
3315	ucnt = 0;
3316	mcnt = 0;
3317	promisc = 0;
3318	allmulti = 0;
3319
3320	VTNET_CORE_LOCK_ASSERT(sc);
3321	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3322	    ("%s: CTRL_RX feature not negotiated", __func__));
3323
3324	/* Unicast MAC addresses: */
3325	if_addr_rlock(ifp);
3326	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3327		if (ifa->ifa_addr->sa_family != AF_LINK)
3328			continue;
3329		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3330		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3331			continue;
3332		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3333			promisc = 1;
3334			break;
3335		}
3336
3337		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3338		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3339		ucnt++;
3340	}
3341	if_addr_runlock(ifp);
3342
3343	if (promisc != 0) {
3344		filter->vmf_unicast.nentries = 0;
3345		if_printf(ifp, "more than %d MAC addresses assigned, "
3346		    "falling back to promiscuous mode\n",
3347		    VTNET_MAX_MAC_ENTRIES);
3348	} else
3349		filter->vmf_unicast.nentries = ucnt;
3350
3351	/* Multicast MAC addresses: */
3352	if_maddr_rlock(ifp);
3353	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3354		if (ifma->ifma_addr->sa_family != AF_LINK)
3355			continue;
3356		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3357			allmulti = 1;
3358			break;
3359		}
3360
3361		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3362		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3363		mcnt++;
3364	}
3365	if_maddr_runlock(ifp);
3366
3367	if (allmulti != 0) {
3368		filter->vmf_multicast.nentries = 0;
3369		if_printf(ifp, "more than %d multicast MAC addresses "
3370		    "assigned, falling back to all-multicast mode\n",
3371		    VTNET_MAX_MAC_ENTRIES);
3372	} else
3373		filter->vmf_multicast.nentries = mcnt;
3374
3375	if (promisc != 0 && allmulti != 0)
3376		goto out;
3377
3378	hdr.class = VIRTIO_NET_CTRL_MAC;
3379	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3380	ack = VIRTIO_NET_ERR;
3381
3382	sglist_init(&sg, 4, segs);
3383	error = 0;
3384	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3385	error |= sglist_append(&sg, &filter->vmf_unicast,
3386	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3387	error |= sglist_append(&sg, &filter->vmf_multicast,
3388	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3389	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3390	KASSERT(error == 0 && sg.sg_nseg == 4,
3391	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3392
3393	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3394
3395	if (ack != VIRTIO_NET_OK)
3396		if_printf(ifp, "error setting host MAC filter table\n");
3397
3398out:
3399	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3400		if_printf(ifp, "cannot enable promiscuous mode\n");
3401	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3402		if_printf(ifp, "cannot enable all-multicast mode\n");
3403}
3404
3405static int
3406vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3407{
3408	struct sglist_seg segs[3];
3409	struct sglist sg;
3410	struct {
3411		struct virtio_net_ctrl_hdr hdr;
3412		uint8_t pad1;
3413		uint16_t tag;
3414		uint8_t pad2;
3415		uint8_t ack;
3416	} s __aligned(2);
3417	int error;
3418
3419	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3420	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3421	s.tag = tag;
3422	s.ack = VIRTIO_NET_ERR;
3423
3424	sglist_init(&sg, 3, segs);
3425	error = 0;
3426	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3427	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3428	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3429	KASSERT(error == 0 && sg.sg_nseg == 3,
3430	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3431
3432	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3433
3434	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3435}
3436
3437static void
3438vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3439{
3440	uint32_t w;
3441	uint16_t tag;
3442	int i, bit;
3443
3444	VTNET_CORE_LOCK_ASSERT(sc);
3445	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3446	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3447
3448	/* Enable the filter for each configured VLAN. */
3449	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3450		w = sc->vtnet_vlan_filter[i];
3451
3452		while ((bit = ffs(w) - 1) != -1) {
3453			w &= ~(1 << bit);
3454			tag = sizeof(w) * CHAR_BIT * i + bit;
3455
3456			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3457				device_printf(sc->vtnet_dev,
3458				    "cannot enable VLAN %d filter\n", tag);
3459			}
3460		}
3461	}
3462}
3463
3464static void
3465vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3466{
3467	struct ifnet *ifp;
3468	int idx, bit;
3469
3470	ifp = sc->vtnet_ifp;
3471	idx = (tag >> 5) & 0x7F;
3472	bit = tag & 0x1F;
3473
3474	if (tag == 0 || tag > 4095)
3475		return;
3476
3477	VTNET_CORE_LOCK(sc);
3478
3479	if (add)
3480		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3481	else
3482		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3483
3484	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3485	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3486		device_printf(sc->vtnet_dev,
3487		    "cannot %s VLAN %d %s the host filter table\n",
3488		    add ? "add" : "remove", tag, add ? "to" : "from");
3489	}
3490
3491	VTNET_CORE_UNLOCK(sc);
3492}
3493
3494static void
3495vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3496{
3497
3498	if (ifp->if_softc != arg)
3499		return;
3500
3501	vtnet_update_vlan_filter(arg, 1, tag);
3502}
3503
3504static void
3505vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3506{
3507
3508	if (ifp->if_softc != arg)
3509		return;
3510
3511	vtnet_update_vlan_filter(arg, 0, tag);
3512}
3513
3514static int
3515vtnet_is_link_up(struct vtnet_softc *sc)
3516{
3517	device_t dev;
3518	struct ifnet *ifp;
3519	uint16_t status;
3520
3521	dev = sc->vtnet_dev;
3522	ifp = sc->vtnet_ifp;
3523
3524	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3525		status = VIRTIO_NET_S_LINK_UP;
3526	else
3527		status = virtio_read_dev_config_2(dev,
3528		    offsetof(struct virtio_net_config, status));
3529
3530	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3531}
3532
3533static void
3534vtnet_update_link_status(struct vtnet_softc *sc)
3535{
3536	struct ifnet *ifp;
3537	int link;
3538
3539	ifp = sc->vtnet_ifp;
3540
3541	VTNET_CORE_LOCK_ASSERT(sc);
3542	link = vtnet_is_link_up(sc);
3543
3544	/* Notify if the link status has changed. */
3545	if (link != 0 && sc->vtnet_link_active == 0) {
3546		sc->vtnet_link_active = 1;
3547		if_link_state_change(ifp, LINK_STATE_UP);
3548	} else if (link == 0 && sc->vtnet_link_active != 0) {
3549		sc->vtnet_link_active = 0;
3550		if_link_state_change(ifp, LINK_STATE_DOWN);
3551	}
3552}
3553
3554static int
3555vtnet_ifmedia_upd(struct ifnet *ifp)
3556{
3557	struct vtnet_softc *sc;
3558	struct ifmedia *ifm;
3559
3560	sc = ifp->if_softc;
3561	ifm = &sc->vtnet_media;
3562
3563	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3564		return (EINVAL);
3565
3566	return (0);
3567}
3568
3569static void
3570vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3571{
3572	struct vtnet_softc *sc;
3573
3574	sc = ifp->if_softc;
3575
3576	ifmr->ifm_status = IFM_AVALID;
3577	ifmr->ifm_active = IFM_ETHER;
3578
3579	VTNET_CORE_LOCK(sc);
3580	if (vtnet_is_link_up(sc) != 0) {
3581		ifmr->ifm_status |= IFM_ACTIVE;
3582		ifmr->ifm_active |= VTNET_MEDIATYPE;
3583	} else
3584		ifmr->ifm_active |= IFM_NONE;
3585	VTNET_CORE_UNLOCK(sc);
3586}
3587
3588static void
3589vtnet_set_hwaddr(struct vtnet_softc *sc)
3590{
3591	device_t dev;
3592	int i;
3593
3594	dev = sc->vtnet_dev;
3595
3596	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3597		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3598			device_printf(dev, "unable to set MAC address\n");
3599	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3600		for (i = 0; i < ETHER_ADDR_LEN; i++) {
3601			virtio_write_dev_config_1(dev,
3602			    offsetof(struct virtio_net_config, mac) + i,
3603			    sc->vtnet_hwaddr[i]);
3604		}
3605	}
3606}
3607
3608static void
3609vtnet_get_hwaddr(struct vtnet_softc *sc)
3610{
3611	device_t dev;
3612	int i;
3613
3614	dev = sc->vtnet_dev;
3615
3616	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3617		/*
3618		 * Generate a random locally administered unicast address.
3619		 *
3620		 * It would be nice to generate the same MAC address across
3621		 * reboots, but it seems all the hosts currently available
3622		 * support the MAC feature, so this isn't too important.
3623		 */
3624		sc->vtnet_hwaddr[0] = 0xB2;
3625		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3626		vtnet_set_hwaddr(sc);
3627		return;
3628	}
3629
3630	for (i = 0; i < ETHER_ADDR_LEN; i++) {
3631		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3632		    offsetof(struct virtio_net_config, mac) + i);
3633	}
3634}
3635
3636static void
3637vtnet_vlan_tag_remove(struct mbuf *m)
3638{
3639	struct ether_vlan_header *evh;
3640
3641	evh = mtod(m, struct ether_vlan_header *);
3642	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3643	m->m_flags |= M_VLANTAG;
3644
3645	/* Strip the 802.1Q header. */
3646	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3647	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3648	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3649}
3650
3651static void
3652vtnet_set_rx_process_limit(struct vtnet_softc *sc)
3653{
3654	int limit;
3655
3656	limit = vtnet_tunable_int(sc, "rx_process_limit",
3657	    vtnet_rx_process_limit);
3658	if (limit < 0)
3659		limit = INT_MAX;
3660	sc->vtnet_rx_process_limit = limit;
3661}
3662
3663static void
3664vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
3665{
3666	device_t dev;
3667	int size, thresh;
3668
3669	dev = sc->vtnet_dev;
3670	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
3671
3672	/*
3673	 * The Tx interrupt is disabled until the queue free count falls
3674	 * below our threshold. Completed frames are drained from the Tx
3675	 * virtqueue before transmitting new frames and in the watchdog
3676	 * callout, so the frequency of Tx interrupts is greatly reduced,
3677	 * at the cost of not freeing mbufs as quickly as they otherwise
3678	 * would be.
3679	 *
3680	 * N.B. We assume all the Tx queues are the same size.
3681	 */
3682	thresh = size / 4;
3683
3684	/*
3685	 * Without indirect descriptors, leave enough room for the most
3686	 * segments we handle.
3687	 */
3688	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
3689	    thresh < sc->vtnet_tx_nsegs)
3690		thresh = sc->vtnet_tx_nsegs;
3691
3692	sc->vtnet_tx_intr_thresh = thresh;
3693}
3694
3695static void
3696vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3697    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3698{
3699	struct sysctl_oid *node;
3700	struct sysctl_oid_list *list;
3701	struct vtnet_rxq_stats *stats;
3702	char namebuf[16];
3703
3704	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3705	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3706	    CTLFLAG_RD, NULL, "Receive Queue");
3707	list = SYSCTL_CHILDREN(node);
3708
3709	stats = &rxq->vtnrx_stats;
3710
3711	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3712	    &stats->vrxs_ipackets, "Receive packets");
3713	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3714	    &stats->vrxs_ibytes, "Receive bytes");
3715	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3716	    &stats->vrxs_iqdrops, "Receive drops");
3717	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3718	    &stats->vrxs_ierrors, "Receive errors");
3719	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3720	    &stats->vrxs_csum, "Receive checksum offloaded");
3721	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3722	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3723	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3724	    &stats->vrxs_rescheduled,
3725	    "Receive interrupt handler rescheduled");
3726}
3727
3728static void
3729vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3730    struct sysctl_oid_list *child, struct vtnet_txq *txq)
3731{
3732	struct sysctl_oid *node;
3733	struct sysctl_oid_list *list;
3734	struct vtnet_txq_stats *stats;
3735	char namebuf[16];
3736
3737	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3738	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3739	    CTLFLAG_RD, NULL, "Transmit Queue");
3740	list = SYSCTL_CHILDREN(node);
3741
3742	stats = &txq->vtntx_stats;
3743
3744	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3745	    &stats->vtxs_opackets, "Transmit packets");
3746	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3747	    &stats->vtxs_obytes, "Transmit bytes");
3748	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3749	    &stats->vtxs_omcasts, "Transmit multicasts");
3750	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3751	    &stats->vtxs_csum, "Transmit checksum offloaded");
3752	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3753	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3754	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3755	    &stats->vtxs_rescheduled,
3756	    "Transmit interrupt handler rescheduled");
3757}
3758
3759static void
3760vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3761{
3762	device_t dev;
3763	struct sysctl_ctx_list *ctx;
3764	struct sysctl_oid *tree;
3765	struct sysctl_oid_list *child;
3766	int i;
3767
3768	dev = sc->vtnet_dev;
3769	ctx = device_get_sysctl_ctx(dev);
3770	tree = device_get_sysctl_tree(dev);
3771	child = SYSCTL_CHILDREN(tree);
3772
3773	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3774		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3775		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3776	}
3777}
3778
3779static void
3780vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3781    struct sysctl_oid_list *child, struct vtnet_softc *sc)
3782{
3783	struct vtnet_statistics *stats;
3784
3785	stats = &sc->vtnet_stats;
3786
3787	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3788	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3789	    "Mbuf cluster allocation failures");
3790
3791	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3792	    CTLFLAG_RD, &stats->rx_frame_too_large,
3793	    "Received frame larger than the mbuf chain");
3794	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3795	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3796	    "Enqueuing the replacement receive mbuf failed");
3797	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3798	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3799	    "Mergeable buffers receive failures");
3800	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3801	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3802	    "Received checksum offloaded buffer with unsupported "
3803	    "Ethernet type");
3804	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3805	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3806	    "Received checksum offloaded buffer with incorrect IP protocol");
3807	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3808	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3809	    "Received checksum offloaded buffer with incorrect offset");
3810	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3811	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3812	    "Received checksum offloaded buffer with incorrect protocol");
3813	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3814	    CTLFLAG_RD, &stats->rx_csum_failed,
3815	    "Received buffer checksum offload failed");
3816	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3817	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3818	    "Received buffer checksum offload succeeded");
3819	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3820	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3821	    "Times the receive interrupt task rescheduled itself");
3822
3823	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3824	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3825	    "Aborted transmit of checksum offloaded buffer with unknown "
3826	    "Ethernet type");
3827	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3828	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3829	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3830	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3831	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3832	    "Aborted transmit of TSO buffer with non TCP protocol");
3833	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3834	    CTLFLAG_RD, &stats->tx_defragged,
3835	    "Transmit mbufs defragged");
3836	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3837	    CTLFLAG_RD, &stats->tx_defrag_failed,
3838	    "Aborted transmit of buffer because defrag failed");
3839	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3840	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3841	    "Offloaded checksum of transmitted buffer");
3842	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3843	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3844	    "Segmentation offload of transmitted buffer");
3845	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3846	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3847	    "Times the transmit interrupt task rescheduled itself");
3848}
3849
3850static void
3851vtnet_setup_sysctl(struct vtnet_softc *sc)
3852{
3853	device_t dev;
3854	struct sysctl_ctx_list *ctx;
3855	struct sysctl_oid *tree;
3856	struct sysctl_oid_list *child;
3857
3858	dev = sc->vtnet_dev;
3859	ctx = device_get_sysctl_ctx(dev);
3860	tree = device_get_sysctl_tree(dev);
3861	child = SYSCTL_CHILDREN(tree);
3862
3863	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3864	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3865	    "Maximum number of supported virtqueue pairs");
3866	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
3867	    CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
3868	    "Requested number of virtqueue pairs");
3869	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3870	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3871	    "Number of active virtqueue pairs");
3872
3873	vtnet_setup_stat_sysctl(ctx, child, sc);
3874}
3875
3876static int
3877vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3878{
3879
3880	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3881}
3882
3883static void
3884vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3885{
3886
3887	virtqueue_disable_intr(rxq->vtnrx_vq);
3888}
3889
3890static int
3891vtnet_txq_enable_intr(struct vtnet_txq *txq)
3892{
3893	struct virtqueue *vq;
3894
3895	vq = txq->vtntx_vq;
3896
3897	if (vtnet_txq_below_threshold(txq) != 0)
3898		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3899
3900	/*
3901	 * The free count is above our threshold. Keep the Tx interrupt
3902	 * disabled until the queue is fuller.
3903	 */
3904	return (0);
3905}
3906
3907static void
3908vtnet_txq_disable_intr(struct vtnet_txq *txq)
3909{
3910
3911	virtqueue_disable_intr(txq->vtntx_vq);
3912}
3913
3914static void
3915vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3916{
3917	int i;
3918
3919	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3920		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3921}
3922
3923static void
3924vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3925{
3926	int i;
3927
3928	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3929		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3930}
3931
3932static void
3933vtnet_enable_interrupts(struct vtnet_softc *sc)
3934{
3935
3936	vtnet_enable_rx_interrupts(sc);
3937	vtnet_enable_tx_interrupts(sc);
3938}
3939
3940static void
3941vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3942{
3943	int i;
3944
3945	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3946		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3947}
3948
3949static void
3950vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3951{
3952	int i;
3953
3954	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3955		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3956}
3957
3958static void
3959vtnet_disable_interrupts(struct vtnet_softc *sc)
3960{
3961
3962	vtnet_disable_rx_interrupts(sc);
3963	vtnet_disable_tx_interrupts(sc);
3964}
3965
3966static int
3967vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3968{
3969	char path[64];
3970
3971	snprintf(path, sizeof(path),
3972	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3973	TUNABLE_INT_FETCH(path, &def);
3974
3975	return (def);
3976}
3977