if_vtnet.c revision 281955
1/*-
2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Driver for VirtIO network devices. */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/virtio/network/if_vtnet.c 281955 2015-04-24 23:26:44Z hiren $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/sockio.h>
36#include <sys/mbuf.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/socket.h>
40#include <sys/sysctl.h>
41#include <sys/random.h>
42#include <sys/sglist.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/taskqueue.h>
46#include <sys/smp.h>
47#include <machine/smp.h>
48
49#include <vm/uma.h>
50
51#include <net/ethernet.h>
52#include <net/if.h>
53#include <net/if_arp.h>
54#include <net/if_dl.h>
55#include <net/if_types.h>
56#include <net/if_media.h>
57#include <net/if_vlan_var.h>
58
59#include <net/bpf.h>
60
61#include <netinet/in_systm.h>
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/ip6.h>
65#include <netinet6/ip6_var.h>
66#include <netinet/udp.h>
67#include <netinet/tcp.h>
68#include <netinet/sctp.h>
69
70#include <machine/bus.h>
71#include <machine/resource.h>
72#include <sys/bus.h>
73#include <sys/rman.h>
74
75#include <dev/virtio/virtio.h>
76#include <dev/virtio/virtqueue.h>
77#include <dev/virtio/network/virtio_net.h>
78#include <dev/virtio/network/if_vtnetvar.h>
79
80#include "virtio_if.h"
81
82#include "opt_inet.h"
83#include "opt_inet6.h"
84
85static int	vtnet_modevent(module_t, int, void *);
86
87static int	vtnet_probe(device_t);
88static int	vtnet_attach(device_t);
89static int	vtnet_detach(device_t);
90static int	vtnet_suspend(device_t);
91static int	vtnet_resume(device_t);
92static int	vtnet_shutdown(device_t);
93static int	vtnet_attach_completed(device_t);
94static int	vtnet_config_change(device_t);
95
96static void	vtnet_negotiate_features(struct vtnet_softc *);
97static void	vtnet_setup_features(struct vtnet_softc *);
98static int	vtnet_init_rxq(struct vtnet_softc *, int);
99static int	vtnet_init_txq(struct vtnet_softc *, int);
100static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
101static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
102static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
103static void	vtnet_free_rx_filters(struct vtnet_softc *);
104static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
105static int	vtnet_setup_interface(struct vtnet_softc *);
106static int	vtnet_change_mtu(struct vtnet_softc *, int);
107static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
108
109static int	vtnet_rxq_populate(struct vtnet_rxq *);
110static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
111static struct mbuf *
112		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
113static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
114		    struct mbuf *, int);
115static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
116static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
117static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
118static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
119		     struct virtio_net_hdr *);
120static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
121static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
122static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
123static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
124		    struct virtio_net_hdr *);
125static int	vtnet_rxq_eof(struct vtnet_rxq *);
126static void	vtnet_rx_vq_intr(void *);
127static void	vtnet_rxq_tq_intr(void *, int);
128
129static int	vtnet_txq_below_threshold(struct vtnet_txq *);
130static int	vtnet_txq_notify(struct vtnet_txq *);
131static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
132static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
133		    int *, int *, int *);
134static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
135		    int, struct virtio_net_hdr *);
136static struct mbuf *
137		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
138		    struct virtio_net_hdr *);
139static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
140		    struct vtnet_tx_header *);
141static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
142#ifdef VTNET_LEGACY_TX
143static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
144static void	vtnet_start(struct ifnet *);
145#else
146static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
147static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
148static void	vtnet_txq_tq_deferred(void *, int);
149#endif
150static void	vtnet_txq_start(struct vtnet_txq *);
151static void	vtnet_txq_tq_intr(void *, int);
152static int	vtnet_txq_eof(struct vtnet_txq *);
153static void	vtnet_tx_vq_intr(void *);
154static void	vtnet_tx_start_all(struct vtnet_softc *);
155
156#ifndef VTNET_LEGACY_TX
157static void	vtnet_qflush(struct ifnet *);
158#endif
159
160static int	vtnet_watchdog(struct vtnet_txq *);
161static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
162		    struct vtnet_rxq_stats *);
163static void	vtnet_txq_accum_stats(struct vtnet_txq *,
164		    struct vtnet_txq_stats *);
165static void	vtnet_accumulate_stats(struct vtnet_softc *);
166static void	vtnet_tick(void *);
167
168static void	vtnet_start_taskqueues(struct vtnet_softc *);
169static void	vtnet_free_taskqueues(struct vtnet_softc *);
170static void	vtnet_drain_taskqueues(struct vtnet_softc *);
171
172static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
173static void	vtnet_stop_rendezvous(struct vtnet_softc *);
174static void	vtnet_stop(struct vtnet_softc *);
175static int	vtnet_virtio_reinit(struct vtnet_softc *);
176static void	vtnet_init_rx_filters(struct vtnet_softc *);
177static int	vtnet_init_rx_queues(struct vtnet_softc *);
178static int	vtnet_init_tx_queues(struct vtnet_softc *);
179static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
180static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
181static int	vtnet_reinit(struct vtnet_softc *);
182static void	vtnet_init_locked(struct vtnet_softc *);
183static void	vtnet_init(void *);
184
185static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
186static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
187		    struct sglist *, int, int);
188static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
189static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
190static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
191static int	vtnet_set_promisc(struct vtnet_softc *, int);
192static int	vtnet_set_allmulti(struct vtnet_softc *, int);
193static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
194static void	vtnet_rx_filter(struct vtnet_softc *);
195static void	vtnet_rx_filter_mac(struct vtnet_softc *);
196static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
197static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
198static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
199static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
200static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
201
202static int	vtnet_is_link_up(struct vtnet_softc *);
203static void	vtnet_update_link_status(struct vtnet_softc *);
204static int	vtnet_ifmedia_upd(struct ifnet *);
205static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
206static void	vtnet_get_hwaddr(struct vtnet_softc *);
207static void	vtnet_set_hwaddr(struct vtnet_softc *);
208static void	vtnet_vlan_tag_remove(struct mbuf *);
209static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
210static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
211
212static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
213		    struct sysctl_oid_list *, struct vtnet_rxq *);
214static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
215		    struct sysctl_oid_list *, struct vtnet_txq *);
216static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
217static void	vtnet_setup_sysctl(struct vtnet_softc *);
218
219static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
220static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
221static int	vtnet_txq_enable_intr(struct vtnet_txq *);
222static void	vtnet_txq_disable_intr(struct vtnet_txq *);
223static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
224static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
225static void	vtnet_enable_interrupts(struct vtnet_softc *);
226static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
227static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
228static void	vtnet_disable_interrupts(struct vtnet_softc *);
229
230static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
231
232/* Tunables. */
233static int vtnet_csum_disable = 0;
234TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
235static int vtnet_tso_disable = 0;
236TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
237static int vtnet_lro_disable = 0;
238TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
239static int vtnet_mq_disable = 0;
240TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
241static int vtnet_mq_max_pairs = 0;
242TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
243static int vtnet_rx_process_limit = 512;
244TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
245
246static uma_zone_t vtnet_tx_header_zone;
247
248static struct virtio_feature_desc vtnet_feature_desc[] = {
249	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
250	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
251	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
252	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
253	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
254	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
255	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
256	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
257	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
258	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
259	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
260	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
261	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
262	{ VIRTIO_NET_F_STATUS,		"Status"	},
263	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
264	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
265	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
266	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
267	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
268	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
269	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
270
271	{ 0, NULL }
272};
273
274static device_method_t vtnet_methods[] = {
275	/* Device methods. */
276	DEVMETHOD(device_probe,			vtnet_probe),
277	DEVMETHOD(device_attach,		vtnet_attach),
278	DEVMETHOD(device_detach,		vtnet_detach),
279	DEVMETHOD(device_suspend,		vtnet_suspend),
280	DEVMETHOD(device_resume,		vtnet_resume),
281	DEVMETHOD(device_shutdown,		vtnet_shutdown),
282
283	/* VirtIO methods. */
284	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
285	DEVMETHOD(virtio_config_change,		vtnet_config_change),
286
287	DEVMETHOD_END
288};
289
290#ifdef DEV_NETMAP
291#include <dev/netmap/if_vtnet_netmap.h>
292#endif /* DEV_NETMAP */
293
294static driver_t vtnet_driver = {
295	"vtnet",
296	vtnet_methods,
297	sizeof(struct vtnet_softc)
298};
299static devclass_t vtnet_devclass;
300
301DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
302    vtnet_modevent, 0);
303MODULE_VERSION(vtnet, 1);
304MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
305
306static int
307vtnet_modevent(module_t mod, int type, void *unused)
308{
309	int error;
310
311	error = 0;
312
313	switch (type) {
314	case MOD_LOAD:
315		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
316		    sizeof(struct vtnet_tx_header),
317		    NULL, NULL, NULL, NULL, 0, 0);
318		break;
319	case MOD_QUIESCE:
320	case MOD_UNLOAD:
321		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
322			error = EBUSY;
323		else if (type == MOD_UNLOAD) {
324			uma_zdestroy(vtnet_tx_header_zone);
325			vtnet_tx_header_zone = NULL;
326		}
327		break;
328	case MOD_SHUTDOWN:
329		break;
330	default:
331		error = EOPNOTSUPP;
332		break;
333	}
334
335	return (error);
336}
337
338static int
339vtnet_probe(device_t dev)
340{
341
342	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
343		return (ENXIO);
344
345	device_set_desc(dev, "VirtIO Networking Adapter");
346
347	return (BUS_PROBE_DEFAULT);
348}
349
350static int
351vtnet_attach(device_t dev)
352{
353	struct vtnet_softc *sc;
354	int error;
355
356	sc = device_get_softc(dev);
357	sc->vtnet_dev = dev;
358
359	/* Register our feature descriptions. */
360	virtio_set_feature_desc(dev, vtnet_feature_desc);
361
362	VTNET_CORE_LOCK_INIT(sc);
363	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
364
365	vtnet_setup_sysctl(sc);
366	vtnet_setup_features(sc);
367
368	error = vtnet_alloc_rx_filters(sc);
369	if (error) {
370		device_printf(dev, "cannot allocate Rx filters\n");
371		goto fail;
372	}
373
374	error = vtnet_alloc_rxtx_queues(sc);
375	if (error) {
376		device_printf(dev, "cannot allocate queues\n");
377		goto fail;
378	}
379
380	error = vtnet_alloc_virtqueues(sc);
381	if (error) {
382		device_printf(dev, "cannot allocate virtqueues\n");
383		goto fail;
384	}
385
386	error = vtnet_setup_interface(sc);
387	if (error) {
388		device_printf(dev, "cannot setup interface\n");
389		goto fail;
390	}
391
392	error = virtio_setup_intr(dev, INTR_TYPE_NET);
393	if (error) {
394		device_printf(dev, "cannot setup virtqueue interrupts\n");
395		/* BMV: This will crash if during boot! */
396		ether_ifdetach(sc->vtnet_ifp);
397		goto fail;
398	}
399
400#ifdef DEV_NETMAP
401	vtnet_netmap_attach(sc);
402#endif /* DEV_NETMAP */
403
404	vtnet_start_taskqueues(sc);
405
406fail:
407	if (error)
408		vtnet_detach(dev);
409
410	return (error);
411}
412
413static int
414vtnet_detach(device_t dev)
415{
416	struct vtnet_softc *sc;
417	struct ifnet *ifp;
418
419	sc = device_get_softc(dev);
420	ifp = sc->vtnet_ifp;
421
422	if (device_is_attached(dev)) {
423		VTNET_CORE_LOCK(sc);
424		vtnet_stop(sc);
425		VTNET_CORE_UNLOCK(sc);
426
427		callout_drain(&sc->vtnet_tick_ch);
428		vtnet_drain_taskqueues(sc);
429
430		ether_ifdetach(ifp);
431	}
432
433#ifdef DEV_NETMAP
434	netmap_detach(ifp);
435#endif /* DEV_NETMAP */
436
437	vtnet_free_taskqueues(sc);
438
439	if (sc->vtnet_vlan_attach != NULL) {
440		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
441		sc->vtnet_vlan_attach = NULL;
442	}
443	if (sc->vtnet_vlan_detach != NULL) {
444		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
445		sc->vtnet_vlan_detach = NULL;
446	}
447
448	ifmedia_removeall(&sc->vtnet_media);
449
450	if (ifp != NULL) {
451		if_free(ifp);
452		sc->vtnet_ifp = NULL;
453	}
454
455	vtnet_free_rxtx_queues(sc);
456	vtnet_free_rx_filters(sc);
457
458	if (sc->vtnet_ctrl_vq != NULL)
459		vtnet_free_ctrl_vq(sc);
460
461	VTNET_CORE_LOCK_DESTROY(sc);
462
463	return (0);
464}
465
466static int
467vtnet_suspend(device_t dev)
468{
469	struct vtnet_softc *sc;
470
471	sc = device_get_softc(dev);
472
473	VTNET_CORE_LOCK(sc);
474	vtnet_stop(sc);
475	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
476	VTNET_CORE_UNLOCK(sc);
477
478	return (0);
479}
480
481static int
482vtnet_resume(device_t dev)
483{
484	struct vtnet_softc *sc;
485	struct ifnet *ifp;
486
487	sc = device_get_softc(dev);
488	ifp = sc->vtnet_ifp;
489
490	VTNET_CORE_LOCK(sc);
491	if (ifp->if_flags & IFF_UP)
492		vtnet_init_locked(sc);
493	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
494	VTNET_CORE_UNLOCK(sc);
495
496	return (0);
497}
498
499static int
500vtnet_shutdown(device_t dev)
501{
502
503	/*
504	 * Suspend already does all of what we need to
505	 * do here; we just never expect to be resumed.
506	 */
507	return (vtnet_suspend(dev));
508}
509
510static int
511vtnet_attach_completed(device_t dev)
512{
513
514	vtnet_attach_disable_promisc(device_get_softc(dev));
515
516	return (0);
517}
518
519static int
520vtnet_config_change(device_t dev)
521{
522	struct vtnet_softc *sc;
523
524	sc = device_get_softc(dev);
525
526	VTNET_CORE_LOCK(sc);
527	vtnet_update_link_status(sc);
528	if (sc->vtnet_link_active != 0)
529		vtnet_tx_start_all(sc);
530	VTNET_CORE_UNLOCK(sc);
531
532	return (0);
533}
534
535static void
536vtnet_negotiate_features(struct vtnet_softc *sc)
537{
538	device_t dev;
539	uint64_t mask, features;
540
541	dev = sc->vtnet_dev;
542	mask = 0;
543
544	/*
545	 * TSO and LRO are only available when their corresponding checksum
546	 * offload feature is also negotiated.
547	 */
548	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
549		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
550		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
551	}
552	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
553		mask |= VTNET_TSO_FEATURES;
554	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
555		mask |= VTNET_LRO_FEATURES;
556#ifndef VTNET_LEGACY_TX
557	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
558		mask |= VIRTIO_NET_F_MQ;
559#else
560	mask |= VIRTIO_NET_F_MQ;
561#endif
562
563	features = VTNET_FEATURES & ~mask;
564	sc->vtnet_features = virtio_negotiate_features(dev, features);
565
566	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
567	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
568		/*
569		 * LRO without mergeable buffers requires special care. This
570		 * is not ideal because every receive buffer must be large
571		 * enough to hold the maximum TCP packet, the Ethernet header,
572		 * and the header. This requires up to 34 descriptors with
573		 * MCLBYTES clusters. If we do not have indirect descriptors,
574		 * LRO is disabled since the virtqueue will not contain very
575		 * many receive buffers.
576		 */
577		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
578			device_printf(dev,
579			    "LRO disabled due to both mergeable buffers and "
580			    "indirect descriptors not negotiated\n");
581
582			features &= ~VTNET_LRO_FEATURES;
583			sc->vtnet_features =
584			    virtio_negotiate_features(dev, features);
585		} else
586			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
587	}
588}
589
590static void
591vtnet_setup_features(struct vtnet_softc *sc)
592{
593	device_t dev;
594	int max_pairs, max;
595
596	dev = sc->vtnet_dev;
597
598	vtnet_negotiate_features(sc);
599
600	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
601		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
602	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
603		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
604
605	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
606		/* This feature should always be negotiated. */
607		sc->vtnet_flags |= VTNET_FLAG_MAC;
608	}
609
610	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
611		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
612		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
613	} else
614		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
615
616	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
617		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
618	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
619		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
620	else
621		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
622
623	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
624	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
625	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
626		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
627	else
628		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
629
630	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
631		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
632
633		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
634			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
635		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
636			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
637		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
638			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
639	}
640
641	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
642	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
643		max_pairs = virtio_read_dev_config_2(dev,
644		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
645		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
646		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
647			max_pairs = 1;
648	} else
649		max_pairs = 1;
650
651	if (max_pairs > 1) {
652		/*
653		 * Limit the maximum number of queue pairs to the number of
654		 * CPUs or the configured maximum. The actual number of
655		 * queues that get used may be less.
656		 */
657		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
658		if (max > 0 && max_pairs > max)
659			max_pairs = max;
660		if (max_pairs > mp_ncpus)
661			max_pairs = mp_ncpus;
662		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
663			max_pairs = VTNET_MAX_QUEUE_PAIRS;
664		if (max_pairs > 1)
665			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
666	}
667
668	sc->vtnet_max_vq_pairs = max_pairs;
669}
670
671static int
672vtnet_init_rxq(struct vtnet_softc *sc, int id)
673{
674	struct vtnet_rxq *rxq;
675
676	rxq = &sc->vtnet_rxqs[id];
677
678	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
679	    device_get_nameunit(sc->vtnet_dev), id);
680	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
681
682	rxq->vtnrx_sc = sc;
683	rxq->vtnrx_id = id;
684
685	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
686	if (rxq->vtnrx_sg == NULL)
687		return (ENOMEM);
688
689	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
690	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
691	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
692
693	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
694}
695
696static int
697vtnet_init_txq(struct vtnet_softc *sc, int id)
698{
699	struct vtnet_txq *txq;
700
701	txq = &sc->vtnet_txqs[id];
702
703	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
704	    device_get_nameunit(sc->vtnet_dev), id);
705	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
706
707	txq->vtntx_sc = sc;
708	txq->vtntx_id = id;
709
710	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
711	if (txq->vtntx_sg == NULL)
712		return (ENOMEM);
713
714#ifndef VTNET_LEGACY_TX
715	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
716	    M_NOWAIT, &txq->vtntx_mtx);
717	if (txq->vtntx_br == NULL)
718		return (ENOMEM);
719
720	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
721#endif
722	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
723	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
724	    taskqueue_thread_enqueue, &txq->vtntx_tq);
725	if (txq->vtntx_tq == NULL)
726		return (ENOMEM);
727
728	return (0);
729}
730
731static int
732vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
733{
734	int i, npairs, error;
735
736	npairs = sc->vtnet_max_vq_pairs;
737
738	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
739	    M_NOWAIT | M_ZERO);
740	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
741	    M_NOWAIT | M_ZERO);
742	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
743		return (ENOMEM);
744
745	for (i = 0; i < npairs; i++) {
746		error = vtnet_init_rxq(sc, i);
747		if (error)
748			return (error);
749		error = vtnet_init_txq(sc, i);
750		if (error)
751			return (error);
752	}
753
754	vtnet_setup_queue_sysctl(sc);
755
756	return (0);
757}
758
759static void
760vtnet_destroy_rxq(struct vtnet_rxq *rxq)
761{
762
763	rxq->vtnrx_sc = NULL;
764	rxq->vtnrx_id = -1;
765
766	if (rxq->vtnrx_sg != NULL) {
767		sglist_free(rxq->vtnrx_sg);
768		rxq->vtnrx_sg = NULL;
769	}
770
771	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
772		mtx_destroy(&rxq->vtnrx_mtx);
773}
774
775static void
776vtnet_destroy_txq(struct vtnet_txq *txq)
777{
778
779	txq->vtntx_sc = NULL;
780	txq->vtntx_id = -1;
781
782	if (txq->vtntx_sg != NULL) {
783		sglist_free(txq->vtntx_sg);
784		txq->vtntx_sg = NULL;
785	}
786
787#ifndef VTNET_LEGACY_TX
788	if (txq->vtntx_br != NULL) {
789		buf_ring_free(txq->vtntx_br, M_DEVBUF);
790		txq->vtntx_br = NULL;
791	}
792#endif
793
794	if (mtx_initialized(&txq->vtntx_mtx) != 0)
795		mtx_destroy(&txq->vtntx_mtx);
796}
797
798static void
799vtnet_free_rxtx_queues(struct vtnet_softc *sc)
800{
801	int i;
802
803	if (sc->vtnet_rxqs != NULL) {
804		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
805			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
806		free(sc->vtnet_rxqs, M_DEVBUF);
807		sc->vtnet_rxqs = NULL;
808	}
809
810	if (sc->vtnet_txqs != NULL) {
811		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
812			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
813		free(sc->vtnet_txqs, M_DEVBUF);
814		sc->vtnet_txqs = NULL;
815	}
816}
817
818static int
819vtnet_alloc_rx_filters(struct vtnet_softc *sc)
820{
821
822	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
823		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
824		    M_DEVBUF, M_NOWAIT | M_ZERO);
825		if (sc->vtnet_mac_filter == NULL)
826			return (ENOMEM);
827	}
828
829	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
830		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
831		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
832		if (sc->vtnet_vlan_filter == NULL)
833			return (ENOMEM);
834	}
835
836	return (0);
837}
838
839static void
840vtnet_free_rx_filters(struct vtnet_softc *sc)
841{
842
843	if (sc->vtnet_mac_filter != NULL) {
844		free(sc->vtnet_mac_filter, M_DEVBUF);
845		sc->vtnet_mac_filter = NULL;
846	}
847
848	if (sc->vtnet_vlan_filter != NULL) {
849		free(sc->vtnet_vlan_filter, M_DEVBUF);
850		sc->vtnet_vlan_filter = NULL;
851	}
852}
853
854static int
855vtnet_alloc_virtqueues(struct vtnet_softc *sc)
856{
857	device_t dev;
858	struct vq_alloc_info *info;
859	struct vtnet_rxq *rxq;
860	struct vtnet_txq *txq;
861	int i, idx, flags, nvqs, error;
862
863	dev = sc->vtnet_dev;
864	flags = 0;
865
866	nvqs = sc->vtnet_max_vq_pairs * 2;
867	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
868		nvqs++;
869
870	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
871	if (info == NULL)
872		return (ENOMEM);
873
874	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
875		rxq = &sc->vtnet_rxqs[i];
876		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
877		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
878		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
879
880		txq = &sc->vtnet_txqs[i];
881		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
882		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
883		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
884	}
885
886	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
887		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
888		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
889	}
890
891	/*
892	 * Enable interrupt binding if this is multiqueue. This only matters
893	 * when per-vq MSIX is available.
894	 */
895	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
896		flags |= 0;
897
898	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
899	free(info, M_TEMP);
900
901	return (error);
902}
903
904static int
905vtnet_setup_interface(struct vtnet_softc *sc)
906{
907	device_t dev;
908	struct ifnet *ifp;
909
910	dev = sc->vtnet_dev;
911
912	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
913	if (ifp == NULL) {
914		device_printf(dev, "cannot allocate ifnet structure\n");
915		return (ENOSPC);
916	}
917
918	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
919	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
920	ifp->if_softc = sc;
921	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
922	ifp->if_init = vtnet_init;
923	ifp->if_ioctl = vtnet_ioctl;
924
925#ifndef VTNET_LEGACY_TX
926	ifp->if_transmit = vtnet_txq_mq_start;
927	ifp->if_qflush = vtnet_qflush;
928#else
929	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
930	ifp->if_start = vtnet_start;
931	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
932	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
933	IFQ_SET_READY(&ifp->if_snd);
934#endif
935
936	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
937	    vtnet_ifmedia_sts);
938	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
939	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
940
941	/* Read (or generate) the MAC address for the adapter. */
942	vtnet_get_hwaddr(sc);
943
944	ether_ifattach(ifp, sc->vtnet_hwaddr);
945
946	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
947		ifp->if_capabilities |= IFCAP_LINKSTATE;
948
949	/* Tell the upper layer(s) we support long frames. */
950	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
951	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
952
953	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
954		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
955
956		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
957			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
958			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
959		} else {
960			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
961				ifp->if_capabilities |= IFCAP_TSO4;
962			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
963				ifp->if_capabilities |= IFCAP_TSO6;
964			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
965				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
966		}
967
968		if (ifp->if_capabilities & IFCAP_TSO)
969			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
970	}
971
972	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
973		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
974
975		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
976		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
977			ifp->if_capabilities |= IFCAP_LRO;
978	}
979
980	if (ifp->if_capabilities & IFCAP_HWCSUM) {
981		/*
982		 * VirtIO does not support VLAN tagging, but we can fake
983		 * it by inserting and removing the 802.1Q header during
984		 * transmit and receive. We are then able to do checksum
985		 * offloading of VLAN frames.
986		 */
987		ifp->if_capabilities |=
988		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
989	}
990
991	ifp->if_capenable = ifp->if_capabilities;
992
993	/*
994	 * Capabilities after here are not enabled by default.
995	 */
996
997	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
998		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
999
1000		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1001		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1002		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1003		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1004	}
1005
1006	vtnet_set_rx_process_limit(sc);
1007	vtnet_set_tx_intr_threshold(sc);
1008
1009	return (0);
1010}
1011
1012static int
1013vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1014{
1015	struct ifnet *ifp;
1016	int frame_size, clsize;
1017
1018	ifp = sc->vtnet_ifp;
1019
1020	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1021		return (EINVAL);
1022
1023	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1024	    new_mtu;
1025
1026	/*
1027	 * Based on the new MTU (and hence frame size) determine which
1028	 * cluster size is most appropriate for the receive queues.
1029	 */
1030	if (frame_size <= MCLBYTES) {
1031		clsize = MCLBYTES;
1032	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1033		/* Avoid going past 9K jumbos. */
1034		if (frame_size > MJUM9BYTES)
1035			return (EINVAL);
1036		clsize = MJUM9BYTES;
1037	} else
1038		clsize = MJUMPAGESIZE;
1039
1040	ifp->if_mtu = new_mtu;
1041	sc->vtnet_rx_new_clsize = clsize;
1042
1043	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1045		vtnet_init_locked(sc);
1046	}
1047
1048	return (0);
1049}
1050
1051static int
1052vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1053{
1054	struct vtnet_softc *sc;
1055	struct ifreq *ifr;
1056	int reinit, mask, error;
1057
1058	sc = ifp->if_softc;
1059	ifr = (struct ifreq *) data;
1060	error = 0;
1061
1062	switch (cmd) {
1063	case SIOCSIFMTU:
1064		if (ifp->if_mtu != ifr->ifr_mtu) {
1065			VTNET_CORE_LOCK(sc);
1066			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1067			VTNET_CORE_UNLOCK(sc);
1068		}
1069		break;
1070
1071	case SIOCSIFFLAGS:
1072		VTNET_CORE_LOCK(sc);
1073		if ((ifp->if_flags & IFF_UP) == 0) {
1074			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1075				vtnet_stop(sc);
1076		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1078			    (IFF_PROMISC | IFF_ALLMULTI)) {
1079				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1080					vtnet_rx_filter(sc);
1081				else
1082					error = ENOTSUP;
1083			}
1084		} else
1085			vtnet_init_locked(sc);
1086
1087		if (error == 0)
1088			sc->vtnet_if_flags = ifp->if_flags;
1089		VTNET_CORE_UNLOCK(sc);
1090		break;
1091
1092	case SIOCADDMULTI:
1093	case SIOCDELMULTI:
1094		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1095			break;
1096		VTNET_CORE_LOCK(sc);
1097		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1098			vtnet_rx_filter_mac(sc);
1099		VTNET_CORE_UNLOCK(sc);
1100		break;
1101
1102	case SIOCSIFMEDIA:
1103	case SIOCGIFMEDIA:
1104		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1105		break;
1106
1107	case SIOCSIFCAP:
1108		VTNET_CORE_LOCK(sc);
1109		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1110
1111		if (mask & IFCAP_TXCSUM)
1112			ifp->if_capenable ^= IFCAP_TXCSUM;
1113		if (mask & IFCAP_TXCSUM_IPV6)
1114			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1115		if (mask & IFCAP_TSO4)
1116			ifp->if_capenable ^= IFCAP_TSO4;
1117		if (mask & IFCAP_TSO6)
1118			ifp->if_capenable ^= IFCAP_TSO6;
1119
1120		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1121		    IFCAP_VLAN_HWFILTER)) {
1122			/* These Rx features require us to renegotiate. */
1123			reinit = 1;
1124
1125			if (mask & IFCAP_RXCSUM)
1126				ifp->if_capenable ^= IFCAP_RXCSUM;
1127			if (mask & IFCAP_RXCSUM_IPV6)
1128				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1129			if (mask & IFCAP_LRO)
1130				ifp->if_capenable ^= IFCAP_LRO;
1131			if (mask & IFCAP_VLAN_HWFILTER)
1132				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1133		} else
1134			reinit = 0;
1135
1136		if (mask & IFCAP_VLAN_HWTSO)
1137			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1138		if (mask & IFCAP_VLAN_HWTAGGING)
1139			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1140
1141		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1142			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1143			vtnet_init_locked(sc);
1144		}
1145
1146		VTNET_CORE_UNLOCK(sc);
1147		VLAN_CAPABILITIES(ifp);
1148
1149		break;
1150
1151	default:
1152		error = ether_ioctl(ifp, cmd, data);
1153		break;
1154	}
1155
1156	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1157
1158	return (error);
1159}
1160
1161static int
1162vtnet_rxq_populate(struct vtnet_rxq *rxq)
1163{
1164	struct virtqueue *vq;
1165	int nbufs, error;
1166
1167	vq = rxq->vtnrx_vq;
1168	error = ENOSPC;
1169
1170	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1171		error = vtnet_rxq_new_buf(rxq);
1172		if (error)
1173			break;
1174	}
1175
1176	if (nbufs > 0) {
1177		virtqueue_notify(vq);
1178		/*
1179		 * EMSGSIZE signifies the virtqueue did not have enough
1180		 * entries available to hold the last mbuf. This is not
1181		 * an error.
1182		 */
1183		if (error == EMSGSIZE)
1184			error = 0;
1185	}
1186
1187	return (error);
1188}
1189
1190static void
1191vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1192{
1193	struct virtqueue *vq;
1194	struct mbuf *m;
1195	int last;
1196
1197	vq = rxq->vtnrx_vq;
1198	last = 0;
1199
1200	while ((m = virtqueue_drain(vq, &last)) != NULL)
1201		m_freem(m);
1202
1203	KASSERT(virtqueue_empty(vq),
1204	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1205}
1206
1207static struct mbuf *
1208vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1209{
1210	struct mbuf *m_head, *m_tail, *m;
1211	int i, clsize;
1212
1213	clsize = sc->vtnet_rx_clsize;
1214
1215	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1216	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1217
1218	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1219	if (m_head == NULL)
1220		goto fail;
1221
1222	m_head->m_len = clsize;
1223	m_tail = m_head;
1224
1225	/* Allocate the rest of the chain. */
1226	for (i = 1; i < nbufs; i++) {
1227		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1228		if (m == NULL)
1229			goto fail;
1230
1231		m->m_len = clsize;
1232		m_tail->m_next = m;
1233		m_tail = m;
1234	}
1235
1236	if (m_tailp != NULL)
1237		*m_tailp = m_tail;
1238
1239	return (m_head);
1240
1241fail:
1242	sc->vtnet_stats.mbuf_alloc_failed++;
1243	m_freem(m_head);
1244
1245	return (NULL);
1246}
1247
1248/*
1249 * Slow path for when LRO without mergeable buffers is negotiated.
1250 */
1251static int
1252vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1253    int len0)
1254{
1255	struct vtnet_softc *sc;
1256	struct mbuf *m, *m_prev;
1257	struct mbuf *m_new, *m_tail;
1258	int len, clsize, nreplace, error;
1259
1260	sc = rxq->vtnrx_sc;
1261	clsize = sc->vtnet_rx_clsize;
1262
1263	m_prev = NULL;
1264	m_tail = NULL;
1265	nreplace = 0;
1266
1267	m = m0;
1268	len = len0;
1269
1270	/*
1271	 * Since these mbuf chains are so large, we avoid allocating an
1272	 * entire replacement chain if possible. When the received frame
1273	 * did not consume the entire chain, the unused mbufs are moved
1274	 * to the replacement chain.
1275	 */
1276	while (len > 0) {
1277		/*
1278		 * Something is seriously wrong if we received a frame
1279		 * larger than the chain. Drop it.
1280		 */
1281		if (m == NULL) {
1282			sc->vtnet_stats.rx_frame_too_large++;
1283			return (EMSGSIZE);
1284		}
1285
1286		/* We always allocate the same cluster size. */
1287		KASSERT(m->m_len == clsize,
1288		    ("%s: mbuf size %d is not the cluster size %d",
1289		    __func__, m->m_len, clsize));
1290
1291		m->m_len = MIN(m->m_len, len);
1292		len -= m->m_len;
1293
1294		m_prev = m;
1295		m = m->m_next;
1296		nreplace++;
1297	}
1298
1299	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1300	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1301	    sc->vtnet_rx_nmbufs));
1302
1303	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1304	if (m_new == NULL) {
1305		m_prev->m_len = clsize;
1306		return (ENOBUFS);
1307	}
1308
1309	/*
1310	 * Move any unused mbufs from the received chain onto the end
1311	 * of the new chain.
1312	 */
1313	if (m_prev->m_next != NULL) {
1314		m_tail->m_next = m_prev->m_next;
1315		m_prev->m_next = NULL;
1316	}
1317
1318	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1319	if (error) {
1320		/*
1321		 * BAD! We could not enqueue the replacement mbuf chain. We
1322		 * must restore the m0 chain to the original state if it was
1323		 * modified so we can subsequently discard it.
1324		 *
1325		 * NOTE: The replacement is suppose to be an identical copy
1326		 * to the one just dequeued so this is an unexpected error.
1327		 */
1328		sc->vtnet_stats.rx_enq_replacement_failed++;
1329
1330		if (m_tail->m_next != NULL) {
1331			m_prev->m_next = m_tail->m_next;
1332			m_tail->m_next = NULL;
1333		}
1334
1335		m_prev->m_len = clsize;
1336		m_freem(m_new);
1337	}
1338
1339	return (error);
1340}
1341
1342static int
1343vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1344{
1345	struct vtnet_softc *sc;
1346	struct mbuf *m_new;
1347	int error;
1348
1349	sc = rxq->vtnrx_sc;
1350
1351	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1352	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1353
1354	if (m->m_next == NULL) {
1355		/* Fast-path for the common case of just one mbuf. */
1356		if (m->m_len < len)
1357			return (EINVAL);
1358
1359		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1360		if (m_new == NULL)
1361			return (ENOBUFS);
1362
1363		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1364		if (error) {
1365			/*
1366			 * The new mbuf is suppose to be an identical
1367			 * copy of the one just dequeued so this is an
1368			 * unexpected error.
1369			 */
1370			m_freem(m_new);
1371			sc->vtnet_stats.rx_enq_replacement_failed++;
1372		} else
1373			m->m_len = len;
1374	} else
1375		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1376
1377	return (error);
1378}
1379
1380static int
1381vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1382{
1383	struct vtnet_softc *sc;
1384	struct sglist *sg;
1385	struct vtnet_rx_header *rxhdr;
1386	uint8_t *mdata;
1387	int offset, error;
1388
1389	sc = rxq->vtnrx_sc;
1390	sg = rxq->vtnrx_sg;
1391	mdata = mtod(m, uint8_t *);
1392
1393	VTNET_RXQ_LOCK_ASSERT(rxq);
1394	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1395	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1396	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1397	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1398	     sc->vtnet_rx_clsize));
1399
1400	sglist_reset(sg);
1401	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1402		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1403		rxhdr = (struct vtnet_rx_header *) mdata;
1404		sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1405		offset = sizeof(struct vtnet_rx_header);
1406	} else
1407		offset = 0;
1408
1409	sglist_append(sg, mdata + offset, m->m_len - offset);
1410	if (m->m_next != NULL) {
1411		error = sglist_append_mbuf(sg, m->m_next);
1412		MPASS(error == 0);
1413	}
1414
1415	error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
1416
1417	return (error);
1418}
1419
1420static int
1421vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1422{
1423	struct vtnet_softc *sc;
1424	struct mbuf *m;
1425	int error;
1426
1427	sc = rxq->vtnrx_sc;
1428
1429	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1430	if (m == NULL)
1431		return (ENOBUFS);
1432
1433	error = vtnet_rxq_enqueue_buf(rxq, m);
1434	if (error)
1435		m_freem(m);
1436
1437	return (error);
1438}
1439
1440/*
1441 * Use the checksum offset in the VirtIO header to set the
1442 * correct CSUM_* flags.
1443 */
1444static int
1445vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1446    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1447{
1448	struct vtnet_softc *sc;
1449#if defined(INET) || defined(INET6)
1450	int offset = hdr->csum_start + hdr->csum_offset;
1451#endif
1452
1453	sc = rxq->vtnrx_sc;
1454
1455	/* Only do a basic sanity check on the offset. */
1456	switch (eth_type) {
1457#if defined(INET)
1458	case ETHERTYPE_IP:
1459		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1460			return (1);
1461		break;
1462#endif
1463#if defined(INET6)
1464	case ETHERTYPE_IPV6:
1465		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1466			return (1);
1467		break;
1468#endif
1469	default:
1470		sc->vtnet_stats.rx_csum_bad_ethtype++;
1471		return (1);
1472	}
1473
1474	/*
1475	 * Use the offset to determine the appropriate CSUM_* flags. This is
1476	 * a bit dirty, but we can get by with it since the checksum offsets
1477	 * happen to be different. We assume the host host does not do IPv4
1478	 * header checksum offloading.
1479	 */
1480	switch (hdr->csum_offset) {
1481	case offsetof(struct udphdr, uh_sum):
1482	case offsetof(struct tcphdr, th_sum):
1483		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1484		m->m_pkthdr.csum_data = 0xFFFF;
1485		break;
1486	case offsetof(struct sctphdr, checksum):
1487		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1488		break;
1489	default:
1490		sc->vtnet_stats.rx_csum_bad_offset++;
1491		return (1);
1492	}
1493
1494	return (0);
1495}
1496
1497static int
1498vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1499    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1500{
1501	struct vtnet_softc *sc;
1502	int offset, proto;
1503
1504	sc = rxq->vtnrx_sc;
1505
1506	switch (eth_type) {
1507#if defined(INET)
1508	case ETHERTYPE_IP: {
1509		struct ip *ip;
1510		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1511			return (1);
1512		ip = (struct ip *)(m->m_data + ip_start);
1513		proto = ip->ip_p;
1514		offset = ip_start + (ip->ip_hl << 2);
1515		break;
1516	}
1517#endif
1518#if defined(INET6)
1519	case ETHERTYPE_IPV6:
1520		if (__predict_false(m->m_len < ip_start +
1521		    sizeof(struct ip6_hdr)))
1522			return (1);
1523		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1524		if (__predict_false(offset < 0))
1525			return (1);
1526		break;
1527#endif
1528	default:
1529		sc->vtnet_stats.rx_csum_bad_ethtype++;
1530		return (1);
1531	}
1532
1533	switch (proto) {
1534	case IPPROTO_TCP:
1535		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1536			return (1);
1537		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1538		m->m_pkthdr.csum_data = 0xFFFF;
1539		break;
1540	case IPPROTO_UDP:
1541		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1542			return (1);
1543		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1544		m->m_pkthdr.csum_data = 0xFFFF;
1545		break;
1546	case IPPROTO_SCTP:
1547		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1548			return (1);
1549		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1550		break;
1551	default:
1552		/*
1553		 * For the remaining protocols, FreeBSD does not support
1554		 * checksum offloading, so the checksum will be recomputed.
1555		 */
1556#if 0
1557		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1558		    "protocol eth_type=%#x proto=%d csum_start=%d "
1559		    "csum_offset=%d\n", __func__, eth_type, proto,
1560		    hdr->csum_start, hdr->csum_offset);
1561#endif
1562		break;
1563	}
1564
1565	return (0);
1566}
1567
1568/*
1569 * Set the appropriate CSUM_* flags. Unfortunately, the information
1570 * provided is not directly useful to us. The VirtIO header gives the
1571 * offset of the checksum, which is all Linux needs, but this is not
1572 * how FreeBSD does things. We are forced to peek inside the packet
1573 * a bit.
1574 *
1575 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1576 * could accept the offsets and let the stack figure it out.
1577 */
1578static int
1579vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1580    struct virtio_net_hdr *hdr)
1581{
1582	struct ether_header *eh;
1583	struct ether_vlan_header *evh;
1584	uint16_t eth_type;
1585	int offset, error;
1586
1587	eh = mtod(m, struct ether_header *);
1588	eth_type = ntohs(eh->ether_type);
1589	if (eth_type == ETHERTYPE_VLAN) {
1590		/* BMV: We should handle nested VLAN tags too. */
1591		evh = mtod(m, struct ether_vlan_header *);
1592		eth_type = ntohs(evh->evl_proto);
1593		offset = sizeof(struct ether_vlan_header);
1594	} else
1595		offset = sizeof(struct ether_header);
1596
1597	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1598		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1599	else
1600		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1601
1602	return (error);
1603}
1604
1605static void
1606vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1607{
1608	struct mbuf *m;
1609
1610	while (--nbufs > 0) {
1611		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1612		if (m == NULL)
1613			break;
1614		vtnet_rxq_discard_buf(rxq, m);
1615	}
1616}
1617
1618static void
1619vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1620{
1621	int error;
1622
1623	/*
1624	 * Requeue the discarded mbuf. This should always be successful
1625	 * since it was just dequeued.
1626	 */
1627	error = vtnet_rxq_enqueue_buf(rxq, m);
1628	KASSERT(error == 0,
1629	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1630}
1631
1632static int
1633vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1634{
1635	struct vtnet_softc *sc;
1636	struct ifnet *ifp;
1637	struct virtqueue *vq;
1638	struct mbuf *m, *m_tail;
1639	int len;
1640
1641	sc = rxq->vtnrx_sc;
1642	vq = rxq->vtnrx_vq;
1643	ifp = sc->vtnet_ifp;
1644	m_tail = m_head;
1645
1646	while (--nbufs > 0) {
1647		m = virtqueue_dequeue(vq, &len);
1648		if (m == NULL) {
1649			rxq->vtnrx_stats.vrxs_ierrors++;
1650			goto fail;
1651		}
1652
1653		if (vtnet_rxq_new_buf(rxq) != 0) {
1654			rxq->vtnrx_stats.vrxs_iqdrops++;
1655			vtnet_rxq_discard_buf(rxq, m);
1656			if (nbufs > 1)
1657				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1658			goto fail;
1659		}
1660
1661		if (m->m_len < len)
1662			len = m->m_len;
1663
1664		m->m_len = len;
1665		m->m_flags &= ~M_PKTHDR;
1666
1667		m_head->m_pkthdr.len += len;
1668		m_tail->m_next = m;
1669		m_tail = m;
1670	}
1671
1672	return (0);
1673
1674fail:
1675	sc->vtnet_stats.rx_mergeable_failed++;
1676	m_freem(m_head);
1677
1678	return (1);
1679}
1680
1681static void
1682vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1683    struct virtio_net_hdr *hdr)
1684{
1685	struct vtnet_softc *sc;
1686	struct ifnet *ifp;
1687	struct ether_header *eh;
1688
1689	sc = rxq->vtnrx_sc;
1690	ifp = sc->vtnet_ifp;
1691
1692	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1693		eh = mtod(m, struct ether_header *);
1694		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1695			vtnet_vlan_tag_remove(m);
1696			/*
1697			 * With the 802.1Q header removed, update the
1698			 * checksum starting location accordingly.
1699			 */
1700			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1701				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1702		}
1703	}
1704
1705	m->m_pkthdr.flowid = rxq->vtnrx_id;
1706	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1707
1708	/*
1709	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1710	 * distinction that Linux does. Need to reevaluate if performing
1711	 * offloading for the NEEDS_CSUM case is really appropriate.
1712	 */
1713	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1714	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1715		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1716			rxq->vtnrx_stats.vrxs_csum++;
1717		else
1718			rxq->vtnrx_stats.vrxs_csum_failed++;
1719	}
1720
1721	rxq->vtnrx_stats.vrxs_ipackets++;
1722	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1723
1724	VTNET_RXQ_UNLOCK(rxq);
1725	(*ifp->if_input)(ifp, m);
1726	VTNET_RXQ_LOCK(rxq);
1727}
1728
1729static int
1730vtnet_rxq_eof(struct vtnet_rxq *rxq)
1731{
1732	struct virtio_net_hdr lhdr, *hdr;
1733	struct vtnet_softc *sc;
1734	struct ifnet *ifp;
1735	struct virtqueue *vq;
1736	struct mbuf *m;
1737	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1738	int len, deq, nbufs, adjsz, count;
1739
1740	sc = rxq->vtnrx_sc;
1741	vq = rxq->vtnrx_vq;
1742	ifp = sc->vtnet_ifp;
1743	hdr = &lhdr;
1744	deq = 0;
1745	count = sc->vtnet_rx_process_limit;
1746
1747	VTNET_RXQ_LOCK_ASSERT(rxq);
1748
1749#ifdef DEV_NETMAP
1750	if (netmap_rx_irq(ifp, 0, &deq)) {
1751		return (FALSE);
1752	}
1753#endif /* DEV_NETMAP */
1754
1755	while (count-- > 0) {
1756		m = virtqueue_dequeue(vq, &len);
1757		if (m == NULL)
1758			break;
1759		deq++;
1760
1761		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1762			rxq->vtnrx_stats.vrxs_ierrors++;
1763			vtnet_rxq_discard_buf(rxq, m);
1764			continue;
1765		}
1766
1767		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1768			nbufs = 1;
1769			adjsz = sizeof(struct vtnet_rx_header);
1770			/*
1771			 * Account for our pad inserted between the header
1772			 * and the actual start of the frame.
1773			 */
1774			len += VTNET_RX_HEADER_PAD;
1775		} else {
1776			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1777			nbufs = mhdr->num_buffers;
1778			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1779		}
1780
1781		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1782			rxq->vtnrx_stats.vrxs_iqdrops++;
1783			vtnet_rxq_discard_buf(rxq, m);
1784			if (nbufs > 1)
1785				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1786			continue;
1787		}
1788
1789		m->m_pkthdr.len = len;
1790		m->m_pkthdr.rcvif = ifp;
1791		m->m_pkthdr.csum_flags = 0;
1792
1793		if (nbufs > 1) {
1794			/* Dequeue the rest of chain. */
1795			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1796				continue;
1797		}
1798
1799		/*
1800		 * Save copy of header before we strip it. For both mergeable
1801		 * and non-mergeable, the header is at the beginning of the
1802		 * mbuf data. We no longer need num_buffers, so always use a
1803		 * regular header.
1804		 *
1805		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1806		 * still valid even after the m_adj().
1807		 */
1808		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1809		m_adj(m, adjsz);
1810
1811		vtnet_rxq_input(rxq, m, hdr);
1812
1813		/* Must recheck after dropping the Rx lock. */
1814		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1815			break;
1816	}
1817
1818	if (deq > 0)
1819		virtqueue_notify(vq);
1820
1821	return (count > 0 ? 0 : EAGAIN);
1822}
1823
1824static void
1825vtnet_rx_vq_intr(void *xrxq)
1826{
1827	struct vtnet_softc *sc;
1828	struct vtnet_rxq *rxq;
1829	struct ifnet *ifp;
1830	int tries, more;
1831
1832	rxq = xrxq;
1833	sc = rxq->vtnrx_sc;
1834	ifp = sc->vtnet_ifp;
1835	tries = 0;
1836
1837	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1838		/*
1839		 * Ignore this interrupt. Either this is a spurious interrupt
1840		 * or multiqueue without per-VQ MSIX so every queue needs to
1841		 * be polled (a brain dead configuration we could try harder
1842		 * to avoid).
1843		 */
1844		vtnet_rxq_disable_intr(rxq);
1845		return;
1846	}
1847
1848	VTNET_RXQ_LOCK(rxq);
1849
1850again:
1851	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1852		VTNET_RXQ_UNLOCK(rxq);
1853		return;
1854	}
1855
1856	more = vtnet_rxq_eof(rxq);
1857	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1858		if (!more)
1859			vtnet_rxq_disable_intr(rxq);
1860		/*
1861		 * This is an occasional condition or race (when !more),
1862		 * so retry a few times before scheduling the taskqueue.
1863		 */
1864		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1865			goto again;
1866
1867		VTNET_RXQ_UNLOCK(rxq);
1868		rxq->vtnrx_stats.vrxs_rescheduled++;
1869		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1870	} else
1871		VTNET_RXQ_UNLOCK(rxq);
1872}
1873
1874static void
1875vtnet_rxq_tq_intr(void *xrxq, int pending)
1876{
1877	struct vtnet_softc *sc;
1878	struct vtnet_rxq *rxq;
1879	struct ifnet *ifp;
1880	int more;
1881
1882	rxq = xrxq;
1883	sc = rxq->vtnrx_sc;
1884	ifp = sc->vtnet_ifp;
1885
1886	VTNET_RXQ_LOCK(rxq);
1887
1888	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1889		VTNET_RXQ_UNLOCK(rxq);
1890		return;
1891	}
1892
1893	more = vtnet_rxq_eof(rxq);
1894	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1895		if (!more)
1896			vtnet_rxq_disable_intr(rxq);
1897		rxq->vtnrx_stats.vrxs_rescheduled++;
1898		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1899	}
1900
1901	VTNET_RXQ_UNLOCK(rxq);
1902}
1903
1904static int
1905vtnet_txq_below_threshold(struct vtnet_txq *txq)
1906{
1907	struct vtnet_softc *sc;
1908	struct virtqueue *vq;
1909
1910	sc = txq->vtntx_sc;
1911	vq = txq->vtntx_vq;
1912
1913	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
1914}
1915
1916static int
1917vtnet_txq_notify(struct vtnet_txq *txq)
1918{
1919	struct virtqueue *vq;
1920
1921	vq = txq->vtntx_vq;
1922
1923	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
1924	virtqueue_notify(vq);
1925
1926	if (vtnet_txq_enable_intr(txq) == 0)
1927		return (0);
1928
1929	/*
1930	 * Drain frames that were completed since last checked. If this
1931	 * causes the queue to go above the threshold, the caller should
1932	 * continue transmitting.
1933	 */
1934	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
1935		virtqueue_disable_intr(vq);
1936		return (1);
1937	}
1938
1939	return (0);
1940}
1941
1942static void
1943vtnet_txq_free_mbufs(struct vtnet_txq *txq)
1944{
1945	struct virtqueue *vq;
1946	struct vtnet_tx_header *txhdr;
1947	int last;
1948
1949	vq = txq->vtntx_vq;
1950	last = 0;
1951
1952	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1953		m_freem(txhdr->vth_mbuf);
1954		uma_zfree(vtnet_tx_header_zone, txhdr);
1955	}
1956
1957	KASSERT(virtqueue_empty(vq),
1958	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1959}
1960
1961/*
1962 * BMV: Much of this can go away once we finally have offsets in
1963 * the mbuf packet header. Bug andre@.
1964 */
1965static int
1966vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1967    int *etype, int *proto, int *start)
1968{
1969	struct vtnet_softc *sc;
1970	struct ether_vlan_header *evh;
1971	int offset;
1972
1973	sc = txq->vtntx_sc;
1974
1975	evh = mtod(m, struct ether_vlan_header *);
1976	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1977		/* BMV: We should handle nested VLAN tags too. */
1978		*etype = ntohs(evh->evl_proto);
1979		offset = sizeof(struct ether_vlan_header);
1980	} else {
1981		*etype = ntohs(evh->evl_encap_proto);
1982		offset = sizeof(struct ether_header);
1983	}
1984
1985	switch (*etype) {
1986#if defined(INET)
1987	case ETHERTYPE_IP: {
1988		struct ip *ip, iphdr;
1989		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
1990			m_copydata(m, offset, sizeof(struct ip),
1991			    (caddr_t) &iphdr);
1992			ip = &iphdr;
1993		} else
1994			ip = (struct ip *)(m->m_data + offset);
1995		*proto = ip->ip_p;
1996		*start = offset + (ip->ip_hl << 2);
1997		break;
1998	}
1999#endif
2000#if defined(INET6)
2001	case ETHERTYPE_IPV6:
2002		*proto = -1;
2003		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2004		/* Assert the network stack sent us a valid packet. */
2005		KASSERT(*start > offset,
2006		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2007		    *start, offset, *proto));
2008		break;
2009#endif
2010	default:
2011		sc->vtnet_stats.tx_csum_bad_ethtype++;
2012		return (EINVAL);
2013	}
2014
2015	return (0);
2016}
2017
2018static int
2019vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2020    int offset, struct virtio_net_hdr *hdr)
2021{
2022	static struct timeval lastecn;
2023	static int curecn;
2024	struct vtnet_softc *sc;
2025	struct tcphdr *tcp, tcphdr;
2026
2027	sc = txq->vtntx_sc;
2028
2029	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2030		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2031		tcp = &tcphdr;
2032	} else
2033		tcp = (struct tcphdr *)(m->m_data + offset);
2034
2035	hdr->hdr_len = offset + (tcp->th_off << 2);
2036	hdr->gso_size = m->m_pkthdr.tso_segsz;
2037	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2038	    VIRTIO_NET_HDR_GSO_TCPV6;
2039
2040	if (tcp->th_flags & TH_CWR) {
2041		/*
2042		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2043		 * ECN support is not on a per-interface basis, but globally via
2044		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2045		 */
2046		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2047			if (ppsratecheck(&lastecn, &curecn, 1))
2048				if_printf(sc->vtnet_ifp,
2049				    "TSO with ECN not negotiated with host\n");
2050			return (ENOTSUP);
2051		}
2052		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2053	}
2054
2055	txq->vtntx_stats.vtxs_tso++;
2056
2057	return (0);
2058}
2059
2060static struct mbuf *
2061vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2062    struct virtio_net_hdr *hdr)
2063{
2064	struct vtnet_softc *sc;
2065	int flags, etype, csum_start, proto, error;
2066
2067	sc = txq->vtntx_sc;
2068	flags = m->m_pkthdr.csum_flags;
2069
2070	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2071	if (error)
2072		goto drop;
2073
2074	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2075	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2076		/*
2077		 * We could compare the IP protocol vs the CSUM_ flag too,
2078		 * but that really should not be necessary.
2079		 */
2080		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2081		hdr->csum_start = csum_start;
2082		hdr->csum_offset = m->m_pkthdr.csum_data;
2083		txq->vtntx_stats.vtxs_csum++;
2084	}
2085
2086	if (flags & CSUM_TSO) {
2087		if (__predict_false(proto != IPPROTO_TCP)) {
2088			/* Likely failed to correctly parse the mbuf. */
2089			sc->vtnet_stats.tx_tso_not_tcp++;
2090			goto drop;
2091		}
2092
2093		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2094		    ("%s: mbuf %p TSO without checksum offload %#x",
2095		    __func__, m, flags));
2096
2097		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2098		if (error)
2099			goto drop;
2100	}
2101
2102	return (m);
2103
2104drop:
2105	m_freem(m);
2106	return (NULL);
2107}
2108
2109static int
2110vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2111    struct vtnet_tx_header *txhdr)
2112{
2113	struct vtnet_softc *sc;
2114	struct virtqueue *vq;
2115	struct sglist *sg;
2116	struct mbuf *m;
2117	int error;
2118
2119	sc = txq->vtntx_sc;
2120	vq = txq->vtntx_vq;
2121	sg = txq->vtntx_sg;
2122	m = *m_head;
2123
2124	sglist_reset(sg);
2125	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2126	KASSERT(error == 0 && sg->sg_nseg == 1,
2127	    ("%s: error %d adding header to sglist", __func__, error));
2128
2129	error = sglist_append_mbuf(sg, m);
2130	if (error) {
2131		m = m_defrag(m, M_NOWAIT);
2132		if (m == NULL)
2133			goto fail;
2134
2135		*m_head = m;
2136		sc->vtnet_stats.tx_defragged++;
2137
2138		error = sglist_append_mbuf(sg, m);
2139		if (error)
2140			goto fail;
2141	}
2142
2143	txhdr->vth_mbuf = m;
2144	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2145
2146	return (error);
2147
2148fail:
2149	sc->vtnet_stats.tx_defrag_failed++;
2150	m_freem(*m_head);
2151	*m_head = NULL;
2152
2153	return (ENOBUFS);
2154}
2155
2156static int
2157vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2158{
2159	struct vtnet_tx_header *txhdr;
2160	struct virtio_net_hdr *hdr;
2161	struct mbuf *m;
2162	int error;
2163
2164	m = *m_head;
2165	M_ASSERTPKTHDR(m);
2166
2167	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2168	if (txhdr == NULL) {
2169		m_freem(m);
2170		*m_head = NULL;
2171		return (ENOMEM);
2172	}
2173
2174	/*
2175	 * Always use the non-mergeable header, regardless if the feature
2176	 * was negotiated. For transmit, num_buffers is always zero. The
2177	 * vtnet_hdr_size is used to enqueue the correct header size.
2178	 */
2179	hdr = &txhdr->vth_uhdr.hdr;
2180
2181	if (m->m_flags & M_VLANTAG) {
2182		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2183		if ((*m_head = m) == NULL) {
2184			error = ENOBUFS;
2185			goto fail;
2186		}
2187		m->m_flags &= ~M_VLANTAG;
2188	}
2189
2190	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2191		m = vtnet_txq_offload(txq, m, hdr);
2192		if ((*m_head = m) == NULL) {
2193			error = ENOBUFS;
2194			goto fail;
2195		}
2196	}
2197
2198	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2199	if (error == 0)
2200		return (0);
2201
2202fail:
2203	uma_zfree(vtnet_tx_header_zone, txhdr);
2204
2205	return (error);
2206}
2207
2208#ifdef VTNET_LEGACY_TX
2209
2210static void
2211vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2212{
2213	struct vtnet_softc *sc;
2214	struct virtqueue *vq;
2215	struct mbuf *m0;
2216	int tries, enq;
2217
2218	sc = txq->vtntx_sc;
2219	vq = txq->vtntx_vq;
2220	tries = 0;
2221
2222	VTNET_TXQ_LOCK_ASSERT(txq);
2223
2224	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2225	    sc->vtnet_link_active == 0)
2226		return;
2227
2228	vtnet_txq_eof(txq);
2229
2230again:
2231	enq = 0;
2232
2233	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2234		if (virtqueue_full(vq))
2235			break;
2236
2237		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2238		if (m0 == NULL)
2239			break;
2240
2241		if (vtnet_txq_encap(txq, &m0) != 0) {
2242			if (m0 != NULL)
2243				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2244			break;
2245		}
2246
2247		enq++;
2248		ETHER_BPF_MTAP(ifp, m0);
2249	}
2250
2251	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2252		if (tries++ < VTNET_NOTIFY_RETRIES)
2253			goto again;
2254
2255		txq->vtntx_stats.vtxs_rescheduled++;
2256		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2257	}
2258}
2259
2260static void
2261vtnet_start(struct ifnet *ifp)
2262{
2263	struct vtnet_softc *sc;
2264	struct vtnet_txq *txq;
2265
2266	sc = ifp->if_softc;
2267	txq = &sc->vtnet_txqs[0];
2268
2269	VTNET_TXQ_LOCK(txq);
2270	vtnet_start_locked(txq, ifp);
2271	VTNET_TXQ_UNLOCK(txq);
2272}
2273
2274#else /* !VTNET_LEGACY_TX */
2275
2276static int
2277vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2278{
2279	struct vtnet_softc *sc;
2280	struct virtqueue *vq;
2281	struct buf_ring *br;
2282	struct ifnet *ifp;
2283	int enq, tries, error;
2284
2285	sc = txq->vtntx_sc;
2286	vq = txq->vtntx_vq;
2287	br = txq->vtntx_br;
2288	ifp = sc->vtnet_ifp;
2289	tries = 0;
2290	error = 0;
2291
2292	VTNET_TXQ_LOCK_ASSERT(txq);
2293
2294	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2295	    sc->vtnet_link_active == 0) {
2296		if (m != NULL)
2297			error = drbr_enqueue(ifp, br, m);
2298		return (error);
2299	}
2300
2301	if (m != NULL) {
2302		error = drbr_enqueue(ifp, br, m);
2303		if (error)
2304			return (error);
2305	}
2306
2307	vtnet_txq_eof(txq);
2308
2309again:
2310	enq = 0;
2311
2312	while ((m = drbr_peek(ifp, br)) != NULL) {
2313		if (virtqueue_full(vq)) {
2314			drbr_putback(ifp, br, m);
2315			break;
2316		}
2317
2318		if (vtnet_txq_encap(txq, &m) != 0) {
2319			if (m != NULL)
2320				drbr_putback(ifp, br, m);
2321			else
2322				drbr_advance(ifp, br);
2323			break;
2324		}
2325		drbr_advance(ifp, br);
2326
2327		enq++;
2328		ETHER_BPF_MTAP(ifp, m);
2329	}
2330
2331	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2332		if (tries++ < VTNET_NOTIFY_RETRIES)
2333			goto again;
2334
2335		txq->vtntx_stats.vtxs_rescheduled++;
2336		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2337	}
2338
2339	return (0);
2340}
2341
2342static int
2343vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2344{
2345	struct vtnet_softc *sc;
2346	struct vtnet_txq *txq;
2347	int i, npairs, error;
2348
2349	sc = ifp->if_softc;
2350	npairs = sc->vtnet_act_vq_pairs;
2351
2352	/* check if flowid is set */
2353	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2354		i = m->m_pkthdr.flowid % npairs;
2355	else
2356		i = curcpu % npairs;
2357
2358	txq = &sc->vtnet_txqs[i];
2359
2360	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2361		error = vtnet_txq_mq_start_locked(txq, m);
2362		VTNET_TXQ_UNLOCK(txq);
2363	} else {
2364		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2365		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2366	}
2367
2368	return (error);
2369}
2370
2371static void
2372vtnet_txq_tq_deferred(void *xtxq, int pending)
2373{
2374	struct vtnet_softc *sc;
2375	struct vtnet_txq *txq;
2376
2377	txq = xtxq;
2378	sc = txq->vtntx_sc;
2379
2380	VTNET_TXQ_LOCK(txq);
2381	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2382		vtnet_txq_mq_start_locked(txq, NULL);
2383	VTNET_TXQ_UNLOCK(txq);
2384}
2385
2386#endif /* VTNET_LEGACY_TX */
2387
2388static void
2389vtnet_txq_start(struct vtnet_txq *txq)
2390{
2391	struct vtnet_softc *sc;
2392	struct ifnet *ifp;
2393
2394	sc = txq->vtntx_sc;
2395	ifp = sc->vtnet_ifp;
2396
2397#ifdef VTNET_LEGACY_TX
2398	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2399		vtnet_start_locked(txq, ifp);
2400#else
2401	if (!drbr_empty(ifp, txq->vtntx_br))
2402		vtnet_txq_mq_start_locked(txq, NULL);
2403#endif
2404}
2405
2406static void
2407vtnet_txq_tq_intr(void *xtxq, int pending)
2408{
2409	struct vtnet_softc *sc;
2410	struct vtnet_txq *txq;
2411	struct ifnet *ifp;
2412
2413	txq = xtxq;
2414	sc = txq->vtntx_sc;
2415	ifp = sc->vtnet_ifp;
2416
2417	VTNET_TXQ_LOCK(txq);
2418
2419	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2420		VTNET_TXQ_UNLOCK(txq);
2421		return;
2422	}
2423
2424	vtnet_txq_eof(txq);
2425	vtnet_txq_start(txq);
2426
2427	VTNET_TXQ_UNLOCK(txq);
2428}
2429
2430static int
2431vtnet_txq_eof(struct vtnet_txq *txq)
2432{
2433	struct virtqueue *vq;
2434	struct vtnet_tx_header *txhdr;
2435	struct mbuf *m;
2436	int deq;
2437
2438	vq = txq->vtntx_vq;
2439	deq = 0;
2440	VTNET_TXQ_LOCK_ASSERT(txq);
2441
2442#ifdef DEV_NETMAP
2443	if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
2444		virtqueue_disable_intr(vq); // XXX luigi
2445		return 0; // XXX or 1 ?
2446	}
2447#endif /* DEV_NETMAP */
2448
2449	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2450		m = txhdr->vth_mbuf;
2451		deq++;
2452
2453		txq->vtntx_stats.vtxs_opackets++;
2454		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2455		if (m->m_flags & M_MCAST)
2456			txq->vtntx_stats.vtxs_omcasts++;
2457
2458		m_freem(m);
2459		uma_zfree(vtnet_tx_header_zone, txhdr);
2460	}
2461
2462	if (virtqueue_empty(vq))
2463		txq->vtntx_watchdog = 0;
2464
2465	return (deq);
2466}
2467
2468static void
2469vtnet_tx_vq_intr(void *xtxq)
2470{
2471	struct vtnet_softc *sc;
2472	struct vtnet_txq *txq;
2473	struct ifnet *ifp;
2474
2475	txq = xtxq;
2476	sc = txq->vtntx_sc;
2477	ifp = sc->vtnet_ifp;
2478
2479	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2480		/*
2481		 * Ignore this interrupt. Either this is a spurious interrupt
2482		 * or multiqueue without per-VQ MSIX so every queue needs to
2483		 * be polled (a brain dead configuration we could try harder
2484		 * to avoid).
2485		 */
2486		vtnet_txq_disable_intr(txq);
2487		return;
2488	}
2489
2490	VTNET_TXQ_LOCK(txq);
2491
2492	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2493		VTNET_TXQ_UNLOCK(txq);
2494		return;
2495	}
2496
2497	vtnet_txq_eof(txq);
2498	vtnet_txq_start(txq);
2499
2500	VTNET_TXQ_UNLOCK(txq);
2501}
2502
2503static void
2504vtnet_tx_start_all(struct vtnet_softc *sc)
2505{
2506	struct vtnet_txq *txq;
2507	int i;
2508
2509	VTNET_CORE_LOCK_ASSERT(sc);
2510
2511	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2512		txq = &sc->vtnet_txqs[i];
2513
2514		VTNET_TXQ_LOCK(txq);
2515		vtnet_txq_start(txq);
2516		VTNET_TXQ_UNLOCK(txq);
2517	}
2518}
2519
2520#ifndef VTNET_LEGACY_TX
2521static void
2522vtnet_qflush(struct ifnet *ifp)
2523{
2524	struct vtnet_softc *sc;
2525	struct vtnet_txq *txq;
2526	struct mbuf *m;
2527	int i;
2528
2529	sc = ifp->if_softc;
2530
2531	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2532		txq = &sc->vtnet_txqs[i];
2533
2534		VTNET_TXQ_LOCK(txq);
2535		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2536			m_freem(m);
2537		VTNET_TXQ_UNLOCK(txq);
2538	}
2539
2540	if_qflush(ifp);
2541}
2542#endif
2543
2544static int
2545vtnet_watchdog(struct vtnet_txq *txq)
2546{
2547	struct ifnet *ifp;
2548
2549	ifp = txq->vtntx_sc->vtnet_ifp;
2550
2551	VTNET_TXQ_LOCK(txq);
2552	if (txq->vtntx_watchdog == 1) {
2553		/*
2554		 * Only drain completed frames if the watchdog is about to
2555		 * expire. If any frames were drained, there may be enough
2556		 * free descriptors now available to transmit queued frames.
2557		 * In that case, the timer will immediately be decremented
2558		 * below, but the timeout is generous enough that should not
2559		 * be a problem.
2560		 */
2561		if (vtnet_txq_eof(txq) != 0)
2562			vtnet_txq_start(txq);
2563	}
2564
2565	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2566		VTNET_TXQ_UNLOCK(txq);
2567		return (0);
2568	}
2569	VTNET_TXQ_UNLOCK(txq);
2570
2571	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2572	return (1);
2573}
2574
2575static void
2576vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2577{
2578	struct vtnet_rxq_stats *st;
2579
2580	st = &rxq->vtnrx_stats;
2581
2582	accum->vrxs_ipackets += st->vrxs_ipackets;
2583	accum->vrxs_ibytes += st->vrxs_ibytes;
2584	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2585	accum->vrxs_csum += st->vrxs_csum;
2586	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2587	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2588}
2589
2590static void
2591vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2592{
2593	struct vtnet_txq_stats *st;
2594
2595	st = &txq->vtntx_stats;
2596
2597	accum->vtxs_opackets += st->vtxs_opackets;
2598	accum->vtxs_obytes += st->vtxs_obytes;
2599	accum->vtxs_csum += st->vtxs_csum;
2600	accum->vtxs_tso += st->vtxs_tso;
2601	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2602}
2603
2604static void
2605vtnet_accumulate_stats(struct vtnet_softc *sc)
2606{
2607	struct ifnet *ifp;
2608	struct vtnet_statistics *st;
2609	struct vtnet_rxq_stats rxaccum;
2610	struct vtnet_txq_stats txaccum;
2611	int i;
2612
2613	ifp = sc->vtnet_ifp;
2614	st = &sc->vtnet_stats;
2615	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2616	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2617
2618	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2619		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2620		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2621	}
2622
2623	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2624	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2625	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2626	st->tx_csum_offloaded = txaccum.vtxs_csum;
2627	st->tx_tso_offloaded = txaccum.vtxs_tso;
2628	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2629
2630	/*
2631	 * With the exception of if_ierrors, these ifnet statistics are
2632	 * only updated in the driver, so just set them to our accumulated
2633	 * values. if_ierrors is updated in ether_input() for malformed
2634	 * frames that we should have already discarded.
2635	 */
2636	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2637	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2638	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2639	ifp->if_opackets = txaccum.vtxs_opackets;
2640#ifndef VTNET_LEGACY_TX
2641	ifp->if_obytes = txaccum.vtxs_obytes;
2642	ifp->if_omcasts = txaccum.vtxs_omcasts;
2643#endif
2644}
2645
2646static void
2647vtnet_tick(void *xsc)
2648{
2649	struct vtnet_softc *sc;
2650	struct ifnet *ifp;
2651	int i, timedout;
2652
2653	sc = xsc;
2654	ifp = sc->vtnet_ifp;
2655	timedout = 0;
2656
2657	VTNET_CORE_LOCK_ASSERT(sc);
2658	vtnet_accumulate_stats(sc);
2659
2660	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2661		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2662
2663	if (timedout != 0) {
2664		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2665		vtnet_init_locked(sc);
2666	} else
2667		callout_schedule(&sc->vtnet_tick_ch, hz);
2668}
2669
2670static void
2671vtnet_start_taskqueues(struct vtnet_softc *sc)
2672{
2673	device_t dev;
2674	struct vtnet_rxq *rxq;
2675	struct vtnet_txq *txq;
2676	int i, error;
2677
2678	dev = sc->vtnet_dev;
2679
2680	/*
2681	 * Errors here are very difficult to recover from - we cannot
2682	 * easily fail because, if this is during boot, we will hang
2683	 * when freeing any successfully started taskqueues because
2684	 * the scheduler isn't up yet.
2685	 *
2686	 * Most drivers just ignore the return value - it only fails
2687	 * with ENOMEM so an error is not likely.
2688	 */
2689	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2690		rxq = &sc->vtnet_rxqs[i];
2691		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2692		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2693		if (error) {
2694			device_printf(dev, "failed to start rx taskq %d\n",
2695			    rxq->vtnrx_id);
2696		}
2697
2698		txq = &sc->vtnet_txqs[i];
2699		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2700		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2701		if (error) {
2702			device_printf(dev, "failed to start tx taskq %d\n",
2703			    txq->vtntx_id);
2704		}
2705	}
2706}
2707
2708static void
2709vtnet_free_taskqueues(struct vtnet_softc *sc)
2710{
2711	struct vtnet_rxq *rxq;
2712	struct vtnet_txq *txq;
2713	int i;
2714
2715	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2716		rxq = &sc->vtnet_rxqs[i];
2717		if (rxq->vtnrx_tq != NULL) {
2718			taskqueue_free(rxq->vtnrx_tq);
2719			rxq->vtnrx_vq = NULL;
2720		}
2721
2722		txq = &sc->vtnet_txqs[i];
2723		if (txq->vtntx_tq != NULL) {
2724			taskqueue_free(txq->vtntx_tq);
2725			txq->vtntx_tq = NULL;
2726		}
2727	}
2728}
2729
2730static void
2731vtnet_drain_taskqueues(struct vtnet_softc *sc)
2732{
2733	struct vtnet_rxq *rxq;
2734	struct vtnet_txq *txq;
2735	int i;
2736
2737	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2738		rxq = &sc->vtnet_rxqs[i];
2739		if (rxq->vtnrx_tq != NULL)
2740			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2741
2742		txq = &sc->vtnet_txqs[i];
2743		if (txq->vtntx_tq != NULL) {
2744			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2745#ifndef VTNET_LEGACY_TX
2746			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2747#endif
2748		}
2749	}
2750}
2751
2752static void
2753vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2754{
2755	struct vtnet_rxq *rxq;
2756	struct vtnet_txq *txq;
2757	int i;
2758
2759	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2760		rxq = &sc->vtnet_rxqs[i];
2761		vtnet_rxq_free_mbufs(rxq);
2762
2763		txq = &sc->vtnet_txqs[i];
2764		vtnet_txq_free_mbufs(txq);
2765	}
2766}
2767
2768static void
2769vtnet_stop_rendezvous(struct vtnet_softc *sc)
2770{
2771	struct vtnet_rxq *rxq;
2772	struct vtnet_txq *txq;
2773	int i;
2774
2775	/*
2776	 * Lock and unlock the per-queue mutex so we known the stop
2777	 * state is visible. Doing only the active queues should be
2778	 * sufficient, but it does not cost much extra to do all the
2779	 * queues. Note we hold the core mutex here too.
2780	 */
2781	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2782		rxq = &sc->vtnet_rxqs[i];
2783		VTNET_RXQ_LOCK(rxq);
2784		VTNET_RXQ_UNLOCK(rxq);
2785
2786		txq = &sc->vtnet_txqs[i];
2787		VTNET_TXQ_LOCK(txq);
2788		VTNET_TXQ_UNLOCK(txq);
2789	}
2790}
2791
2792static void
2793vtnet_stop(struct vtnet_softc *sc)
2794{
2795	device_t dev;
2796	struct ifnet *ifp;
2797
2798	dev = sc->vtnet_dev;
2799	ifp = sc->vtnet_ifp;
2800
2801	VTNET_CORE_LOCK_ASSERT(sc);
2802
2803	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2804	sc->vtnet_link_active = 0;
2805	callout_stop(&sc->vtnet_tick_ch);
2806
2807	/* Only advisory. */
2808	vtnet_disable_interrupts(sc);
2809
2810	/*
2811	 * Stop the host adapter. This resets it to the pre-initialized
2812	 * state. It will not generate any interrupts until after it is
2813	 * reinitialized.
2814	 */
2815	virtio_stop(dev);
2816	vtnet_stop_rendezvous(sc);
2817
2818	/* Free any mbufs left in the virtqueues. */
2819	vtnet_drain_rxtx_queues(sc);
2820}
2821
2822static int
2823vtnet_virtio_reinit(struct vtnet_softc *sc)
2824{
2825	device_t dev;
2826	struct ifnet *ifp;
2827	uint64_t features;
2828	int mask, error;
2829
2830	dev = sc->vtnet_dev;
2831	ifp = sc->vtnet_ifp;
2832	features = sc->vtnet_features;
2833
2834	mask = 0;
2835#if defined(INET)
2836	mask |= IFCAP_RXCSUM;
2837#endif
2838#if defined (INET6)
2839	mask |= IFCAP_RXCSUM_IPV6;
2840#endif
2841
2842	/*
2843	 * Re-negotiate with the host, removing any disabled receive
2844	 * features. Transmit features are disabled only on our side
2845	 * via if_capenable and if_hwassist.
2846	 */
2847
2848	if (ifp->if_capabilities & mask) {
2849		/*
2850		 * We require both IPv4 and IPv6 offloading to be enabled
2851		 * in order to negotiated it: VirtIO does not distinguish
2852		 * between the two.
2853		 */
2854		if ((ifp->if_capenable & mask) != mask)
2855			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2856	}
2857
2858	if (ifp->if_capabilities & IFCAP_LRO) {
2859		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2860			features &= ~VTNET_LRO_FEATURES;
2861	}
2862
2863	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2864		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2865			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2866	}
2867
2868	error = virtio_reinit(dev, features);
2869	if (error)
2870		device_printf(dev, "virtio reinit error %d\n", error);
2871
2872	return (error);
2873}
2874
2875static void
2876vtnet_init_rx_filters(struct vtnet_softc *sc)
2877{
2878	struct ifnet *ifp;
2879
2880	ifp = sc->vtnet_ifp;
2881
2882	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2883		/* Restore promiscuous and all-multicast modes. */
2884		vtnet_rx_filter(sc);
2885		/* Restore filtered MAC addresses. */
2886		vtnet_rx_filter_mac(sc);
2887	}
2888
2889	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2890		vtnet_rx_filter_vlan(sc);
2891}
2892
2893static int
2894vtnet_init_rx_queues(struct vtnet_softc *sc)
2895{
2896	device_t dev;
2897	struct vtnet_rxq *rxq;
2898	int i, clsize, error;
2899
2900	dev = sc->vtnet_dev;
2901
2902	/*
2903	 * Use the new cluster size if one has been set (via a MTU
2904	 * change). Otherwise, use the standard 2K clusters.
2905	 *
2906	 * BMV: It might make sense to use page sized clusters as
2907	 * the default (depending on the features negotiated).
2908	 */
2909	if (sc->vtnet_rx_new_clsize != 0) {
2910		clsize = sc->vtnet_rx_new_clsize;
2911		sc->vtnet_rx_new_clsize = 0;
2912	} else
2913		clsize = MCLBYTES;
2914
2915	sc->vtnet_rx_clsize = clsize;
2916	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2917
2918	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
2919	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
2920	    ("%s: too many rx mbufs %d for %d segments", __func__,
2921	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
2922
2923#ifdef DEV_NETMAP
2924	if (vtnet_netmap_init_rx_buffers(sc))
2925		return 0;
2926#endif /* DEV_NETMAP */
2927
2928	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2929		rxq = &sc->vtnet_rxqs[i];
2930
2931		/* Hold the lock to satisfy asserts. */
2932		VTNET_RXQ_LOCK(rxq);
2933		error = vtnet_rxq_populate(rxq);
2934		VTNET_RXQ_UNLOCK(rxq);
2935
2936		if (error) {
2937			device_printf(dev,
2938			    "cannot allocate mbufs for Rx queue %d\n", i);
2939			return (error);
2940		}
2941	}
2942
2943	return (0);
2944}
2945
2946static int
2947vtnet_init_tx_queues(struct vtnet_softc *sc)
2948{
2949	struct vtnet_txq *txq;
2950	int i;
2951
2952	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2953		txq = &sc->vtnet_txqs[i];
2954		txq->vtntx_watchdog = 0;
2955	}
2956
2957	return (0);
2958}
2959
2960static int
2961vtnet_init_rxtx_queues(struct vtnet_softc *sc)
2962{
2963	int error;
2964
2965	error = vtnet_init_rx_queues(sc);
2966	if (error)
2967		return (error);
2968
2969	error = vtnet_init_tx_queues(sc);
2970	if (error)
2971		return (error);
2972
2973	return (0);
2974}
2975
2976static void
2977vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2978{
2979	device_t dev;
2980	int npairs;
2981
2982	dev = sc->vtnet_dev;
2983
2984	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
2985		MPASS(sc->vtnet_max_vq_pairs == 1);
2986		sc->vtnet_act_vq_pairs = 1;
2987		return;
2988	}
2989
2990	/* BMV: Just use the maximum configured for now. */
2991	npairs = sc->vtnet_max_vq_pairs;
2992
2993	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
2994		device_printf(dev,
2995		    "cannot set active queue pairs to %d\n", npairs);
2996		npairs = 1;
2997	}
2998
2999	sc->vtnet_act_vq_pairs = npairs;
3000}
3001
3002static int
3003vtnet_reinit(struct vtnet_softc *sc)
3004{
3005	struct ifnet *ifp;
3006	int error;
3007
3008	ifp = sc->vtnet_ifp;
3009
3010	/* Use the current MAC address. */
3011	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3012	vtnet_set_hwaddr(sc);
3013
3014	vtnet_set_active_vq_pairs(sc);
3015
3016	ifp->if_hwassist = 0;
3017	if (ifp->if_capenable & IFCAP_TXCSUM)
3018		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3019	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3020		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3021	if (ifp->if_capenable & IFCAP_TSO4)
3022		ifp->if_hwassist |= CSUM_IP_TSO;
3023	if (ifp->if_capenable & IFCAP_TSO6)
3024		ifp->if_hwassist |= CSUM_IP6_TSO;
3025
3026	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3027		vtnet_init_rx_filters(sc);
3028
3029	error = vtnet_init_rxtx_queues(sc);
3030	if (error)
3031		return (error);
3032
3033	vtnet_enable_interrupts(sc);
3034	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3035
3036	return (0);
3037}
3038
3039static void
3040vtnet_init_locked(struct vtnet_softc *sc)
3041{
3042	device_t dev;
3043	struct ifnet *ifp;
3044
3045	dev = sc->vtnet_dev;
3046	ifp = sc->vtnet_ifp;
3047
3048	VTNET_CORE_LOCK_ASSERT(sc);
3049
3050	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3051		return;
3052
3053	vtnet_stop(sc);
3054
3055	/* Reinitialize with the host. */
3056	if (vtnet_virtio_reinit(sc) != 0)
3057		goto fail;
3058
3059	if (vtnet_reinit(sc) != 0)
3060		goto fail;
3061
3062	virtio_reinit_complete(dev);
3063
3064	vtnet_update_link_status(sc);
3065	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3066
3067	return;
3068
3069fail:
3070	vtnet_stop(sc);
3071}
3072
3073static void
3074vtnet_init(void *xsc)
3075{
3076	struct vtnet_softc *sc;
3077
3078	sc = xsc;
3079
3080#ifdef DEV_NETMAP
3081	if (!NA(sc->vtnet_ifp)) {
3082		D("try to attach again");
3083		vtnet_netmap_attach(sc);
3084	}
3085#endif /* DEV_NETMAP */
3086
3087	VTNET_CORE_LOCK(sc);
3088	vtnet_init_locked(sc);
3089	VTNET_CORE_UNLOCK(sc);
3090}
3091
3092static void
3093vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3094{
3095	struct virtqueue *vq;
3096
3097	vq = sc->vtnet_ctrl_vq;
3098
3099	/*
3100	 * The control virtqueue is only polled and therefore it should
3101	 * already be empty.
3102	 */
3103	KASSERT(virtqueue_empty(vq),
3104	    ("%s: ctrl vq %p not empty", __func__, vq));
3105}
3106
3107static void
3108vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3109    struct sglist *sg, int readable, int writable)
3110{
3111	struct virtqueue *vq;
3112
3113	vq = sc->vtnet_ctrl_vq;
3114
3115	VTNET_CORE_LOCK_ASSERT(sc);
3116	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3117	    ("%s: CTRL_VQ feature not negotiated", __func__));
3118
3119	if (!virtqueue_empty(vq))
3120		return;
3121	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3122		return;
3123
3124	/*
3125	 * Poll for the response, but the command is likely already
3126	 * done when we return from the notify.
3127	 */
3128	virtqueue_notify(vq);
3129	virtqueue_poll(vq, NULL);
3130}
3131
3132static int
3133vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3134{
3135	struct virtio_net_ctrl_hdr hdr __aligned(2);
3136	struct sglist_seg segs[3];
3137	struct sglist sg;
3138	uint8_t ack;
3139	int error;
3140
3141	hdr.class = VIRTIO_NET_CTRL_MAC;
3142	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3143	ack = VIRTIO_NET_ERR;
3144
3145	sglist_init(&sg, 3, segs);
3146	error = 0;
3147	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3148	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3149	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3150	KASSERT(error == 0 && sg.sg_nseg == 3,
3151	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3152
3153	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3154
3155	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3156}
3157
3158static int
3159vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3160{
3161	struct sglist_seg segs[3];
3162	struct sglist sg;
3163	struct {
3164		struct virtio_net_ctrl_hdr hdr;
3165		uint8_t pad1;
3166		struct virtio_net_ctrl_mq mq;
3167		uint8_t pad2;
3168		uint8_t ack;
3169	} s __aligned(2);
3170	int error;
3171
3172	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3173	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3174	s.mq.virtqueue_pairs = npairs;
3175	s.ack = VIRTIO_NET_ERR;
3176
3177	sglist_init(&sg, 3, segs);
3178	error = 0;
3179	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3180	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3181	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3182	KASSERT(error == 0 && sg.sg_nseg == 3,
3183	    ("%s: error %d adding MQ message to sglist", __func__, error));
3184
3185	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3186
3187	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3188}
3189
3190static int
3191vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3192{
3193	struct sglist_seg segs[3];
3194	struct sglist sg;
3195	struct {
3196		struct virtio_net_ctrl_hdr hdr;
3197		uint8_t pad1;
3198		uint8_t onoff;
3199		uint8_t pad2;
3200		uint8_t ack;
3201	} s __aligned(2);
3202	int error;
3203
3204	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3205	    ("%s: CTRL_RX feature not negotiated", __func__));
3206
3207	s.hdr.class = VIRTIO_NET_CTRL_RX;
3208	s.hdr.cmd = cmd;
3209	s.onoff = !!on;
3210	s.ack = VIRTIO_NET_ERR;
3211
3212	sglist_init(&sg, 3, segs);
3213	error = 0;
3214	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3215	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3216	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3217	KASSERT(error == 0 && sg.sg_nseg == 3,
3218	    ("%s: error %d adding Rx message to sglist", __func__, error));
3219
3220	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3221
3222	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3223}
3224
3225static int
3226vtnet_set_promisc(struct vtnet_softc *sc, int on)
3227{
3228
3229	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3230}
3231
3232static int
3233vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3234{
3235
3236	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3237}
3238
3239/*
3240 * The device defaults to promiscuous mode for backwards compatibility.
3241 * Turn it off at attach time if possible.
3242 */
3243static void
3244vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3245{
3246	struct ifnet *ifp;
3247
3248	ifp = sc->vtnet_ifp;
3249
3250	VTNET_CORE_LOCK(sc);
3251	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3252		ifp->if_flags |= IFF_PROMISC;
3253	} else if (vtnet_set_promisc(sc, 0) != 0) {
3254		ifp->if_flags |= IFF_PROMISC;
3255		device_printf(sc->vtnet_dev,
3256		    "cannot disable default promiscuous mode\n");
3257	}
3258	VTNET_CORE_UNLOCK(sc);
3259}
3260
3261static void
3262vtnet_rx_filter(struct vtnet_softc *sc)
3263{
3264	device_t dev;
3265	struct ifnet *ifp;
3266
3267	dev = sc->vtnet_dev;
3268	ifp = sc->vtnet_ifp;
3269
3270	VTNET_CORE_LOCK_ASSERT(sc);
3271
3272	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3273		device_printf(dev, "cannot %s promiscuous mode\n",
3274		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3275
3276	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3277		device_printf(dev, "cannot %s all-multicast mode\n",
3278		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3279}
3280
3281static void
3282vtnet_rx_filter_mac(struct vtnet_softc *sc)
3283{
3284	struct virtio_net_ctrl_hdr hdr __aligned(2);
3285	struct vtnet_mac_filter *filter;
3286	struct sglist_seg segs[4];
3287	struct sglist sg;
3288	struct ifnet *ifp;
3289	struct ifaddr *ifa;
3290	struct ifmultiaddr *ifma;
3291	int ucnt, mcnt, promisc, allmulti, error;
3292	uint8_t ack;
3293
3294	ifp = sc->vtnet_ifp;
3295	filter = sc->vtnet_mac_filter;
3296	ucnt = 0;
3297	mcnt = 0;
3298	promisc = 0;
3299	allmulti = 0;
3300
3301	VTNET_CORE_LOCK_ASSERT(sc);
3302	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3303	    ("%s: CTRL_RX feature not negotiated", __func__));
3304
3305	/* Unicast MAC addresses: */
3306	if_addr_rlock(ifp);
3307	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3308		if (ifa->ifa_addr->sa_family != AF_LINK)
3309			continue;
3310		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3311		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3312			continue;
3313		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3314			promisc = 1;
3315			break;
3316		}
3317
3318		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3319		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3320		ucnt++;
3321	}
3322	if_addr_runlock(ifp);
3323
3324	if (promisc != 0) {
3325		filter->vmf_unicast.nentries = 0;
3326		if_printf(ifp, "more than %d MAC addresses assigned, "
3327		    "falling back to promiscuous mode\n",
3328		    VTNET_MAX_MAC_ENTRIES);
3329	} else
3330		filter->vmf_unicast.nentries = ucnt;
3331
3332	/* Multicast MAC addresses: */
3333	if_maddr_rlock(ifp);
3334	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3335		if (ifma->ifma_addr->sa_family != AF_LINK)
3336			continue;
3337		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3338			allmulti = 1;
3339			break;
3340		}
3341
3342		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3343		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3344		mcnt++;
3345	}
3346	if_maddr_runlock(ifp);
3347
3348	if (allmulti != 0) {
3349		filter->vmf_multicast.nentries = 0;
3350		if_printf(ifp, "more than %d multicast MAC addresses "
3351		    "assigned, falling back to all-multicast mode\n",
3352		    VTNET_MAX_MAC_ENTRIES);
3353	} else
3354		filter->vmf_multicast.nentries = mcnt;
3355
3356	if (promisc != 0 && allmulti != 0)
3357		goto out;
3358
3359	hdr.class = VIRTIO_NET_CTRL_MAC;
3360	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3361	ack = VIRTIO_NET_ERR;
3362
3363	sglist_init(&sg, 4, segs);
3364	error = 0;
3365	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3366	error |= sglist_append(&sg, &filter->vmf_unicast,
3367	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3368	error |= sglist_append(&sg, &filter->vmf_multicast,
3369	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3370	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3371	KASSERT(error == 0 && sg.sg_nseg == 4,
3372	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3373
3374	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3375
3376	if (ack != VIRTIO_NET_OK)
3377		if_printf(ifp, "error setting host MAC filter table\n");
3378
3379out:
3380	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3381		if_printf(ifp, "cannot enable promiscuous mode\n");
3382	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3383		if_printf(ifp, "cannot enable all-multicast mode\n");
3384}
3385
3386static int
3387vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3388{
3389	struct sglist_seg segs[3];
3390	struct sglist sg;
3391	struct {
3392		struct virtio_net_ctrl_hdr hdr;
3393		uint8_t pad1;
3394		uint16_t tag;
3395		uint8_t pad2;
3396		uint8_t ack;
3397	} s __aligned(2);
3398	int error;
3399
3400	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3401	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3402	s.tag = tag;
3403	s.ack = VIRTIO_NET_ERR;
3404
3405	sglist_init(&sg, 3, segs);
3406	error = 0;
3407	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3408	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3409	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3410	KASSERT(error == 0 && sg.sg_nseg == 3,
3411	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3412
3413	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3414
3415	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3416}
3417
3418static void
3419vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3420{
3421	uint32_t w;
3422	uint16_t tag;
3423	int i, bit;
3424
3425	VTNET_CORE_LOCK_ASSERT(sc);
3426	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3427	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3428
3429	/* Enable the filter for each configured VLAN. */
3430	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3431		w = sc->vtnet_vlan_filter[i];
3432
3433		while ((bit = ffs(w) - 1) != -1) {
3434			w &= ~(1 << bit);
3435			tag = sizeof(w) * CHAR_BIT * i + bit;
3436
3437			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3438				device_printf(sc->vtnet_dev,
3439				    "cannot enable VLAN %d filter\n", tag);
3440			}
3441		}
3442	}
3443}
3444
3445static void
3446vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3447{
3448	struct ifnet *ifp;
3449	int idx, bit;
3450
3451	ifp = sc->vtnet_ifp;
3452	idx = (tag >> 5) & 0x7F;
3453	bit = tag & 0x1F;
3454
3455	if (tag == 0 || tag > 4095)
3456		return;
3457
3458	VTNET_CORE_LOCK(sc);
3459
3460	if (add)
3461		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3462	else
3463		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3464
3465	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3466	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3467		device_printf(sc->vtnet_dev,
3468		    "cannot %s VLAN %d %s the host filter table\n",
3469		    add ? "add" : "remove", tag, add ? "to" : "from");
3470	}
3471
3472	VTNET_CORE_UNLOCK(sc);
3473}
3474
3475static void
3476vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3477{
3478
3479	if (ifp->if_softc != arg)
3480		return;
3481
3482	vtnet_update_vlan_filter(arg, 1, tag);
3483}
3484
3485static void
3486vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3487{
3488
3489	if (ifp->if_softc != arg)
3490		return;
3491
3492	vtnet_update_vlan_filter(arg, 0, tag);
3493}
3494
3495static int
3496vtnet_is_link_up(struct vtnet_softc *sc)
3497{
3498	device_t dev;
3499	struct ifnet *ifp;
3500	uint16_t status;
3501
3502	dev = sc->vtnet_dev;
3503	ifp = sc->vtnet_ifp;
3504
3505	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3506		status = VIRTIO_NET_S_LINK_UP;
3507	else
3508		status = virtio_read_dev_config_2(dev,
3509		    offsetof(struct virtio_net_config, status));
3510
3511	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3512}
3513
3514static void
3515vtnet_update_link_status(struct vtnet_softc *sc)
3516{
3517	struct ifnet *ifp;
3518	int link;
3519
3520	ifp = sc->vtnet_ifp;
3521
3522	VTNET_CORE_LOCK_ASSERT(sc);
3523	link = vtnet_is_link_up(sc);
3524
3525	/* Notify if the link status has changed. */
3526	if (link != 0 && sc->vtnet_link_active == 0) {
3527		sc->vtnet_link_active = 1;
3528		if_link_state_change(ifp, LINK_STATE_UP);
3529	} else if (link == 0 && sc->vtnet_link_active != 0) {
3530		sc->vtnet_link_active = 0;
3531		if_link_state_change(ifp, LINK_STATE_DOWN);
3532	}
3533}
3534
3535static int
3536vtnet_ifmedia_upd(struct ifnet *ifp)
3537{
3538	struct vtnet_softc *sc;
3539	struct ifmedia *ifm;
3540
3541	sc = ifp->if_softc;
3542	ifm = &sc->vtnet_media;
3543
3544	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3545		return (EINVAL);
3546
3547	return (0);
3548}
3549
3550static void
3551vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3552{
3553	struct vtnet_softc *sc;
3554
3555	sc = ifp->if_softc;
3556
3557	ifmr->ifm_status = IFM_AVALID;
3558	ifmr->ifm_active = IFM_ETHER;
3559
3560	VTNET_CORE_LOCK(sc);
3561	if (vtnet_is_link_up(sc) != 0) {
3562		ifmr->ifm_status |= IFM_ACTIVE;
3563		ifmr->ifm_active |= VTNET_MEDIATYPE;
3564	} else
3565		ifmr->ifm_active |= IFM_NONE;
3566	VTNET_CORE_UNLOCK(sc);
3567}
3568
3569static void
3570vtnet_set_hwaddr(struct vtnet_softc *sc)
3571{
3572	device_t dev;
3573	int i;
3574
3575	dev = sc->vtnet_dev;
3576
3577	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3578		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3579			device_printf(dev, "unable to set MAC address\n");
3580	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3581		for (i = 0; i < ETHER_ADDR_LEN; i++) {
3582			virtio_write_dev_config_1(dev,
3583			    offsetof(struct virtio_net_config, mac) + i,
3584			    sc->vtnet_hwaddr[i]);
3585		}
3586	}
3587}
3588
3589static void
3590vtnet_get_hwaddr(struct vtnet_softc *sc)
3591{
3592	device_t dev;
3593	int i;
3594
3595	dev = sc->vtnet_dev;
3596
3597	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3598		/*
3599		 * Generate a random locally administered unicast address.
3600		 *
3601		 * It would be nice to generate the same MAC address across
3602		 * reboots, but it seems all the hosts currently available
3603		 * support the MAC feature, so this isn't too important.
3604		 */
3605		sc->vtnet_hwaddr[0] = 0xB2;
3606		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3607		vtnet_set_hwaddr(sc);
3608		return;
3609	}
3610
3611	for (i = 0; i < ETHER_ADDR_LEN; i++) {
3612		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3613		    offsetof(struct virtio_net_config, mac) + i);
3614	}
3615}
3616
3617static void
3618vtnet_vlan_tag_remove(struct mbuf *m)
3619{
3620	struct ether_vlan_header *evh;
3621
3622	evh = mtod(m, struct ether_vlan_header *);
3623	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3624	m->m_flags |= M_VLANTAG;
3625
3626	/* Strip the 802.1Q header. */
3627	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3628	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3629	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3630}
3631
3632static void
3633vtnet_set_rx_process_limit(struct vtnet_softc *sc)
3634{
3635	int limit;
3636
3637	limit = vtnet_tunable_int(sc, "rx_process_limit",
3638	    vtnet_rx_process_limit);
3639	if (limit < 0)
3640		limit = INT_MAX;
3641	sc->vtnet_rx_process_limit = limit;
3642}
3643
3644static void
3645vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
3646{
3647	device_t dev;
3648	int size, thresh;
3649
3650	dev = sc->vtnet_dev;
3651	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
3652
3653	/*
3654	 * The Tx interrupt is disabled until the queue free count falls
3655	 * below our threshold. Completed frames are drained from the Tx
3656	 * virtqueue before transmitting new frames and in the watchdog
3657	 * callout, so the frequency of Tx interrupts is greatly reduced,
3658	 * at the cost of not freeing mbufs as quickly as they otherwise
3659	 * would be.
3660	 *
3661	 * N.B. We assume all the Tx queues are the same size.
3662	 */
3663	thresh = size / 4;
3664
3665	/*
3666	 * Without indirect descriptors, leave enough room for the most
3667	 * segments we handle.
3668	 */
3669	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
3670	    thresh < sc->vtnet_tx_nsegs)
3671		thresh = sc->vtnet_tx_nsegs;
3672
3673	sc->vtnet_tx_intr_thresh = thresh;
3674}
3675
3676static void
3677vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3678    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3679{
3680	struct sysctl_oid *node;
3681	struct sysctl_oid_list *list;
3682	struct vtnet_rxq_stats *stats;
3683	char namebuf[16];
3684
3685	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3686	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3687	    CTLFLAG_RD, NULL, "Receive Queue");
3688	list = SYSCTL_CHILDREN(node);
3689
3690	stats = &rxq->vtnrx_stats;
3691
3692	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3693	    &stats->vrxs_ipackets, "Receive packets");
3694	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3695	    &stats->vrxs_ibytes, "Receive bytes");
3696	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3697	    &stats->vrxs_iqdrops, "Receive drops");
3698	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3699	    &stats->vrxs_ierrors, "Receive errors");
3700	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3701	    &stats->vrxs_csum, "Receive checksum offloaded");
3702	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3703	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3704	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3705	    &stats->vrxs_rescheduled,
3706	    "Receive interrupt handler rescheduled");
3707}
3708
3709static void
3710vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3711    struct sysctl_oid_list *child, struct vtnet_txq *txq)
3712{
3713	struct sysctl_oid *node;
3714	struct sysctl_oid_list *list;
3715	struct vtnet_txq_stats *stats;
3716	char namebuf[16];
3717
3718	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3719	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3720	    CTLFLAG_RD, NULL, "Transmit Queue");
3721	list = SYSCTL_CHILDREN(node);
3722
3723	stats = &txq->vtntx_stats;
3724
3725	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3726	    &stats->vtxs_opackets, "Transmit packets");
3727	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3728	    &stats->vtxs_obytes, "Transmit bytes");
3729	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3730	    &stats->vtxs_omcasts, "Transmit multicasts");
3731	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3732	    &stats->vtxs_csum, "Transmit checksum offloaded");
3733	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3734	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3735	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3736	    &stats->vtxs_rescheduled,
3737	    "Transmit interrupt handler rescheduled");
3738}
3739
3740static void
3741vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3742{
3743	device_t dev;
3744	struct sysctl_ctx_list *ctx;
3745	struct sysctl_oid *tree;
3746	struct sysctl_oid_list *child;
3747	int i;
3748
3749	dev = sc->vtnet_dev;
3750	ctx = device_get_sysctl_ctx(dev);
3751	tree = device_get_sysctl_tree(dev);
3752	child = SYSCTL_CHILDREN(tree);
3753
3754	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3755		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3756		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3757	}
3758}
3759
3760static void
3761vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3762    struct sysctl_oid_list *child, struct vtnet_softc *sc)
3763{
3764	struct vtnet_statistics *stats;
3765
3766	stats = &sc->vtnet_stats;
3767
3768	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3769	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3770	    "Mbuf cluster allocation failures");
3771
3772	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3773	    CTLFLAG_RD, &stats->rx_frame_too_large,
3774	    "Received frame larger than the mbuf chain");
3775	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3776	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3777	    "Enqueuing the replacement receive mbuf failed");
3778	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3779	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3780	    "Mergeable buffers receive failures");
3781	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3782	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3783	    "Received checksum offloaded buffer with unsupported "
3784	    "Ethernet type");
3785	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3786	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3787	    "Received checksum offloaded buffer with incorrect IP protocol");
3788	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3789	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3790	    "Received checksum offloaded buffer with incorrect offset");
3791	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3792	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3793	    "Received checksum offloaded buffer with incorrect protocol");
3794	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3795	    CTLFLAG_RD, &stats->rx_csum_failed,
3796	    "Received buffer checksum offload failed");
3797	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3798	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3799	    "Received buffer checksum offload succeeded");
3800	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3801	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3802	    "Times the receive interrupt task rescheduled itself");
3803
3804	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3805	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3806	    "Aborted transmit of checksum offloaded buffer with unknown "
3807	    "Ethernet type");
3808	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3809	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3810	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3811	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3812	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3813	    "Aborted transmit of TSO buffer with non TCP protocol");
3814	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3815	    CTLFLAG_RD, &stats->tx_defragged,
3816	    "Transmit mbufs defragged");
3817	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3818	    CTLFLAG_RD, &stats->tx_defrag_failed,
3819	    "Aborted transmit of buffer because defrag failed");
3820	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3821	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3822	    "Offloaded checksum of transmitted buffer");
3823	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3824	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3825	    "Segmentation offload of transmitted buffer");
3826	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3827	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3828	    "Times the transmit interrupt task rescheduled itself");
3829}
3830
3831static void
3832vtnet_setup_sysctl(struct vtnet_softc *sc)
3833{
3834	device_t dev;
3835	struct sysctl_ctx_list *ctx;
3836	struct sysctl_oid *tree;
3837	struct sysctl_oid_list *child;
3838
3839	dev = sc->vtnet_dev;
3840	ctx = device_get_sysctl_ctx(dev);
3841	tree = device_get_sysctl_tree(dev);
3842	child = SYSCTL_CHILDREN(tree);
3843
3844	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3845	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3846	    "Maximum number of supported virtqueue pairs");
3847	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3848	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3849	    "Number of active virtqueue pairs");
3850
3851	vtnet_setup_stat_sysctl(ctx, child, sc);
3852}
3853
3854static int
3855vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3856{
3857
3858	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3859}
3860
3861static void
3862vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3863{
3864
3865	virtqueue_disable_intr(rxq->vtnrx_vq);
3866}
3867
3868static int
3869vtnet_txq_enable_intr(struct vtnet_txq *txq)
3870{
3871	struct virtqueue *vq;
3872
3873	vq = txq->vtntx_vq;
3874
3875	if (vtnet_txq_below_threshold(txq) != 0)
3876		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3877
3878	/*
3879	 * The free count is above our threshold. Keep the Tx interrupt
3880	 * disabled until the queue is fuller.
3881	 */
3882	return (0);
3883}
3884
3885static void
3886vtnet_txq_disable_intr(struct vtnet_txq *txq)
3887{
3888
3889	virtqueue_disable_intr(txq->vtntx_vq);
3890}
3891
3892static void
3893vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3894{
3895	int i;
3896
3897	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3898		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3899}
3900
3901static void
3902vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3903{
3904	int i;
3905
3906	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3907		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3908}
3909
3910static void
3911vtnet_enable_interrupts(struct vtnet_softc *sc)
3912{
3913
3914	vtnet_enable_rx_interrupts(sc);
3915	vtnet_enable_tx_interrupts(sc);
3916}
3917
3918static void
3919vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3920{
3921	int i;
3922
3923	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3924		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3925}
3926
3927static void
3928vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3929{
3930	int i;
3931
3932	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3933		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3934}
3935
3936static void
3937vtnet_disable_interrupts(struct vtnet_softc *sc)
3938{
3939
3940	vtnet_disable_rx_interrupts(sc);
3941	vtnet_disable_tx_interrupts(sc);
3942}
3943
3944static int
3945vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3946{
3947	char path[64];
3948
3949	snprintf(path, sizeof(path),
3950	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3951	TUNABLE_INT_FETCH(path, &def);
3952
3953	return (def);
3954}
3955