1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* Driver for VirtIO network devices. */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/eventhandler.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/module.h>
42#include <sys/socket.h>
43#include <sys/sysctl.h>
44#include <sys/random.h>
45#include <sys/sglist.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/taskqueue.h>
49#include <sys/smp.h>
50#include <machine/smp.h>
51
52#include <vm/uma.h>
53
54#include <net/debugnet.h>
55#include <net/ethernet.h>
56#include <net/pfil.h>
57#include <net/if.h>
58#include <net/if_var.h>
59#include <net/if_arp.h>
60#include <net/if_dl.h>
61#include <net/if_types.h>
62#include <net/if_media.h>
63#include <net/if_vlan_var.h>
64
65#include <net/bpf.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/ip.h>
70#include <netinet/ip6.h>
71#include <netinet6/ip6_var.h>
72#include <netinet/udp.h>
73#include <netinet/tcp.h>
74#include <netinet/tcp_lro.h>
75
76#include <machine/bus.h>
77#include <machine/resource.h>
78#include <sys/bus.h>
79#include <sys/rman.h>
80
81#include <dev/virtio/virtio.h>
82#include <dev/virtio/virtqueue.h>
83#include <dev/virtio/network/virtio_net.h>
84#include <dev/virtio/network/if_vtnetvar.h>
85#include "virtio_if.h"
86
87#include "opt_inet.h"
88#include "opt_inet6.h"
89
90#if defined(INET) || defined(INET6)
91#include <machine/in_cksum.h>
92#endif
93
94static int	vtnet_modevent(module_t, int, void *);
95
96static int	vtnet_probe(device_t);
97static int	vtnet_attach(device_t);
98static int	vtnet_detach(device_t);
99static int	vtnet_suspend(device_t);
100static int	vtnet_resume(device_t);
101static int	vtnet_shutdown(device_t);
102static int	vtnet_attach_completed(device_t);
103static int	vtnet_config_change(device_t);
104
105static int	vtnet_negotiate_features(struct vtnet_softc *);
106static int	vtnet_setup_features(struct vtnet_softc *);
107static int	vtnet_init_rxq(struct vtnet_softc *, int);
108static int	vtnet_init_txq(struct vtnet_softc *, int);
109static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
110static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
111static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
112static void	vtnet_free_rx_filters(struct vtnet_softc *);
113static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
114static int	vtnet_alloc_interface(struct vtnet_softc *);
115static int	vtnet_setup_interface(struct vtnet_softc *);
116static int	vtnet_ioctl_mtu(struct vtnet_softc *, int);
117static int	vtnet_ioctl_ifflags(struct vtnet_softc *);
118static int	vtnet_ioctl_multi(struct vtnet_softc *);
119static int	vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
120static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
121static uint64_t	vtnet_get_counter(struct ifnet *, ift_counter);
122
123static int	vtnet_rxq_populate(struct vtnet_rxq *);
124static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
125static struct mbuf *
126		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
127static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
128		    struct mbuf *, int);
129static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
130static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
131static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
132static int	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
133		     uint16_t, int, struct virtio_net_hdr *);
134static int	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
135		     uint16_t, int, struct virtio_net_hdr *);
136static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
137		     struct virtio_net_hdr *);
138static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
139static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
140static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
141static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
142		    struct virtio_net_hdr *);
143static int	vtnet_rxq_eof(struct vtnet_rxq *);
144static void	vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
145static void	vtnet_rx_vq_intr(void *);
146static void	vtnet_rxq_tq_intr(void *, int);
147
148static int	vtnet_txq_intr_threshold(struct vtnet_txq *);
149static int	vtnet_txq_below_threshold(struct vtnet_txq *);
150static int	vtnet_txq_notify(struct vtnet_txq *);
151static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
152static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
153		    int *, int *, int *);
154static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
155		    int, struct virtio_net_hdr *);
156static struct mbuf *
157		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
158		    struct virtio_net_hdr *);
159static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
160		    struct vtnet_tx_header *);
161static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
162#ifdef VTNET_LEGACY_TX
163static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
164static void	vtnet_start(struct ifnet *);
165#else
166static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
167static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
168static void	vtnet_txq_tq_deferred(void *, int);
169#endif
170static void	vtnet_txq_start(struct vtnet_txq *);
171static void	vtnet_txq_tq_intr(void *, int);
172static int	vtnet_txq_eof(struct vtnet_txq *);
173static void	vtnet_tx_vq_intr(void *);
174static void	vtnet_tx_start_all(struct vtnet_softc *);
175
176#ifndef VTNET_LEGACY_TX
177static void	vtnet_qflush(struct ifnet *);
178#endif
179
180static int	vtnet_watchdog(struct vtnet_txq *);
181static void	vtnet_accum_stats(struct vtnet_softc *,
182		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
183static void	vtnet_tick(void *);
184
185static void	vtnet_start_taskqueues(struct vtnet_softc *);
186static void	vtnet_free_taskqueues(struct vtnet_softc *);
187static void	vtnet_drain_taskqueues(struct vtnet_softc *);
188
189static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
190static void	vtnet_stop_rendezvous(struct vtnet_softc *);
191static void	vtnet_stop(struct vtnet_softc *);
192static int	vtnet_virtio_reinit(struct vtnet_softc *);
193static void	vtnet_init_rx_filters(struct vtnet_softc *);
194static int	vtnet_init_rx_queues(struct vtnet_softc *);
195static int	vtnet_init_tx_queues(struct vtnet_softc *);
196static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
197static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
198static void	vtnet_update_rx_offloads(struct vtnet_softc *);
199static int	vtnet_reinit(struct vtnet_softc *);
200static void	vtnet_init_locked(struct vtnet_softc *, int);
201static void	vtnet_init(void *);
202
203static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
204static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
205		    struct sglist *, int, int);
206static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
207static int	vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
208static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
209static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, int);
210static int	vtnet_set_promisc(struct vtnet_softc *, int);
211static int	vtnet_set_allmulti(struct vtnet_softc *, int);
212static void	vtnet_rx_filter(struct vtnet_softc *);
213static void	vtnet_rx_filter_mac(struct vtnet_softc *);
214static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
215static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
216static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
217static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
218static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
219
220static void	vtnet_update_speed_duplex(struct vtnet_softc *);
221static int	vtnet_is_link_up(struct vtnet_softc *);
222static void	vtnet_update_link_status(struct vtnet_softc *);
223static int	vtnet_ifmedia_upd(struct ifnet *);
224static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
225static void	vtnet_get_macaddr(struct vtnet_softc *);
226static void	vtnet_set_macaddr(struct vtnet_softc *);
227static void	vtnet_attached_set_macaddr(struct vtnet_softc *);
228static void	vtnet_vlan_tag_remove(struct mbuf *);
229static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
230
231static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
232		    struct sysctl_oid_list *, struct vtnet_rxq *);
233static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
234		    struct sysctl_oid_list *, struct vtnet_txq *);
235static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
236static void	vtnet_load_tunables(struct vtnet_softc *);
237static void	vtnet_setup_sysctl(struct vtnet_softc *);
238
239static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
240static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
241static int	vtnet_txq_enable_intr(struct vtnet_txq *);
242static void	vtnet_txq_disable_intr(struct vtnet_txq *);
243static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
244static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
245static void	vtnet_enable_interrupts(struct vtnet_softc *);
246static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
247static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
248static void	vtnet_disable_interrupts(struct vtnet_softc *);
249
250static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
251
252DEBUGNET_DEFINE(vtnet);
253
254#define vtnet_htog16(_sc, _val)	virtio_htog16(vtnet_modern(_sc), _val)
255#define vtnet_htog32(_sc, _val)	virtio_htog32(vtnet_modern(_sc), _val)
256#define vtnet_htog64(_sc, _val)	virtio_htog64(vtnet_modern(_sc), _val)
257#define vtnet_gtoh16(_sc, _val)	virtio_gtoh16(vtnet_modern(_sc), _val)
258#define vtnet_gtoh32(_sc, _val)	virtio_gtoh32(vtnet_modern(_sc), _val)
259#define vtnet_gtoh64(_sc, _val)	virtio_gtoh64(vtnet_modern(_sc), _val)
260
261/* Tunables. */
262static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
263    "VirtIO Net driver parameters");
264
265static int vtnet_csum_disable = 0;
266SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
267    &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
268
269static int vtnet_fixup_needs_csum = 0;
270SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
271    &vtnet_fixup_needs_csum, 0,
272    "Calculate valid checksum for NEEDS_CSUM packets");
273
274static int vtnet_tso_disable = 0;
275SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
276    &vtnet_tso_disable, 0, "Disables TSO");
277
278static int vtnet_lro_disable = 0;
279SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
280    &vtnet_lro_disable, 0, "Disables hardware LRO");
281
282static int vtnet_mq_disable = 0;
283SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
284    &vtnet_mq_disable, 0, "Disables multiqueue support");
285
286static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
287SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
288    &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
289
290static int vtnet_tso_maxlen = IP_MAXPACKET;
291SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
292    &vtnet_tso_maxlen, 0, "TSO burst limit");
293
294static int vtnet_rx_process_limit = 1024;
295SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
296    &vtnet_rx_process_limit, 0,
297    "Number of RX segments processed in one pass");
298
299static int vtnet_lro_entry_count = 128;
300SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
301    &vtnet_lro_entry_count, 0, "Software LRO entry count");
302
303/* Enable sorted LRO, and the depth of the mbuf queue. */
304static int vtnet_lro_mbufq_depth = 0;
305SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
306    &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
307
308static uma_zone_t vtnet_tx_header_zone;
309
310static struct virtio_feature_desc vtnet_feature_desc[] = {
311	{ VIRTIO_NET_F_CSUM,			"TxChecksum"		},
312	{ VIRTIO_NET_F_GUEST_CSUM,		"RxChecksum"		},
313	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlRxOffloads"	},
314	{ VIRTIO_NET_F_MAC,			"MAC"			},
315	{ VIRTIO_NET_F_GSO,			"TxGSO"			},
316	{ VIRTIO_NET_F_GUEST_TSO4,		"RxLROv4"		},
317	{ VIRTIO_NET_F_GUEST_TSO6,		"RxLROv6"		},
318	{ VIRTIO_NET_F_GUEST_ECN,		"RxLROECN"		},
319	{ VIRTIO_NET_F_GUEST_UFO,		"RxUFO"			},
320	{ VIRTIO_NET_F_HOST_TSO4,		"TxTSOv4"		},
321	{ VIRTIO_NET_F_HOST_TSO6,		"TxTSOv6"		},
322	{ VIRTIO_NET_F_HOST_ECN,		"TxTSOECN"		},
323	{ VIRTIO_NET_F_HOST_UFO,		"TxUFO"			},
324	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRxBuf"		},
325	{ VIRTIO_NET_F_STATUS,			"Status"		},
326	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVq"		},
327	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRxMode"		},
328	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLANFilter"	},
329	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRxModeExtra"	},
330	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce"		},
331	{ VIRTIO_NET_F_MQ,			"Multiqueue"		},
332	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMacAddr"		},
333	{ VIRTIO_NET_F_SPEED_DUPLEX,		"SpeedDuplex"		},
334
335	{ 0, NULL }
336};
337
338static device_method_t vtnet_methods[] = {
339	/* Device methods. */
340	DEVMETHOD(device_probe,			vtnet_probe),
341	DEVMETHOD(device_attach,		vtnet_attach),
342	DEVMETHOD(device_detach,		vtnet_detach),
343	DEVMETHOD(device_suspend,		vtnet_suspend),
344	DEVMETHOD(device_resume,		vtnet_resume),
345	DEVMETHOD(device_shutdown,		vtnet_shutdown),
346
347	/* VirtIO methods. */
348	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
349	DEVMETHOD(virtio_config_change,		vtnet_config_change),
350
351	DEVMETHOD_END
352};
353
354#ifdef DEV_NETMAP
355#include <dev/netmap/if_vtnet_netmap.h>
356#endif
357
358static driver_t vtnet_driver = {
359    .name = "vtnet",
360    .methods = vtnet_methods,
361    .size = sizeof(struct vtnet_softc)
362};
363static devclass_t vtnet_devclass;
364
365VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_devclass,
366    vtnet_modevent, 0);
367MODULE_VERSION(vtnet, 1);
368MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
369#ifdef DEV_NETMAP
370MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
371#endif
372
373VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
374
375static int
376vtnet_modevent(module_t mod, int type, void *unused)
377{
378	int error = 0;
379	static int loaded = 0;
380
381	switch (type) {
382	case MOD_LOAD:
383		if (loaded++ == 0) {
384			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
385				sizeof(struct vtnet_tx_header),
386				NULL, NULL, NULL, NULL, 0, 0);
387#ifdef DEBUGNET
388			/*
389			 * We need to allocate from this zone in the transmit path, so ensure
390			 * that we have at least one item per header available.
391			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
392			 * buckets
393			 */
394			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
395			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
396#endif
397		}
398		break;
399	case MOD_QUIESCE:
400		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
401			error = EBUSY;
402		break;
403	case MOD_UNLOAD:
404		if (--loaded == 0) {
405			uma_zdestroy(vtnet_tx_header_zone);
406			vtnet_tx_header_zone = NULL;
407		}
408		break;
409	case MOD_SHUTDOWN:
410		break;
411	default:
412		error = EOPNOTSUPP;
413		break;
414	}
415
416	return (error);
417}
418
419static int
420vtnet_probe(device_t dev)
421{
422	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
423}
424
425static int
426vtnet_attach(device_t dev)
427{
428	struct vtnet_softc *sc;
429	int error;
430
431	sc = device_get_softc(dev);
432	sc->vtnet_dev = dev;
433	virtio_set_feature_desc(dev, vtnet_feature_desc);
434
435	VTNET_CORE_LOCK_INIT(sc);
436	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
437	vtnet_load_tunables(sc);
438
439	error = vtnet_alloc_interface(sc);
440	if (error) {
441		device_printf(dev, "cannot allocate interface\n");
442		goto fail;
443	}
444
445	vtnet_setup_sysctl(sc);
446
447	error = vtnet_setup_features(sc);
448	if (error) {
449		device_printf(dev, "cannot setup features\n");
450		goto fail;
451	}
452
453	error = vtnet_alloc_rx_filters(sc);
454	if (error) {
455		device_printf(dev, "cannot allocate Rx filters\n");
456		goto fail;
457	}
458
459	error = vtnet_alloc_rxtx_queues(sc);
460	if (error) {
461		device_printf(dev, "cannot allocate queues\n");
462		goto fail;
463	}
464
465	error = vtnet_alloc_virtqueues(sc);
466	if (error) {
467		device_printf(dev, "cannot allocate virtqueues\n");
468		goto fail;
469	}
470
471	error = vtnet_setup_interface(sc);
472	if (error) {
473		device_printf(dev, "cannot setup interface\n");
474		goto fail;
475	}
476
477	error = virtio_setup_intr(dev, INTR_TYPE_NET);
478	if (error) {
479		device_printf(dev, "cannot setup interrupts\n");
480		ether_ifdetach(sc->vtnet_ifp);
481		goto fail;
482	}
483
484#ifdef DEV_NETMAP
485	vtnet_netmap_attach(sc);
486#endif
487	vtnet_start_taskqueues(sc);
488
489fail:
490	if (error)
491		vtnet_detach(dev);
492
493	return (error);
494}
495
496static int
497vtnet_detach(device_t dev)
498{
499	struct vtnet_softc *sc;
500	struct ifnet *ifp;
501
502	sc = device_get_softc(dev);
503	ifp = sc->vtnet_ifp;
504
505	if (device_is_attached(dev)) {
506		VTNET_CORE_LOCK(sc);
507		vtnet_stop(sc);
508		VTNET_CORE_UNLOCK(sc);
509
510		callout_drain(&sc->vtnet_tick_ch);
511		vtnet_drain_taskqueues(sc);
512
513		ether_ifdetach(ifp);
514	}
515
516#ifdef DEV_NETMAP
517	netmap_detach(ifp);
518#endif
519
520	vtnet_free_taskqueues(sc);
521
522	if (sc->vtnet_vlan_attach != NULL) {
523		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
524		sc->vtnet_vlan_attach = NULL;
525	}
526	if (sc->vtnet_vlan_detach != NULL) {
527		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
528		sc->vtnet_vlan_detach = NULL;
529	}
530
531	ifmedia_removeall(&sc->vtnet_media);
532
533	if (ifp != NULL) {
534		if_free(ifp);
535		sc->vtnet_ifp = NULL;
536	}
537
538	vtnet_free_rxtx_queues(sc);
539	vtnet_free_rx_filters(sc);
540
541	if (sc->vtnet_ctrl_vq != NULL)
542		vtnet_free_ctrl_vq(sc);
543
544	VTNET_CORE_LOCK_DESTROY(sc);
545
546	return (0);
547}
548
549static int
550vtnet_suspend(device_t dev)
551{
552	struct vtnet_softc *sc;
553
554	sc = device_get_softc(dev);
555
556	VTNET_CORE_LOCK(sc);
557	vtnet_stop(sc);
558	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
559	VTNET_CORE_UNLOCK(sc);
560
561	return (0);
562}
563
564static int
565vtnet_resume(device_t dev)
566{
567	struct vtnet_softc *sc;
568	struct ifnet *ifp;
569
570	sc = device_get_softc(dev);
571	ifp = sc->vtnet_ifp;
572
573	VTNET_CORE_LOCK(sc);
574	if (ifp->if_flags & IFF_UP)
575		vtnet_init_locked(sc, 0);
576	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
577	VTNET_CORE_UNLOCK(sc);
578
579	return (0);
580}
581
582static int
583vtnet_shutdown(device_t dev)
584{
585	/*
586	 * Suspend already does all of what we need to
587	 * do here; we just never expect to be resumed.
588	 */
589	return (vtnet_suspend(dev));
590}
591
592static int
593vtnet_attach_completed(device_t dev)
594{
595	struct vtnet_softc *sc;
596
597	sc = device_get_softc(dev);
598
599	VTNET_CORE_LOCK(sc);
600	vtnet_attached_set_macaddr(sc);
601	VTNET_CORE_UNLOCK(sc);
602
603	return (0);
604}
605
606static int
607vtnet_config_change(device_t dev)
608{
609	struct vtnet_softc *sc;
610
611	sc = device_get_softc(dev);
612
613	VTNET_CORE_LOCK(sc);
614	vtnet_update_link_status(sc);
615	if (sc->vtnet_link_active != 0)
616		vtnet_tx_start_all(sc);
617	VTNET_CORE_UNLOCK(sc);
618
619	return (0);
620}
621
622static int
623vtnet_negotiate_features(struct vtnet_softc *sc)
624{
625	device_t dev;
626	uint64_t features, negotiated_features;
627	int no_csum;
628
629	dev = sc->vtnet_dev;
630	features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
631	    VTNET_LEGACY_FEATURES;
632
633	/*
634	 * TSO and LRO are only available when their corresponding checksum
635	 * offload feature is also negotiated.
636	 */
637	no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
638	if (no_csum)
639		features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
640	if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
641		features &= ~VTNET_TSO_FEATURES;
642	if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
643		features &= ~VTNET_LRO_FEATURES;
644
645#ifndef VTNET_LEGACY_TX
646	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
647		features &= ~VIRTIO_NET_F_MQ;
648#else
649	features &= ~VIRTIO_NET_F_MQ;
650#endif
651
652	negotiated_features = virtio_negotiate_features(dev, features);
653
654	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
655		uint16_t mtu;
656
657		mtu = virtio_read_dev_config_2(dev,
658		    offsetof(struct virtio_net_config, mtu));
659		if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
660			device_printf(dev, "Invalid MTU value: %d. "
661			    "MTU feature disabled.\n", mtu);
662			features &= ~VIRTIO_NET_F_MTU;
663			negotiated_features =
664			    virtio_negotiate_features(dev, features);
665		}
666	}
667
668	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
669		uint16_t npairs;
670
671		npairs = virtio_read_dev_config_2(dev,
672		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
673		if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
674		    npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
675			device_printf(dev, "Invalid max_virtqueue_pairs value: "
676			    "%d. Multiqueue feature disabled.\n", npairs);
677			features &= ~VIRTIO_NET_F_MQ;
678			negotiated_features =
679			    virtio_negotiate_features(dev, features);
680		}
681	}
682
683	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
684	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
685		/*
686		 * LRO without mergeable buffers requires special care. This
687		 * is not ideal because every receive buffer must be large
688		 * enough to hold the maximum TCP packet, the Ethernet header,
689		 * and the header. This requires up to 34 descriptors with
690		 * MCLBYTES clusters. If we do not have indirect descriptors,
691		 * LRO is disabled since the virtqueue will not contain very
692		 * many receive buffers.
693		 */
694		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
695			device_printf(dev,
696			    "Host LRO disabled since both mergeable buffers "
697			    "and indirect descriptors were not negotiated\n");
698			features &= ~VTNET_LRO_FEATURES;
699			negotiated_features =
700			    virtio_negotiate_features(dev, features);
701		} else
702			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
703	}
704
705	sc->vtnet_features = negotiated_features;
706	sc->vtnet_negotiated_features = negotiated_features;
707
708	return (virtio_finalize_features(dev));
709}
710
711static int
712vtnet_setup_features(struct vtnet_softc *sc)
713{
714	device_t dev;
715	int error;
716
717	dev = sc->vtnet_dev;
718
719	error = vtnet_negotiate_features(sc);
720	if (error)
721		return (error);
722
723	if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
724		sc->vtnet_flags |= VTNET_FLAG_MODERN;
725	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
726		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
727	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
728		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
729
730	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
731		/* This feature should always be negotiated. */
732		sc->vtnet_flags |= VTNET_FLAG_MAC;
733	}
734
735	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
736		sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
737		    offsetof(struct virtio_net_config, mtu));
738	} else
739		sc->vtnet_max_mtu = VTNET_MAX_MTU;
740
741	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
742		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
743		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
744	} else if (vtnet_modern(sc)) {
745		/* This is identical to the mergeable header. */
746		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
747	} else
748		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
749
750	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
751		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
752	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
753		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
754	else
755		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
756
757	/*
758	 * Favor "hardware" LRO if negotiated, but support software LRO as
759	 * a fallback; there is usually little benefit (or worse) with both.
760	 */
761	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
762	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
763		sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
764
765	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
766	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
767	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
768		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
769	else
770		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
771
772	sc->vtnet_req_vq_pairs = 1;
773	sc->vtnet_max_vq_pairs = 1;
774
775	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
776		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
777
778		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
779			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
780		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
781			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
782		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
783			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
784
785		if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
786			sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
787			    offsetof(struct virtio_net_config,
788			    max_virtqueue_pairs));
789		}
790	}
791
792	if (sc->vtnet_max_vq_pairs > 1) {
793		int req;
794
795		/*
796		 * Limit the maximum number of requested queue pairs to the
797		 * number of CPUs and the configured maximum.
798		 */
799		req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
800		if (req < 0)
801			req = 1;
802		if (req == 0)
803			req = mp_ncpus;
804		if (req > sc->vtnet_max_vq_pairs)
805			req = sc->vtnet_max_vq_pairs;
806		if (req > mp_ncpus)
807			req = mp_ncpus;
808		if (req > 1) {
809			sc->vtnet_req_vq_pairs = req;
810			sc->vtnet_flags |= VTNET_FLAG_MQ;
811		}
812	}
813
814	return (0);
815}
816
817static int
818vtnet_init_rxq(struct vtnet_softc *sc, int id)
819{
820	struct vtnet_rxq *rxq;
821
822	rxq = &sc->vtnet_rxqs[id];
823
824	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
825	    device_get_nameunit(sc->vtnet_dev), id);
826	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
827
828	rxq->vtnrx_sc = sc;
829	rxq->vtnrx_id = id;
830
831	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
832	if (rxq->vtnrx_sg == NULL)
833		return (ENOMEM);
834
835#if defined(INET) || defined(INET6)
836	if (vtnet_software_lro(sc)) {
837		if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
838		    sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
839			return (ENOMEM);
840	}
841#endif
842
843	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
844	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
845	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
846
847	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
848}
849
850static int
851vtnet_init_txq(struct vtnet_softc *sc, int id)
852{
853	struct vtnet_txq *txq;
854
855	txq = &sc->vtnet_txqs[id];
856
857	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
858	    device_get_nameunit(sc->vtnet_dev), id);
859	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
860
861	txq->vtntx_sc = sc;
862	txq->vtntx_id = id;
863
864	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
865	if (txq->vtntx_sg == NULL)
866		return (ENOMEM);
867
868#ifndef VTNET_LEGACY_TX
869	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
870	    M_NOWAIT, &txq->vtntx_mtx);
871	if (txq->vtntx_br == NULL)
872		return (ENOMEM);
873
874	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
875#endif
876	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
877	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
878	    taskqueue_thread_enqueue, &txq->vtntx_tq);
879	if (txq->vtntx_tq == NULL)
880		return (ENOMEM);
881
882	return (0);
883}
884
885static int
886vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
887{
888	int i, npairs, error;
889
890	npairs = sc->vtnet_max_vq_pairs;
891
892	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
893	    M_NOWAIT | M_ZERO);
894	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
895	    M_NOWAIT | M_ZERO);
896	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
897		return (ENOMEM);
898
899	for (i = 0; i < npairs; i++) {
900		error = vtnet_init_rxq(sc, i);
901		if (error)
902			return (error);
903		error = vtnet_init_txq(sc, i);
904		if (error)
905			return (error);
906	}
907
908	vtnet_set_rx_process_limit(sc);
909	vtnet_setup_queue_sysctl(sc);
910
911	return (0);
912}
913
914static void
915vtnet_destroy_rxq(struct vtnet_rxq *rxq)
916{
917
918	rxq->vtnrx_sc = NULL;
919	rxq->vtnrx_id = -1;
920
921#if defined(INET) || defined(INET6)
922	tcp_lro_free(&rxq->vtnrx_lro);
923#endif
924
925	if (rxq->vtnrx_sg != NULL) {
926		sglist_free(rxq->vtnrx_sg);
927		rxq->vtnrx_sg = NULL;
928	}
929
930	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
931		mtx_destroy(&rxq->vtnrx_mtx);
932}
933
934static void
935vtnet_destroy_txq(struct vtnet_txq *txq)
936{
937
938	txq->vtntx_sc = NULL;
939	txq->vtntx_id = -1;
940
941	if (txq->vtntx_sg != NULL) {
942		sglist_free(txq->vtntx_sg);
943		txq->vtntx_sg = NULL;
944	}
945
946#ifndef VTNET_LEGACY_TX
947	if (txq->vtntx_br != NULL) {
948		buf_ring_free(txq->vtntx_br, M_DEVBUF);
949		txq->vtntx_br = NULL;
950	}
951#endif
952
953	if (mtx_initialized(&txq->vtntx_mtx) != 0)
954		mtx_destroy(&txq->vtntx_mtx);
955}
956
957static void
958vtnet_free_rxtx_queues(struct vtnet_softc *sc)
959{
960	int i;
961
962	if (sc->vtnet_rxqs != NULL) {
963		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
964			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
965		free(sc->vtnet_rxqs, M_DEVBUF);
966		sc->vtnet_rxqs = NULL;
967	}
968
969	if (sc->vtnet_txqs != NULL) {
970		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
971			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
972		free(sc->vtnet_txqs, M_DEVBUF);
973		sc->vtnet_txqs = NULL;
974	}
975}
976
977static int
978vtnet_alloc_rx_filters(struct vtnet_softc *sc)
979{
980
981	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
982		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
983		    M_DEVBUF, M_NOWAIT | M_ZERO);
984		if (sc->vtnet_mac_filter == NULL)
985			return (ENOMEM);
986	}
987
988	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
989		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
990		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
991		if (sc->vtnet_vlan_filter == NULL)
992			return (ENOMEM);
993	}
994
995	return (0);
996}
997
998static void
999vtnet_free_rx_filters(struct vtnet_softc *sc)
1000{
1001
1002	if (sc->vtnet_mac_filter != NULL) {
1003		free(sc->vtnet_mac_filter, M_DEVBUF);
1004		sc->vtnet_mac_filter = NULL;
1005	}
1006
1007	if (sc->vtnet_vlan_filter != NULL) {
1008		free(sc->vtnet_vlan_filter, M_DEVBUF);
1009		sc->vtnet_vlan_filter = NULL;
1010	}
1011}
1012
1013static int
1014vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1015{
1016	device_t dev;
1017	struct vq_alloc_info *info;
1018	struct vtnet_rxq *rxq;
1019	struct vtnet_txq *txq;
1020	int i, idx, flags, nvqs, error;
1021
1022	dev = sc->vtnet_dev;
1023	flags = 0;
1024
1025	nvqs = sc->vtnet_max_vq_pairs * 2;
1026	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1027		nvqs++;
1028
1029	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1030	if (info == NULL)
1031		return (ENOMEM);
1032
1033	for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1034		rxq = &sc->vtnet_rxqs[i];
1035		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1036		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1037		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1038
1039		txq = &sc->vtnet_txqs[i];
1040		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
1041		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1042		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1043	}
1044
1045	/* These queues will not be used so allocate the minimum resources. */
1046	for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1047		rxq = &sc->vtnet_rxqs[i];
1048		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1049		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1050
1051		txq = &sc->vtnet_txqs[i];
1052		VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
1053		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1054	}
1055
1056	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1057		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1058		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1059	}
1060
1061	/*
1062	 * TODO: Enable interrupt binding if this is multiqueue. This will
1063	 * only matter when per-virtqueue MSIX is available.
1064	 */
1065	if (sc->vtnet_flags & VTNET_FLAG_MQ)
1066		flags |= 0;
1067
1068	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
1069	free(info, M_TEMP);
1070
1071	return (error);
1072}
1073
1074static int
1075vtnet_alloc_interface(struct vtnet_softc *sc)
1076{
1077	device_t dev;
1078	struct ifnet *ifp;
1079
1080	dev = sc->vtnet_dev;
1081
1082	ifp = if_alloc(IFT_ETHER);
1083	if (ifp == NULL)
1084		return (ENOMEM);
1085
1086	sc->vtnet_ifp = ifp;
1087	ifp->if_softc = sc;
1088	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1089
1090	return (0);
1091}
1092
1093static int
1094vtnet_setup_interface(struct vtnet_softc *sc)
1095{
1096	device_t dev;
1097	struct pfil_head_args pa;
1098	struct ifnet *ifp;
1099
1100	dev = sc->vtnet_dev;
1101	ifp = sc->vtnet_ifp;
1102
1103	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
1104	    IFF_KNOWSEPOCH;
1105	ifp->if_baudrate = IF_Gbps(10);
1106	ifp->if_init = vtnet_init;
1107	ifp->if_ioctl = vtnet_ioctl;
1108	ifp->if_get_counter = vtnet_get_counter;
1109#ifndef VTNET_LEGACY_TX
1110	ifp->if_transmit = vtnet_txq_mq_start;
1111	ifp->if_qflush = vtnet_qflush;
1112#else
1113	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1114	ifp->if_start = vtnet_start;
1115	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
1116	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
1117	IFQ_SET_READY(&ifp->if_snd);
1118#endif
1119
1120	vtnet_get_macaddr(sc);
1121
1122	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1123		ifp->if_capabilities |= IFCAP_LINKSTATE;
1124
1125	ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1126	ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1127	ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1128
1129	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1130		int gso;
1131
1132		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
1133
1134		gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1135		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1136			ifp->if_capabilities |= IFCAP_TSO4;
1137		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1138			ifp->if_capabilities |= IFCAP_TSO6;
1139		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1140			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1141
1142		if (ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) {
1143			int tso_maxlen;
1144
1145			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
1146
1147			tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1148			    vtnet_tso_maxlen);
1149			ifp->if_hw_tsomax = tso_maxlen -
1150			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1151			ifp->if_hw_tsomaxsegcount = sc->vtnet_tx_nsegs - 1;
1152			ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1153		}
1154	}
1155
1156	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1157		ifp->if_capabilities |= IFCAP_RXCSUM;
1158#ifdef notyet
1159		/* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1160		ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1161#endif
1162
1163		if (vtnet_tunable_int(sc, "fixup_needs_csum",
1164		    vtnet_fixup_needs_csum) != 0)
1165			sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1166
1167		/* Support either "hardware" or software LRO. */
1168		ifp->if_capabilities |= IFCAP_LRO;
1169	}
1170
1171	if (ifp->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1172		/*
1173		 * VirtIO does not support VLAN tagging, but we can fake
1174		 * it by inserting and removing the 802.1Q header during
1175		 * transmit and receive. We are then able to do checksum
1176		 * offloading of VLAN frames.
1177		 */
1178		ifp->if_capabilities |=
1179		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1180	}
1181
1182	if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1183		ifp->if_capabilities |= IFCAP_JUMBO_MTU;
1184	ifp->if_capabilities |= IFCAP_VLAN_MTU;
1185
1186	/*
1187	 * Capabilities after here are not enabled by default.
1188	 */
1189	ifp->if_capenable = ifp->if_capabilities;
1190
1191	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1192		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1193
1194		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1195		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1196		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1197		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1198	}
1199
1200	ether_ifattach(ifp, sc->vtnet_hwaddr);
1201
1202	/* Tell the upper layer(s) we support long frames. */
1203	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1204
1205	DEBUGNET_SET(ifp, vtnet);
1206
1207	pa.pa_version = PFIL_VERSION;
1208	pa.pa_flags = PFIL_IN;
1209	pa.pa_type = PFIL_TYPE_ETHERNET;
1210	pa.pa_headname = ifp->if_xname;
1211	sc->vtnet_pfil = pfil_head_register(&pa);
1212
1213	return (0);
1214}
1215
1216static int
1217vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1218{
1219	int framesz;
1220
1221	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1222		return (MJUMPAGESIZE);
1223	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1224		return (MCLBYTES);
1225
1226	/*
1227	 * Try to scale the receive mbuf cluster size from the MTU. We
1228	 * could also use the VQ size to influence the selected size,
1229	 * but that would only matter for very small queues.
1230	 */
1231	if (vtnet_modern(sc)) {
1232		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1233		framesz = sizeof(struct virtio_net_hdr_v1);
1234	} else
1235		framesz = sizeof(struct vtnet_rx_header);
1236	framesz += sizeof(struct ether_vlan_header) + mtu;
1237
1238	if (framesz <= MCLBYTES)
1239		return (MCLBYTES);
1240	else if (framesz <= MJUMPAGESIZE)
1241		return (MJUMPAGESIZE);
1242	else if (framesz <= MJUM9BYTES)
1243		return (MJUM9BYTES);
1244
1245	/* Sane default; avoid 16KB clusters. */
1246	return (MCLBYTES);
1247}
1248
1249static int
1250vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu)
1251{
1252	struct ifnet *ifp;
1253	int clustersz;
1254
1255	ifp = sc->vtnet_ifp;
1256	VTNET_CORE_LOCK_ASSERT(sc);
1257
1258	if (ifp->if_mtu == mtu)
1259		return (0);
1260	else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1261		return (EINVAL);
1262
1263	ifp->if_mtu = mtu;
1264	clustersz = vtnet_rx_cluster_size(sc, mtu);
1265
1266	if (clustersz != sc->vtnet_rx_clustersz &&
1267	    ifp->if_drv_flags & IFF_DRV_RUNNING) {
1268		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1269		vtnet_init_locked(sc, 0);
1270	}
1271
1272	return (0);
1273}
1274
1275static int
1276vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1277{
1278	struct ifnet *ifp;
1279	int drv_running;
1280
1281	ifp = sc->vtnet_ifp;
1282	drv_running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0;
1283
1284	VTNET_CORE_LOCK_ASSERT(sc);
1285
1286	if ((ifp->if_flags & IFF_UP) == 0) {
1287		if (drv_running)
1288			vtnet_stop(sc);
1289		goto out;
1290	}
1291
1292	if (!drv_running) {
1293		vtnet_init_locked(sc, 0);
1294		goto out;
1295	}
1296
1297	if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1298	    (IFF_PROMISC | IFF_ALLMULTI)) {
1299		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1300			return (ENOTSUP);
1301		vtnet_rx_filter(sc);
1302	}
1303
1304out:
1305	sc->vtnet_if_flags = ifp->if_flags;
1306	return (0);
1307}
1308
1309static int
1310vtnet_ioctl_multi(struct vtnet_softc *sc)
1311{
1312	struct ifnet *ifp;
1313
1314	ifp = sc->vtnet_ifp;
1315
1316	VTNET_CORE_LOCK_ASSERT(sc);
1317
1318	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1319	    ifp->if_drv_flags & IFF_DRV_RUNNING)
1320		vtnet_rx_filter_mac(sc);
1321
1322	return (0);
1323}
1324
1325static int
1326vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1327{
1328	struct ifnet *ifp;
1329	int mask, reinit, update;
1330
1331	ifp = sc->vtnet_ifp;
1332	mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ ifp->if_capenable;
1333	reinit = update = 0;
1334
1335	VTNET_CORE_LOCK_ASSERT(sc);
1336
1337	if (mask & IFCAP_TXCSUM)
1338		ifp->if_capenable ^= IFCAP_TXCSUM;
1339	if (mask & IFCAP_TXCSUM_IPV6)
1340		ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1341	if (mask & IFCAP_TSO4)
1342		ifp->if_capenable ^= IFCAP_TSO4;
1343	if (mask & IFCAP_TSO6)
1344		ifp->if_capenable ^= IFCAP_TSO6;
1345
1346	if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1347		/*
1348		 * These Rx features require the negotiated features to
1349		 * be updated. Avoid a full reinit if possible.
1350		 */
1351		if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1352			update = 1;
1353		else
1354			reinit = 1;
1355
1356		/* BMV: Avoid needless renegotiation for just software LRO. */
1357		if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1358		    IFCAP_LRO && vtnet_software_lro(sc))
1359			reinit = update = 0;
1360
1361		if (mask & IFCAP_RXCSUM)
1362			ifp->if_capenable ^= IFCAP_RXCSUM;
1363		if (mask & IFCAP_RXCSUM_IPV6)
1364			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1365		if (mask & IFCAP_LRO)
1366			ifp->if_capenable ^= IFCAP_LRO;
1367
1368		/*
1369		 * VirtIO does not distinguish between IPv4 and IPv6 checksums
1370		 * so treat them as a pair. Guest TSO (LRO) requires receive
1371		 * checksums.
1372		 */
1373		if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1374			ifp->if_capenable |= IFCAP_RXCSUM;
1375#ifdef notyet
1376			ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
1377#endif
1378		} else
1379			ifp->if_capenable &=
1380			    ~(IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO);
1381	}
1382
1383	if (mask & IFCAP_VLAN_HWFILTER) {
1384		/* These Rx features require renegotiation. */
1385		reinit = 1;
1386
1387		if (mask & IFCAP_VLAN_HWFILTER)
1388			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1389	}
1390
1391	if (mask & IFCAP_VLAN_HWTSO)
1392		ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1393	if (mask & IFCAP_VLAN_HWTAGGING)
1394		ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1395
1396	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1397		if (reinit) {
1398			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1399			vtnet_init_locked(sc, 0);
1400		} else if (update)
1401			vtnet_update_rx_offloads(sc);
1402	}
1403
1404	return (0);
1405}
1406
1407static int
1408vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1409{
1410	struct vtnet_softc *sc;
1411	struct ifreq *ifr;
1412	int error;
1413
1414	sc = ifp->if_softc;
1415	ifr = (struct ifreq *) data;
1416	error = 0;
1417
1418	switch (cmd) {
1419	case SIOCSIFMTU:
1420		VTNET_CORE_LOCK(sc);
1421		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1422		VTNET_CORE_UNLOCK(sc);
1423		break;
1424
1425	case SIOCSIFFLAGS:
1426		VTNET_CORE_LOCK(sc);
1427		error = vtnet_ioctl_ifflags(sc);
1428		VTNET_CORE_UNLOCK(sc);
1429		break;
1430
1431	case SIOCADDMULTI:
1432	case SIOCDELMULTI:
1433		VTNET_CORE_LOCK(sc);
1434		error = vtnet_ioctl_multi(sc);
1435		VTNET_CORE_UNLOCK(sc);
1436		break;
1437
1438	case SIOCSIFMEDIA:
1439	case SIOCGIFMEDIA:
1440		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1441		break;
1442
1443	case SIOCSIFCAP:
1444		VTNET_CORE_LOCK(sc);
1445		error = vtnet_ioctl_ifcap(sc, ifr);
1446		VTNET_CORE_UNLOCK(sc);
1447		VLAN_CAPABILITIES(ifp);
1448		break;
1449
1450	default:
1451		error = ether_ioctl(ifp, cmd, data);
1452		break;
1453	}
1454
1455	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1456
1457	return (error);
1458}
1459
1460static int
1461vtnet_rxq_populate(struct vtnet_rxq *rxq)
1462{
1463	struct virtqueue *vq;
1464	int nbufs, error;
1465
1466#ifdef DEV_NETMAP
1467	error = vtnet_netmap_rxq_populate(rxq);
1468	if (error >= 0)
1469		return (error);
1470#endif  /* DEV_NETMAP */
1471
1472	vq = rxq->vtnrx_vq;
1473	error = ENOSPC;
1474
1475	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1476		error = vtnet_rxq_new_buf(rxq);
1477		if (error)
1478			break;
1479	}
1480
1481	if (nbufs > 0) {
1482		virtqueue_notify(vq);
1483		/*
1484		 * EMSGSIZE signifies the virtqueue did not have enough
1485		 * entries available to hold the last mbuf. This is not
1486		 * an error.
1487		 */
1488		if (error == EMSGSIZE)
1489			error = 0;
1490	}
1491
1492	return (error);
1493}
1494
1495static void
1496vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1497{
1498	struct virtqueue *vq;
1499	struct mbuf *m;
1500	int last;
1501#ifdef DEV_NETMAP
1502	struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1503							rxq->vtnrx_id, NR_RX);
1504#else  /* !DEV_NETMAP */
1505	void *kring = NULL;
1506#endif /* !DEV_NETMAP */
1507
1508	vq = rxq->vtnrx_vq;
1509	last = 0;
1510
1511	while ((m = virtqueue_drain(vq, &last)) != NULL) {
1512		if (kring == NULL)
1513			m_freem(m);
1514	}
1515
1516	KASSERT(virtqueue_empty(vq),
1517	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1518}
1519
1520static struct mbuf *
1521vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1522{
1523	struct mbuf *m_head, *m_tail, *m;
1524	int i, size;
1525
1526	m_head = NULL;
1527	size = sc->vtnet_rx_clustersz;
1528
1529	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1530	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1531
1532	for (i = 0; i < nbufs; i++) {
1533		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1534		if (m == NULL) {
1535			sc->vtnet_stats.mbuf_alloc_failed++;
1536			m_freem(m_head);
1537			return (NULL);
1538		}
1539
1540		m->m_len = size;
1541		if (m_head != NULL) {
1542			m_tail->m_next = m;
1543			m_tail = m;
1544		} else
1545			m_head = m_tail = m;
1546	}
1547
1548	if (m_tailp != NULL)
1549		*m_tailp = m_tail;
1550
1551	return (m_head);
1552}
1553
1554/*
1555 * Slow path for when LRO without mergeable buffers is negotiated.
1556 */
1557static int
1558vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1559    int len0)
1560{
1561	struct vtnet_softc *sc;
1562	struct mbuf *m, *m_prev, *m_new, *m_tail;
1563	int len, clustersz, nreplace, error;
1564
1565	sc = rxq->vtnrx_sc;
1566	clustersz = sc->vtnet_rx_clustersz;
1567
1568	m_prev = NULL;
1569	m_tail = NULL;
1570	nreplace = 0;
1571
1572	m = m0;
1573	len = len0;
1574
1575	/*
1576	 * Since these mbuf chains are so large, avoid allocating a complete
1577	 * replacement when the received frame did not consume the entire
1578	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1579	 */
1580	while (len > 0) {
1581		if (m == NULL) {
1582			sc->vtnet_stats.rx_frame_too_large++;
1583			return (EMSGSIZE);
1584		}
1585
1586		/*
1587		 * Every mbuf should have the expected cluster size since that
1588		 * is also used to allocate the replacements.
1589		 */
1590		KASSERT(m->m_len == clustersz,
1591		    ("%s: mbuf size %d not expected cluster size %d", __func__,
1592		    m->m_len, clustersz));
1593
1594		m->m_len = MIN(m->m_len, len);
1595		len -= m->m_len;
1596
1597		m_prev = m;
1598		m = m->m_next;
1599		nreplace++;
1600	}
1601
1602	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1603	    ("%s: invalid replacement mbuf count %d max %d", __func__,
1604	    nreplace, sc->vtnet_rx_nmbufs));
1605
1606	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1607	if (m_new == NULL) {
1608		m_prev->m_len = clustersz;
1609		return (ENOBUFS);
1610	}
1611
1612	/*
1613	 * Move any unused mbufs from the received mbuf chain onto the
1614	 * end of the replacement chain.
1615	 */
1616	if (m_prev->m_next != NULL) {
1617		m_tail->m_next = m_prev->m_next;
1618		m_prev->m_next = NULL;
1619	}
1620
1621	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1622	if (error) {
1623		/*
1624		 * The replacement is suppose to be an copy of the one
1625		 * dequeued so this is a very unexpected error.
1626		 *
1627		 * Restore the m0 chain to the original state if it was
1628		 * modified so we can then discard it.
1629		 */
1630		if (m_tail->m_next != NULL) {
1631			m_prev->m_next = m_tail->m_next;
1632			m_tail->m_next = NULL;
1633		}
1634		m_prev->m_len = clustersz;
1635		sc->vtnet_stats.rx_enq_replacement_failed++;
1636		m_freem(m_new);
1637	}
1638
1639	return (error);
1640}
1641
1642static int
1643vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1644{
1645	struct vtnet_softc *sc;
1646	struct mbuf *m_new;
1647	int error;
1648
1649	sc = rxq->vtnrx_sc;
1650
1651	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1652		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1653
1654	MPASS(m->m_next == NULL);
1655	if (m->m_len < len)
1656		return (EMSGSIZE);
1657
1658	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1659	if (m_new == NULL)
1660		return (ENOBUFS);
1661
1662	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1663	if (error) {
1664		sc->vtnet_stats.rx_enq_replacement_failed++;
1665		m_freem(m_new);
1666	} else
1667		m->m_len = len;
1668
1669	return (error);
1670}
1671
1672static int
1673vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1674{
1675	struct vtnet_softc *sc;
1676	struct sglist *sg;
1677	int header_inlined, error;
1678
1679	sc = rxq->vtnrx_sc;
1680	sg = rxq->vtnrx_sg;
1681
1682	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1683	    ("%s: mbuf chain without LRO_NOMRG", __func__));
1684	VTNET_RXQ_LOCK_ASSERT(rxq);
1685
1686	sglist_reset(sg);
1687	header_inlined = vtnet_modern(sc) ||
1688	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1689
1690	if (header_inlined)
1691		error = sglist_append_mbuf(sg, m);
1692	else {
1693		struct vtnet_rx_header *rxhdr =
1694		    mtod(m, struct vtnet_rx_header *);
1695		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1696
1697		/* Append the header and remaining mbuf data. */
1698		error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1699		if (error)
1700			return (error);
1701		error = sglist_append(sg, &rxhdr[1],
1702		    m->m_len - sizeof(struct vtnet_rx_header));
1703		if (error)
1704			return (error);
1705
1706		if (m->m_next != NULL)
1707			error = sglist_append_mbuf(sg, m->m_next);
1708	}
1709
1710	if (error)
1711		return (error);
1712
1713	return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1714}
1715
1716static int
1717vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1718{
1719	struct vtnet_softc *sc;
1720	struct mbuf *m;
1721	int error;
1722
1723	sc = rxq->vtnrx_sc;
1724
1725	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1726	if (m == NULL)
1727		return (ENOBUFS);
1728
1729	error = vtnet_rxq_enqueue_buf(rxq, m);
1730	if (error)
1731		m_freem(m);
1732
1733	return (error);
1734}
1735
1736static int
1737vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
1738    int hoff, struct virtio_net_hdr *hdr)
1739{
1740	struct vtnet_softc *sc;
1741	int error;
1742
1743	sc = rxq->vtnrx_sc;
1744
1745	/*
1746	 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
1747	 * not have an analogous CSUM flag. The checksum has been validated,
1748	 * but is incomplete (TCP/UDP pseudo header).
1749	 *
1750	 * The packet is likely from another VM on the same host that itself
1751	 * performed checksum offloading so Tx/Rx is basically a memcpy and
1752	 * the checksum has little value.
1753	 *
1754	 * Default to receiving the packet as-is for performance reasons, but
1755	 * this can cause issues if the packet is to be forwarded because it
1756	 * does not contain a valid checksum. This patch may be helpful:
1757	 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
1758	 * compute the checksum if requested.
1759	 *
1760	 * BMV: Need to add an CSUM_PARTIAL flag?
1761	 */
1762	if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1763		error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
1764		return (error);
1765	}
1766
1767	/*
1768	 * Compute the checksum in the driver so the packet will contain a
1769	 * valid checksum. The checksum is at csum_offset from csum_start.
1770	 */
1771	switch (etype) {
1772#if defined(INET) || defined(INET6)
1773	case ETHERTYPE_IP:
1774	case ETHERTYPE_IPV6: {
1775		int csum_off, csum_end;
1776		uint16_t csum;
1777
1778		csum_off = hdr->csum_start + hdr->csum_offset;
1779		csum_end = csum_off + sizeof(uint16_t);
1780
1781		/* Assume checksum will be in the first mbuf. */
1782		if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
1783			return (1);
1784
1785		/*
1786		 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1787		 * checksum and write it at the specified offset. We could
1788		 * try to verify the packet: csum_start should probably
1789		 * correspond to the start of the TCP/UDP header.
1790		 *
1791		 * BMV: Need to properly handle UDP with zero checksum. Is
1792		 * the IPv4 header checksum implicitly validated?
1793		 */
1794		csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1795		*(uint16_t *)(mtodo(m, csum_off)) = csum;
1796		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1797		m->m_pkthdr.csum_data = 0xFFFF;
1798		break;
1799	}
1800#endif
1801	default:
1802		sc->vtnet_stats.rx_csum_bad_ethtype++;
1803		return (1);
1804	}
1805
1806	return (0);
1807}
1808
1809static int
1810vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
1811    uint16_t etype, int hoff, struct virtio_net_hdr *hdr)
1812{
1813	struct vtnet_softc *sc;
1814	int protocol;
1815
1816	sc = rxq->vtnrx_sc;
1817
1818	switch (etype) {
1819#if defined(INET)
1820	case ETHERTYPE_IP:
1821		if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
1822			protocol = IPPROTO_DONE;
1823		else {
1824			struct ip *ip = (struct ip *)(m->m_data + hoff);
1825			protocol = ip->ip_p;
1826		}
1827		break;
1828#endif
1829#if defined(INET6)
1830	case ETHERTYPE_IPV6:
1831		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1832		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
1833			protocol = IPPROTO_DONE;
1834		break;
1835#endif
1836	default:
1837		protocol = IPPROTO_DONE;
1838		break;
1839	}
1840
1841	switch (protocol) {
1842	case IPPROTO_TCP:
1843	case IPPROTO_UDP:
1844		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1845		m->m_pkthdr.csum_data = 0xFFFF;
1846		break;
1847	default:
1848		/*
1849		 * FreeBSD does not support checksum offloading of this
1850		 * protocol. Let the stack re-verify the checksum later
1851		 * if the protocol is supported.
1852		 */
1853#if 0
1854		if_printf(sc->vtnet_ifp,
1855		    "%s: checksum offload of unsupported protocol "
1856		    "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
1857		    __func__, etype, protocol, hdr->csum_start,
1858		    hdr->csum_offset);
1859#endif
1860		break;
1861	}
1862
1863	return (0);
1864}
1865
1866static int
1867vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1868    struct virtio_net_hdr *hdr)
1869{
1870	const struct ether_header *eh;
1871	int hoff;
1872	uint16_t etype;
1873
1874	eh = mtod(m, const struct ether_header *);
1875	etype = ntohs(eh->ether_type);
1876	if (etype == ETHERTYPE_VLAN) {
1877		/* TODO BMV: Handle QinQ. */
1878		const struct ether_vlan_header *evh =
1879		    mtod(m, const struct ether_vlan_header *);
1880		etype = ntohs(evh->evl_proto);
1881		hoff = sizeof(struct ether_vlan_header);
1882	} else
1883		hoff = sizeof(struct ether_header);
1884
1885	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1886		return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
1887	else /* VIRTIO_NET_HDR_F_DATA_VALID */
1888		return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
1889}
1890
1891static void
1892vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1893{
1894	struct mbuf *m;
1895
1896	while (--nbufs > 0) {
1897		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1898		if (m == NULL)
1899			break;
1900		vtnet_rxq_discard_buf(rxq, m);
1901	}
1902}
1903
1904static void
1905vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1906{
1907	int error;
1908
1909	/*
1910	 * Requeue the discarded mbuf. This should always be successful
1911	 * since it was just dequeued.
1912	 */
1913	error = vtnet_rxq_enqueue_buf(rxq, m);
1914	KASSERT(error == 0,
1915	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1916}
1917
1918static int
1919vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1920{
1921	struct vtnet_softc *sc;
1922	struct virtqueue *vq;
1923	struct mbuf *m_tail;
1924
1925	sc = rxq->vtnrx_sc;
1926	vq = rxq->vtnrx_vq;
1927	m_tail = m_head;
1928
1929	while (--nbufs > 0) {
1930		struct mbuf *m;
1931		int len;
1932
1933		m = virtqueue_dequeue(vq, &len);
1934		if (m == NULL) {
1935			rxq->vtnrx_stats.vrxs_ierrors++;
1936			goto fail;
1937		}
1938
1939		if (vtnet_rxq_new_buf(rxq) != 0) {
1940			rxq->vtnrx_stats.vrxs_iqdrops++;
1941			vtnet_rxq_discard_buf(rxq, m);
1942			if (nbufs > 1)
1943				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1944			goto fail;
1945		}
1946
1947		if (m->m_len < len)
1948			len = m->m_len;
1949
1950		m->m_len = len;
1951		m->m_flags &= ~M_PKTHDR;
1952
1953		m_head->m_pkthdr.len += len;
1954		m_tail->m_next = m;
1955		m_tail = m;
1956	}
1957
1958	return (0);
1959
1960fail:
1961	sc->vtnet_stats.rx_mergeable_failed++;
1962	m_freem(m_head);
1963
1964	return (1);
1965}
1966
1967#if defined(INET) || defined(INET6)
1968static int
1969vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
1970{
1971	struct lro_ctrl *lro;
1972
1973	lro = &rxq->vtnrx_lro;
1974
1975	if (lro->lro_mbuf_max != 0) {
1976		tcp_lro_queue_mbuf(lro, m);
1977		return (0);
1978	}
1979
1980	return (tcp_lro_rx(lro, m, 0));
1981}
1982#endif
1983
1984static void
1985vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1986    struct virtio_net_hdr *hdr)
1987{
1988	struct vtnet_softc *sc;
1989	struct ifnet *ifp;
1990
1991	sc = rxq->vtnrx_sc;
1992	ifp = sc->vtnet_ifp;
1993
1994	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1995		struct ether_header *eh = mtod(m, struct ether_header *);
1996		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1997			vtnet_vlan_tag_remove(m);
1998			/*
1999			 * With the 802.1Q header removed, update the
2000			 * checksum starting location accordingly.
2001			 */
2002			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2003				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2004		}
2005	}
2006
2007	m->m_pkthdr.flowid = rxq->vtnrx_id;
2008	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2009
2010	if (hdr->flags &
2011	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2012		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2013			rxq->vtnrx_stats.vrxs_csum++;
2014		else
2015			rxq->vtnrx_stats.vrxs_csum_failed++;
2016	}
2017
2018	if (hdr->gso_size != 0) {
2019		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2020		case VIRTIO_NET_HDR_GSO_TCPV4:
2021		case VIRTIO_NET_HDR_GSO_TCPV6:
2022			m->m_pkthdr.lro_nsegs =
2023			    howmany(m->m_pkthdr.len, hdr->gso_size);
2024			rxq->vtnrx_stats.vrxs_host_lro++;
2025			break;
2026		}
2027	}
2028
2029	rxq->vtnrx_stats.vrxs_ipackets++;
2030	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2031
2032#if defined(INET) || defined(INET6)
2033	if (vtnet_software_lro(sc) && ifp->if_capenable & IFCAP_LRO) {
2034		if (vtnet_lro_rx(rxq, m) == 0)
2035			return;
2036	}
2037#endif
2038
2039	(*ifp->if_input)(ifp, m);
2040}
2041
2042static int
2043vtnet_rxq_eof(struct vtnet_rxq *rxq)
2044{
2045	struct virtio_net_hdr lhdr, *hdr;
2046	struct vtnet_softc *sc;
2047	struct ifnet *ifp;
2048	struct virtqueue *vq;
2049	int deq, count;
2050
2051	sc = rxq->vtnrx_sc;
2052	vq = rxq->vtnrx_vq;
2053	ifp = sc->vtnet_ifp;
2054	deq = 0;
2055	count = sc->vtnet_rx_process_limit;
2056
2057	VTNET_RXQ_LOCK_ASSERT(rxq);
2058
2059	while (count-- > 0) {
2060		struct mbuf *m;
2061		int len, nbufs, adjsz;
2062
2063		m = virtqueue_dequeue(vq, &len);
2064		if (m == NULL)
2065			break;
2066		deq++;
2067
2068		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2069			rxq->vtnrx_stats.vrxs_ierrors++;
2070			vtnet_rxq_discard_buf(rxq, m);
2071			continue;
2072		}
2073
2074		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2075			struct virtio_net_hdr_mrg_rxbuf *mhdr =
2076			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2077			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2078			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2079		} else if (vtnet_modern(sc)) {
2080			nbufs = 1; /* num_buffers is always 1 */
2081			adjsz = sizeof(struct virtio_net_hdr_v1);
2082		} else {
2083			nbufs = 1;
2084			adjsz = sizeof(struct vtnet_rx_header);
2085			/*
2086			 * Account for our gap between the header and start of
2087			 * data to keep the segments separated.
2088			 */
2089			len += VTNET_RX_HEADER_PAD;
2090		}
2091
2092		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2093			rxq->vtnrx_stats.vrxs_iqdrops++;
2094			vtnet_rxq_discard_buf(rxq, m);
2095			if (nbufs > 1)
2096				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2097			continue;
2098		}
2099
2100		m->m_pkthdr.len = len;
2101		m->m_pkthdr.rcvif = ifp;
2102		m->m_pkthdr.csum_flags = 0;
2103
2104		if (nbufs > 1) {
2105			/* Dequeue the rest of chain. */
2106			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2107				continue;
2108		}
2109
2110		/*
2111		 * Save an endian swapped version of the header prior to it
2112		 * being stripped. The header is always at the start of the
2113		 * mbuf data. num_buffers was already saved (and not needed)
2114		 * so use the standard header.
2115		 */
2116		hdr = mtod(m, struct virtio_net_hdr *);
2117		lhdr.flags = hdr->flags;
2118		lhdr.gso_type = hdr->gso_type;
2119		lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2120		lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2121		lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2122		lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2123		m_adj(m, adjsz);
2124
2125		if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2126			pfil_return_t pfil;
2127
2128			pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
2129			    NULL);
2130			switch (pfil) {
2131			case PFIL_REALLOCED:
2132				m = pfil_mem2mbuf(m->m_data);
2133				break;
2134			case PFIL_DROPPED:
2135			case PFIL_CONSUMED:
2136				continue;
2137			default:
2138				KASSERT(pfil == PFIL_PASS,
2139				    ("Filter returned %d!", pfil));
2140			}
2141		}
2142
2143		vtnet_rxq_input(rxq, m, &lhdr);
2144	}
2145
2146	if (deq > 0) {
2147#if defined(INET) || defined(INET6)
2148		tcp_lro_flush_all(&rxq->vtnrx_lro);
2149#endif
2150		virtqueue_notify(vq);
2151	}
2152
2153	return (count > 0 ? 0 : EAGAIN);
2154}
2155
2156static void
2157vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2158{
2159	struct vtnet_softc *sc;
2160	struct ifnet *ifp;
2161	int more;
2162#ifdef DEV_NETMAP
2163	int nmirq;
2164#endif /* DEV_NETMAP */
2165
2166	sc = rxq->vtnrx_sc;
2167	ifp = sc->vtnet_ifp;
2168
2169	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2170		/*
2171		 * Ignore this interrupt. Either this is a spurious interrupt
2172		 * or multiqueue without per-VQ MSIX so every queue needs to
2173		 * be polled (a brain dead configuration we could try harder
2174		 * to avoid).
2175		 */
2176		vtnet_rxq_disable_intr(rxq);
2177		return;
2178	}
2179
2180	VTNET_RXQ_LOCK(rxq);
2181
2182#ifdef DEV_NETMAP
2183	/*
2184	 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2185	 * This is not necessary to serialize the access to the RX vq, but
2186	 * rather to avoid races that may happen if this interface is
2187	 * attached to a VALE switch, which would cause received packets
2188	 * to stall in the RX queue (nm_kr_tryget() could find the kring
2189	 * busy when called from netmap_bwrap_intr_notify()).
2190	 */
2191	nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2192	if (nmirq != NM_IRQ_PASS) {
2193		VTNET_RXQ_UNLOCK(rxq);
2194		if (nmirq == NM_IRQ_RESCHED) {
2195			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2196		}
2197		return;
2198	}
2199#endif /* DEV_NETMAP */
2200
2201again:
2202	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2203		VTNET_RXQ_UNLOCK(rxq);
2204		return;
2205	}
2206
2207	more = vtnet_rxq_eof(rxq);
2208	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2209		if (!more)
2210			vtnet_rxq_disable_intr(rxq);
2211		/*
2212		 * This is an occasional condition or race (when !more),
2213		 * so retry a few times before scheduling the taskqueue.
2214		 */
2215		if (tries-- > 0)
2216			goto again;
2217
2218		rxq->vtnrx_stats.vrxs_rescheduled++;
2219		VTNET_RXQ_UNLOCK(rxq);
2220		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2221	} else
2222		VTNET_RXQ_UNLOCK(rxq);
2223}
2224
2225static void
2226vtnet_rx_vq_intr(void *xrxq)
2227{
2228	struct vtnet_rxq *rxq;
2229
2230	rxq = xrxq;
2231	vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2232}
2233
2234static void
2235vtnet_rxq_tq_intr(void *xrxq, int pending)
2236{
2237	struct vtnet_rxq *rxq;
2238
2239	rxq = xrxq;
2240	vtnet_rx_vq_process(rxq, 0);
2241}
2242
2243static int
2244vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2245{
2246	struct vtnet_softc *sc;
2247	int threshold;
2248
2249	sc = txq->vtntx_sc;
2250
2251	/*
2252	 * The Tx interrupt is disabled until the queue free count falls
2253	 * below our threshold. Completed frames are drained from the Tx
2254	 * virtqueue before transmitting new frames and in the watchdog
2255	 * callout, so the frequency of Tx interrupts is greatly reduced,
2256	 * at the cost of not freeing mbufs as quickly as they otherwise
2257	 * would be.
2258	 */
2259	threshold = virtqueue_size(txq->vtntx_vq) / 4;
2260
2261	/*
2262	 * Without indirect descriptors, leave enough room for the most
2263	 * segments we handle.
2264	 */
2265	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2266	    threshold < sc->vtnet_tx_nsegs)
2267		threshold = sc->vtnet_tx_nsegs;
2268
2269	return (threshold);
2270}
2271
2272static int
2273vtnet_txq_below_threshold(struct vtnet_txq *txq)
2274{
2275	struct virtqueue *vq;
2276
2277	vq = txq->vtntx_vq;
2278
2279	return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2280}
2281
2282static int
2283vtnet_txq_notify(struct vtnet_txq *txq)
2284{
2285	struct virtqueue *vq;
2286
2287	vq = txq->vtntx_vq;
2288
2289	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2290	virtqueue_notify(vq);
2291
2292	if (vtnet_txq_enable_intr(txq) == 0)
2293		return (0);
2294
2295	/*
2296	 * Drain frames that were completed since last checked. If this
2297	 * causes the queue to go above the threshold, the caller should
2298	 * continue transmitting.
2299	 */
2300	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2301		virtqueue_disable_intr(vq);
2302		return (1);
2303	}
2304
2305	return (0);
2306}
2307
2308static void
2309vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2310{
2311	struct virtqueue *vq;
2312	struct vtnet_tx_header *txhdr;
2313	int last;
2314#ifdef DEV_NETMAP
2315	struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2316							txq->vtntx_id, NR_TX);
2317#else  /* !DEV_NETMAP */
2318	void *kring = NULL;
2319#endif /* !DEV_NETMAP */
2320
2321	vq = txq->vtntx_vq;
2322	last = 0;
2323
2324	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2325		if (kring == NULL) {
2326			m_freem(txhdr->vth_mbuf);
2327			uma_zfree(vtnet_tx_header_zone, txhdr);
2328		}
2329	}
2330
2331	KASSERT(virtqueue_empty(vq),
2332	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
2333}
2334
2335/*
2336 * BMV: This can go away once we finally have offsets in the mbuf header.
2337 */
2338static int
2339vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2340    int *proto, int *start)
2341{
2342	struct vtnet_softc *sc;
2343	struct ether_vlan_header *evh;
2344	int offset;
2345
2346	sc = txq->vtntx_sc;
2347
2348	evh = mtod(m, struct ether_vlan_header *);
2349	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2350		/* BMV: We should handle nested VLAN tags too. */
2351		*etype = ntohs(evh->evl_proto);
2352		offset = sizeof(struct ether_vlan_header);
2353	} else {
2354		*etype = ntohs(evh->evl_encap_proto);
2355		offset = sizeof(struct ether_header);
2356	}
2357
2358	switch (*etype) {
2359#if defined(INET)
2360	case ETHERTYPE_IP: {
2361		struct ip *ip, iphdr;
2362		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2363			m_copydata(m, offset, sizeof(struct ip),
2364			    (caddr_t) &iphdr);
2365			ip = &iphdr;
2366		} else
2367			ip = (struct ip *)(m->m_data + offset);
2368		*proto = ip->ip_p;
2369		*start = offset + (ip->ip_hl << 2);
2370		break;
2371	}
2372#endif
2373#if defined(INET6)
2374	case ETHERTYPE_IPV6:
2375		*proto = -1;
2376		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2377		/* Assert the network stack sent us a valid packet. */
2378		KASSERT(*start > offset,
2379		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2380		    *start, offset, *proto));
2381		break;
2382#endif
2383	default:
2384		sc->vtnet_stats.tx_csum_unknown_ethtype++;
2385		return (EINVAL);
2386	}
2387
2388	return (0);
2389}
2390
2391static int
2392vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2393    int offset, struct virtio_net_hdr *hdr)
2394{
2395	static struct timeval lastecn;
2396	static int curecn;
2397	struct vtnet_softc *sc;
2398	struct tcphdr *tcp, tcphdr;
2399
2400	sc = txq->vtntx_sc;
2401
2402	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2403		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2404		tcp = &tcphdr;
2405	} else
2406		tcp = (struct tcphdr *)(m->m_data + offset);
2407
2408	hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2409	hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2410	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2411	    VIRTIO_NET_HDR_GSO_TCPV6;
2412
2413	if (__predict_false(tcp->th_flags & TH_CWR)) {
2414		/*
2415		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2416		 * FreeBSD, ECN support is not on a per-interface basis,
2417		 * but globally via the net.inet.tcp.ecn.enable sysctl
2418		 * knob. The default is off.
2419		 */
2420		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2421			if (ppsratecheck(&lastecn, &curecn, 1))
2422				if_printf(sc->vtnet_ifp,
2423				    "TSO with ECN not negotiated with host\n");
2424			return (ENOTSUP);
2425		}
2426		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2427	}
2428
2429	txq->vtntx_stats.vtxs_tso++;
2430
2431	return (0);
2432}
2433
2434static struct mbuf *
2435vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2436    struct virtio_net_hdr *hdr)
2437{
2438	struct vtnet_softc *sc;
2439	int flags, etype, csum_start, proto, error;
2440
2441	sc = txq->vtntx_sc;
2442	flags = m->m_pkthdr.csum_flags;
2443
2444	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2445	if (error)
2446		goto drop;
2447
2448	if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2449		/* Sanity check the parsed mbuf matches the offload flags. */
2450		if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2451		    etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2452		    && etype != ETHERTYPE_IPV6))) {
2453			sc->vtnet_stats.tx_csum_proto_mismatch++;
2454			goto drop;
2455		}
2456
2457		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2458		hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2459		hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2460		txq->vtntx_stats.vtxs_csum++;
2461	}
2462
2463	if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2464		/*
2465		 * Sanity check the parsed mbuf IP protocol is TCP, and
2466		 * VirtIO TSO reqires the checksum offloading above.
2467		 */
2468		if (__predict_false(proto != IPPROTO_TCP)) {
2469			sc->vtnet_stats.tx_tso_not_tcp++;
2470			goto drop;
2471		} else if (__predict_false((hdr->flags &
2472		    VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2473			sc->vtnet_stats.tx_tso_without_csum++;
2474			goto drop;
2475		}
2476
2477		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2478		if (error)
2479			goto drop;
2480	}
2481
2482	return (m);
2483
2484drop:
2485	m_freem(m);
2486	return (NULL);
2487}
2488
2489static int
2490vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2491    struct vtnet_tx_header *txhdr)
2492{
2493	struct vtnet_softc *sc;
2494	struct virtqueue *vq;
2495	struct sglist *sg;
2496	struct mbuf *m;
2497	int error;
2498
2499	sc = txq->vtntx_sc;
2500	vq = txq->vtntx_vq;
2501	sg = txq->vtntx_sg;
2502	m = *m_head;
2503
2504	sglist_reset(sg);
2505	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2506	if (error != 0 || sg->sg_nseg != 1) {
2507		KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2508		    __func__, error, sg->sg_nseg));
2509		goto fail;
2510	}
2511
2512	error = sglist_append_mbuf(sg, m);
2513	if (error) {
2514		m = m_defrag(m, M_NOWAIT);
2515		if (m == NULL)
2516			goto fail;
2517
2518		*m_head = m;
2519		sc->vtnet_stats.tx_defragged++;
2520
2521		error = sglist_append_mbuf(sg, m);
2522		if (error)
2523			goto fail;
2524	}
2525
2526	txhdr->vth_mbuf = m;
2527	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2528
2529	return (error);
2530
2531fail:
2532	sc->vtnet_stats.tx_defrag_failed++;
2533	m_freem(*m_head);
2534	*m_head = NULL;
2535
2536	return (ENOBUFS);
2537}
2538
2539static int
2540vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2541{
2542	struct vtnet_tx_header *txhdr;
2543	struct virtio_net_hdr *hdr;
2544	struct mbuf *m;
2545	int error;
2546
2547	m = *m_head;
2548	M_ASSERTPKTHDR(m);
2549
2550	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2551	if (txhdr == NULL) {
2552		m_freem(m);
2553		*m_head = NULL;
2554		return (ENOMEM);
2555	}
2556
2557	/*
2558	 * Always use the non-mergeable header, regardless if mergable headers
2559	 * were negotiated, because for transmit num_buffers is always zero.
2560	 * The vtnet_hdr_size is used to enqueue the right header size segment.
2561	 */
2562	hdr = &txhdr->vth_uhdr.hdr;
2563
2564	if (m->m_flags & M_VLANTAG) {
2565		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2566		if ((*m_head = m) == NULL) {
2567			error = ENOBUFS;
2568			goto fail;
2569		}
2570		m->m_flags &= ~M_VLANTAG;
2571	}
2572
2573	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2574		m = vtnet_txq_offload(txq, m, hdr);
2575		if ((*m_head = m) == NULL) {
2576			error = ENOBUFS;
2577			goto fail;
2578		}
2579	}
2580
2581	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2582fail:
2583	if (error)
2584		uma_zfree(vtnet_tx_header_zone, txhdr);
2585
2586	return (error);
2587}
2588
2589#ifdef VTNET_LEGACY_TX
2590
2591static void
2592vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2593{
2594	struct vtnet_softc *sc;
2595	struct virtqueue *vq;
2596	struct mbuf *m0;
2597	int tries, enq;
2598
2599	sc = txq->vtntx_sc;
2600	vq = txq->vtntx_vq;
2601	tries = 0;
2602
2603	VTNET_TXQ_LOCK_ASSERT(txq);
2604
2605	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2606	    sc->vtnet_link_active == 0)
2607		return;
2608
2609	vtnet_txq_eof(txq);
2610
2611again:
2612	enq = 0;
2613
2614	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2615		if (virtqueue_full(vq))
2616			break;
2617
2618		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2619		if (m0 == NULL)
2620			break;
2621
2622		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2623			if (m0 != NULL)
2624				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2625			break;
2626		}
2627
2628		enq++;
2629		ETHER_BPF_MTAP(ifp, m0);
2630	}
2631
2632	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2633		if (tries++ < VTNET_NOTIFY_RETRIES)
2634			goto again;
2635
2636		txq->vtntx_stats.vtxs_rescheduled++;
2637		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2638	}
2639}
2640
2641static void
2642vtnet_start(struct ifnet *ifp)
2643{
2644	struct vtnet_softc *sc;
2645	struct vtnet_txq *txq;
2646
2647	sc = ifp->if_softc;
2648	txq = &sc->vtnet_txqs[0];
2649
2650	VTNET_TXQ_LOCK(txq);
2651	vtnet_start_locked(txq, ifp);
2652	VTNET_TXQ_UNLOCK(txq);
2653}
2654
2655#else /* !VTNET_LEGACY_TX */
2656
2657static int
2658vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2659{
2660	struct vtnet_softc *sc;
2661	struct virtqueue *vq;
2662	struct buf_ring *br;
2663	struct ifnet *ifp;
2664	int enq, tries, error;
2665
2666	sc = txq->vtntx_sc;
2667	vq = txq->vtntx_vq;
2668	br = txq->vtntx_br;
2669	ifp = sc->vtnet_ifp;
2670	tries = 0;
2671	error = 0;
2672
2673	VTNET_TXQ_LOCK_ASSERT(txq);
2674
2675	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2676	    sc->vtnet_link_active == 0) {
2677		if (m != NULL)
2678			error = drbr_enqueue(ifp, br, m);
2679		return (error);
2680	}
2681
2682	if (m != NULL) {
2683		error = drbr_enqueue(ifp, br, m);
2684		if (error)
2685			return (error);
2686	}
2687
2688	vtnet_txq_eof(txq);
2689
2690again:
2691	enq = 0;
2692
2693	while ((m = drbr_peek(ifp, br)) != NULL) {
2694		if (virtqueue_full(vq)) {
2695			drbr_putback(ifp, br, m);
2696			break;
2697		}
2698
2699		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2700			if (m != NULL)
2701				drbr_putback(ifp, br, m);
2702			else
2703				drbr_advance(ifp, br);
2704			break;
2705		}
2706		drbr_advance(ifp, br);
2707
2708		enq++;
2709		ETHER_BPF_MTAP(ifp, m);
2710	}
2711
2712	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2713		if (tries++ < VTNET_NOTIFY_RETRIES)
2714			goto again;
2715
2716		txq->vtntx_stats.vtxs_rescheduled++;
2717		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2718	}
2719
2720	return (0);
2721}
2722
2723static int
2724vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2725{
2726	struct vtnet_softc *sc;
2727	struct vtnet_txq *txq;
2728	int i, npairs, error;
2729
2730	sc = ifp->if_softc;
2731	npairs = sc->vtnet_act_vq_pairs;
2732
2733	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2734		i = m->m_pkthdr.flowid % npairs;
2735	else
2736		i = curcpu % npairs;
2737
2738	txq = &sc->vtnet_txqs[i];
2739
2740	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2741		error = vtnet_txq_mq_start_locked(txq, m);
2742		VTNET_TXQ_UNLOCK(txq);
2743	} else {
2744		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2745		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2746	}
2747
2748	return (error);
2749}
2750
2751static void
2752vtnet_txq_tq_deferred(void *xtxq, int pending)
2753{
2754	struct vtnet_softc *sc;
2755	struct vtnet_txq *txq;
2756
2757	txq = xtxq;
2758	sc = txq->vtntx_sc;
2759
2760	VTNET_TXQ_LOCK(txq);
2761	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2762		vtnet_txq_mq_start_locked(txq, NULL);
2763	VTNET_TXQ_UNLOCK(txq);
2764}
2765
2766#endif /* VTNET_LEGACY_TX */
2767
2768static void
2769vtnet_txq_start(struct vtnet_txq *txq)
2770{
2771	struct vtnet_softc *sc;
2772	struct ifnet *ifp;
2773
2774	sc = txq->vtntx_sc;
2775	ifp = sc->vtnet_ifp;
2776
2777#ifdef VTNET_LEGACY_TX
2778	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2779		vtnet_start_locked(txq, ifp);
2780#else
2781	if (!drbr_empty(ifp, txq->vtntx_br))
2782		vtnet_txq_mq_start_locked(txq, NULL);
2783#endif
2784}
2785
2786static void
2787vtnet_txq_tq_intr(void *xtxq, int pending)
2788{
2789	struct vtnet_softc *sc;
2790	struct vtnet_txq *txq;
2791	struct ifnet *ifp;
2792
2793	txq = xtxq;
2794	sc = txq->vtntx_sc;
2795	ifp = sc->vtnet_ifp;
2796
2797	VTNET_TXQ_LOCK(txq);
2798
2799	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2800		VTNET_TXQ_UNLOCK(txq);
2801		return;
2802	}
2803
2804	vtnet_txq_eof(txq);
2805	vtnet_txq_start(txq);
2806
2807	VTNET_TXQ_UNLOCK(txq);
2808}
2809
2810static int
2811vtnet_txq_eof(struct vtnet_txq *txq)
2812{
2813	struct virtqueue *vq;
2814	struct vtnet_tx_header *txhdr;
2815	struct mbuf *m;
2816	int deq;
2817
2818	vq = txq->vtntx_vq;
2819	deq = 0;
2820	VTNET_TXQ_LOCK_ASSERT(txq);
2821
2822	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2823		m = txhdr->vth_mbuf;
2824		deq++;
2825
2826		txq->vtntx_stats.vtxs_opackets++;
2827		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2828		if (m->m_flags & M_MCAST)
2829			txq->vtntx_stats.vtxs_omcasts++;
2830
2831		m_freem(m);
2832		uma_zfree(vtnet_tx_header_zone, txhdr);
2833	}
2834
2835	if (virtqueue_empty(vq))
2836		txq->vtntx_watchdog = 0;
2837
2838	return (deq);
2839}
2840
2841static void
2842vtnet_tx_vq_intr(void *xtxq)
2843{
2844	struct vtnet_softc *sc;
2845	struct vtnet_txq *txq;
2846	struct ifnet *ifp;
2847
2848	txq = xtxq;
2849	sc = txq->vtntx_sc;
2850	ifp = sc->vtnet_ifp;
2851
2852	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2853		/*
2854		 * Ignore this interrupt. Either this is a spurious interrupt
2855		 * or multiqueue without per-VQ MSIX so every queue needs to
2856		 * be polled (a brain dead configuration we could try harder
2857		 * to avoid).
2858		 */
2859		vtnet_txq_disable_intr(txq);
2860		return;
2861	}
2862
2863#ifdef DEV_NETMAP
2864	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2865		return;
2866#endif /* DEV_NETMAP */
2867
2868	VTNET_TXQ_LOCK(txq);
2869
2870	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2871		VTNET_TXQ_UNLOCK(txq);
2872		return;
2873	}
2874
2875	vtnet_txq_eof(txq);
2876	vtnet_txq_start(txq);
2877
2878	VTNET_TXQ_UNLOCK(txq);
2879}
2880
2881static void
2882vtnet_tx_start_all(struct vtnet_softc *sc)
2883{
2884	struct vtnet_txq *txq;
2885	int i;
2886
2887	VTNET_CORE_LOCK_ASSERT(sc);
2888
2889	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2890		txq = &sc->vtnet_txqs[i];
2891
2892		VTNET_TXQ_LOCK(txq);
2893		vtnet_txq_start(txq);
2894		VTNET_TXQ_UNLOCK(txq);
2895	}
2896}
2897
2898#ifndef VTNET_LEGACY_TX
2899static void
2900vtnet_qflush(struct ifnet *ifp)
2901{
2902	struct vtnet_softc *sc;
2903	struct vtnet_txq *txq;
2904	struct mbuf *m;
2905	int i;
2906
2907	sc = ifp->if_softc;
2908
2909	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2910		txq = &sc->vtnet_txqs[i];
2911
2912		VTNET_TXQ_LOCK(txq);
2913		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2914			m_freem(m);
2915		VTNET_TXQ_UNLOCK(txq);
2916	}
2917
2918	if_qflush(ifp);
2919}
2920#endif
2921
2922static int
2923vtnet_watchdog(struct vtnet_txq *txq)
2924{
2925	struct ifnet *ifp;
2926
2927	ifp = txq->vtntx_sc->vtnet_ifp;
2928
2929	VTNET_TXQ_LOCK(txq);
2930	if (txq->vtntx_watchdog == 1) {
2931		/*
2932		 * Only drain completed frames if the watchdog is about to
2933		 * expire. If any frames were drained, there may be enough
2934		 * free descriptors now available to transmit queued frames.
2935		 * In that case, the timer will immediately be decremented
2936		 * below, but the timeout is generous enough that should not
2937		 * be a problem.
2938		 */
2939		if (vtnet_txq_eof(txq) != 0)
2940			vtnet_txq_start(txq);
2941	}
2942
2943	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2944		VTNET_TXQ_UNLOCK(txq);
2945		return (0);
2946	}
2947	VTNET_TXQ_UNLOCK(txq);
2948
2949	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2950	return (1);
2951}
2952
2953static void
2954vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2955    struct vtnet_txq_stats *txacc)
2956{
2957
2958	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2959	bzero(txacc, sizeof(struct vtnet_txq_stats));
2960
2961	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2962		struct vtnet_rxq_stats *rxst;
2963		struct vtnet_txq_stats *txst;
2964
2965		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
2966		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
2967		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
2968		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
2969		rxacc->vrxs_csum += rxst->vrxs_csum;
2970		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
2971		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
2972
2973		txst = &sc->vtnet_txqs[i].vtntx_stats;
2974		txacc->vtxs_opackets += txst->vtxs_opackets;
2975		txacc->vtxs_obytes += txst->vtxs_obytes;
2976		txacc->vtxs_csum += txst->vtxs_csum;
2977		txacc->vtxs_tso += txst->vtxs_tso;
2978		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
2979	}
2980}
2981
2982static uint64_t
2983vtnet_get_counter(if_t ifp, ift_counter cnt)
2984{
2985	struct vtnet_softc *sc;
2986	struct vtnet_rxq_stats rxaccum;
2987	struct vtnet_txq_stats txaccum;
2988
2989	sc = if_getsoftc(ifp);
2990	vtnet_accum_stats(sc, &rxaccum, &txaccum);
2991
2992	switch (cnt) {
2993	case IFCOUNTER_IPACKETS:
2994		return (rxaccum.vrxs_ipackets);
2995	case IFCOUNTER_IQDROPS:
2996		return (rxaccum.vrxs_iqdrops);
2997	case IFCOUNTER_IERRORS:
2998		return (rxaccum.vrxs_ierrors);
2999	case IFCOUNTER_OPACKETS:
3000		return (txaccum.vtxs_opackets);
3001#ifndef VTNET_LEGACY_TX
3002	case IFCOUNTER_OBYTES:
3003		return (txaccum.vtxs_obytes);
3004	case IFCOUNTER_OMCASTS:
3005		return (txaccum.vtxs_omcasts);
3006#endif
3007	default:
3008		return (if_get_counter_default(ifp, cnt));
3009	}
3010}
3011
3012static void
3013vtnet_tick(void *xsc)
3014{
3015	struct vtnet_softc *sc;
3016	struct ifnet *ifp;
3017	int i, timedout;
3018
3019	sc = xsc;
3020	ifp = sc->vtnet_ifp;
3021	timedout = 0;
3022
3023	VTNET_CORE_LOCK_ASSERT(sc);
3024
3025	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3026		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3027
3028	if (timedout != 0) {
3029		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3030		vtnet_init_locked(sc, 0);
3031	} else
3032		callout_schedule(&sc->vtnet_tick_ch, hz);
3033}
3034
3035static void
3036vtnet_start_taskqueues(struct vtnet_softc *sc)
3037{
3038	device_t dev;
3039	struct vtnet_rxq *rxq;
3040	struct vtnet_txq *txq;
3041	int i, error;
3042
3043	dev = sc->vtnet_dev;
3044
3045	/*
3046	 * Errors here are very difficult to recover from - we cannot
3047	 * easily fail because, if this is during boot, we will hang
3048	 * when freeing any successfully started taskqueues because
3049	 * the scheduler isn't up yet.
3050	 *
3051	 * Most drivers just ignore the return value - it only fails
3052	 * with ENOMEM so an error is not likely.
3053	 */
3054	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3055		rxq = &sc->vtnet_rxqs[i];
3056		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3057		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3058		if (error) {
3059			device_printf(dev, "failed to start rx taskq %d\n",
3060			    rxq->vtnrx_id);
3061		}
3062
3063		txq = &sc->vtnet_txqs[i];
3064		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3065		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3066		if (error) {
3067			device_printf(dev, "failed to start tx taskq %d\n",
3068			    txq->vtntx_id);
3069		}
3070	}
3071}
3072
3073static void
3074vtnet_free_taskqueues(struct vtnet_softc *sc)
3075{
3076	struct vtnet_rxq *rxq;
3077	struct vtnet_txq *txq;
3078	int i;
3079
3080	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3081		rxq = &sc->vtnet_rxqs[i];
3082		if (rxq->vtnrx_tq != NULL) {
3083			taskqueue_free(rxq->vtnrx_tq);
3084			rxq->vtnrx_tq = NULL;
3085		}
3086
3087		txq = &sc->vtnet_txqs[i];
3088		if (txq->vtntx_tq != NULL) {
3089			taskqueue_free(txq->vtntx_tq);
3090			txq->vtntx_tq = NULL;
3091		}
3092	}
3093}
3094
3095static void
3096vtnet_drain_taskqueues(struct vtnet_softc *sc)
3097{
3098	struct vtnet_rxq *rxq;
3099	struct vtnet_txq *txq;
3100	int i;
3101
3102	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3103		rxq = &sc->vtnet_rxqs[i];
3104		if (rxq->vtnrx_tq != NULL)
3105			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3106
3107		txq = &sc->vtnet_txqs[i];
3108		if (txq->vtntx_tq != NULL) {
3109			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3110#ifndef VTNET_LEGACY_TX
3111			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3112#endif
3113		}
3114	}
3115}
3116
3117static void
3118vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3119{
3120	struct vtnet_rxq *rxq;
3121	struct vtnet_txq *txq;
3122	int i;
3123
3124	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3125		rxq = &sc->vtnet_rxqs[i];
3126		vtnet_rxq_free_mbufs(rxq);
3127
3128		txq = &sc->vtnet_txqs[i];
3129		vtnet_txq_free_mbufs(txq);
3130	}
3131}
3132
3133static void
3134vtnet_stop_rendezvous(struct vtnet_softc *sc)
3135{
3136	struct vtnet_rxq *rxq;
3137	struct vtnet_txq *txq;
3138	int i;
3139
3140	VTNET_CORE_LOCK_ASSERT(sc);
3141
3142	/*
3143	 * Lock and unlock the per-queue mutex so we known the stop
3144	 * state is visible. Doing only the active queues should be
3145	 * sufficient, but it does not cost much extra to do all the
3146	 * queues.
3147	 */
3148	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3149		rxq = &sc->vtnet_rxqs[i];
3150		VTNET_RXQ_LOCK(rxq);
3151		VTNET_RXQ_UNLOCK(rxq);
3152
3153		txq = &sc->vtnet_txqs[i];
3154		VTNET_TXQ_LOCK(txq);
3155		VTNET_TXQ_UNLOCK(txq);
3156	}
3157}
3158
3159static void
3160vtnet_stop(struct vtnet_softc *sc)
3161{
3162	device_t dev;
3163	struct ifnet *ifp;
3164
3165	dev = sc->vtnet_dev;
3166	ifp = sc->vtnet_ifp;
3167
3168	VTNET_CORE_LOCK_ASSERT(sc);
3169
3170	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3171	sc->vtnet_link_active = 0;
3172	callout_stop(&sc->vtnet_tick_ch);
3173
3174	/* Only advisory. */
3175	vtnet_disable_interrupts(sc);
3176
3177#ifdef DEV_NETMAP
3178	/* Stop any pending txsync/rxsync and disable them. */
3179	netmap_disable_all_rings(ifp);
3180#endif /* DEV_NETMAP */
3181
3182	/*
3183	 * Stop the host adapter. This resets it to the pre-initialized
3184	 * state. It will not generate any interrupts until after it is
3185	 * reinitialized.
3186	 */
3187	virtio_stop(dev);
3188	vtnet_stop_rendezvous(sc);
3189
3190	vtnet_drain_rxtx_queues(sc);
3191	sc->vtnet_act_vq_pairs = 1;
3192}
3193
3194static int
3195vtnet_virtio_reinit(struct vtnet_softc *sc)
3196{
3197	device_t dev;
3198	struct ifnet *ifp;
3199	uint64_t features;
3200	int error;
3201
3202	dev = sc->vtnet_dev;
3203	ifp = sc->vtnet_ifp;
3204	features = sc->vtnet_negotiated_features;
3205
3206	/*
3207	 * Re-negotiate with the host, removing any disabled receive
3208	 * features. Transmit features are disabled only on our side
3209	 * via if_capenable and if_hwassist.
3210	 */
3211
3212	if ((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3213		features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3214
3215	if ((ifp->if_capenable & IFCAP_LRO) == 0)
3216		features &= ~VTNET_LRO_FEATURES;
3217
3218	if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
3219		features &= ~VIRTIO_NET_F_CTRL_VLAN;
3220
3221	error = virtio_reinit(dev, features);
3222	if (error) {
3223		device_printf(dev, "virtio reinit error %d\n", error);
3224		return (error);
3225	}
3226
3227	sc->vtnet_features = features;
3228	virtio_reinit_complete(dev);
3229
3230	return (0);
3231}
3232
3233static void
3234vtnet_init_rx_filters(struct vtnet_softc *sc)
3235{
3236	struct ifnet *ifp;
3237
3238	ifp = sc->vtnet_ifp;
3239
3240	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3241		vtnet_rx_filter(sc);
3242		vtnet_rx_filter_mac(sc);
3243	}
3244
3245	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
3246		vtnet_rx_filter_vlan(sc);
3247}
3248
3249static int
3250vtnet_init_rx_queues(struct vtnet_softc *sc)
3251{
3252	device_t dev;
3253	struct ifnet *ifp;
3254	struct vtnet_rxq *rxq;
3255	int i, clustersz, error;
3256
3257	dev = sc->vtnet_dev;
3258	ifp = sc->vtnet_ifp;
3259
3260	clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu);
3261	sc->vtnet_rx_clustersz = clustersz;
3262
3263	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3264		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3265		    VTNET_MAX_RX_SIZE, clustersz);
3266		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3267		    ("%s: too many rx mbufs %d for %d segments", __func__,
3268		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3269	} else
3270		sc->vtnet_rx_nmbufs = 1;
3271
3272	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3273		rxq = &sc->vtnet_rxqs[i];
3274
3275		/* Hold the lock to satisfy asserts. */
3276		VTNET_RXQ_LOCK(rxq);
3277		error = vtnet_rxq_populate(rxq);
3278		VTNET_RXQ_UNLOCK(rxq);
3279
3280		if (error) {
3281			device_printf(dev, "cannot populate Rx queue %d\n", i);
3282			return (error);
3283		}
3284	}
3285
3286	return (0);
3287}
3288
3289static int
3290vtnet_init_tx_queues(struct vtnet_softc *sc)
3291{
3292	struct vtnet_txq *txq;
3293	int i;
3294
3295	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3296		txq = &sc->vtnet_txqs[i];
3297		txq->vtntx_watchdog = 0;
3298		txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3299#ifdef DEV_NETMAP
3300		netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3301#endif /* DEV_NETMAP */
3302	}
3303
3304	return (0);
3305}
3306
3307static int
3308vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3309{
3310	int error;
3311
3312	error = vtnet_init_rx_queues(sc);
3313	if (error)
3314		return (error);
3315
3316	error = vtnet_init_tx_queues(sc);
3317	if (error)
3318		return (error);
3319
3320	return (0);
3321}
3322
3323static void
3324vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3325{
3326	device_t dev;
3327	int npairs;
3328
3329	dev = sc->vtnet_dev;
3330
3331	if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3332		sc->vtnet_act_vq_pairs = 1;
3333		return;
3334	}
3335
3336	npairs = sc->vtnet_req_vq_pairs;
3337
3338	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3339		device_printf(dev, "cannot set active queue pairs to %d, "
3340		    "falling back to 1 queue pair\n", npairs);
3341		npairs = 1;
3342	}
3343
3344	sc->vtnet_act_vq_pairs = npairs;
3345}
3346
3347static void
3348vtnet_update_rx_offloads(struct vtnet_softc *sc)
3349{
3350	struct ifnet *ifp;
3351	uint64_t features;
3352	int error;
3353
3354	ifp = sc->vtnet_ifp;
3355	features = sc->vtnet_features;
3356
3357	VTNET_CORE_LOCK_ASSERT(sc);
3358
3359	if (ifp->if_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3360		if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3361			features |= VIRTIO_NET_F_GUEST_CSUM;
3362		else
3363			features &= ~VIRTIO_NET_F_GUEST_CSUM;
3364	}
3365
3366	if (ifp->if_capabilities & IFCAP_LRO && !vtnet_software_lro(sc)) {
3367		if (ifp->if_capenable & IFCAP_LRO)
3368			features |= VTNET_LRO_FEATURES;
3369		else
3370			features &= ~VTNET_LRO_FEATURES;
3371	}
3372
3373	error = vtnet_ctrl_guest_offloads(sc,
3374	    features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3375		        VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
3376			VIRTIO_NET_F_GUEST_UFO));
3377	if (error) {
3378		device_printf(sc->vtnet_dev,
3379		    "%s: cannot update Rx features\n", __func__);
3380		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3381			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3382			vtnet_init_locked(sc, 0);
3383		}
3384	} else
3385		sc->vtnet_features = features;
3386}
3387
3388static int
3389vtnet_reinit(struct vtnet_softc *sc)
3390{
3391	device_t dev;
3392	struct ifnet *ifp;
3393	int error;
3394
3395	dev = sc->vtnet_dev;
3396	ifp = sc->vtnet_ifp;
3397
3398	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3399
3400	error = vtnet_virtio_reinit(sc);
3401	if (error)
3402		return (error);
3403
3404	vtnet_set_macaddr(sc);
3405	vtnet_set_active_vq_pairs(sc);
3406
3407	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3408		vtnet_init_rx_filters(sc);
3409
3410	ifp->if_hwassist = 0;
3411	if (ifp->if_capenable & IFCAP_TXCSUM)
3412		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3413	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3414		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3415	if (ifp->if_capenable & IFCAP_TSO4)
3416		ifp->if_hwassist |= CSUM_IP_TSO;
3417	if (ifp->if_capenable & IFCAP_TSO6)
3418		ifp->if_hwassist |= CSUM_IP6_TSO;
3419
3420	error = vtnet_init_rxtx_queues(sc);
3421	if (error)
3422		return (error);
3423
3424	return (0);
3425}
3426
3427static void
3428vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3429{
3430	device_t dev;
3431	struct ifnet *ifp;
3432
3433	dev = sc->vtnet_dev;
3434	ifp = sc->vtnet_ifp;
3435
3436	VTNET_CORE_LOCK_ASSERT(sc);
3437
3438	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3439		return;
3440
3441	vtnet_stop(sc);
3442
3443#ifdef DEV_NETMAP
3444	/* Once stopped we can update the netmap flags, if necessary. */
3445	switch (init_mode) {
3446	case VTNET_INIT_NETMAP_ENTER:
3447		nm_set_native_flags(NA(ifp));
3448		break;
3449	case VTNET_INIT_NETMAP_EXIT:
3450		nm_clear_native_flags(NA(ifp));
3451		break;
3452	}
3453#endif /* DEV_NETMAP */
3454
3455	if (vtnet_reinit(sc) != 0) {
3456		vtnet_stop(sc);
3457		return;
3458	}
3459
3460	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3461	vtnet_update_link_status(sc);
3462	vtnet_enable_interrupts(sc);
3463	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3464
3465#ifdef DEV_NETMAP
3466	/* Re-enable txsync/rxsync. */
3467	netmap_enable_all_rings(ifp);
3468#endif /* DEV_NETMAP */
3469}
3470
3471static void
3472vtnet_init(void *xsc)
3473{
3474	struct vtnet_softc *sc;
3475
3476	sc = xsc;
3477
3478	VTNET_CORE_LOCK(sc);
3479	vtnet_init_locked(sc, 0);
3480	VTNET_CORE_UNLOCK(sc);
3481}
3482
3483static void
3484vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3485{
3486
3487	/*
3488	 * The control virtqueue is only polled and therefore it should
3489	 * already be empty.
3490	 */
3491	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3492	    ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3493}
3494
3495static void
3496vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3497    struct sglist *sg, int readable, int writable)
3498{
3499	struct virtqueue *vq;
3500
3501	vq = sc->vtnet_ctrl_vq;
3502
3503	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3504	VTNET_CORE_LOCK_ASSERT(sc);
3505
3506	if (!virtqueue_empty(vq))
3507		return;
3508
3509	/*
3510	 * Poll for the response, but the command is likely completed before
3511	 * returning from the notify.
3512	 */
3513	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
3514		virtqueue_notify(vq);
3515		virtqueue_poll(vq, NULL);
3516	}
3517}
3518
3519static int
3520vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3521{
3522	struct sglist_seg segs[3];
3523	struct sglist sg;
3524	struct {
3525		struct virtio_net_ctrl_hdr hdr __aligned(2);
3526		uint8_t pad1;
3527		uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3528		uint8_t pad2;
3529		uint8_t ack;
3530	} s;
3531	int error;
3532
3533	error = 0;
3534	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3535
3536	s.hdr.class = VIRTIO_NET_CTRL_MAC;
3537	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3538	bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3539	s.ack = VIRTIO_NET_ERR;
3540
3541	sglist_init(&sg, nitems(segs), segs);
3542	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3543	error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3544	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3545	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3546
3547	if (error == 0)
3548		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3549
3550	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3551}
3552
3553static int
3554vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3555{
3556	struct sglist_seg segs[3];
3557	struct sglist sg;
3558	struct {
3559		struct virtio_net_ctrl_hdr hdr __aligned(2);
3560		uint8_t pad1;
3561		uint64_t offloads __aligned(8);
3562		uint8_t pad2;
3563		uint8_t ack;
3564	} s;
3565	int error;
3566
3567	error = 0;
3568	MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3569
3570	s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3571	s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3572	s.offloads = vtnet_gtoh64(sc, offloads);
3573	s.ack = VIRTIO_NET_ERR;
3574
3575	sglist_init(&sg, nitems(segs), segs);
3576	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3577	error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3578	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3579	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3580
3581	if (error == 0)
3582		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3583
3584	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3585}
3586
3587static int
3588vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3589{
3590	struct sglist_seg segs[3];
3591	struct sglist sg;
3592	struct {
3593		struct virtio_net_ctrl_hdr hdr __aligned(2);
3594		uint8_t pad1;
3595		struct virtio_net_ctrl_mq mq __aligned(2);
3596		uint8_t pad2;
3597		uint8_t ack;
3598	} s;
3599	int error;
3600
3601	error = 0;
3602	MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3603
3604	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3605	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3606	s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3607	s.ack = VIRTIO_NET_ERR;
3608
3609	sglist_init(&sg, nitems(segs), segs);
3610	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3611	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3612	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3613	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3614
3615	if (error == 0)
3616		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3617
3618	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3619}
3620
3621static int
3622vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, int on)
3623{
3624	struct sglist_seg segs[3];
3625	struct sglist sg;
3626	struct {
3627		struct virtio_net_ctrl_hdr hdr __aligned(2);
3628		uint8_t pad1;
3629		uint8_t onoff;
3630		uint8_t pad2;
3631		uint8_t ack;
3632	} s;
3633	int error;
3634
3635	error = 0;
3636	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3637
3638	s.hdr.class = VIRTIO_NET_CTRL_RX;
3639	s.hdr.cmd = cmd;
3640	s.onoff = !!on;
3641	s.ack = VIRTIO_NET_ERR;
3642
3643	sglist_init(&sg, nitems(segs), segs);
3644	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3645	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3646	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3647	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3648
3649	if (error == 0)
3650		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3651
3652	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3653}
3654
3655static int
3656vtnet_set_promisc(struct vtnet_softc *sc, int on)
3657{
3658	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3659}
3660
3661static int
3662vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3663{
3664	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3665}
3666
3667static void
3668vtnet_rx_filter(struct vtnet_softc *sc)
3669{
3670	device_t dev;
3671	struct ifnet *ifp;
3672
3673	dev = sc->vtnet_dev;
3674	ifp = sc->vtnet_ifp;
3675
3676	VTNET_CORE_LOCK_ASSERT(sc);
3677
3678	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) {
3679		device_printf(dev, "cannot %s promiscuous mode\n",
3680		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3681	}
3682
3683	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) {
3684		device_printf(dev, "cannot %s all-multicast mode\n",
3685		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3686	}
3687}
3688
3689static u_int
3690vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3691{
3692	struct vtnet_softc *sc = arg;
3693
3694	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3695		return (0);
3696
3697	if (ucnt < VTNET_MAX_MAC_ENTRIES)
3698		bcopy(LLADDR(sdl),
3699		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3700		    ETHER_ADDR_LEN);
3701
3702	return (1);
3703}
3704
3705static u_int
3706vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3707{
3708	struct vtnet_mac_filter *filter = arg;
3709
3710	if (mcnt < VTNET_MAX_MAC_ENTRIES)
3711		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3712		    ETHER_ADDR_LEN);
3713
3714	return (1);
3715}
3716
3717static void
3718vtnet_rx_filter_mac(struct vtnet_softc *sc)
3719{
3720	struct virtio_net_ctrl_hdr hdr __aligned(2);
3721	struct vtnet_mac_filter *filter;
3722	struct sglist_seg segs[4];
3723	struct sglist sg;
3724	struct ifnet *ifp;
3725	bool promisc, allmulti;
3726	u_int ucnt, mcnt;
3727	int error;
3728	uint8_t ack;
3729
3730	ifp = sc->vtnet_ifp;
3731	filter = sc->vtnet_mac_filter;
3732	error = 0;
3733
3734	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3735	VTNET_CORE_LOCK_ASSERT(sc);
3736
3737	/* Unicast MAC addresses: */
3738	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3739	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3740
3741	if (promisc) {
3742		ucnt = 0;
3743		if_printf(ifp, "more than %d MAC addresses assigned, "
3744		    "falling back to promiscuous mode\n",
3745		    VTNET_MAX_MAC_ENTRIES);
3746	}
3747
3748	/* Multicast MAC addresses: */
3749	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3750	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3751
3752	if (allmulti) {
3753		mcnt = 0;
3754		if_printf(ifp, "more than %d multicast MAC addresses "
3755		    "assigned, falling back to all-multicast mode\n",
3756		    VTNET_MAX_MAC_ENTRIES);
3757	}
3758
3759	if (promisc && allmulti)
3760		goto out;
3761
3762	filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3763	filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3764
3765	hdr.class = VIRTIO_NET_CTRL_MAC;
3766	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3767	ack = VIRTIO_NET_ERR;
3768
3769	sglist_init(&sg, nitems(segs), segs);
3770	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3771	error |= sglist_append(&sg, &filter->vmf_unicast,
3772	    sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3773	error |= sglist_append(&sg, &filter->vmf_multicast,
3774	    sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3775	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3776	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3777
3778	if (error == 0)
3779		vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3780	if (ack != VIRTIO_NET_OK)
3781		if_printf(ifp, "error setting host MAC filter table\n");
3782
3783out:
3784	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3785		if_printf(ifp, "cannot enable promiscuous mode\n");
3786	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3787		if_printf(ifp, "cannot enable all-multicast mode\n");
3788}
3789
3790static int
3791vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3792{
3793	struct sglist_seg segs[3];
3794	struct sglist sg;
3795	struct {
3796		struct virtio_net_ctrl_hdr hdr __aligned(2);
3797		uint8_t pad1;
3798		uint16_t tag __aligned(2);
3799		uint8_t pad2;
3800		uint8_t ack;
3801	} s;
3802	int error;
3803
3804	error = 0;
3805	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3806
3807	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3808	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3809	s.tag = vtnet_gtoh16(sc, tag);
3810	s.ack = VIRTIO_NET_ERR;
3811
3812	sglist_init(&sg, nitems(segs), segs);
3813	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3814	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3815	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3816	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3817
3818	if (error == 0)
3819		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3820
3821	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3822}
3823
3824static void
3825vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3826{
3827	int i, bit;
3828	uint32_t w;
3829	uint16_t tag;
3830
3831	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3832	VTNET_CORE_LOCK_ASSERT(sc);
3833
3834	/* Enable the filter for each configured VLAN. */
3835	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3836		w = sc->vtnet_vlan_filter[i];
3837
3838		while ((bit = ffs(w) - 1) != -1) {
3839			w &= ~(1 << bit);
3840			tag = sizeof(w) * CHAR_BIT * i + bit;
3841
3842			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3843				device_printf(sc->vtnet_dev,
3844				    "cannot enable VLAN %d filter\n", tag);
3845			}
3846		}
3847	}
3848}
3849
3850static void
3851vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3852{
3853	struct ifnet *ifp;
3854	int idx, bit;
3855
3856	ifp = sc->vtnet_ifp;
3857	idx = (tag >> 5) & 0x7F;
3858	bit = tag & 0x1F;
3859
3860	if (tag == 0 || tag > 4095)
3861		return;
3862
3863	VTNET_CORE_LOCK(sc);
3864
3865	if (add)
3866		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3867	else
3868		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3869
3870	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3871	    ifp->if_drv_flags & IFF_DRV_RUNNING &&
3872	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3873		device_printf(sc->vtnet_dev,
3874		    "cannot %s VLAN %d %s the host filter table\n",
3875		    add ? "add" : "remove", tag, add ? "to" : "from");
3876	}
3877
3878	VTNET_CORE_UNLOCK(sc);
3879}
3880
3881static void
3882vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3883{
3884
3885	if (ifp->if_softc != arg)
3886		return;
3887
3888	vtnet_update_vlan_filter(arg, 1, tag);
3889}
3890
3891static void
3892vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3893{
3894
3895	if (ifp->if_softc != arg)
3896		return;
3897
3898	vtnet_update_vlan_filter(arg, 0, tag);
3899}
3900
3901static void
3902vtnet_update_speed_duplex(struct vtnet_softc *sc)
3903{
3904	struct ifnet *ifp;
3905	uint32_t speed;
3906
3907	ifp = sc->vtnet_ifp;
3908
3909	if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3910		return;
3911
3912	/* BMV: Ignore duplex. */
3913	speed = virtio_read_dev_config_4(sc->vtnet_dev,
3914	    offsetof(struct virtio_net_config, speed));
3915	if (speed != -1)
3916		ifp->if_baudrate = IF_Mbps(speed);
3917}
3918
3919static int
3920vtnet_is_link_up(struct vtnet_softc *sc)
3921{
3922	uint16_t status;
3923
3924	if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3925		return (1);
3926
3927	status = virtio_read_dev_config_2(sc->vtnet_dev,
3928	    offsetof(struct virtio_net_config, status));
3929
3930	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3931}
3932
3933static void
3934vtnet_update_link_status(struct vtnet_softc *sc)
3935{
3936	struct ifnet *ifp;
3937	int link;
3938
3939	ifp = sc->vtnet_ifp;
3940	VTNET_CORE_LOCK_ASSERT(sc);
3941	link = vtnet_is_link_up(sc);
3942
3943	/* Notify if the link status has changed. */
3944	if (link != 0 && sc->vtnet_link_active == 0) {
3945		vtnet_update_speed_duplex(sc);
3946		sc->vtnet_link_active = 1;
3947		if_link_state_change(ifp, LINK_STATE_UP);
3948	} else if (link == 0 && sc->vtnet_link_active != 0) {
3949		sc->vtnet_link_active = 0;
3950		if_link_state_change(ifp, LINK_STATE_DOWN);
3951	}
3952}
3953
3954static int
3955vtnet_ifmedia_upd(struct ifnet *ifp)
3956{
3957	return (EOPNOTSUPP);
3958}
3959
3960static void
3961vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3962{
3963	struct vtnet_softc *sc;
3964
3965	sc = ifp->if_softc;
3966
3967	ifmr->ifm_status = IFM_AVALID;
3968	ifmr->ifm_active = IFM_ETHER;
3969
3970	VTNET_CORE_LOCK(sc);
3971	if (vtnet_is_link_up(sc) != 0) {
3972		ifmr->ifm_status |= IFM_ACTIVE;
3973		ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
3974	} else
3975		ifmr->ifm_active |= IFM_NONE;
3976	VTNET_CORE_UNLOCK(sc);
3977}
3978
3979static void
3980vtnet_get_macaddr(struct vtnet_softc *sc)
3981{
3982
3983	if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3984		virtio_read_device_config_array(sc->vtnet_dev,
3985		    offsetof(struct virtio_net_config, mac),
3986		    &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
3987	} else {
3988		/* Generate a random locally administered unicast address. */
3989		sc->vtnet_hwaddr[0] = 0xB2;
3990		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3991	}
3992}
3993
3994static void
3995vtnet_set_macaddr(struct vtnet_softc *sc)
3996{
3997	device_t dev;
3998	int error;
3999
4000	dev = sc->vtnet_dev;
4001
4002	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4003		error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4004		if (error)
4005			device_printf(dev, "unable to set MAC address\n");
4006		return;
4007	}
4008
4009	/* MAC in config is read-only in modern VirtIO. */
4010	if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4011		for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4012			virtio_write_dev_config_1(dev,
4013			    offsetof(struct virtio_net_config, mac) + i,
4014			    sc->vtnet_hwaddr[i]);
4015		}
4016	}
4017}
4018
4019static void
4020vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4021{
4022
4023	/* Assign MAC address if it was generated. */
4024	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4025		vtnet_set_macaddr(sc);
4026}
4027
4028static void
4029vtnet_vlan_tag_remove(struct mbuf *m)
4030{
4031	struct ether_vlan_header *evh;
4032
4033	evh = mtod(m, struct ether_vlan_header *);
4034	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4035	m->m_flags |= M_VLANTAG;
4036
4037	/* Strip the 802.1Q header. */
4038	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4039	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
4040	m_adj(m, ETHER_VLAN_ENCAP_LEN);
4041}
4042
4043static void
4044vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4045{
4046	int limit;
4047
4048	limit = vtnet_tunable_int(sc, "rx_process_limit",
4049	    vtnet_rx_process_limit);
4050	if (limit < 0)
4051		limit = INT_MAX;
4052	sc->vtnet_rx_process_limit = limit;
4053}
4054
4055static void
4056vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4057    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4058{
4059	struct sysctl_oid *node;
4060	struct sysctl_oid_list *list;
4061	struct vtnet_rxq_stats *stats;
4062	char namebuf[16];
4063
4064	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4065	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4066	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4067	list = SYSCTL_CHILDREN(node);
4068
4069	stats = &rxq->vtnrx_stats;
4070
4071	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
4072	    &stats->vrxs_ipackets, "Receive packets");
4073	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
4074	    &stats->vrxs_ibytes, "Receive bytes");
4075	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
4076	    &stats->vrxs_iqdrops, "Receive drops");
4077	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
4078	    &stats->vrxs_ierrors, "Receive errors");
4079	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4080	    &stats->vrxs_csum, "Receive checksum offloaded");
4081	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
4082	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
4083	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
4084	    &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4085	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4086	    &stats->vrxs_rescheduled,
4087	    "Receive interrupt handler rescheduled");
4088}
4089
4090static void
4091vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4092    struct sysctl_oid_list *child, struct vtnet_txq *txq)
4093{
4094	struct sysctl_oid *node;
4095	struct sysctl_oid_list *list;
4096	struct vtnet_txq_stats *stats;
4097	char namebuf[16];
4098
4099	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4100	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4101	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4102	list = SYSCTL_CHILDREN(node);
4103
4104	stats = &txq->vtntx_stats;
4105
4106	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
4107	    &stats->vtxs_opackets, "Transmit packets");
4108	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
4109	    &stats->vtxs_obytes, "Transmit bytes");
4110	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
4111	    &stats->vtxs_omcasts, "Transmit multicasts");
4112	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4113	    &stats->vtxs_csum, "Transmit checksum offloaded");
4114	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
4115	    &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4116	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4117	    &stats->vtxs_rescheduled,
4118	    "Transmit interrupt handler rescheduled");
4119}
4120
4121static void
4122vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4123{
4124	device_t dev;
4125	struct sysctl_ctx_list *ctx;
4126	struct sysctl_oid *tree;
4127	struct sysctl_oid_list *child;
4128	int i;
4129
4130	dev = sc->vtnet_dev;
4131	ctx = device_get_sysctl_ctx(dev);
4132	tree = device_get_sysctl_tree(dev);
4133	child = SYSCTL_CHILDREN(tree);
4134
4135	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4136		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4137		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4138	}
4139}
4140
4141static void
4142vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4143    struct sysctl_oid_list *child, struct vtnet_softc *sc)
4144{
4145	struct vtnet_statistics *stats;
4146	struct vtnet_rxq_stats rxaccum;
4147	struct vtnet_txq_stats txaccum;
4148
4149	vtnet_accum_stats(sc, &rxaccum, &txaccum);
4150
4151	stats = &sc->vtnet_stats;
4152	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4153	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4154	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4155	stats->tx_csum_offloaded = txaccum.vtxs_csum;
4156	stats->tx_tso_offloaded = txaccum.vtxs_tso;
4157	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4158
4159	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4160	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
4161	    "Mbuf cluster allocation failures");
4162
4163	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4164	    CTLFLAG_RD, &stats->rx_frame_too_large,
4165	    "Received frame larger than the mbuf chain");
4166	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4167	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
4168	    "Enqueuing the replacement receive mbuf failed");
4169	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4170	    CTLFLAG_RD, &stats->rx_mergeable_failed,
4171	    "Mergeable buffers receive failures");
4172	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4173	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
4174	    "Received checksum offloaded buffer with unsupported "
4175	    "Ethernet type");
4176	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4177	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
4178	    "Received checksum offloaded buffer with incorrect IP protocol");
4179	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4180	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
4181	    "Received checksum offloaded buffer with incorrect offset");
4182	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
4183	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
4184	    "Received checksum offloaded buffer with incorrect protocol");
4185	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
4186	    CTLFLAG_RD, &stats->rx_csum_failed,
4187	    "Received buffer checksum offload failed");
4188	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
4189	    CTLFLAG_RD, &stats->rx_csum_offloaded,
4190	    "Received buffer checksum offload succeeded");
4191	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
4192	    CTLFLAG_RD, &stats->rx_task_rescheduled,
4193	    "Times the receive interrupt task rescheduled itself");
4194
4195	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4196	    CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
4197	    "Aborted transmit of checksum offloaded buffer with unknown "
4198	    "Ethernet type");
4199	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4200	    CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
4201	    "Aborted transmit of checksum offloaded buffer because mismatched "
4202	    "protocols");
4203	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4204	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
4205	    "Aborted transmit of TSO buffer with non TCP protocol");
4206	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4207	    CTLFLAG_RD, &stats->tx_tso_without_csum,
4208	    "Aborted transmit of TSO buffer without TCP checksum offload");
4209	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4210	    CTLFLAG_RD, &stats->tx_defragged,
4211	    "Transmit mbufs defragged");
4212	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4213	    CTLFLAG_RD, &stats->tx_defrag_failed,
4214	    "Aborted transmit of buffer because defrag failed");
4215	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
4216	    CTLFLAG_RD, &stats->tx_csum_offloaded,
4217	    "Offloaded checksum of transmitted buffer");
4218	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
4219	    CTLFLAG_RD, &stats->tx_tso_offloaded,
4220	    "Segmentation offload of transmitted buffer");
4221	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
4222	    CTLFLAG_RD, &stats->tx_task_rescheduled,
4223	    "Times the transmit interrupt task rescheduled itself");
4224}
4225
4226static void
4227vtnet_setup_sysctl(struct vtnet_softc *sc)
4228{
4229	device_t dev;
4230	struct sysctl_ctx_list *ctx;
4231	struct sysctl_oid *tree;
4232	struct sysctl_oid_list *child;
4233
4234	dev = sc->vtnet_dev;
4235	ctx = device_get_sysctl_ctx(dev);
4236	tree = device_get_sysctl_tree(dev);
4237	child = SYSCTL_CHILDREN(tree);
4238
4239	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4240	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4241	    "Number of maximum supported virtqueue pairs");
4242	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4243	    CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4244	    "Number of requested virtqueue pairs");
4245	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4246	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4247	    "Number of active virtqueue pairs");
4248
4249	vtnet_setup_stat_sysctl(ctx, child, sc);
4250}
4251
4252static void
4253vtnet_load_tunables(struct vtnet_softc *sc)
4254{
4255
4256	sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4257	    "lro_entry_count", vtnet_lro_entry_count);
4258	if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4259		sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4260
4261	sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4262	    "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4263}
4264
4265static int
4266vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4267{
4268
4269	return (virtqueue_enable_intr(rxq->vtnrx_vq));
4270}
4271
4272static void
4273vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4274{
4275
4276	virtqueue_disable_intr(rxq->vtnrx_vq);
4277}
4278
4279static int
4280vtnet_txq_enable_intr(struct vtnet_txq *txq)
4281{
4282	struct virtqueue *vq;
4283
4284	vq = txq->vtntx_vq;
4285
4286	if (vtnet_txq_below_threshold(txq) != 0)
4287		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4288
4289	/*
4290	 * The free count is above our threshold. Keep the Tx interrupt
4291	 * disabled until the queue is fuller.
4292	 */
4293	return (0);
4294}
4295
4296static void
4297vtnet_txq_disable_intr(struct vtnet_txq *txq)
4298{
4299
4300	virtqueue_disable_intr(txq->vtntx_vq);
4301}
4302
4303static void
4304vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4305{
4306	struct vtnet_rxq *rxq;
4307	int i;
4308
4309	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4310		rxq = &sc->vtnet_rxqs[i];
4311		if (vtnet_rxq_enable_intr(rxq) != 0)
4312			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4313	}
4314}
4315
4316static void
4317vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4318{
4319	int i;
4320
4321	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4322		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4323}
4324
4325static void
4326vtnet_enable_interrupts(struct vtnet_softc *sc)
4327{
4328
4329	vtnet_enable_rx_interrupts(sc);
4330	vtnet_enable_tx_interrupts(sc);
4331}
4332
4333static void
4334vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4335{
4336	int i;
4337
4338	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4339		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4340}
4341
4342static void
4343vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4344{
4345	int i;
4346
4347	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4348		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4349}
4350
4351static void
4352vtnet_disable_interrupts(struct vtnet_softc *sc)
4353{
4354
4355	vtnet_disable_rx_interrupts(sc);
4356	vtnet_disable_tx_interrupts(sc);
4357}
4358
4359static int
4360vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4361{
4362	char path[64];
4363
4364	snprintf(path, sizeof(path),
4365	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4366	TUNABLE_INT_FETCH(path, &def);
4367
4368	return (def);
4369}
4370
4371#ifdef DEBUGNET
4372static void
4373vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
4374{
4375	struct vtnet_softc *sc;
4376
4377	sc = if_getsoftc(ifp);
4378
4379	VTNET_CORE_LOCK(sc);
4380	*nrxr = sc->vtnet_req_vq_pairs;
4381	*ncl = DEBUGNET_MAX_IN_FLIGHT;
4382	*clsize = sc->vtnet_rx_clustersz;
4383	VTNET_CORE_UNLOCK(sc);
4384}
4385
4386static void
4387vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
4388{
4389}
4390
4391static int
4392vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
4393{
4394	struct vtnet_softc *sc;
4395	struct vtnet_txq *txq;
4396	int error;
4397
4398	sc = if_getsoftc(ifp);
4399	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4400	    IFF_DRV_RUNNING)
4401		return (EBUSY);
4402
4403	txq = &sc->vtnet_txqs[0];
4404	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4405	if (error == 0)
4406		(void)vtnet_txq_notify(txq);
4407	return (error);
4408}
4409
4410static int
4411vtnet_debugnet_poll(struct ifnet *ifp, int count)
4412{
4413	struct vtnet_softc *sc;
4414	int i;
4415
4416	sc = if_getsoftc(ifp);
4417	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4418	    IFF_DRV_RUNNING)
4419		return (EBUSY);
4420
4421	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4422	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4423		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4424	return (0);
4425}
4426#endif /* DEBUGNET */
4427