1227652Sgrehan/*-
2252707Sbryanv * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3227652Sgrehan * All rights reserved.
4227652Sgrehan *
5227652Sgrehan * Redistribution and use in source and binary forms, with or without
6227652Sgrehan * modification, are permitted provided that the following conditions
7227652Sgrehan * are met:
8227652Sgrehan * 1. Redistributions of source code must retain the above copyright
9227652Sgrehan *    notice unmodified, this list of conditions, and the following
10227652Sgrehan *    disclaimer.
11227652Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
12227652Sgrehan *    notice, this list of conditions and the following disclaimer in the
13227652Sgrehan *    documentation and/or other materials provided with the distribution.
14227652Sgrehan *
15227652Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16227652Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17227652Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18227652Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19227652Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20227652Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21227652Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22227652Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23227652Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24227652Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25227652Sgrehan */
26227652Sgrehan
27227652Sgrehan/* Driver for VirtIO network devices. */
28227652Sgrehan
29227652Sgrehan#include <sys/cdefs.h>
30227652Sgrehan__FBSDID("$FreeBSD$");
31227652Sgrehan
32227652Sgrehan#include <sys/param.h>
33227652Sgrehan#include <sys/systm.h>
34227652Sgrehan#include <sys/kernel.h>
35227652Sgrehan#include <sys/sockio.h>
36227652Sgrehan#include <sys/mbuf.h>
37227652Sgrehan#include <sys/malloc.h>
38227652Sgrehan#include <sys/module.h>
39227652Sgrehan#include <sys/socket.h>
40227652Sgrehan#include <sys/sysctl.h>
41227652Sgrehan#include <sys/random.h>
42227652Sgrehan#include <sys/sglist.h>
43227652Sgrehan#include <sys/lock.h>
44227652Sgrehan#include <sys/mutex.h>
45255112Sbryanv#include <sys/taskqueue.h>
46255112Sbryanv#include <sys/smp.h>
47255112Sbryanv#include <machine/smp.h>
48227652Sgrehan
49227652Sgrehan#include <vm/uma.h>
50227652Sgrehan
51227652Sgrehan#include <net/ethernet.h>
52227652Sgrehan#include <net/if.h>
53227652Sgrehan#include <net/if_arp.h>
54227652Sgrehan#include <net/if_dl.h>
55227652Sgrehan#include <net/if_types.h>
56227652Sgrehan#include <net/if_media.h>
57227652Sgrehan#include <net/if_vlan_var.h>
58227652Sgrehan
59227652Sgrehan#include <net/bpf.h>
60227652Sgrehan
61227652Sgrehan#include <netinet/in_systm.h>
62227652Sgrehan#include <netinet/in.h>
63227652Sgrehan#include <netinet/ip.h>
64227652Sgrehan#include <netinet/ip6.h>
65255112Sbryanv#include <netinet6/ip6_var.h>
66227652Sgrehan#include <netinet/udp.h>
67227652Sgrehan#include <netinet/tcp.h>
68227652Sgrehan#include <netinet/sctp.h>
69227652Sgrehan
70227652Sgrehan#include <machine/bus.h>
71227652Sgrehan#include <machine/resource.h>
72227652Sgrehan#include <sys/bus.h>
73227652Sgrehan#include <sys/rman.h>
74227652Sgrehan
75227652Sgrehan#include <dev/virtio/virtio.h>
76227652Sgrehan#include <dev/virtio/virtqueue.h>
77227652Sgrehan#include <dev/virtio/network/virtio_net.h>
78227652Sgrehan#include <dev/virtio/network/if_vtnetvar.h>
79227652Sgrehan
80227652Sgrehan#include "virtio_if.h"
81227652Sgrehan
82255112Sbryanv#include "opt_inet.h"
83255112Sbryanv#include "opt_inet6.h"
84255112Sbryanv
85227652Sgrehanstatic int	vtnet_modevent(module_t, int, void *);
86227652Sgrehan
87227652Sgrehanstatic int	vtnet_probe(device_t);
88227652Sgrehanstatic int	vtnet_attach(device_t);
89227652Sgrehanstatic int	vtnet_detach(device_t);
90227652Sgrehanstatic int	vtnet_suspend(device_t);
91227652Sgrehanstatic int	vtnet_resume(device_t);
92227652Sgrehanstatic int	vtnet_shutdown(device_t);
93255112Sbryanvstatic int	vtnet_attach_completed(device_t);
94227652Sgrehanstatic int	vtnet_config_change(device_t);
95227652Sgrehan
96227652Sgrehanstatic void	vtnet_negotiate_features(struct vtnet_softc *);
97255112Sbryanvstatic void	vtnet_setup_features(struct vtnet_softc *);
98255112Sbryanvstatic int	vtnet_init_rxq(struct vtnet_softc *, int);
99255112Sbryanvstatic int	vtnet_init_txq(struct vtnet_softc *, int);
100255112Sbryanvstatic int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
101255112Sbryanvstatic void	vtnet_free_rxtx_queues(struct vtnet_softc *);
102255112Sbryanvstatic int	vtnet_alloc_rx_filters(struct vtnet_softc *);
103255112Sbryanvstatic void	vtnet_free_rx_filters(struct vtnet_softc *);
104227652Sgrehanstatic int	vtnet_alloc_virtqueues(struct vtnet_softc *);
105255112Sbryanvstatic int	vtnet_setup_interface(struct vtnet_softc *);
106227652Sgrehanstatic int	vtnet_change_mtu(struct vtnet_softc *, int);
107227652Sgrehanstatic int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
108227652Sgrehan
109255112Sbryanvstatic int	vtnet_rxq_populate(struct vtnet_rxq *);
110255112Sbryanvstatic void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
111255112Sbryanvstatic struct mbuf *
112255112Sbryanv		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
113255112Sbryanvstatic int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
114227652Sgrehan		    struct mbuf *, int);
115255112Sbryanvstatic int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
116255112Sbryanvstatic int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
117255112Sbryanvstatic int	vtnet_rxq_new_buf(struct vtnet_rxq *);
118255112Sbryanvstatic int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
119255112Sbryanv		     struct virtio_net_hdr *);
120255112Sbryanvstatic void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
121255112Sbryanvstatic void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
122255112Sbryanvstatic int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
123255112Sbryanvstatic void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
124227652Sgrehan		    struct virtio_net_hdr *);
125255112Sbryanvstatic int	vtnet_rxq_eof(struct vtnet_rxq *);
126252702Sbryanvstatic void	vtnet_rx_vq_intr(void *);
127255112Sbryanvstatic void	vtnet_rxq_tq_intr(void *, int);
128227652Sgrehan
129270334Sbryanvstatic int	vtnet_txq_below_threshold(struct vtnet_txq *);
130270334Sbryanvstatic int	vtnet_txq_notify(struct vtnet_txq *);
131255112Sbryanvstatic void	vtnet_txq_free_mbufs(struct vtnet_txq *);
132255112Sbryanvstatic int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
133255112Sbryanv		    int *, int *, int *);
134255112Sbryanvstatic int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
135255112Sbryanv		    int, struct virtio_net_hdr *);
136255112Sbryanvstatic struct mbuf *
137255112Sbryanv		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
138227652Sgrehan		    struct virtio_net_hdr *);
139255112Sbryanvstatic int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
140227652Sgrehan		    struct vtnet_tx_header *);
141255112Sbryanvstatic int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
142255112Sbryanv#ifdef VTNET_LEGACY_TX
143255112Sbryanvstatic void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
144227652Sgrehanstatic void	vtnet_start(struct ifnet *);
145255112Sbryanv#else
146255112Sbryanvstatic int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
147255112Sbryanvstatic int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
148255112Sbryanvstatic void	vtnet_txq_tq_deferred(void *, int);
149255112Sbryanv#endif
150265286Sbryanvstatic void	vtnet_txq_start(struct vtnet_txq *);
151255112Sbryanvstatic void	vtnet_txq_tq_intr(void *, int);
152270334Sbryanvstatic int	vtnet_txq_eof(struct vtnet_txq *);
153252702Sbryanvstatic void	vtnet_tx_vq_intr(void *);
154255112Sbryanvstatic void	vtnet_tx_start_all(struct vtnet_softc *);
155227652Sgrehan
156255112Sbryanv#ifndef VTNET_LEGACY_TX
157255112Sbryanvstatic void	vtnet_qflush(struct ifnet *);
158255112Sbryanv#endif
159255112Sbryanv
160255112Sbryanvstatic int	vtnet_watchdog(struct vtnet_txq *);
161255112Sbryanvstatic void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
162255112Sbryanv		    struct vtnet_rxq_stats *);
163255112Sbryanvstatic void	vtnet_txq_accum_stats(struct vtnet_txq *,
164255112Sbryanv		    struct vtnet_txq_stats *);
165255112Sbryanvstatic void	vtnet_accumulate_stats(struct vtnet_softc *);
166255112Sbryanvstatic void	vtnet_tick(void *);
167255112Sbryanv
168255112Sbryanvstatic void	vtnet_start_taskqueues(struct vtnet_softc *);
169255112Sbryanvstatic void	vtnet_free_taskqueues(struct vtnet_softc *);
170255112Sbryanvstatic void	vtnet_drain_taskqueues(struct vtnet_softc *);
171255112Sbryanv
172255112Sbryanvstatic void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
173255112Sbryanvstatic void	vtnet_stop_rendezvous(struct vtnet_softc *);
174227652Sgrehanstatic void	vtnet_stop(struct vtnet_softc *);
175255112Sbryanvstatic int	vtnet_virtio_reinit(struct vtnet_softc *);
176255112Sbryanvstatic void	vtnet_init_rx_filters(struct vtnet_softc *);
177255112Sbryanvstatic int	vtnet_init_rx_queues(struct vtnet_softc *);
178255112Sbryanvstatic int	vtnet_init_tx_queues(struct vtnet_softc *);
179255112Sbryanvstatic int	vtnet_init_rxtx_queues(struct vtnet_softc *);
180255112Sbryanvstatic void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
181227652Sgrehanstatic int	vtnet_reinit(struct vtnet_softc *);
182227652Sgrehanstatic void	vtnet_init_locked(struct vtnet_softc *);
183227652Sgrehanstatic void	vtnet_init(void *);
184227652Sgrehan
185255112Sbryanvstatic void	vtnet_free_ctrl_vq(struct vtnet_softc *);
186227652Sgrehanstatic void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
187227652Sgrehan		    struct sglist *, int, int);
188255112Sbryanvstatic int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
189255112Sbryanvstatic int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
190227652Sgrehanstatic int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
191227652Sgrehanstatic int	vtnet_set_promisc(struct vtnet_softc *, int);
192227652Sgrehanstatic int	vtnet_set_allmulti(struct vtnet_softc *, int);
193255112Sbryanvstatic void	vtnet_attach_disable_promisc(struct vtnet_softc *);
194255112Sbryanvstatic void	vtnet_rx_filter(struct vtnet_softc *);
195227652Sgrehanstatic void	vtnet_rx_filter_mac(struct vtnet_softc *);
196227652Sgrehanstatic int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
197227652Sgrehanstatic void	vtnet_rx_filter_vlan(struct vtnet_softc *);
198255112Sbryanvstatic void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
199227652Sgrehanstatic void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
200227652Sgrehanstatic void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
201227652Sgrehan
202255112Sbryanvstatic int	vtnet_is_link_up(struct vtnet_softc *);
203255112Sbryanvstatic void	vtnet_update_link_status(struct vtnet_softc *);
204227652Sgrehanstatic int	vtnet_ifmedia_upd(struct ifnet *);
205227652Sgrehanstatic void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
206255112Sbryanvstatic void	vtnet_get_hwaddr(struct vtnet_softc *);
207255112Sbryanvstatic void	vtnet_set_hwaddr(struct vtnet_softc *);
208255112Sbryanvstatic void	vtnet_vlan_tag_remove(struct mbuf *);
209270334Sbryanvstatic void	vtnet_set_rx_process_limit(struct vtnet_softc *);
210270334Sbryanvstatic void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
211227652Sgrehan
212255112Sbryanvstatic void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
213255112Sbryanv		    struct sysctl_oid_list *, struct vtnet_rxq *);
214255112Sbryanvstatic void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
215255112Sbryanv		    struct sysctl_oid_list *, struct vtnet_txq *);
216255112Sbryanvstatic void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
217255112Sbryanvstatic void	vtnet_setup_sysctl(struct vtnet_softc *);
218227652Sgrehan
219255112Sbryanvstatic int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
220255112Sbryanvstatic void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
221255112Sbryanvstatic int	vtnet_txq_enable_intr(struct vtnet_txq *);
222255112Sbryanvstatic void	vtnet_txq_disable_intr(struct vtnet_txq *);
223255112Sbryanvstatic void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
224255112Sbryanvstatic void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
225255112Sbryanvstatic void	vtnet_enable_interrupts(struct vtnet_softc *);
226255112Sbryanvstatic void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
227255112Sbryanvstatic void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
228255112Sbryanvstatic void	vtnet_disable_interrupts(struct vtnet_softc *);
229227652Sgrehan
230255112Sbryanvstatic int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
231255112Sbryanv
232227652Sgrehan/* Tunables. */
233304081Ssmhstatic SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters");
234227652Sgrehanstatic int vtnet_csum_disable = 0;
235227652SgrehanTUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
236304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
237304081Ssmh    &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
238227652Sgrehanstatic int vtnet_tso_disable = 0;
239227652SgrehanTUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
240304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
241304081Ssmh    0, "Disables TCP Segmentation Offload");
242227652Sgrehanstatic int vtnet_lro_disable = 0;
243227652SgrehanTUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
244304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
245304081Ssmh    0, "Disables TCP Large Receive Offload");
246255112Sbryanvstatic int vtnet_mq_disable = 0;
247255112SbryanvTUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
248304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
249304081Ssmh    0, "Disables Multi Queue support");
250304081Ssmhstatic int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
251255112SbryanvTUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
252304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
253304081Ssmh    &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
254255112Sbryanvstatic int vtnet_rx_process_limit = 512;
255255112SbryanvTUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
256304081SsmhSYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
257304081Ssmh    &vtnet_rx_process_limit, 0,
258304081Ssmh    "Limits the number RX segments processed in a single pass");
259227652Sgrehan
260227652Sgrehanstatic uma_zone_t vtnet_tx_header_zone;
261227652Sgrehan
262227652Sgrehanstatic struct virtio_feature_desc vtnet_feature_desc[] = {
263227652Sgrehan	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
264227652Sgrehan	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
265227652Sgrehan	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
266227652Sgrehan	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
267227652Sgrehan	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
268227652Sgrehan	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
269227652Sgrehan	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
270227652Sgrehan	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
271227652Sgrehan	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
272227652Sgrehan	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
273227652Sgrehan	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
274227652Sgrehan	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
275227652Sgrehan	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
276227652Sgrehan	{ VIRTIO_NET_F_STATUS,		"Status"	},
277227652Sgrehan	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
278227652Sgrehan	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
279227652Sgrehan	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
280227652Sgrehan	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
281255112Sbryanv	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
282255112Sbryanv	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
283255112Sbryanv	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
284227652Sgrehan
285227652Sgrehan	{ 0, NULL }
286227652Sgrehan};
287227652Sgrehan
288227652Sgrehanstatic device_method_t vtnet_methods[] = {
289227652Sgrehan	/* Device methods. */
290255112Sbryanv	DEVMETHOD(device_probe,			vtnet_probe),
291255112Sbryanv	DEVMETHOD(device_attach,		vtnet_attach),
292255112Sbryanv	DEVMETHOD(device_detach,		vtnet_detach),
293255112Sbryanv	DEVMETHOD(device_suspend,		vtnet_suspend),
294255112Sbryanv	DEVMETHOD(device_resume,		vtnet_resume),
295255112Sbryanv	DEVMETHOD(device_shutdown,		vtnet_shutdown),
296227652Sgrehan
297227652Sgrehan	/* VirtIO methods. */
298255112Sbryanv	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
299255112Sbryanv	DEVMETHOD(virtio_config_change,		vtnet_config_change),
300227652Sgrehan
301234270Sgrehan	DEVMETHOD_END
302227652Sgrehan};
303227652Sgrehan
304270509Sbryanv#ifdef DEV_NETMAP
305270509Sbryanv#include <dev/netmap/if_vtnet_netmap.h>
306270509Sbryanv#endif /* DEV_NETMAP */
307270509Sbryanv
308227652Sgrehanstatic driver_t vtnet_driver = {
309227652Sgrehan	"vtnet",
310227652Sgrehan	vtnet_methods,
311227652Sgrehan	sizeof(struct vtnet_softc)
312227652Sgrehan};
313227652Sgrehanstatic devclass_t vtnet_devclass;
314227652Sgrehan
315227652SgrehanDRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
316227652Sgrehan    vtnet_modevent, 0);
317227652SgrehanMODULE_VERSION(vtnet, 1);
318227652SgrehanMODULE_DEPEND(vtnet, virtio, 1, 1, 1);
319227652Sgrehan
320227652Sgrehanstatic int
321227652Sgrehanvtnet_modevent(module_t mod, int type, void *unused)
322227652Sgrehan{
323227652Sgrehan	int error;
324227652Sgrehan
325227652Sgrehan	error = 0;
326227652Sgrehan
327227652Sgrehan	switch (type) {
328227652Sgrehan	case MOD_LOAD:
329227652Sgrehan		vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
330227652Sgrehan		    sizeof(struct vtnet_tx_header),
331227652Sgrehan		    NULL, NULL, NULL, NULL, 0, 0);
332227652Sgrehan		break;
333227652Sgrehan	case MOD_QUIESCE:
334227652Sgrehan	case MOD_UNLOAD:
335227652Sgrehan		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
336227652Sgrehan			error = EBUSY;
337227652Sgrehan		else if (type == MOD_UNLOAD) {
338227652Sgrehan			uma_zdestroy(vtnet_tx_header_zone);
339227652Sgrehan			vtnet_tx_header_zone = NULL;
340227652Sgrehan		}
341227652Sgrehan		break;
342227652Sgrehan	case MOD_SHUTDOWN:
343227652Sgrehan		break;
344227652Sgrehan	default:
345227652Sgrehan		error = EOPNOTSUPP;
346227652Sgrehan		break;
347227652Sgrehan	}
348227652Sgrehan
349227652Sgrehan	return (error);
350227652Sgrehan}
351227652Sgrehan
352227652Sgrehanstatic int
353227652Sgrehanvtnet_probe(device_t dev)
354227652Sgrehan{
355227652Sgrehan
356227652Sgrehan	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
357227652Sgrehan		return (ENXIO);
358227652Sgrehan
359227652Sgrehan	device_set_desc(dev, "VirtIO Networking Adapter");
360227652Sgrehan
361227652Sgrehan	return (BUS_PROBE_DEFAULT);
362227652Sgrehan}
363227652Sgrehan
364227652Sgrehanstatic int
365227652Sgrehanvtnet_attach(device_t dev)
366227652Sgrehan{
367227652Sgrehan	struct vtnet_softc *sc;
368255112Sbryanv	int error;
369227652Sgrehan
370227652Sgrehan	sc = device_get_softc(dev);
371227652Sgrehan	sc->vtnet_dev = dev;
372227652Sgrehan
373255112Sbryanv	/* Register our feature descriptions. */
374227652Sgrehan	virtio_set_feature_desc(dev, vtnet_feature_desc);
375227652Sgrehan
376255112Sbryanv	VTNET_CORE_LOCK_INIT(sc);
377255112Sbryanv	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
378227652Sgrehan
379255112Sbryanv	vtnet_setup_sysctl(sc);
380255112Sbryanv	vtnet_setup_features(sc);
381227652Sgrehan
382255112Sbryanv	error = vtnet_alloc_rx_filters(sc);
383255112Sbryanv	if (error) {
384255112Sbryanv		device_printf(dev, "cannot allocate Rx filters\n");
385255112Sbryanv		goto fail;
386255112Sbryanv	}
387227652Sgrehan
388255112Sbryanv	error = vtnet_alloc_rxtx_queues(sc);
389255112Sbryanv	if (error) {
390255112Sbryanv		device_printf(dev, "cannot allocate queues\n");
391255112Sbryanv		goto fail;
392227652Sgrehan	}
393227652Sgrehan
394227652Sgrehan	error = vtnet_alloc_virtqueues(sc);
395227652Sgrehan	if (error) {
396227652Sgrehan		device_printf(dev, "cannot allocate virtqueues\n");
397227652Sgrehan		goto fail;
398227652Sgrehan	}
399227652Sgrehan
400255112Sbryanv	error = vtnet_setup_interface(sc);
401255112Sbryanv	if (error) {
402255112Sbryanv		device_printf(dev, "cannot setup interface\n");
403227652Sgrehan		goto fail;
404227652Sgrehan	}
405227652Sgrehan
406227652Sgrehan	error = virtio_setup_intr(dev, INTR_TYPE_NET);
407227652Sgrehan	if (error) {
408227652Sgrehan		device_printf(dev, "cannot setup virtqueue interrupts\n");
409255112Sbryanv		/* BMV: This will crash if during boot! */
410255112Sbryanv		ether_ifdetach(sc->vtnet_ifp);
411227652Sgrehan		goto fail;
412227652Sgrehan	}
413227652Sgrehan
414270509Sbryanv#ifdef DEV_NETMAP
415270509Sbryanv	vtnet_netmap_attach(sc);
416270509Sbryanv#endif /* DEV_NETMAP */
417270509Sbryanv
418255112Sbryanv	vtnet_start_taskqueues(sc);
419227652Sgrehan
420227652Sgrehanfail:
421227652Sgrehan	if (error)
422227652Sgrehan		vtnet_detach(dev);
423227652Sgrehan
424227652Sgrehan	return (error);
425227652Sgrehan}
426227652Sgrehan
427227652Sgrehanstatic int
428227652Sgrehanvtnet_detach(device_t dev)
429227652Sgrehan{
430227652Sgrehan	struct vtnet_softc *sc;
431227652Sgrehan	struct ifnet *ifp;
432227652Sgrehan
433227652Sgrehan	sc = device_get_softc(dev);
434227652Sgrehan	ifp = sc->vtnet_ifp;
435227652Sgrehan
436227652Sgrehan	if (device_is_attached(dev)) {
437255112Sbryanv		VTNET_CORE_LOCK(sc);
438227652Sgrehan		vtnet_stop(sc);
439255112Sbryanv		VTNET_CORE_UNLOCK(sc);
440227652Sgrehan
441227652Sgrehan		callout_drain(&sc->vtnet_tick_ch);
442255112Sbryanv		vtnet_drain_taskqueues(sc);
443227652Sgrehan
444227652Sgrehan		ether_ifdetach(ifp);
445227652Sgrehan	}
446227652Sgrehan
447270509Sbryanv#ifdef DEV_NETMAP
448270509Sbryanv	netmap_detach(ifp);
449270509Sbryanv#endif /* DEV_NETMAP */
450270509Sbryanv
451255112Sbryanv	vtnet_free_taskqueues(sc);
452255112Sbryanv
453227652Sgrehan	if (sc->vtnet_vlan_attach != NULL) {
454227652Sgrehan		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
455227652Sgrehan		sc->vtnet_vlan_attach = NULL;
456227652Sgrehan	}
457227652Sgrehan	if (sc->vtnet_vlan_detach != NULL) {
458227652Sgrehan		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
459227652Sgrehan		sc->vtnet_vlan_detach = NULL;
460227652Sgrehan	}
461227652Sgrehan
462255112Sbryanv	ifmedia_removeall(&sc->vtnet_media);
463228301Sgrehan
464228301Sgrehan	if (ifp != NULL) {
465227652Sgrehan		if_free(ifp);
466227652Sgrehan		sc->vtnet_ifp = NULL;
467227652Sgrehan	}
468227652Sgrehan
469255112Sbryanv	vtnet_free_rxtx_queues(sc);
470255112Sbryanv	vtnet_free_rx_filters(sc);
471255112Sbryanv
472227652Sgrehan	if (sc->vtnet_ctrl_vq != NULL)
473227652Sgrehan		vtnet_free_ctrl_vq(sc);
474227652Sgrehan
475255112Sbryanv	VTNET_CORE_LOCK_DESTROY(sc);
476227652Sgrehan
477227652Sgrehan	return (0);
478227652Sgrehan}
479227652Sgrehan
480227652Sgrehanstatic int
481227652Sgrehanvtnet_suspend(device_t dev)
482227652Sgrehan{
483227652Sgrehan	struct vtnet_softc *sc;
484227652Sgrehan
485227652Sgrehan	sc = device_get_softc(dev);
486227652Sgrehan
487255112Sbryanv	VTNET_CORE_LOCK(sc);
488227652Sgrehan	vtnet_stop(sc);
489227652Sgrehan	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
490255112Sbryanv	VTNET_CORE_UNLOCK(sc);
491227652Sgrehan
492227652Sgrehan	return (0);
493227652Sgrehan}
494227652Sgrehan
495227652Sgrehanstatic int
496227652Sgrehanvtnet_resume(device_t dev)
497227652Sgrehan{
498227652Sgrehan	struct vtnet_softc *sc;
499227652Sgrehan	struct ifnet *ifp;
500227652Sgrehan
501227652Sgrehan	sc = device_get_softc(dev);
502227652Sgrehan	ifp = sc->vtnet_ifp;
503227652Sgrehan
504255112Sbryanv	VTNET_CORE_LOCK(sc);
505227652Sgrehan	if (ifp->if_flags & IFF_UP)
506227652Sgrehan		vtnet_init_locked(sc);
507227652Sgrehan	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
508255112Sbryanv	VTNET_CORE_UNLOCK(sc);
509227652Sgrehan
510227652Sgrehan	return (0);
511227652Sgrehan}
512227652Sgrehan
513227652Sgrehanstatic int
514227652Sgrehanvtnet_shutdown(device_t dev)
515227652Sgrehan{
516227652Sgrehan
517227652Sgrehan	/*
518227652Sgrehan	 * Suspend already does all of what we need to
519227652Sgrehan	 * do here; we just never expect to be resumed.
520227652Sgrehan	 */
521227652Sgrehan	return (vtnet_suspend(dev));
522227652Sgrehan}
523227652Sgrehan
524227652Sgrehanstatic int
525255112Sbryanvvtnet_attach_completed(device_t dev)
526255112Sbryanv{
527255112Sbryanv
528255112Sbryanv	vtnet_attach_disable_promisc(device_get_softc(dev));
529255112Sbryanv
530255112Sbryanv	return (0);
531255112Sbryanv}
532255112Sbryanv
533255112Sbryanvstatic int
534227652Sgrehanvtnet_config_change(device_t dev)
535227652Sgrehan{
536227652Sgrehan	struct vtnet_softc *sc;
537227652Sgrehan
538227652Sgrehan	sc = device_get_softc(dev);
539227652Sgrehan
540255112Sbryanv	VTNET_CORE_LOCK(sc);
541252702Sbryanv	vtnet_update_link_status(sc);
542255112Sbryanv	if (sc->vtnet_link_active != 0)
543255112Sbryanv		vtnet_tx_start_all(sc);
544255112Sbryanv	VTNET_CORE_UNLOCK(sc);
545227652Sgrehan
546252702Sbryanv	return (0);
547227652Sgrehan}
548227652Sgrehan
549227652Sgrehanstatic void
550227652Sgrehanvtnet_negotiate_features(struct vtnet_softc *sc)
551227652Sgrehan{
552227652Sgrehan	device_t dev;
553227652Sgrehan	uint64_t mask, features;
554227652Sgrehan
555227652Sgrehan	dev = sc->vtnet_dev;
556227652Sgrehan	mask = 0;
557227652Sgrehan
558255112Sbryanv	/*
559255112Sbryanv	 * TSO and LRO are only available when their corresponding checksum
560255112Sbryanv	 * offload feature is also negotiated.
561255112Sbryanv	 */
562255112Sbryanv	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
563227652Sgrehan		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
564255112Sbryanv		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
565255112Sbryanv	}
566255112Sbryanv	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
567255112Sbryanv		mask |= VTNET_TSO_FEATURES;
568255112Sbryanv	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
569255112Sbryanv		mask |= VTNET_LRO_FEATURES;
570268010Sbryanv#ifndef VTNET_LEGACY_TX
571255112Sbryanv	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
572255112Sbryanv		mask |= VIRTIO_NET_F_MQ;
573268010Sbryanv#else
574255112Sbryanv	mask |= VIRTIO_NET_F_MQ;
575255112Sbryanv#endif
576227652Sgrehan
577255112Sbryanv	features = VTNET_FEATURES & ~mask;
578255112Sbryanv	sc->vtnet_features = virtio_negotiate_features(dev, features);
579255112Sbryanv
580268010Sbryanv	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
581268010Sbryanv	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
582268010Sbryanv		/*
583268010Sbryanv		 * LRO without mergeable buffers requires special care. This
584268010Sbryanv		 * is not ideal because every receive buffer must be large
585268010Sbryanv		 * enough to hold the maximum TCP packet, the Ethernet header,
586268010Sbryanv		 * and the header. This requires up to 34 descriptors with
587268010Sbryanv		 * MCLBYTES clusters. If we do not have indirect descriptors,
588268010Sbryanv		 * LRO is disabled since the virtqueue will not contain very
589268010Sbryanv		 * many receive buffers.
590268010Sbryanv		 */
591268010Sbryanv		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
592268010Sbryanv			device_printf(dev,
593268010Sbryanv			    "LRO disabled due to both mergeable buffers and "
594268010Sbryanv			    "indirect descriptors not negotiated\n");
595255112Sbryanv
596268010Sbryanv			features &= ~VTNET_LRO_FEATURES;
597268010Sbryanv			sc->vtnet_features =
598268010Sbryanv			    virtio_negotiate_features(dev, features);
599268010Sbryanv		} else
600268010Sbryanv			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
601268010Sbryanv	}
602255112Sbryanv}
603227652Sgrehan
604255112Sbryanvstatic void
605255112Sbryanvvtnet_setup_features(struct vtnet_softc *sc)
606255112Sbryanv{
607255112Sbryanv	device_t dev;
608227652Sgrehan
609255112Sbryanv	dev = sc->vtnet_dev;
610227652Sgrehan
611255112Sbryanv	vtnet_negotiate_features(sc);
612255112Sbryanv
613277389Sbryanv	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
614277389Sbryanv		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
615255167Sbryanv	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
616255167Sbryanv		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
617255167Sbryanv
618255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
619255112Sbryanv		/* This feature should always be negotiated. */
620255112Sbryanv		sc->vtnet_flags |= VTNET_FLAG_MAC;
621255112Sbryanv	}
622255112Sbryanv
623255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
624255112Sbryanv		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
625255112Sbryanv		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
626255112Sbryanv	} else
627255112Sbryanv		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
628255112Sbryanv
629265286Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
630265286Sbryanv		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
631265286Sbryanv	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
632265286Sbryanv		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
633265286Sbryanv	else
634265286Sbryanv		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
635265286Sbryanv
636265286Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
637265286Sbryanv	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
638265286Sbryanv	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
639265286Sbryanv		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
640265286Sbryanv	else
641265286Sbryanv		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
642265286Sbryanv
643255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
644255112Sbryanv		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
645255112Sbryanv
646255112Sbryanv		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
647255112Sbryanv			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
648255112Sbryanv		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
649255112Sbryanv			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
650255112Sbryanv		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
651255112Sbryanv			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
652255112Sbryanv	}
653255112Sbryanv
654255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
655255112Sbryanv	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
656304081Ssmh		sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
657255112Sbryanv		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
658255112Sbryanv	} else
659304081Ssmh		sc->vtnet_max_vq_pairs = 1;
660255112Sbryanv
661304081Ssmh	if (sc->vtnet_max_vq_pairs > 1) {
662227652Sgrehan		/*
663304081Ssmh		 * Limit the maximum number of queue pairs to the lower of
664304081Ssmh		 * the number of CPUs and the configured maximum.
665304081Ssmh		 * The actual number of queues that get used may be less.
666227652Sgrehan		 */
667304081Ssmh		int max;
668304081Ssmh
669255112Sbryanv		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
670304081Ssmh		if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
671304081Ssmh			if (max > mp_ncpus)
672304081Ssmh				max = mp_ncpus;
673304081Ssmh			if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
674304081Ssmh				max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
675304081Ssmh			if (max > 1) {
676304081Ssmh				sc->vtnet_requested_vq_pairs = max;
677304081Ssmh				sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
678304081Ssmh			}
679304081Ssmh		}
680255112Sbryanv	}
681227652Sgrehan}
682227652Sgrehan
683227652Sgrehanstatic int
684255112Sbryanvvtnet_init_rxq(struct vtnet_softc *sc, int id)
685227652Sgrehan{
686255112Sbryanv	struct vtnet_rxq *rxq;
687227652Sgrehan
688255112Sbryanv	rxq = &sc->vtnet_rxqs[id];
689227652Sgrehan
690255112Sbryanv	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
691255112Sbryanv	    device_get_nameunit(sc->vtnet_dev), id);
692255112Sbryanv	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
693227652Sgrehan
694255112Sbryanv	rxq->vtnrx_sc = sc;
695255112Sbryanv	rxq->vtnrx_id = id;
696227652Sgrehan
697265286Sbryanv	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
698265286Sbryanv	if (rxq->vtnrx_sg == NULL)
699265286Sbryanv		return (ENOMEM);
700265286Sbryanv
701255112Sbryanv	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
702255112Sbryanv	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
703255112Sbryanv	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
704227652Sgrehan
705255112Sbryanv	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
706255112Sbryanv}
707227652Sgrehan
708255112Sbryanvstatic int
709255112Sbryanvvtnet_init_txq(struct vtnet_softc *sc, int id)
710255112Sbryanv{
711255112Sbryanv	struct vtnet_txq *txq;
712255112Sbryanv
713255112Sbryanv	txq = &sc->vtnet_txqs[id];
714255112Sbryanv
715255112Sbryanv	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
716255112Sbryanv	    device_get_nameunit(sc->vtnet_dev), id);
717255112Sbryanv	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
718255112Sbryanv
719255112Sbryanv	txq->vtntx_sc = sc;
720255112Sbryanv	txq->vtntx_id = id;
721255112Sbryanv
722265286Sbryanv	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
723265286Sbryanv	if (txq->vtntx_sg == NULL)
724265286Sbryanv		return (ENOMEM);
725265286Sbryanv
726255112Sbryanv#ifndef VTNET_LEGACY_TX
727255112Sbryanv	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
728255112Sbryanv	    M_NOWAIT, &txq->vtntx_mtx);
729255112Sbryanv	if (txq->vtntx_br == NULL)
730255112Sbryanv		return (ENOMEM);
731255112Sbryanv
732255112Sbryanv	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
733255112Sbryanv#endif
734255112Sbryanv	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
735255112Sbryanv	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
736255112Sbryanv	    taskqueue_thread_enqueue, &txq->vtntx_tq);
737255112Sbryanv	if (txq->vtntx_tq == NULL)
738255112Sbryanv		return (ENOMEM);
739255112Sbryanv
740255112Sbryanv	return (0);
741255112Sbryanv}
742255112Sbryanv
743255112Sbryanvstatic int
744255112Sbryanvvtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
745255112Sbryanv{
746255112Sbryanv	int i, npairs, error;
747255112Sbryanv
748255112Sbryanv	npairs = sc->vtnet_max_vq_pairs;
749255112Sbryanv
750255112Sbryanv	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
751255112Sbryanv	    M_NOWAIT | M_ZERO);
752255112Sbryanv	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
753255112Sbryanv	    M_NOWAIT | M_ZERO);
754255112Sbryanv	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
755255112Sbryanv		return (ENOMEM);
756255112Sbryanv
757255112Sbryanv	for (i = 0; i < npairs; i++) {
758255112Sbryanv		error = vtnet_init_rxq(sc, i);
759255112Sbryanv		if (error)
760255112Sbryanv			return (error);
761255112Sbryanv		error = vtnet_init_txq(sc, i);
762255112Sbryanv		if (error)
763255112Sbryanv			return (error);
764227652Sgrehan	}
765227652Sgrehan
766255112Sbryanv	vtnet_setup_queue_sysctl(sc);
767255112Sbryanv
768255112Sbryanv	return (0);
769227652Sgrehan}
770227652Sgrehan
771227652Sgrehanstatic void
772255112Sbryanvvtnet_destroy_rxq(struct vtnet_rxq *rxq)
773227652Sgrehan{
774227652Sgrehan
775255112Sbryanv	rxq->vtnrx_sc = NULL;
776255112Sbryanv	rxq->vtnrx_id = -1;
777227652Sgrehan
778265286Sbryanv	if (rxq->vtnrx_sg != NULL) {
779265286Sbryanv		sglist_free(rxq->vtnrx_sg);
780265286Sbryanv		rxq->vtnrx_sg = NULL;
781265286Sbryanv	}
782265286Sbryanv
783255112Sbryanv	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
784255112Sbryanv		mtx_destroy(&rxq->vtnrx_mtx);
785255112Sbryanv}
786227652Sgrehan
787255112Sbryanvstatic void
788255112Sbryanvvtnet_destroy_txq(struct vtnet_txq *txq)
789255112Sbryanv{
790255112Sbryanv
791255112Sbryanv	txq->vtntx_sc = NULL;
792255112Sbryanv	txq->vtntx_id = -1;
793255112Sbryanv
794265286Sbryanv	if (txq->vtntx_sg != NULL) {
795265286Sbryanv		sglist_free(txq->vtntx_sg);
796265286Sbryanv		txq->vtntx_sg = NULL;
797265286Sbryanv	}
798265286Sbryanv
799255112Sbryanv#ifndef VTNET_LEGACY_TX
800255112Sbryanv	if (txq->vtntx_br != NULL) {
801255112Sbryanv		buf_ring_free(txq->vtntx_br, M_DEVBUF);
802255112Sbryanv		txq->vtntx_br = NULL;
803227652Sgrehan	}
804255112Sbryanv#endif
805255112Sbryanv
806255112Sbryanv	if (mtx_initialized(&txq->vtntx_mtx) != 0)
807255112Sbryanv		mtx_destroy(&txq->vtntx_mtx);
808227652Sgrehan}
809227652Sgrehan
810227652Sgrehanstatic void
811255112Sbryanvvtnet_free_rxtx_queues(struct vtnet_softc *sc)
812227652Sgrehan{
813255112Sbryanv	int i;
814227652Sgrehan
815255112Sbryanv	if (sc->vtnet_rxqs != NULL) {
816255112Sbryanv		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
817255112Sbryanv			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
818255112Sbryanv		free(sc->vtnet_rxqs, M_DEVBUF);
819255112Sbryanv		sc->vtnet_rxqs = NULL;
820255112Sbryanv	}
821227652Sgrehan
822255112Sbryanv	if (sc->vtnet_txqs != NULL) {
823255112Sbryanv		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
824255112Sbryanv			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
825255112Sbryanv		free(sc->vtnet_txqs, M_DEVBUF);
826255112Sbryanv		sc->vtnet_txqs = NULL;
827255112Sbryanv	}
828227652Sgrehan}
829227652Sgrehan
830227652Sgrehanstatic int
831255112Sbryanvvtnet_alloc_rx_filters(struct vtnet_softc *sc)
832227652Sgrehan{
833255112Sbryanv
834255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
835255112Sbryanv		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
836255112Sbryanv		    M_DEVBUF, M_NOWAIT | M_ZERO);
837255112Sbryanv		if (sc->vtnet_mac_filter == NULL)
838255112Sbryanv			return (ENOMEM);
839255112Sbryanv	}
840255112Sbryanv
841255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
842255112Sbryanv		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
843255112Sbryanv		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
844255112Sbryanv		if (sc->vtnet_vlan_filter == NULL)
845255112Sbryanv			return (ENOMEM);
846255112Sbryanv	}
847255112Sbryanv
848255112Sbryanv	return (0);
849255112Sbryanv}
850255112Sbryanv
851255112Sbryanvstatic void
852255112Sbryanvvtnet_free_rx_filters(struct vtnet_softc *sc)
853255112Sbryanv{
854255112Sbryanv
855255112Sbryanv	if (sc->vtnet_mac_filter != NULL) {
856255112Sbryanv		free(sc->vtnet_mac_filter, M_DEVBUF);
857255112Sbryanv		sc->vtnet_mac_filter = NULL;
858255112Sbryanv	}
859255112Sbryanv
860255112Sbryanv	if (sc->vtnet_vlan_filter != NULL) {
861255112Sbryanv		free(sc->vtnet_vlan_filter, M_DEVBUF);
862255112Sbryanv		sc->vtnet_vlan_filter = NULL;
863255112Sbryanv	}
864255112Sbryanv}
865255112Sbryanv
866255112Sbryanvstatic int
867255112Sbryanvvtnet_alloc_virtqueues(struct vtnet_softc *sc)
868255112Sbryanv{
869227652Sgrehan	device_t dev;
870255112Sbryanv	struct vq_alloc_info *info;
871255112Sbryanv	struct vtnet_rxq *rxq;
872255112Sbryanv	struct vtnet_txq *txq;
873265286Sbryanv	int i, idx, flags, nvqs, error;
874227652Sgrehan
875227652Sgrehan	dev = sc->vtnet_dev;
876255112Sbryanv	flags = 0;
877227652Sgrehan
878255112Sbryanv	nvqs = sc->vtnet_max_vq_pairs * 2;
879255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
880255112Sbryanv		nvqs++;
881227652Sgrehan
882265286Sbryanv	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
883255112Sbryanv	if (info == NULL)
884255112Sbryanv		return (ENOMEM);
885227652Sgrehan
886255112Sbryanv	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
887255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
888265286Sbryanv		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
889255112Sbryanv		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
890255112Sbryanv		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
891255112Sbryanv
892255112Sbryanv		txq = &sc->vtnet_txqs[i];
893265286Sbryanv		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
894255112Sbryanv		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
895255112Sbryanv		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
896255112Sbryanv	}
897255112Sbryanv
898255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
899255112Sbryanv		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
900255112Sbryanv		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
901255112Sbryanv	}
902255112Sbryanv
903255112Sbryanv	/*
904255112Sbryanv	 * Enable interrupt binding if this is multiqueue. This only matters
905255112Sbryanv	 * when per-vq MSIX is available.
906255112Sbryanv	 */
907255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
908255112Sbryanv		flags |= 0;
909255112Sbryanv
910255112Sbryanv	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
911255112Sbryanv	free(info, M_TEMP);
912255112Sbryanv
913255112Sbryanv	return (error);
914227652Sgrehan}
915227652Sgrehan
916255112Sbryanvstatic int
917255112Sbryanvvtnet_setup_interface(struct vtnet_softc *sc)
918227652Sgrehan{
919255112Sbryanv	device_t dev;
920227652Sgrehan	struct ifnet *ifp;
921227652Sgrehan
922255112Sbryanv	dev = sc->vtnet_dev;
923227652Sgrehan
924255112Sbryanv	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
925255112Sbryanv	if (ifp == NULL) {
926255112Sbryanv		device_printf(dev, "cannot allocate ifnet structure\n");
927255112Sbryanv		return (ENOSPC);
928255112Sbryanv	}
929227652Sgrehan
930255112Sbryanv	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
931255112Sbryanv	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
932255112Sbryanv	ifp->if_softc = sc;
933255112Sbryanv	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
934255112Sbryanv	ifp->if_init = vtnet_init;
935255112Sbryanv	ifp->if_ioctl = vtnet_ioctl;
936255112Sbryanv
937255112Sbryanv#ifndef VTNET_LEGACY_TX
938255112Sbryanv	ifp->if_transmit = vtnet_txq_mq_start;
939255112Sbryanv	ifp->if_qflush = vtnet_qflush;
940255112Sbryanv#else
941255112Sbryanv	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
942255112Sbryanv	ifp->if_start = vtnet_start;
943255112Sbryanv	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
944255112Sbryanv	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
945255112Sbryanv	IFQ_SET_READY(&ifp->if_snd);
946255112Sbryanv#endif
947255112Sbryanv
948255112Sbryanv	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
949255112Sbryanv	    vtnet_ifmedia_sts);
950255112Sbryanv	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
951255112Sbryanv	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
952255112Sbryanv
953255112Sbryanv	/* Read (or generate) the MAC address for the adapter. */
954255112Sbryanv	vtnet_get_hwaddr(sc);
955255112Sbryanv
956255112Sbryanv	ether_ifattach(ifp, sc->vtnet_hwaddr);
957255112Sbryanv
958255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
959255112Sbryanv		ifp->if_capabilities |= IFCAP_LINKSTATE;
960255112Sbryanv
961255112Sbryanv	/* Tell the upper layer(s) we support long frames. */
962255112Sbryanv	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
963255112Sbryanv	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
964255112Sbryanv
965255112Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
966255112Sbryanv		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
967255112Sbryanv
968255112Sbryanv		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
969255112Sbryanv			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
970255112Sbryanv			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
971255112Sbryanv		} else {
972255112Sbryanv			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
973255112Sbryanv				ifp->if_capabilities |= IFCAP_TSO4;
974255112Sbryanv			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
975255112Sbryanv				ifp->if_capabilities |= IFCAP_TSO6;
976255112Sbryanv			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
977255112Sbryanv				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
978255112Sbryanv		}
979255112Sbryanv
980255112Sbryanv		if (ifp->if_capabilities & IFCAP_TSO)
981255112Sbryanv			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
982227652Sgrehan	}
983255112Sbryanv
984275274Sbryanv	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
985255112Sbryanv		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
986255112Sbryanv
987275274Sbryanv		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
988275274Sbryanv		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
989275274Sbryanv			ifp->if_capabilities |= IFCAP_LRO;
990275274Sbryanv	}
991275274Sbryanv
992255112Sbryanv	if (ifp->if_capabilities & IFCAP_HWCSUM) {
993255112Sbryanv		/*
994255112Sbryanv		 * VirtIO does not support VLAN tagging, but we can fake
995255112Sbryanv		 * it by inserting and removing the 802.1Q header during
996255112Sbryanv		 * transmit and receive. We are then able to do checksum
997255112Sbryanv		 * offloading of VLAN frames.
998255112Sbryanv		 */
999255112Sbryanv		ifp->if_capabilities |=
1000255112Sbryanv		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1001255112Sbryanv	}
1002255112Sbryanv
1003255112Sbryanv	ifp->if_capenable = ifp->if_capabilities;
1004255112Sbryanv
1005255112Sbryanv	/*
1006255112Sbryanv	 * Capabilities after here are not enabled by default.
1007255112Sbryanv	 */
1008255112Sbryanv
1009255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1010255112Sbryanv		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1011255112Sbryanv
1012255112Sbryanv		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1013255112Sbryanv		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1014255112Sbryanv		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1015255112Sbryanv		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1016255112Sbryanv	}
1017255112Sbryanv
1018270334Sbryanv	vtnet_set_rx_process_limit(sc);
1019270334Sbryanv	vtnet_set_tx_intr_threshold(sc);
1020255112Sbryanv
1021255112Sbryanv	return (0);
1022227652Sgrehan}
1023227652Sgrehan
1024255112Sbryanvstatic int
1025255112Sbryanvvtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1026227652Sgrehan{
1027227652Sgrehan	struct ifnet *ifp;
1028255112Sbryanv	int frame_size, clsize;
1029227652Sgrehan
1030227652Sgrehan	ifp = sc->vtnet_ifp;
1031227652Sgrehan
1032255112Sbryanv	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1033255112Sbryanv		return (EINVAL);
1034227652Sgrehan
1035255112Sbryanv	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1036255112Sbryanv	    new_mtu;
1037227652Sgrehan
1038255112Sbryanv	/*
1039255112Sbryanv	 * Based on the new MTU (and hence frame size) determine which
1040255112Sbryanv	 * cluster size is most appropriate for the receive queues.
1041255112Sbryanv	 */
1042255112Sbryanv	if (frame_size <= MCLBYTES) {
1043255112Sbryanv		clsize = MCLBYTES;
1044255112Sbryanv	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1045255112Sbryanv		/* Avoid going past 9K jumbos. */
1046255112Sbryanv		if (frame_size > MJUM9BYTES)
1047255112Sbryanv			return (EINVAL);
1048255112Sbryanv		clsize = MJUM9BYTES;
1049255112Sbryanv	} else
1050255112Sbryanv		clsize = MJUMPAGESIZE;
1051255112Sbryanv
1052255112Sbryanv	ifp->if_mtu = new_mtu;
1053255112Sbryanv	sc->vtnet_rx_new_clsize = clsize;
1054255112Sbryanv
1055255112Sbryanv	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056255112Sbryanv		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1057255112Sbryanv		vtnet_init_locked(sc);
1058255112Sbryanv	}
1059255112Sbryanv
1060255112Sbryanv	return (0);
1061227652Sgrehan}
1062227652Sgrehan
1063227652Sgrehanstatic int
1064227652Sgrehanvtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1065227652Sgrehan{
1066227652Sgrehan	struct vtnet_softc *sc;
1067227652Sgrehan	struct ifreq *ifr;
1068227652Sgrehan	int reinit, mask, error;
1069227652Sgrehan
1070227652Sgrehan	sc = ifp->if_softc;
1071227652Sgrehan	ifr = (struct ifreq *) data;
1072227652Sgrehan	error = 0;
1073227652Sgrehan
1074227652Sgrehan	switch (cmd) {
1075227652Sgrehan	case SIOCSIFMTU:
1076255112Sbryanv		if (ifp->if_mtu != ifr->ifr_mtu) {
1077255112Sbryanv			VTNET_CORE_LOCK(sc);
1078227652Sgrehan			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1079255112Sbryanv			VTNET_CORE_UNLOCK(sc);
1080227652Sgrehan		}
1081227652Sgrehan		break;
1082227652Sgrehan
1083227652Sgrehan	case SIOCSIFFLAGS:
1084255112Sbryanv		VTNET_CORE_LOCK(sc);
1085227652Sgrehan		if ((ifp->if_flags & IFF_UP) == 0) {
1086227652Sgrehan			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1087227652Sgrehan				vtnet_stop(sc);
1088227652Sgrehan		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1089227652Sgrehan			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1090227652Sgrehan			    (IFF_PROMISC | IFF_ALLMULTI)) {
1091227652Sgrehan				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1092227652Sgrehan					vtnet_rx_filter(sc);
1093285016Skp				else {
1094285016Skp					ifp->if_flags |= IFF_PROMISC;
1095285016Skp					if ((ifp->if_flags ^ sc->vtnet_if_flags)
1096285016Skp					    & IFF_ALLMULTI)
1097285016Skp						error = ENOTSUP;
1098285016Skp				}
1099227652Sgrehan			}
1100227652Sgrehan		} else
1101227652Sgrehan			vtnet_init_locked(sc);
1102227652Sgrehan
1103227652Sgrehan		if (error == 0)
1104227652Sgrehan			sc->vtnet_if_flags = ifp->if_flags;
1105255112Sbryanv		VTNET_CORE_UNLOCK(sc);
1106227652Sgrehan		break;
1107227652Sgrehan
1108227652Sgrehan	case SIOCADDMULTI:
1109227652Sgrehan	case SIOCDELMULTI:
1110255112Sbryanv		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1111255112Sbryanv			break;
1112255112Sbryanv		VTNET_CORE_LOCK(sc);
1113255112Sbryanv		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1114227652Sgrehan			vtnet_rx_filter_mac(sc);
1115255112Sbryanv		VTNET_CORE_UNLOCK(sc);
1116227652Sgrehan		break;
1117227652Sgrehan
1118227652Sgrehan	case SIOCSIFMEDIA:
1119227652Sgrehan	case SIOCGIFMEDIA:
1120227652Sgrehan		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1121227652Sgrehan		break;
1122227652Sgrehan
1123227652Sgrehan	case SIOCSIFCAP:
1124255112Sbryanv		VTNET_CORE_LOCK(sc);
1125227652Sgrehan		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126227652Sgrehan
1127255112Sbryanv		if (mask & IFCAP_TXCSUM)
1128227652Sgrehan			ifp->if_capenable ^= IFCAP_TXCSUM;
1129255112Sbryanv		if (mask & IFCAP_TXCSUM_IPV6)
1130255112Sbryanv			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1131255112Sbryanv		if (mask & IFCAP_TSO4)
1132227652Sgrehan			ifp->if_capenable ^= IFCAP_TSO4;
1133255112Sbryanv		if (mask & IFCAP_TSO6)
1134255112Sbryanv			ifp->if_capenable ^= IFCAP_TSO6;
1135227652Sgrehan
1136255112Sbryanv		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1137255112Sbryanv		    IFCAP_VLAN_HWFILTER)) {
1138255112Sbryanv			/* These Rx features require us to renegotiate. */
1139227652Sgrehan			reinit = 1;
1140227652Sgrehan
1141255112Sbryanv			if (mask & IFCAP_RXCSUM)
1142255112Sbryanv				ifp->if_capenable ^= IFCAP_RXCSUM;
1143255112Sbryanv			if (mask & IFCAP_RXCSUM_IPV6)
1144255112Sbryanv				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1145255112Sbryanv			if (mask & IFCAP_LRO)
1146255112Sbryanv				ifp->if_capenable ^= IFCAP_LRO;
1147255112Sbryanv			if (mask & IFCAP_VLAN_HWFILTER)
1148255112Sbryanv				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1149255112Sbryanv		} else
1150255112Sbryanv			reinit = 0;
1151227652Sgrehan
1152227652Sgrehan		if (mask & IFCAP_VLAN_HWTSO)
1153227652Sgrehan			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1154227652Sgrehan		if (mask & IFCAP_VLAN_HWTAGGING)
1155227652Sgrehan			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156227652Sgrehan
1157227652Sgrehan		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1158227652Sgrehan			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1159227652Sgrehan			vtnet_init_locked(sc);
1160227652Sgrehan		}
1161255112Sbryanv
1162255112Sbryanv		VTNET_CORE_UNLOCK(sc);
1163227652Sgrehan		VLAN_CAPABILITIES(ifp);
1164227652Sgrehan
1165227652Sgrehan		break;
1166227652Sgrehan
1167227652Sgrehan	default:
1168227652Sgrehan		error = ether_ioctl(ifp, cmd, data);
1169227652Sgrehan		break;
1170227652Sgrehan	}
1171227652Sgrehan
1172255112Sbryanv	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1173227652Sgrehan
1174227652Sgrehan	return (error);
1175227652Sgrehan}
1176227652Sgrehan
1177227652Sgrehanstatic int
1178255112Sbryanvvtnet_rxq_populate(struct vtnet_rxq *rxq)
1179227652Sgrehan{
1180227652Sgrehan	struct virtqueue *vq;
1181227652Sgrehan	int nbufs, error;
1182227652Sgrehan
1183255112Sbryanv	vq = rxq->vtnrx_vq;
1184227652Sgrehan	error = ENOSPC;
1185227652Sgrehan
1186255112Sbryanv	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1187255112Sbryanv		error = vtnet_rxq_new_buf(rxq);
1188255112Sbryanv		if (error)
1189227652Sgrehan			break;
1190227652Sgrehan	}
1191227652Sgrehan
1192227652Sgrehan	if (nbufs > 0) {
1193227652Sgrehan		virtqueue_notify(vq);
1194227652Sgrehan		/*
1195227652Sgrehan		 * EMSGSIZE signifies the virtqueue did not have enough
1196227652Sgrehan		 * entries available to hold the last mbuf. This is not
1197255112Sbryanv		 * an error.
1198227652Sgrehan		 */
1199227652Sgrehan		if (error == EMSGSIZE)
1200227652Sgrehan			error = 0;
1201227652Sgrehan	}
1202227652Sgrehan
1203227652Sgrehan	return (error);
1204227652Sgrehan}
1205227652Sgrehan
1206227652Sgrehanstatic void
1207255112Sbryanvvtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1208227652Sgrehan{
1209227652Sgrehan	struct virtqueue *vq;
1210227652Sgrehan	struct mbuf *m;
1211227652Sgrehan	int last;
1212227652Sgrehan
1213255112Sbryanv	vq = rxq->vtnrx_vq;
1214227652Sgrehan	last = 0;
1215227652Sgrehan
1216227652Sgrehan	while ((m = virtqueue_drain(vq, &last)) != NULL)
1217227652Sgrehan		m_freem(m);
1218227652Sgrehan
1219255112Sbryanv	KASSERT(virtqueue_empty(vq),
1220255112Sbryanv	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1221227652Sgrehan}
1222227652Sgrehan
1223227652Sgrehanstatic struct mbuf *
1224255112Sbryanvvtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1225227652Sgrehan{
1226227652Sgrehan	struct mbuf *m_head, *m_tail, *m;
1227227652Sgrehan	int i, clsize;
1228227652Sgrehan
1229255112Sbryanv	clsize = sc->vtnet_rx_clsize;
1230227652Sgrehan
1231255112Sbryanv	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1232255112Sbryanv	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1233255112Sbryanv
1234243857Sglebius	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1235227652Sgrehan	if (m_head == NULL)
1236227652Sgrehan		goto fail;
1237227652Sgrehan
1238227652Sgrehan	m_head->m_len = clsize;
1239227652Sgrehan	m_tail = m_head;
1240227652Sgrehan
1241255112Sbryanv	/* Allocate the rest of the chain. */
1242255112Sbryanv	for (i = 1; i < nbufs; i++) {
1243255112Sbryanv		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1244255112Sbryanv		if (m == NULL)
1245255112Sbryanv			goto fail;
1246227652Sgrehan
1247255112Sbryanv		m->m_len = clsize;
1248255112Sbryanv		m_tail->m_next = m;
1249255112Sbryanv		m_tail = m;
1250227652Sgrehan	}
1251227652Sgrehan
1252227652Sgrehan	if (m_tailp != NULL)
1253227652Sgrehan		*m_tailp = m_tail;
1254227652Sgrehan
1255227652Sgrehan	return (m_head);
1256227652Sgrehan
1257227652Sgrehanfail:
1258227652Sgrehan	sc->vtnet_stats.mbuf_alloc_failed++;
1259227652Sgrehan	m_freem(m_head);
1260227652Sgrehan
1261227652Sgrehan	return (NULL);
1262227652Sgrehan}
1263227652Sgrehan
1264255112Sbryanv/*
1265255112Sbryanv * Slow path for when LRO without mergeable buffers is negotiated.
1266255112Sbryanv */
1267227652Sgrehanstatic int
1268255112Sbryanvvtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1269255112Sbryanv    int len0)
1270227652Sgrehan{
1271255112Sbryanv	struct vtnet_softc *sc;
1272227652Sgrehan	struct mbuf *m, *m_prev;
1273227652Sgrehan	struct mbuf *m_new, *m_tail;
1274227652Sgrehan	int len, clsize, nreplace, error;
1275227652Sgrehan
1276255112Sbryanv	sc = rxq->vtnrx_sc;
1277255112Sbryanv	clsize = sc->vtnet_rx_clsize;
1278255112Sbryanv
1279227652Sgrehan	m_prev = NULL;
1280227652Sgrehan	m_tail = NULL;
1281227652Sgrehan	nreplace = 0;
1282227652Sgrehan
1283255112Sbryanv	m = m0;
1284255112Sbryanv	len = len0;
1285227652Sgrehan
1286227652Sgrehan	/*
1287255112Sbryanv	 * Since these mbuf chains are so large, we avoid allocating an
1288255112Sbryanv	 * entire replacement chain if possible. When the received frame
1289255112Sbryanv	 * did not consume the entire chain, the unused mbufs are moved
1290255112Sbryanv	 * to the replacement chain.
1291227652Sgrehan	 */
1292227652Sgrehan	while (len > 0) {
1293227652Sgrehan		/*
1294255112Sbryanv		 * Something is seriously wrong if we received a frame
1295255112Sbryanv		 * larger than the chain. Drop it.
1296227652Sgrehan		 */
1297227652Sgrehan		if (m == NULL) {
1298227652Sgrehan			sc->vtnet_stats.rx_frame_too_large++;
1299227652Sgrehan			return (EMSGSIZE);
1300227652Sgrehan		}
1301227652Sgrehan
1302255112Sbryanv		/* We always allocate the same cluster size. */
1303227652Sgrehan		KASSERT(m->m_len == clsize,
1304255112Sbryanv		    ("%s: mbuf size %d is not the cluster size %d",
1305255112Sbryanv		    __func__, m->m_len, clsize));
1306227652Sgrehan
1307227652Sgrehan		m->m_len = MIN(m->m_len, len);
1308227652Sgrehan		len -= m->m_len;
1309227652Sgrehan
1310227652Sgrehan		m_prev = m;
1311227652Sgrehan		m = m->m_next;
1312227652Sgrehan		nreplace++;
1313227652Sgrehan	}
1314227652Sgrehan
1315255112Sbryanv	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1316255112Sbryanv	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1317255112Sbryanv	    sc->vtnet_rx_nmbufs));
1318227652Sgrehan
1319255112Sbryanv	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1320227652Sgrehan	if (m_new == NULL) {
1321227652Sgrehan		m_prev->m_len = clsize;
1322227652Sgrehan		return (ENOBUFS);
1323227652Sgrehan	}
1324227652Sgrehan
1325227652Sgrehan	/*
1326255112Sbryanv	 * Move any unused mbufs from the received chain onto the end
1327255112Sbryanv	 * of the new chain.
1328227652Sgrehan	 */
1329227652Sgrehan	if (m_prev->m_next != NULL) {
1330227652Sgrehan		m_tail->m_next = m_prev->m_next;
1331227652Sgrehan		m_prev->m_next = NULL;
1332227652Sgrehan	}
1333227652Sgrehan
1334255112Sbryanv	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1335227652Sgrehan	if (error) {
1336227652Sgrehan		/*
1337227652Sgrehan		 * BAD! We could not enqueue the replacement mbuf chain. We
1338227652Sgrehan		 * must restore the m0 chain to the original state if it was
1339227652Sgrehan		 * modified so we can subsequently discard it.
1340227652Sgrehan		 *
1341227652Sgrehan		 * NOTE: The replacement is suppose to be an identical copy
1342227652Sgrehan		 * to the one just dequeued so this is an unexpected error.
1343227652Sgrehan		 */
1344227652Sgrehan		sc->vtnet_stats.rx_enq_replacement_failed++;
1345227652Sgrehan
1346227652Sgrehan		if (m_tail->m_next != NULL) {
1347227652Sgrehan			m_prev->m_next = m_tail->m_next;
1348227652Sgrehan			m_tail->m_next = NULL;
1349227652Sgrehan		}
1350227652Sgrehan
1351227652Sgrehan		m_prev->m_len = clsize;
1352227652Sgrehan		m_freem(m_new);
1353227652Sgrehan	}
1354227652Sgrehan
1355227652Sgrehan	return (error);
1356227652Sgrehan}
1357227652Sgrehan
1358227652Sgrehanstatic int
1359255112Sbryanvvtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1360227652Sgrehan{
1361255112Sbryanv	struct vtnet_softc *sc;
1362255112Sbryanv	struct mbuf *m_new;
1363227652Sgrehan	int error;
1364227652Sgrehan
1365255112Sbryanv	sc = rxq->vtnrx_sc;
1366227652Sgrehan
1367255112Sbryanv	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1368255112Sbryanv	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1369227652Sgrehan
1370255112Sbryanv	if (m->m_next == NULL) {
1371255112Sbryanv		/* Fast-path for the common case of just one mbuf. */
1372255112Sbryanv		if (m->m_len < len)
1373255112Sbryanv			return (EINVAL);
1374227652Sgrehan
1375255112Sbryanv		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1376255112Sbryanv		if (m_new == NULL)
1377255112Sbryanv			return (ENOBUFS);
1378227652Sgrehan
1379255112Sbryanv		error = vtnet_rxq_enqueue_buf(rxq, m_new);
1380255112Sbryanv		if (error) {
1381255112Sbryanv			/*
1382255112Sbryanv			 * The new mbuf is suppose to be an identical
1383255112Sbryanv			 * copy of the one just dequeued so this is an
1384255112Sbryanv			 * unexpected error.
1385255112Sbryanv			 */
1386255112Sbryanv			m_freem(m_new);
1387255112Sbryanv			sc->vtnet_stats.rx_enq_replacement_failed++;
1388255112Sbryanv		} else
1389255112Sbryanv			m->m_len = len;
1390255112Sbryanv	} else
1391255112Sbryanv		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1392227652Sgrehan
1393255112Sbryanv	return (error);
1394227652Sgrehan}
1395227652Sgrehan
1396227652Sgrehanstatic int
1397255112Sbryanvvtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1398227652Sgrehan{
1399255112Sbryanv	struct vtnet_softc *sc;
1400265286Sbryanv	struct sglist *sg;
1401227652Sgrehan	struct vtnet_rx_header *rxhdr;
1402227652Sgrehan	uint8_t *mdata;
1403227652Sgrehan	int offset, error;
1404227652Sgrehan
1405255112Sbryanv	sc = rxq->vtnrx_sc;
1406265286Sbryanv	sg = rxq->vtnrx_sg;
1407255112Sbryanv	mdata = mtod(m, uint8_t *);
1408227652Sgrehan
1409255112Sbryanv	VTNET_RXQ_LOCK_ASSERT(rxq);
1410255112Sbryanv	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1411255112Sbryanv	    ("%s: chained mbuf without LRO_NOMRG", __func__));
1412255112Sbryanv	KASSERT(m->m_len == sc->vtnet_rx_clsize,
1413255112Sbryanv	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1414255112Sbryanv	     sc->vtnet_rx_clsize));
1415255112Sbryanv
1416265286Sbryanv	sglist_reset(sg);
1417227652Sgrehan	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1418255112Sbryanv		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1419227652Sgrehan		rxhdr = (struct vtnet_rx_header *) mdata;
1420265286Sbryanv		sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1421255112Sbryanv		offset = sizeof(struct vtnet_rx_header);
1422255112Sbryanv	} else
1423255112Sbryanv		offset = 0;
1424227652Sgrehan
1425265286Sbryanv	sglist_append(sg, mdata + offset, m->m_len - offset);
1426227652Sgrehan	if (m->m_next != NULL) {
1427265286Sbryanv		error = sglist_append_mbuf(sg, m->m_next);
1428255112Sbryanv		MPASS(error == 0);
1429227652Sgrehan	}
1430227652Sgrehan
1431265286Sbryanv	error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
1432255112Sbryanv
1433255112Sbryanv	return (error);
1434227652Sgrehan}
1435227652Sgrehan
1436255112Sbryanvstatic int
1437255112Sbryanvvtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1438227652Sgrehan{
1439255112Sbryanv	struct vtnet_softc *sc;
1440255112Sbryanv	struct mbuf *m;
1441255112Sbryanv	int error;
1442227652Sgrehan
1443255112Sbryanv	sc = rxq->vtnrx_sc;
1444227652Sgrehan
1445255112Sbryanv	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1446255112Sbryanv	if (m == NULL)
1447255112Sbryanv		return (ENOBUFS);
1448227652Sgrehan
1449255112Sbryanv	error = vtnet_rxq_enqueue_buf(rxq, m);
1450255112Sbryanv	if (error)
1451255112Sbryanv		m_freem(m);
1452255112Sbryanv
1453255112Sbryanv	return (error);
1454227652Sgrehan}
1455227652Sgrehan
1456255112Sbryanv/*
1457255112Sbryanv * Use the checksum offset in the VirtIO header to set the
1458255112Sbryanv * correct CSUM_* flags.
1459255112Sbryanv */
1460227652Sgrehanstatic int
1461255112Sbryanvvtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1462255112Sbryanv    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1463227652Sgrehan{
1464255112Sbryanv	struct vtnet_softc *sc;
1465255112Sbryanv#if defined(INET) || defined(INET6)
1466255112Sbryanv	int offset = hdr->csum_start + hdr->csum_offset;
1467255112Sbryanv#endif
1468227652Sgrehan
1469255112Sbryanv	sc = rxq->vtnrx_sc;
1470227652Sgrehan
1471255112Sbryanv	/* Only do a basic sanity check on the offset. */
1472227652Sgrehan	switch (eth_type) {
1473255112Sbryanv#if defined(INET)
1474227652Sgrehan	case ETHERTYPE_IP:
1475255112Sbryanv		if (__predict_false(offset < ip_start + sizeof(struct ip)))
1476227652Sgrehan			return (1);
1477227652Sgrehan		break;
1478255112Sbryanv#endif
1479255112Sbryanv#if defined(INET6)
1480227652Sgrehan	case ETHERTYPE_IPV6:
1481255112Sbryanv		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1482227652Sgrehan			return (1);
1483227652Sgrehan		break;
1484255112Sbryanv#endif
1485227652Sgrehan	default:
1486227652Sgrehan		sc->vtnet_stats.rx_csum_bad_ethtype++;
1487227652Sgrehan		return (1);
1488227652Sgrehan	}
1489227652Sgrehan
1490255112Sbryanv	/*
1491255112Sbryanv	 * Use the offset to determine the appropriate CSUM_* flags. This is
1492255112Sbryanv	 * a bit dirty, but we can get by with it since the checksum offsets
1493255112Sbryanv	 * happen to be different. We assume the host host does not do IPv4
1494255112Sbryanv	 * header checksum offloading.
1495255112Sbryanv	 */
1496255112Sbryanv	switch (hdr->csum_offset) {
1497255112Sbryanv	case offsetof(struct udphdr, uh_sum):
1498255112Sbryanv	case offsetof(struct tcphdr, th_sum):
1499255112Sbryanv		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1500255112Sbryanv		m->m_pkthdr.csum_data = 0xFFFF;
1501227652Sgrehan		break;
1502255112Sbryanv	case offsetof(struct sctphdr, checksum):
1503255112Sbryanv		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1504227652Sgrehan		break;
1505227652Sgrehan	default:
1506227652Sgrehan		sc->vtnet_stats.rx_csum_bad_offset++;
1507227652Sgrehan		return (1);
1508227652Sgrehan	}
1509227652Sgrehan
1510255112Sbryanv	return (0);
1511255112Sbryanv}
1512227652Sgrehan
1513255112Sbryanvstatic int
1514255112Sbryanvvtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1515255112Sbryanv    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1516255112Sbryanv{
1517255112Sbryanv	struct vtnet_softc *sc;
1518255112Sbryanv	int offset, proto;
1519227652Sgrehan
1520255112Sbryanv	sc = rxq->vtnrx_sc;
1521227652Sgrehan
1522255112Sbryanv	switch (eth_type) {
1523255112Sbryanv#if defined(INET)
1524255112Sbryanv	case ETHERTYPE_IP: {
1525255112Sbryanv		struct ip *ip;
1526255112Sbryanv		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1527255112Sbryanv			return (1);
1528255112Sbryanv		ip = (struct ip *)(m->m_data + ip_start);
1529255112Sbryanv		proto = ip->ip_p;
1530255112Sbryanv		offset = ip_start + (ip->ip_hl << 2);
1531255112Sbryanv		break;
1532255112Sbryanv	}
1533255112Sbryanv#endif
1534255112Sbryanv#if defined(INET6)
1535255112Sbryanv	case ETHERTYPE_IPV6:
1536255112Sbryanv		if (__predict_false(m->m_len < ip_start +
1537255112Sbryanv		    sizeof(struct ip6_hdr)))
1538255112Sbryanv			return (1);
1539255112Sbryanv		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1540255112Sbryanv		if (__predict_false(offset < 0))
1541255112Sbryanv			return (1);
1542255112Sbryanv		break;
1543255112Sbryanv#endif
1544255112Sbryanv	default:
1545255112Sbryanv		sc->vtnet_stats.rx_csum_bad_ethtype++;
1546255112Sbryanv		return (1);
1547255112Sbryanv	}
1548227652Sgrehan
1549255112Sbryanv	switch (proto) {
1550227652Sgrehan	case IPPROTO_TCP:
1551255112Sbryanv		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1552255112Sbryanv			return (1);
1553227652Sgrehan		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1554227652Sgrehan		m->m_pkthdr.csum_data = 0xFFFF;
1555227652Sgrehan		break;
1556255112Sbryanv	case IPPROTO_UDP:
1557255112Sbryanv		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1558255112Sbryanv			return (1);
1559255112Sbryanv		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1560255112Sbryanv		m->m_pkthdr.csum_data = 0xFFFF;
1561255112Sbryanv		break;
1562227652Sgrehan	case IPPROTO_SCTP:
1563255112Sbryanv		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
1564255112Sbryanv			return (1);
1565227652Sgrehan		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1566227652Sgrehan		break;
1567255112Sbryanv	default:
1568255112Sbryanv		/*
1569255112Sbryanv		 * For the remaining protocols, FreeBSD does not support
1570255112Sbryanv		 * checksum offloading, so the checksum will be recomputed.
1571255112Sbryanv		 */
1572255112Sbryanv#if 0
1573255112Sbryanv		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1574255131Seadler		    "protocol eth_type=%#x proto=%d csum_start=%d "
1575255112Sbryanv		    "csum_offset=%d\n", __func__, eth_type, proto,
1576255112Sbryanv		    hdr->csum_start, hdr->csum_offset);
1577255112Sbryanv#endif
1578255112Sbryanv		break;
1579227652Sgrehan	}
1580227652Sgrehan
1581227652Sgrehan	return (0);
1582227652Sgrehan}
1583227652Sgrehan
1584227652Sgrehan/*
1585255112Sbryanv * Set the appropriate CSUM_* flags. Unfortunately, the information
1586255112Sbryanv * provided is not directly useful to us. The VirtIO header gives the
1587255112Sbryanv * offset of the checksum, which is all Linux needs, but this is not
1588255112Sbryanv * how FreeBSD does things. We are forced to peek inside the packet
1589255112Sbryanv * a bit.
1590255112Sbryanv *
1591255112Sbryanv * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1592255112Sbryanv * could accept the offsets and let the stack figure it out.
1593227652Sgrehan */
1594227652Sgrehanstatic int
1595255112Sbryanvvtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1596227652Sgrehan    struct virtio_net_hdr *hdr)
1597227652Sgrehan{
1598227652Sgrehan	struct ether_header *eh;
1599227652Sgrehan	struct ether_vlan_header *evh;
1600227652Sgrehan	uint16_t eth_type;
1601255112Sbryanv	int offset, error;
1602227652Sgrehan
1603227652Sgrehan	eh = mtod(m, struct ether_header *);
1604227652Sgrehan	eth_type = ntohs(eh->ether_type);
1605227652Sgrehan	if (eth_type == ETHERTYPE_VLAN) {
1606255112Sbryanv		/* BMV: We should handle nested VLAN tags too. */
1607227652Sgrehan		evh = mtod(m, struct ether_vlan_header *);
1608227652Sgrehan		eth_type = ntohs(evh->evl_proto);
1609255112Sbryanv		offset = sizeof(struct ether_vlan_header);
1610255112Sbryanv	} else
1611255112Sbryanv		offset = sizeof(struct ether_header);
1612227652Sgrehan
1613255112Sbryanv	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1614255112Sbryanv		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1615255112Sbryanv	else
1616255112Sbryanv		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1617227652Sgrehan
1618255112Sbryanv	return (error);
1619255112Sbryanv}
1620227652Sgrehan
1621255112Sbryanvstatic void
1622255112Sbryanvvtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1623255112Sbryanv{
1624255112Sbryanv	struct mbuf *m;
1625227652Sgrehan
1626255112Sbryanv	while (--nbufs > 0) {
1627255112Sbryanv		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1628255112Sbryanv		if (m == NULL)
1629255112Sbryanv			break;
1630255112Sbryanv		vtnet_rxq_discard_buf(rxq, m);
1631227652Sgrehan	}
1632255112Sbryanv}
1633227652Sgrehan
1634255112Sbryanvstatic void
1635255112Sbryanvvtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1636255112Sbryanv{
1637255112Sbryanv	int error;
1638227652Sgrehan
1639255112Sbryanv	/*
1640255112Sbryanv	 * Requeue the discarded mbuf. This should always be successful
1641255112Sbryanv	 * since it was just dequeued.
1642255112Sbryanv	 */
1643255112Sbryanv	error = vtnet_rxq_enqueue_buf(rxq, m);
1644255112Sbryanv	KASSERT(error == 0,
1645255112Sbryanv	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1646227652Sgrehan}
1647227652Sgrehan
1648227652Sgrehanstatic int
1649255112Sbryanvvtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1650227652Sgrehan{
1651255112Sbryanv	struct vtnet_softc *sc;
1652227652Sgrehan	struct ifnet *ifp;
1653227652Sgrehan	struct virtqueue *vq;
1654227652Sgrehan	struct mbuf *m, *m_tail;
1655227652Sgrehan	int len;
1656227652Sgrehan
1657255112Sbryanv	sc = rxq->vtnrx_sc;
1658255112Sbryanv	vq = rxq->vtnrx_vq;
1659227652Sgrehan	ifp = sc->vtnet_ifp;
1660227652Sgrehan	m_tail = m_head;
1661227652Sgrehan
1662227652Sgrehan	while (--nbufs > 0) {
1663227652Sgrehan		m = virtqueue_dequeue(vq, &len);
1664227652Sgrehan		if (m == NULL) {
1665255112Sbryanv			rxq->vtnrx_stats.vrxs_ierrors++;
1666227652Sgrehan			goto fail;
1667227652Sgrehan		}
1668227652Sgrehan
1669255112Sbryanv		if (vtnet_rxq_new_buf(rxq) != 0) {
1670255112Sbryanv			rxq->vtnrx_stats.vrxs_iqdrops++;
1671255112Sbryanv			vtnet_rxq_discard_buf(rxq, m);
1672227652Sgrehan			if (nbufs > 1)
1673255112Sbryanv				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1674227652Sgrehan			goto fail;
1675227652Sgrehan		}
1676227652Sgrehan
1677227652Sgrehan		if (m->m_len < len)
1678227652Sgrehan			len = m->m_len;
1679227652Sgrehan
1680227652Sgrehan		m->m_len = len;
1681227652Sgrehan		m->m_flags &= ~M_PKTHDR;
1682227652Sgrehan
1683227652Sgrehan		m_head->m_pkthdr.len += len;
1684227652Sgrehan		m_tail->m_next = m;
1685227652Sgrehan		m_tail = m;
1686227652Sgrehan	}
1687227652Sgrehan
1688227652Sgrehan	return (0);
1689227652Sgrehan
1690227652Sgrehanfail:
1691227652Sgrehan	sc->vtnet_stats.rx_mergeable_failed++;
1692227652Sgrehan	m_freem(m_head);
1693227652Sgrehan
1694227652Sgrehan	return (1);
1695227652Sgrehan}
1696227652Sgrehan
1697255112Sbryanvstatic void
1698255112Sbryanvvtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1699255112Sbryanv    struct virtio_net_hdr *hdr)
1700255112Sbryanv{
1701255112Sbryanv	struct vtnet_softc *sc;
1702255112Sbryanv	struct ifnet *ifp;
1703255112Sbryanv	struct ether_header *eh;
1704255112Sbryanv
1705255112Sbryanv	sc = rxq->vtnrx_sc;
1706255112Sbryanv	ifp = sc->vtnet_ifp;
1707255112Sbryanv
1708255112Sbryanv	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1709255112Sbryanv		eh = mtod(m, struct ether_header *);
1710255112Sbryanv		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1711255112Sbryanv			vtnet_vlan_tag_remove(m);
1712255112Sbryanv			/*
1713255112Sbryanv			 * With the 802.1Q header removed, update the
1714255112Sbryanv			 * checksum starting location accordingly.
1715255112Sbryanv			 */
1716255112Sbryanv			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1717255112Sbryanv				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1718255112Sbryanv		}
1719255112Sbryanv	}
1720255112Sbryanv
1721255112Sbryanv	m->m_pkthdr.flowid = rxq->vtnrx_id;
1722281955Shiren	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1723255112Sbryanv
1724255112Sbryanv	/*
1725255112Sbryanv	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1726255112Sbryanv	 * distinction that Linux does. Need to reevaluate if performing
1727255112Sbryanv	 * offloading for the NEEDS_CSUM case is really appropriate.
1728255112Sbryanv	 */
1729255112Sbryanv	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1730255112Sbryanv	    VIRTIO_NET_HDR_F_DATA_VALID)) {
1731255112Sbryanv		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1732255112Sbryanv			rxq->vtnrx_stats.vrxs_csum++;
1733255112Sbryanv		else
1734255112Sbryanv			rxq->vtnrx_stats.vrxs_csum_failed++;
1735255112Sbryanv	}
1736255112Sbryanv
1737255112Sbryanv	rxq->vtnrx_stats.vrxs_ipackets++;
1738255112Sbryanv	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1739255112Sbryanv
1740256066Sbryanv	VTNET_RXQ_UNLOCK(rxq);
1741255112Sbryanv	(*ifp->if_input)(ifp, m);
1742256066Sbryanv	VTNET_RXQ_LOCK(rxq);
1743255112Sbryanv}
1744255112Sbryanv
1745227652Sgrehanstatic int
1746255112Sbryanvvtnet_rxq_eof(struct vtnet_rxq *rxq)
1747227652Sgrehan{
1748255112Sbryanv	struct virtio_net_hdr lhdr, *hdr;
1749255112Sbryanv	struct vtnet_softc *sc;
1750227652Sgrehan	struct ifnet *ifp;
1751227652Sgrehan	struct virtqueue *vq;
1752227652Sgrehan	struct mbuf *m;
1753227652Sgrehan	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1754255112Sbryanv	int len, deq, nbufs, adjsz, count;
1755227652Sgrehan
1756255112Sbryanv	sc = rxq->vtnrx_sc;
1757255112Sbryanv	vq = rxq->vtnrx_vq;
1758227652Sgrehan	ifp = sc->vtnet_ifp;
1759227652Sgrehan	hdr = &lhdr;
1760227652Sgrehan	deq = 0;
1761255112Sbryanv	count = sc->vtnet_rx_process_limit;
1762227652Sgrehan
1763255112Sbryanv	VTNET_RXQ_LOCK_ASSERT(rxq);
1764227652Sgrehan
1765270509Sbryanv#ifdef DEV_NETMAP
1766270509Sbryanv	if (netmap_rx_irq(ifp, 0, &deq)) {
1767270509Sbryanv		return (FALSE);
1768270509Sbryanv	}
1769270509Sbryanv#endif /* DEV_NETMAP */
1770270509Sbryanv
1771255112Sbryanv	while (count-- > 0) {
1772227652Sgrehan		m = virtqueue_dequeue(vq, &len);
1773227652Sgrehan		if (m == NULL)
1774227652Sgrehan			break;
1775227652Sgrehan		deq++;
1776227652Sgrehan
1777227652Sgrehan		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1778255112Sbryanv			rxq->vtnrx_stats.vrxs_ierrors++;
1779255112Sbryanv			vtnet_rxq_discard_buf(rxq, m);
1780227652Sgrehan			continue;
1781227652Sgrehan		}
1782227652Sgrehan
1783227652Sgrehan		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1784227652Sgrehan			nbufs = 1;
1785227652Sgrehan			adjsz = sizeof(struct vtnet_rx_header);
1786227652Sgrehan			/*
1787255112Sbryanv			 * Account for our pad inserted between the header
1788255112Sbryanv			 * and the actual start of the frame.
1789227652Sgrehan			 */
1790227652Sgrehan			len += VTNET_RX_HEADER_PAD;
1791227652Sgrehan		} else {
1792227652Sgrehan			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1793227652Sgrehan			nbufs = mhdr->num_buffers;
1794227652Sgrehan			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1795227652Sgrehan		}
1796227652Sgrehan
1797255112Sbryanv		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1798255112Sbryanv			rxq->vtnrx_stats.vrxs_iqdrops++;
1799255112Sbryanv			vtnet_rxq_discard_buf(rxq, m);
1800227652Sgrehan			if (nbufs > 1)
1801255112Sbryanv				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1802227652Sgrehan			continue;
1803227652Sgrehan		}
1804227652Sgrehan
1805227652Sgrehan		m->m_pkthdr.len = len;
1806227652Sgrehan		m->m_pkthdr.rcvif = ifp;
1807227652Sgrehan		m->m_pkthdr.csum_flags = 0;
1808227652Sgrehan
1809227652Sgrehan		if (nbufs > 1) {
1810255112Sbryanv			/* Dequeue the rest of chain. */
1811255112Sbryanv			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1812227652Sgrehan				continue;
1813227652Sgrehan		}
1814227652Sgrehan
1815227652Sgrehan		/*
1816227652Sgrehan		 * Save copy of header before we strip it. For both mergeable
1817255112Sbryanv		 * and non-mergeable, the header is at the beginning of the
1818255112Sbryanv		 * mbuf data. We no longer need num_buffers, so always use a
1819255112Sbryanv		 * regular header.
1820255112Sbryanv		 *
1821255112Sbryanv		 * BMV: Is this memcpy() expensive? We know the mbuf data is
1822255112Sbryanv		 * still valid even after the m_adj().
1823227652Sgrehan		 */
1824227652Sgrehan		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1825227652Sgrehan		m_adj(m, adjsz);
1826227652Sgrehan
1827255112Sbryanv		vtnet_rxq_input(rxq, m, hdr);
1828256066Sbryanv
1829256066Sbryanv		/* Must recheck after dropping the Rx lock. */
1830256066Sbryanv		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1831256066Sbryanv			break;
1832255112Sbryanv	}
1833227652Sgrehan
1834255112Sbryanv	if (deq > 0)
1835255112Sbryanv		virtqueue_notify(vq);
1836227652Sgrehan
1837255112Sbryanv	return (count > 0 ? 0 : EAGAIN);
1838255112Sbryanv}
1839227652Sgrehan
1840255112Sbryanvstatic void
1841255112Sbryanvvtnet_rx_vq_intr(void *xrxq)
1842255112Sbryanv{
1843255112Sbryanv	struct vtnet_softc *sc;
1844255112Sbryanv	struct vtnet_rxq *rxq;
1845255112Sbryanv	struct ifnet *ifp;
1846255112Sbryanv	int tries, more;
1847227652Sgrehan
1848255112Sbryanv	rxq = xrxq;
1849255112Sbryanv	sc = rxq->vtnrx_sc;
1850255112Sbryanv	ifp = sc->vtnet_ifp;
1851255112Sbryanv	tries = 0;
1852255112Sbryanv
1853255112Sbryanv	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1854227652Sgrehan		/*
1855255112Sbryanv		 * Ignore this interrupt. Either this is a spurious interrupt
1856255112Sbryanv		 * or multiqueue without per-VQ MSIX so every queue needs to
1857255112Sbryanv		 * be polled (a brain dead configuration we could try harder
1858255112Sbryanv		 * to avoid).
1859227652Sgrehan		 */
1860255112Sbryanv		vtnet_rxq_disable_intr(rxq);
1861255112Sbryanv		return;
1862227652Sgrehan	}
1863227652Sgrehan
1864255112Sbryanv	VTNET_RXQ_LOCK(rxq);
1865227652Sgrehan
1866265286Sbryanvagain:
1867255112Sbryanv	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1868255112Sbryanv		VTNET_RXQ_UNLOCK(rxq);
1869255112Sbryanv		return;
1870255112Sbryanv	}
1871227652Sgrehan
1872255112Sbryanv	more = vtnet_rxq_eof(rxq);
1873255112Sbryanv	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1874255112Sbryanv		if (!more)
1875255112Sbryanv			vtnet_rxq_disable_intr(rxq);
1876255112Sbryanv		/*
1877255112Sbryanv		 * This is an occasional condition or race (when !more),
1878255112Sbryanv		 * so retry a few times before scheduling the taskqueue.
1879255112Sbryanv		 */
1880255112Sbryanv		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1881255112Sbryanv			goto again;
1882265286Sbryanv
1883265286Sbryanv		VTNET_RXQ_UNLOCK(rxq);
1884265286Sbryanv		rxq->vtnrx_stats.vrxs_rescheduled++;
1885255112Sbryanv		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1886255112Sbryanv	} else
1887255112Sbryanv		VTNET_RXQ_UNLOCK(rxq);
1888227652Sgrehan}
1889227652Sgrehan
1890227652Sgrehanstatic void
1891255112Sbryanvvtnet_rxq_tq_intr(void *xrxq, int pending)
1892227652Sgrehan{
1893227652Sgrehan	struct vtnet_softc *sc;
1894255112Sbryanv	struct vtnet_rxq *rxq;
1895227652Sgrehan	struct ifnet *ifp;
1896227652Sgrehan	int more;
1897227652Sgrehan
1898255112Sbryanv	rxq = xrxq;
1899255112Sbryanv	sc = rxq->vtnrx_sc;
1900227652Sgrehan	ifp = sc->vtnet_ifp;
1901227652Sgrehan
1902255112Sbryanv	VTNET_RXQ_LOCK(rxq);
1903227652Sgrehan
1904227652Sgrehan	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1905255112Sbryanv		VTNET_RXQ_UNLOCK(rxq);
1906227652Sgrehan		return;
1907227652Sgrehan	}
1908227652Sgrehan
1909255112Sbryanv	more = vtnet_rxq_eof(rxq);
1910255112Sbryanv	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1911252702Sbryanv		if (!more)
1912255112Sbryanv			vtnet_rxq_disable_intr(rxq);
1913255112Sbryanv		rxq->vtnrx_stats.vrxs_rescheduled++;
1914255112Sbryanv		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1915227652Sgrehan	}
1916227652Sgrehan
1917255112Sbryanv	VTNET_RXQ_UNLOCK(rxq);
1918227652Sgrehan}
1919227652Sgrehan
1920270334Sbryanvstatic int
1921270334Sbryanvvtnet_txq_below_threshold(struct vtnet_txq *txq)
1922270334Sbryanv{
1923270334Sbryanv	struct vtnet_softc *sc;
1924270334Sbryanv	struct virtqueue *vq;
1925270334Sbryanv
1926270334Sbryanv	sc = txq->vtntx_sc;
1927270334Sbryanv	vq = txq->vtntx_vq;
1928270334Sbryanv
1929270334Sbryanv	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
1930270334Sbryanv}
1931270334Sbryanv
1932270334Sbryanvstatic int
1933270334Sbryanvvtnet_txq_notify(struct vtnet_txq *txq)
1934270334Sbryanv{
1935270334Sbryanv	struct virtqueue *vq;
1936270334Sbryanv
1937270334Sbryanv	vq = txq->vtntx_vq;
1938270334Sbryanv
1939270334Sbryanv	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
1940270334Sbryanv	virtqueue_notify(vq);
1941270334Sbryanv
1942270334Sbryanv	if (vtnet_txq_enable_intr(txq) == 0)
1943270334Sbryanv		return (0);
1944270334Sbryanv
1945270334Sbryanv	/*
1946270334Sbryanv	 * Drain frames that were completed since last checked. If this
1947270334Sbryanv	 * causes the queue to go above the threshold, the caller should
1948270334Sbryanv	 * continue transmitting.
1949270334Sbryanv	 */
1950270334Sbryanv	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
1951270334Sbryanv		virtqueue_disable_intr(vq);
1952270334Sbryanv		return (1);
1953270334Sbryanv	}
1954270334Sbryanv
1955270334Sbryanv	return (0);
1956270334Sbryanv}
1957270334Sbryanv
1958227652Sgrehanstatic void
1959255112Sbryanvvtnet_txq_free_mbufs(struct vtnet_txq *txq)
1960227652Sgrehan{
1961227652Sgrehan	struct virtqueue *vq;
1962227652Sgrehan	struct vtnet_tx_header *txhdr;
1963255112Sbryanv	int last;
1964227652Sgrehan
1965255112Sbryanv	vq = txq->vtntx_vq;
1966255112Sbryanv	last = 0;
1967227652Sgrehan
1968255112Sbryanv	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1969227652Sgrehan		m_freem(txhdr->vth_mbuf);
1970227652Sgrehan		uma_zfree(vtnet_tx_header_zone, txhdr);
1971227652Sgrehan	}
1972227652Sgrehan
1973255112Sbryanv	KASSERT(virtqueue_empty(vq),
1974255112Sbryanv	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
1975227652Sgrehan}
1976227652Sgrehan
1977255112Sbryanv/*
1978255112Sbryanv * BMV: Much of this can go away once we finally have offsets in
1979255112Sbryanv * the mbuf packet header. Bug andre@.
1980255112Sbryanv */
1981255112Sbryanvstatic int
1982255112Sbryanvvtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
1983255112Sbryanv    int *etype, int *proto, int *start)
1984227652Sgrehan{
1985255112Sbryanv	struct vtnet_softc *sc;
1986227652Sgrehan	struct ether_vlan_header *evh;
1987255112Sbryanv	int offset;
1988227652Sgrehan
1989255112Sbryanv	sc = txq->vtntx_sc;
1990227652Sgrehan
1991255112Sbryanv	evh = mtod(m, struct ether_vlan_header *);
1992255112Sbryanv	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1993255112Sbryanv		/* BMV: We should handle nested VLAN tags too. */
1994255112Sbryanv		*etype = ntohs(evh->evl_proto);
1995255112Sbryanv		offset = sizeof(struct ether_vlan_header);
1996255112Sbryanv	} else {
1997255112Sbryanv		*etype = ntohs(evh->evl_encap_proto);
1998255112Sbryanv		offset = sizeof(struct ether_header);
1999227652Sgrehan	}
2000227652Sgrehan
2001255112Sbryanv	switch (*etype) {
2002255112Sbryanv#if defined(INET)
2003255112Sbryanv	case ETHERTYPE_IP: {
2004255112Sbryanv		struct ip *ip, iphdr;
2005255112Sbryanv		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2006255112Sbryanv			m_copydata(m, offset, sizeof(struct ip),
2007255112Sbryanv			    (caddr_t) &iphdr);
2008255112Sbryanv			ip = &iphdr;
2009255112Sbryanv		} else
2010255112Sbryanv			ip = (struct ip *)(m->m_data + offset);
2011255112Sbryanv		*proto = ip->ip_p;
2012255112Sbryanv		*start = offset + (ip->ip_hl << 2);
2013255112Sbryanv		break;
2014227652Sgrehan	}
2015255112Sbryanv#endif
2016255112Sbryanv#if defined(INET6)
2017255112Sbryanv	case ETHERTYPE_IPV6:
2018255112Sbryanv		*proto = -1;
2019255112Sbryanv		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2020255112Sbryanv		/* Assert the network stack sent us a valid packet. */
2021255112Sbryanv		KASSERT(*start > offset,
2022255112Sbryanv		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2023255112Sbryanv		    *start, offset, *proto));
2024255112Sbryanv		break;
2025255112Sbryanv#endif
2026255112Sbryanv	default:
2027255112Sbryanv		sc->vtnet_stats.tx_csum_bad_ethtype++;
2028255112Sbryanv		return (EINVAL);
2029255112Sbryanv	}
2030227652Sgrehan
2031255112Sbryanv	return (0);
2032255112Sbryanv}
2033227652Sgrehan
2034255112Sbryanvstatic int
2035255112Sbryanvvtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2036255112Sbryanv    int offset, struct virtio_net_hdr *hdr)
2037255112Sbryanv{
2038255112Sbryanv	static struct timeval lastecn;
2039255112Sbryanv	static int curecn;
2040255112Sbryanv	struct vtnet_softc *sc;
2041255112Sbryanv	struct tcphdr *tcp, tcphdr;
2042227652Sgrehan
2043255112Sbryanv	sc = txq->vtntx_sc;
2044227652Sgrehan
2045255112Sbryanv	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2046255112Sbryanv		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2047255112Sbryanv		tcp = &tcphdr;
2048255112Sbryanv	} else
2049255112Sbryanv		tcp = (struct tcphdr *)(m->m_data + offset);
2050255112Sbryanv
2051255112Sbryanv	hdr->hdr_len = offset + (tcp->th_off << 2);
2052255112Sbryanv	hdr->gso_size = m->m_pkthdr.tso_segsz;
2053255112Sbryanv	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2054255112Sbryanv	    VIRTIO_NET_HDR_GSO_TCPV6;
2055255112Sbryanv
2056255112Sbryanv	if (tcp->th_flags & TH_CWR) {
2057227652Sgrehan		/*
2058255112Sbryanv		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2059255112Sbryanv		 * ECN support is not on a per-interface basis, but globally via
2060255112Sbryanv		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2061227652Sgrehan		 */
2062255112Sbryanv		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2063255112Sbryanv			if (ppsratecheck(&lastecn, &curecn, 1))
2064255112Sbryanv				if_printf(sc->vtnet_ifp,
2065255112Sbryanv				    "TSO with ECN not negotiated with host\n");
2066255112Sbryanv			return (ENOTSUP);
2067255112Sbryanv		}
2068255112Sbryanv		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2069227652Sgrehan	}
2070227652Sgrehan
2071255112Sbryanv	txq->vtntx_stats.vtxs_tso++;
2072255112Sbryanv
2073255112Sbryanv	return (0);
2074255112Sbryanv}
2075255112Sbryanv
2076255112Sbryanvstatic struct mbuf *
2077255112Sbryanvvtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2078255112Sbryanv    struct virtio_net_hdr *hdr)
2079255112Sbryanv{
2080255112Sbryanv	struct vtnet_softc *sc;
2081255112Sbryanv	int flags, etype, csum_start, proto, error;
2082255112Sbryanv
2083255112Sbryanv	sc = txq->vtntx_sc;
2084255112Sbryanv	flags = m->m_pkthdr.csum_flags;
2085255112Sbryanv
2086255112Sbryanv	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2087255112Sbryanv	if (error)
2088255112Sbryanv		goto drop;
2089255112Sbryanv
2090255112Sbryanv	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2091255112Sbryanv	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2092255112Sbryanv		/*
2093255112Sbryanv		 * We could compare the IP protocol vs the CSUM_ flag too,
2094255112Sbryanv		 * but that really should not be necessary.
2095255112Sbryanv		 */
2096227652Sgrehan		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2097227652Sgrehan		hdr->csum_start = csum_start;
2098227652Sgrehan		hdr->csum_offset = m->m_pkthdr.csum_data;
2099255112Sbryanv		txq->vtntx_stats.vtxs_csum++;
2100227652Sgrehan	}
2101227652Sgrehan
2102255112Sbryanv	if (flags & CSUM_TSO) {
2103255112Sbryanv		if (__predict_false(proto != IPPROTO_TCP)) {
2104255112Sbryanv			/* Likely failed to correctly parse the mbuf. */
2105255112Sbryanv			sc->vtnet_stats.tx_tso_not_tcp++;
2106255112Sbryanv			goto drop;
2107227652Sgrehan		}
2108227652Sgrehan
2109255112Sbryanv		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2110265286Sbryanv		    ("%s: mbuf %p TSO without checksum offload %#x",
2111265286Sbryanv		    __func__, m, flags));
2112227652Sgrehan
2113255112Sbryanv		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2114255112Sbryanv		if (error)
2115255112Sbryanv			goto drop;
2116227652Sgrehan	}
2117227652Sgrehan
2118227652Sgrehan	return (m);
2119255112Sbryanv
2120255112Sbryanvdrop:
2121255112Sbryanv	m_freem(m);
2122255112Sbryanv	return (NULL);
2123227652Sgrehan}
2124227652Sgrehan
2125227652Sgrehanstatic int
2126255112Sbryanvvtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2127227652Sgrehan    struct vtnet_tx_header *txhdr)
2128227652Sgrehan{
2129255112Sbryanv	struct vtnet_softc *sc;
2130227652Sgrehan	struct virtqueue *vq;
2131265286Sbryanv	struct sglist *sg;
2132227652Sgrehan	struct mbuf *m;
2133265286Sbryanv	int error;
2134227652Sgrehan
2135265286Sbryanv	sc = txq->vtntx_sc;
2136255112Sbryanv	vq = txq->vtntx_vq;
2137265286Sbryanv	sg = txq->vtntx_sg;
2138227652Sgrehan	m = *m_head;
2139227652Sgrehan
2140265286Sbryanv	sglist_reset(sg);
2141265286Sbryanv	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2142265286Sbryanv	KASSERT(error == 0 && sg->sg_nseg == 1,
2143255112Sbryanv	    ("%s: error %d adding header to sglist", __func__, error));
2144227652Sgrehan
2145265286Sbryanv	error = sglist_append_mbuf(sg, m);
2146227652Sgrehan	if (error) {
2147265286Sbryanv		m = m_defrag(m, M_NOWAIT);
2148227652Sgrehan		if (m == NULL)
2149227652Sgrehan			goto fail;
2150227652Sgrehan
2151227652Sgrehan		*m_head = m;
2152265286Sbryanv		sc->vtnet_stats.tx_defragged++;
2153265286Sbryanv
2154265286Sbryanv		error = sglist_append_mbuf(sg, m);
2155265286Sbryanv		if (error)
2156265286Sbryanv			goto fail;
2157227652Sgrehan	}
2158227652Sgrehan
2159227652Sgrehan	txhdr->vth_mbuf = m;
2160265286Sbryanv	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2161227652Sgrehan
2162255112Sbryanv	return (error);
2163227652Sgrehan
2164227652Sgrehanfail:
2165265286Sbryanv	sc->vtnet_stats.tx_defrag_failed++;
2166227652Sgrehan	m_freem(*m_head);
2167227652Sgrehan	*m_head = NULL;
2168227652Sgrehan
2169227652Sgrehan	return (ENOBUFS);
2170227652Sgrehan}
2171227652Sgrehan
2172227652Sgrehanstatic int
2173255112Sbryanvvtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
2174227652Sgrehan{
2175227652Sgrehan	struct vtnet_tx_header *txhdr;
2176227652Sgrehan	struct virtio_net_hdr *hdr;
2177227652Sgrehan	struct mbuf *m;
2178227652Sgrehan	int error;
2179227652Sgrehan
2180228301Sgrehan	m = *m_head;
2181252706Sbryanv	M_ASSERTPKTHDR(m);
2182228301Sgrehan
2183227652Sgrehan	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
2184228301Sgrehan	if (txhdr == NULL) {
2185255112Sbryanv		m_freem(m);
2186228301Sgrehan		*m_head = NULL;
2187227652Sgrehan		return (ENOMEM);
2188228301Sgrehan	}
2189227652Sgrehan
2190227652Sgrehan	/*
2191255112Sbryanv	 * Always use the non-mergeable header, regardless if the feature
2192255112Sbryanv	 * was negotiated. For transmit, num_buffers is always zero. The
2193255112Sbryanv	 * vtnet_hdr_size is used to enqueue the correct header size.
2194227652Sgrehan	 */
2195227652Sgrehan	hdr = &txhdr->vth_uhdr.hdr;
2196227652Sgrehan
2197227652Sgrehan	if (m->m_flags & M_VLANTAG) {
2198227652Sgrehan		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2199228301Sgrehan		if ((*m_head = m) == NULL) {
2200228301Sgrehan			error = ENOBUFS;
2201227652Sgrehan			goto fail;
2202228301Sgrehan		}
2203227652Sgrehan		m->m_flags &= ~M_VLANTAG;
2204227652Sgrehan	}
2205227652Sgrehan
2206255112Sbryanv	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2207255112Sbryanv		m = vtnet_txq_offload(txq, m, hdr);
2208228301Sgrehan		if ((*m_head = m) == NULL) {
2209228301Sgrehan			error = ENOBUFS;
2210227652Sgrehan			goto fail;
2211228301Sgrehan		}
2212227652Sgrehan	}
2213227652Sgrehan
2214255112Sbryanv	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2215255112Sbryanv	if (error == 0)
2216255112Sbryanv		return (0);
2217255112Sbryanv
2218227652Sgrehanfail:
2219255112Sbryanv	uma_zfree(vtnet_tx_header_zone, txhdr);
2220227652Sgrehan
2221227652Sgrehan	return (error);
2222227652Sgrehan}
2223227652Sgrehan
2224255112Sbryanv#ifdef VTNET_LEGACY_TX
2225227652Sgrehan
2226227652Sgrehanstatic void
2227255112Sbryanvvtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2228227652Sgrehan{
2229227652Sgrehan	struct vtnet_softc *sc;
2230227652Sgrehan	struct virtqueue *vq;
2231227652Sgrehan	struct mbuf *m0;
2232270334Sbryanv	int tries, enq;
2233227652Sgrehan
2234255112Sbryanv	sc = txq->vtntx_sc;
2235255112Sbryanv	vq = txq->vtntx_vq;
2236270334Sbryanv	tries = 0;
2237227652Sgrehan
2238255112Sbryanv	VTNET_TXQ_LOCK_ASSERT(txq);
2239227652Sgrehan
2240255112Sbryanv	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2241255112Sbryanv	    sc->vtnet_link_active == 0)
2242227652Sgrehan		return;
2243227652Sgrehan
2244255167Sbryanv	vtnet_txq_eof(txq);
2245255167Sbryanv
2246270334Sbryanvagain:
2247270334Sbryanv	enq = 0;
2248270334Sbryanv
2249227652Sgrehan	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2250255112Sbryanv		if (virtqueue_full(vq))
2251227652Sgrehan			break;
2252227652Sgrehan
2253227652Sgrehan		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2254227652Sgrehan		if (m0 == NULL)
2255227652Sgrehan			break;
2256227652Sgrehan
2257255112Sbryanv		if (vtnet_txq_encap(txq, &m0) != 0) {
2258255112Sbryanv			if (m0 != NULL)
2259255112Sbryanv				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2260227652Sgrehan			break;
2261227652Sgrehan		}
2262227652Sgrehan
2263227652Sgrehan		enq++;
2264227652Sgrehan		ETHER_BPF_MTAP(ifp, m0);
2265227652Sgrehan	}
2266227652Sgrehan
2267270334Sbryanv	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2268270334Sbryanv		if (tries++ < VTNET_NOTIFY_RETRIES)
2269270334Sbryanv			goto again;
2270270334Sbryanv
2271270334Sbryanv		txq->vtntx_stats.vtxs_rescheduled++;
2272270334Sbryanv		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2273227652Sgrehan	}
2274227652Sgrehan}
2275227652Sgrehan
2276227652Sgrehanstatic void
2277255112Sbryanvvtnet_start(struct ifnet *ifp)
2278227652Sgrehan{
2279227652Sgrehan	struct vtnet_softc *sc;
2280255112Sbryanv	struct vtnet_txq *txq;
2281227652Sgrehan
2282255112Sbryanv	sc = ifp->if_softc;
2283255112Sbryanv	txq = &sc->vtnet_txqs[0];
2284227652Sgrehan
2285255112Sbryanv	VTNET_TXQ_LOCK(txq);
2286255112Sbryanv	vtnet_start_locked(txq, ifp);
2287255112Sbryanv	VTNET_TXQ_UNLOCK(txq);
2288255112Sbryanv}
2289227652Sgrehan
2290255112Sbryanv#else /* !VTNET_LEGACY_TX */
2291255112Sbryanv
2292255112Sbryanvstatic int
2293255112Sbryanvvtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2294255112Sbryanv{
2295255112Sbryanv	struct vtnet_softc *sc;
2296255112Sbryanv	struct virtqueue *vq;
2297255112Sbryanv	struct buf_ring *br;
2298255112Sbryanv	struct ifnet *ifp;
2299270334Sbryanv	int enq, tries, error;
2300255112Sbryanv
2301255112Sbryanv	sc = txq->vtntx_sc;
2302255112Sbryanv	vq = txq->vtntx_vq;
2303255112Sbryanv	br = txq->vtntx_br;
2304255112Sbryanv	ifp = sc->vtnet_ifp;
2305270334Sbryanv	tries = 0;
2306255112Sbryanv	error = 0;
2307255112Sbryanv
2308255112Sbryanv	VTNET_TXQ_LOCK_ASSERT(txq);
2309255112Sbryanv
2310255112Sbryanv	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2311255112Sbryanv	    sc->vtnet_link_active == 0) {
2312255112Sbryanv		if (m != NULL)
2313255112Sbryanv			error = drbr_enqueue(ifp, br, m);
2314255112Sbryanv		return (error);
2315255112Sbryanv	}
2316255112Sbryanv
2317255112Sbryanv	if (m != NULL) {
2318255112Sbryanv		error = drbr_enqueue(ifp, br, m);
2319255112Sbryanv		if (error)
2320255112Sbryanv			return (error);
2321255112Sbryanv	}
2322255112Sbryanv
2323255167Sbryanv	vtnet_txq_eof(txq);
2324255167Sbryanv
2325270334Sbryanvagain:
2326270334Sbryanv	enq = 0;
2327270334Sbryanv
2328255112Sbryanv	while ((m = drbr_peek(ifp, br)) != NULL) {
2329265286Sbryanv		if (virtqueue_full(vq)) {
2330265286Sbryanv			drbr_putback(ifp, br, m);
2331265286Sbryanv			break;
2332265286Sbryanv		}
2333265286Sbryanv
2334270334Sbryanv		if (vtnet_txq_encap(txq, &m) != 0) {
2335255112Sbryanv			if (m != NULL)
2336255112Sbryanv				drbr_putback(ifp, br, m);
2337255112Sbryanv			else
2338255112Sbryanv				drbr_advance(ifp, br);
2339255112Sbryanv			break;
2340255112Sbryanv		}
2341255112Sbryanv		drbr_advance(ifp, br);
2342255112Sbryanv
2343255112Sbryanv		enq++;
2344255112Sbryanv		ETHER_BPF_MTAP(ifp, m);
2345255112Sbryanv	}
2346255112Sbryanv
2347270334Sbryanv	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2348270334Sbryanv		if (tries++ < VTNET_NOTIFY_RETRIES)
2349270334Sbryanv			goto again;
2350270334Sbryanv
2351270334Sbryanv		txq->vtntx_stats.vtxs_rescheduled++;
2352270334Sbryanv		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2353255112Sbryanv	}
2354255112Sbryanv
2355267279Sluigi	return (0);
2356227652Sgrehan}
2357227652Sgrehan
2358255112Sbryanvstatic int
2359255112Sbryanvvtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2360255112Sbryanv{
2361255112Sbryanv	struct vtnet_softc *sc;
2362255112Sbryanv	struct vtnet_txq *txq;
2363255112Sbryanv	int i, npairs, error;
2364255112Sbryanv
2365255112Sbryanv	sc = ifp->if_softc;
2366255112Sbryanv	npairs = sc->vtnet_act_vq_pairs;
2367255112Sbryanv
2368281955Shiren	/* check if flowid is set */
2369281955Shiren	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2370255112Sbryanv		i = m->m_pkthdr.flowid % npairs;
2371255112Sbryanv	else
2372255112Sbryanv		i = curcpu % npairs;
2373255112Sbryanv
2374255112Sbryanv	txq = &sc->vtnet_txqs[i];
2375255112Sbryanv
2376255112Sbryanv	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2377255112Sbryanv		error = vtnet_txq_mq_start_locked(txq, m);
2378255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2379255112Sbryanv	} else {
2380255112Sbryanv		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2381255112Sbryanv		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2382255112Sbryanv	}
2383255112Sbryanv
2384255112Sbryanv	return (error);
2385255112Sbryanv}
2386255112Sbryanv
2387227652Sgrehanstatic void
2388255112Sbryanvvtnet_txq_tq_deferred(void *xtxq, int pending)
2389227652Sgrehan{
2390227652Sgrehan	struct vtnet_softc *sc;
2391255112Sbryanv	struct vtnet_txq *txq;
2392255112Sbryanv
2393255112Sbryanv	txq = xtxq;
2394255112Sbryanv	sc = txq->vtntx_sc;
2395255112Sbryanv
2396255112Sbryanv	VTNET_TXQ_LOCK(txq);
2397255112Sbryanv	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2398255112Sbryanv		vtnet_txq_mq_start_locked(txq, NULL);
2399255112Sbryanv	VTNET_TXQ_UNLOCK(txq);
2400255112Sbryanv}
2401255112Sbryanv
2402255112Sbryanv#endif /* VTNET_LEGACY_TX */
2403255112Sbryanv
2404255112Sbryanvstatic void
2405265286Sbryanvvtnet_txq_start(struct vtnet_txq *txq)
2406265286Sbryanv{
2407265286Sbryanv	struct vtnet_softc *sc;
2408265286Sbryanv	struct ifnet *ifp;
2409265286Sbryanv
2410265286Sbryanv	sc = txq->vtntx_sc;
2411265286Sbryanv	ifp = sc->vtnet_ifp;
2412265286Sbryanv
2413265286Sbryanv#ifdef VTNET_LEGACY_TX
2414265286Sbryanv	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2415265286Sbryanv		vtnet_start_locked(txq, ifp);
2416265286Sbryanv#else
2417265286Sbryanv	if (!drbr_empty(ifp, txq->vtntx_br))
2418265286Sbryanv		vtnet_txq_mq_start_locked(txq, NULL);
2419265286Sbryanv#endif
2420265286Sbryanv}
2421265286Sbryanv
2422265286Sbryanvstatic void
2423255112Sbryanvvtnet_txq_tq_intr(void *xtxq, int pending)
2424255112Sbryanv{
2425255112Sbryanv	struct vtnet_softc *sc;
2426255112Sbryanv	struct vtnet_txq *txq;
2427227652Sgrehan	struct ifnet *ifp;
2428227652Sgrehan
2429255112Sbryanv	txq = xtxq;
2430255112Sbryanv	sc = txq->vtntx_sc;
2431227652Sgrehan	ifp = sc->vtnet_ifp;
2432227652Sgrehan
2433255112Sbryanv	VTNET_TXQ_LOCK(txq);
2434227652Sgrehan
2435255112Sbryanv	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2436255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2437227652Sgrehan		return;
2438227652Sgrehan	}
2439255112Sbryanv
2440255112Sbryanv	vtnet_txq_eof(txq);
2441265286Sbryanv	vtnet_txq_start(txq);
2442227652Sgrehan
2443255112Sbryanv	VTNET_TXQ_UNLOCK(txq);
2444255112Sbryanv}
2445255112Sbryanv
2446270334Sbryanvstatic int
2447255112Sbryanvvtnet_txq_eof(struct vtnet_txq *txq)
2448255112Sbryanv{
2449255112Sbryanv	struct virtqueue *vq;
2450255112Sbryanv	struct vtnet_tx_header *txhdr;
2451255112Sbryanv	struct mbuf *m;
2452270334Sbryanv	int deq;
2453255112Sbryanv
2454255112Sbryanv	vq = txq->vtntx_vq;
2455270334Sbryanv	deq = 0;
2456255112Sbryanv	VTNET_TXQ_LOCK_ASSERT(txq);
2457255112Sbryanv
2458270509Sbryanv#ifdef DEV_NETMAP
2459270509Sbryanv	if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
2460270509Sbryanv		virtqueue_disable_intr(vq); // XXX luigi
2461270509Sbryanv		return 0; // XXX or 1 ?
2462270509Sbryanv	}
2463270509Sbryanv#endif /* DEV_NETMAP */
2464270509Sbryanv
2465255112Sbryanv	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2466255112Sbryanv		m = txhdr->vth_mbuf;
2467270334Sbryanv		deq++;
2468255112Sbryanv
2469255112Sbryanv		txq->vtntx_stats.vtxs_opackets++;
2470255112Sbryanv		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2471255112Sbryanv		if (m->m_flags & M_MCAST)
2472255112Sbryanv			txq->vtntx_stats.vtxs_omcasts++;
2473255112Sbryanv
2474255112Sbryanv		m_freem(m);
2475255112Sbryanv		uma_zfree(vtnet_tx_header_zone, txhdr);
2476255112Sbryanv	}
2477255112Sbryanv
2478255112Sbryanv	if (virtqueue_empty(vq))
2479255112Sbryanv		txq->vtntx_watchdog = 0;
2480270334Sbryanv
2481270334Sbryanv	return (deq);
2482255112Sbryanv}
2483255112Sbryanv
2484255112Sbryanvstatic void
2485255112Sbryanvvtnet_tx_vq_intr(void *xtxq)
2486255112Sbryanv{
2487255112Sbryanv	struct vtnet_softc *sc;
2488255112Sbryanv	struct vtnet_txq *txq;
2489255112Sbryanv	struct ifnet *ifp;
2490255112Sbryanv
2491255112Sbryanv	txq = xtxq;
2492255112Sbryanv	sc = txq->vtntx_sc;
2493255112Sbryanv	ifp = sc->vtnet_ifp;
2494255112Sbryanv
2495255112Sbryanv	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2496255112Sbryanv		/*
2497255112Sbryanv		 * Ignore this interrupt. Either this is a spurious interrupt
2498255112Sbryanv		 * or multiqueue without per-VQ MSIX so every queue needs to
2499255112Sbryanv		 * be polled (a brain dead configuration we could try harder
2500255112Sbryanv		 * to avoid).
2501255112Sbryanv		 */
2502255112Sbryanv		vtnet_txq_disable_intr(txq);
2503255112Sbryanv		return;
2504255112Sbryanv	}
2505255112Sbryanv
2506255112Sbryanv	VTNET_TXQ_LOCK(txq);
2507255112Sbryanv
2508227652Sgrehan	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2509255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2510227652Sgrehan		return;
2511227652Sgrehan	}
2512227652Sgrehan
2513255112Sbryanv	vtnet_txq_eof(txq);
2514265286Sbryanv	vtnet_txq_start(txq);
2515227652Sgrehan
2516270334Sbryanv	VTNET_TXQ_UNLOCK(txq);
2517255112Sbryanv}
2518255112Sbryanv
2519255112Sbryanvstatic void
2520255112Sbryanvvtnet_tx_start_all(struct vtnet_softc *sc)
2521255112Sbryanv{
2522255112Sbryanv	struct vtnet_txq *txq;
2523255112Sbryanv	int i;
2524255112Sbryanv
2525255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
2526255112Sbryanv
2527255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2528255112Sbryanv		txq = &sc->vtnet_txqs[i];
2529255112Sbryanv
2530255112Sbryanv		VTNET_TXQ_LOCK(txq);
2531265286Sbryanv		vtnet_txq_start(txq);
2532255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2533227652Sgrehan	}
2534255112Sbryanv}
2535227652Sgrehan
2536255112Sbryanv#ifndef VTNET_LEGACY_TX
2537255112Sbryanvstatic void
2538255112Sbryanvvtnet_qflush(struct ifnet *ifp)
2539255112Sbryanv{
2540255112Sbryanv	struct vtnet_softc *sc;
2541255112Sbryanv	struct vtnet_txq *txq;
2542255112Sbryanv	struct mbuf *m;
2543255112Sbryanv	int i;
2544255112Sbryanv
2545255112Sbryanv	sc = ifp->if_softc;
2546255112Sbryanv
2547255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2548255112Sbryanv		txq = &sc->vtnet_txqs[i];
2549255112Sbryanv
2550255112Sbryanv		VTNET_TXQ_LOCK(txq);
2551255112Sbryanv		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2552255112Sbryanv			m_freem(m);
2553255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2554255112Sbryanv	}
2555255112Sbryanv
2556255112Sbryanv	if_qflush(ifp);
2557227652Sgrehan}
2558255112Sbryanv#endif
2559227652Sgrehan
2560255112Sbryanvstatic int
2561255112Sbryanvvtnet_watchdog(struct vtnet_txq *txq)
2562255112Sbryanv{
2563270334Sbryanv	struct ifnet *ifp;
2564255112Sbryanv
2565270334Sbryanv	ifp = txq->vtntx_sc->vtnet_ifp;
2566255112Sbryanv
2567255112Sbryanv	VTNET_TXQ_LOCK(txq);
2568270334Sbryanv	if (txq->vtntx_watchdog == 1) {
2569270334Sbryanv		/*
2570270334Sbryanv		 * Only drain completed frames if the watchdog is about to
2571270334Sbryanv		 * expire. If any frames were drained, there may be enough
2572270334Sbryanv		 * free descriptors now available to transmit queued frames.
2573270334Sbryanv		 * In that case, the timer will immediately be decremented
2574270334Sbryanv		 * below, but the timeout is generous enough that should not
2575270334Sbryanv		 * be a problem.
2576270334Sbryanv		 */
2577270334Sbryanv		if (vtnet_txq_eof(txq) != 0)
2578270334Sbryanv			vtnet_txq_start(txq);
2579270334Sbryanv	}
2580270334Sbryanv
2581255112Sbryanv	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2582255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2583255112Sbryanv		return (0);
2584255112Sbryanv	}
2585255112Sbryanv	VTNET_TXQ_UNLOCK(txq);
2586255112Sbryanv
2587270334Sbryanv	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2588255112Sbryanv	return (1);
2589255112Sbryanv}
2590255112Sbryanv
2591227652Sgrehanstatic void
2592255112Sbryanvvtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
2593255112Sbryanv{
2594255112Sbryanv	struct vtnet_rxq_stats *st;
2595255112Sbryanv
2596255112Sbryanv	st = &rxq->vtnrx_stats;
2597255112Sbryanv
2598255112Sbryanv	accum->vrxs_ipackets += st->vrxs_ipackets;
2599255112Sbryanv	accum->vrxs_ibytes += st->vrxs_ibytes;
2600255112Sbryanv	accum->vrxs_iqdrops += st->vrxs_iqdrops;
2601255112Sbryanv	accum->vrxs_csum += st->vrxs_csum;
2602255112Sbryanv	accum->vrxs_csum_failed += st->vrxs_csum_failed;
2603255112Sbryanv	accum->vrxs_rescheduled += st->vrxs_rescheduled;
2604255112Sbryanv}
2605255112Sbryanv
2606255112Sbryanvstatic void
2607255112Sbryanvvtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
2608255112Sbryanv{
2609255112Sbryanv	struct vtnet_txq_stats *st;
2610255112Sbryanv
2611255112Sbryanv	st = &txq->vtntx_stats;
2612255112Sbryanv
2613255112Sbryanv	accum->vtxs_opackets += st->vtxs_opackets;
2614255112Sbryanv	accum->vtxs_obytes += st->vtxs_obytes;
2615255112Sbryanv	accum->vtxs_csum += st->vtxs_csum;
2616255112Sbryanv	accum->vtxs_tso += st->vtxs_tso;
2617255112Sbryanv	accum->vtxs_rescheduled += st->vtxs_rescheduled;
2618255112Sbryanv}
2619255112Sbryanv
2620255112Sbryanvstatic void
2621255112Sbryanvvtnet_accumulate_stats(struct vtnet_softc *sc)
2622255112Sbryanv{
2623255112Sbryanv	struct ifnet *ifp;
2624255112Sbryanv	struct vtnet_statistics *st;
2625255112Sbryanv	struct vtnet_rxq_stats rxaccum;
2626255112Sbryanv	struct vtnet_txq_stats txaccum;
2627255112Sbryanv	int i;
2628255112Sbryanv
2629255112Sbryanv	ifp = sc->vtnet_ifp;
2630255112Sbryanv	st = &sc->vtnet_stats;
2631255112Sbryanv	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
2632255112Sbryanv	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
2633255112Sbryanv
2634255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2635255112Sbryanv		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
2636255112Sbryanv		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
2637255112Sbryanv	}
2638255112Sbryanv
2639255112Sbryanv	st->rx_csum_offloaded = rxaccum.vrxs_csum;
2640255112Sbryanv	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
2641255112Sbryanv	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
2642255112Sbryanv	st->tx_csum_offloaded = txaccum.vtxs_csum;
2643255112Sbryanv	st->tx_tso_offloaded = txaccum.vtxs_tso;
2644255112Sbryanv	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
2645255112Sbryanv
2646255112Sbryanv	/*
2647255112Sbryanv	 * With the exception of if_ierrors, these ifnet statistics are
2648255112Sbryanv	 * only updated in the driver, so just set them to our accumulated
2649255112Sbryanv	 * values. if_ierrors is updated in ether_input() for malformed
2650255112Sbryanv	 * frames that we should have already discarded.
2651255112Sbryanv	 */
2652255112Sbryanv	ifp->if_ipackets = rxaccum.vrxs_ipackets;
2653255112Sbryanv	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
2654255112Sbryanv	ifp->if_ierrors = rxaccum.vrxs_ierrors;
2655255112Sbryanv	ifp->if_opackets = txaccum.vtxs_opackets;
2656255112Sbryanv#ifndef VTNET_LEGACY_TX
2657255112Sbryanv	ifp->if_obytes = txaccum.vtxs_obytes;
2658255112Sbryanv	ifp->if_omcasts = txaccum.vtxs_omcasts;
2659255112Sbryanv#endif
2660255112Sbryanv}
2661255112Sbryanv
2662255112Sbryanvstatic void
2663255112Sbryanvvtnet_tick(void *xsc)
2664255112Sbryanv{
2665255112Sbryanv	struct vtnet_softc *sc;
2666255112Sbryanv	struct ifnet *ifp;
2667255112Sbryanv	int i, timedout;
2668255112Sbryanv
2669255112Sbryanv	sc = xsc;
2670255112Sbryanv	ifp = sc->vtnet_ifp;
2671255112Sbryanv	timedout = 0;
2672255112Sbryanv
2673255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
2674255112Sbryanv	vtnet_accumulate_stats(sc);
2675255112Sbryanv
2676255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2677255112Sbryanv		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2678255112Sbryanv
2679255112Sbryanv	if (timedout != 0) {
2680255112Sbryanv		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2681255112Sbryanv		vtnet_init_locked(sc);
2682255112Sbryanv	} else
2683255112Sbryanv		callout_schedule(&sc->vtnet_tick_ch, hz);
2684255112Sbryanv}
2685255112Sbryanv
2686255112Sbryanvstatic void
2687255112Sbryanvvtnet_start_taskqueues(struct vtnet_softc *sc)
2688255112Sbryanv{
2689255112Sbryanv	device_t dev;
2690255112Sbryanv	struct vtnet_rxq *rxq;
2691255112Sbryanv	struct vtnet_txq *txq;
2692255112Sbryanv	int i, error;
2693255112Sbryanv
2694255112Sbryanv	dev = sc->vtnet_dev;
2695255112Sbryanv
2696255112Sbryanv	/*
2697255112Sbryanv	 * Errors here are very difficult to recover from - we cannot
2698255112Sbryanv	 * easily fail because, if this is during boot, we will hang
2699255112Sbryanv	 * when freeing any successfully started taskqueues because
2700255112Sbryanv	 * the scheduler isn't up yet.
2701255112Sbryanv	 *
2702255112Sbryanv	 * Most drivers just ignore the return value - it only fails
2703255112Sbryanv	 * with ENOMEM so an error is not likely.
2704255112Sbryanv	 */
2705255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2706255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2707255112Sbryanv		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2708255112Sbryanv		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2709255112Sbryanv		if (error) {
2710255112Sbryanv			device_printf(dev, "failed to start rx taskq %d\n",
2711255112Sbryanv			    rxq->vtnrx_id);
2712255112Sbryanv		}
2713255112Sbryanv
2714255112Sbryanv		txq = &sc->vtnet_txqs[i];
2715255112Sbryanv		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2716255112Sbryanv		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2717255112Sbryanv		if (error) {
2718255112Sbryanv			device_printf(dev, "failed to start tx taskq %d\n",
2719255112Sbryanv			    txq->vtntx_id);
2720255112Sbryanv		}
2721255112Sbryanv	}
2722255112Sbryanv}
2723255112Sbryanv
2724255112Sbryanvstatic void
2725255112Sbryanvvtnet_free_taskqueues(struct vtnet_softc *sc)
2726255112Sbryanv{
2727255112Sbryanv	struct vtnet_rxq *rxq;
2728255112Sbryanv	struct vtnet_txq *txq;
2729255112Sbryanv	int i;
2730255112Sbryanv
2731255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2732255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2733255112Sbryanv		if (rxq->vtnrx_tq != NULL) {
2734255112Sbryanv			taskqueue_free(rxq->vtnrx_tq);
2735255112Sbryanv			rxq->vtnrx_vq = NULL;
2736255112Sbryanv		}
2737255112Sbryanv
2738255112Sbryanv		txq = &sc->vtnet_txqs[i];
2739255112Sbryanv		if (txq->vtntx_tq != NULL) {
2740255112Sbryanv			taskqueue_free(txq->vtntx_tq);
2741255112Sbryanv			txq->vtntx_tq = NULL;
2742255112Sbryanv		}
2743255112Sbryanv	}
2744255112Sbryanv}
2745255112Sbryanv
2746255112Sbryanvstatic void
2747255112Sbryanvvtnet_drain_taskqueues(struct vtnet_softc *sc)
2748255112Sbryanv{
2749255112Sbryanv	struct vtnet_rxq *rxq;
2750255112Sbryanv	struct vtnet_txq *txq;
2751255112Sbryanv	int i;
2752255112Sbryanv
2753255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2754255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2755255112Sbryanv		if (rxq->vtnrx_tq != NULL)
2756255112Sbryanv			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2757255112Sbryanv
2758255112Sbryanv		txq = &sc->vtnet_txqs[i];
2759255112Sbryanv		if (txq->vtntx_tq != NULL) {
2760255112Sbryanv			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2761255112Sbryanv#ifndef VTNET_LEGACY_TX
2762255112Sbryanv			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2763255112Sbryanv#endif
2764255112Sbryanv		}
2765255112Sbryanv	}
2766255112Sbryanv}
2767255112Sbryanv
2768255112Sbryanvstatic void
2769255112Sbryanvvtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2770255112Sbryanv{
2771255112Sbryanv	struct vtnet_rxq *rxq;
2772255112Sbryanv	struct vtnet_txq *txq;
2773255112Sbryanv	int i;
2774255112Sbryanv
2775283617Sjhb#ifdef DEV_NETMAP
2776283617Sjhb	if (nm_native_on(NA(sc->vtnet_ifp)))
2777283617Sjhb		return;
2778283617Sjhb#endif /* DEV_NETMAP */
2779283617Sjhb
2780255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2781255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2782255112Sbryanv		vtnet_rxq_free_mbufs(rxq);
2783255112Sbryanv
2784255112Sbryanv		txq = &sc->vtnet_txqs[i];
2785255112Sbryanv		vtnet_txq_free_mbufs(txq);
2786255112Sbryanv	}
2787255112Sbryanv}
2788255112Sbryanv
2789255112Sbryanvstatic void
2790255112Sbryanvvtnet_stop_rendezvous(struct vtnet_softc *sc)
2791255112Sbryanv{
2792255112Sbryanv	struct vtnet_rxq *rxq;
2793255112Sbryanv	struct vtnet_txq *txq;
2794255112Sbryanv	int i;
2795255112Sbryanv
2796255112Sbryanv	/*
2797255112Sbryanv	 * Lock and unlock the per-queue mutex so we known the stop
2798255112Sbryanv	 * state is visible. Doing only the active queues should be
2799255112Sbryanv	 * sufficient, but it does not cost much extra to do all the
2800255112Sbryanv	 * queues. Note we hold the core mutex here too.
2801255112Sbryanv	 */
2802255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2803255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2804255112Sbryanv		VTNET_RXQ_LOCK(rxq);
2805255112Sbryanv		VTNET_RXQ_UNLOCK(rxq);
2806255112Sbryanv
2807255112Sbryanv		txq = &sc->vtnet_txqs[i];
2808255112Sbryanv		VTNET_TXQ_LOCK(txq);
2809255112Sbryanv		VTNET_TXQ_UNLOCK(txq);
2810255112Sbryanv	}
2811255112Sbryanv}
2812255112Sbryanv
2813255112Sbryanvstatic void
2814227652Sgrehanvtnet_stop(struct vtnet_softc *sc)
2815227652Sgrehan{
2816227652Sgrehan	device_t dev;
2817227652Sgrehan	struct ifnet *ifp;
2818227652Sgrehan
2819227652Sgrehan	dev = sc->vtnet_dev;
2820227652Sgrehan	ifp = sc->vtnet_ifp;
2821227652Sgrehan
2822255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
2823227652Sgrehan
2824255112Sbryanv	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2825255112Sbryanv	sc->vtnet_link_active = 0;
2826227652Sgrehan	callout_stop(&sc->vtnet_tick_ch);
2827227652Sgrehan
2828255112Sbryanv	/* Only advisory. */
2829255112Sbryanv	vtnet_disable_interrupts(sc);
2830227652Sgrehan
2831227652Sgrehan	/*
2832255112Sbryanv	 * Stop the host adapter. This resets it to the pre-initialized
2833255112Sbryanv	 * state. It will not generate any interrupts until after it is
2834255112Sbryanv	 * reinitialized.
2835227652Sgrehan	 */
2836227652Sgrehan	virtio_stop(dev);
2837255112Sbryanv	vtnet_stop_rendezvous(sc);
2838227652Sgrehan
2839255112Sbryanv	/* Free any mbufs left in the virtqueues. */
2840255112Sbryanv	vtnet_drain_rxtx_queues(sc);
2841227652Sgrehan}
2842227652Sgrehan
2843227652Sgrehanstatic int
2844255112Sbryanvvtnet_virtio_reinit(struct vtnet_softc *sc)
2845227652Sgrehan{
2846255112Sbryanv	device_t dev;
2847227652Sgrehan	struct ifnet *ifp;
2848227652Sgrehan	uint64_t features;
2849255112Sbryanv	int mask, error;
2850227652Sgrehan
2851255112Sbryanv	dev = sc->vtnet_dev;
2852227652Sgrehan	ifp = sc->vtnet_ifp;
2853227652Sgrehan	features = sc->vtnet_features;
2854227652Sgrehan
2855255112Sbryanv	mask = 0;
2856255112Sbryanv#if defined(INET)
2857255112Sbryanv	mask |= IFCAP_RXCSUM;
2858255112Sbryanv#endif
2859255112Sbryanv#if defined (INET6)
2860255112Sbryanv	mask |= IFCAP_RXCSUM_IPV6;
2861255112Sbryanv#endif
2862255112Sbryanv
2863227652Sgrehan	/*
2864227652Sgrehan	 * Re-negotiate with the host, removing any disabled receive
2865227652Sgrehan	 * features. Transmit features are disabled only on our side
2866227652Sgrehan	 * via if_capenable and if_hwassist.
2867227652Sgrehan	 */
2868227652Sgrehan
2869255112Sbryanv	if (ifp->if_capabilities & mask) {
2870255112Sbryanv		/*
2871255112Sbryanv		 * We require both IPv4 and IPv6 offloading to be enabled
2872255112Sbryanv		 * in order to negotiated it: VirtIO does not distinguish
2873255112Sbryanv		 * between the two.
2874255112Sbryanv		 */
2875255112Sbryanv		if ((ifp->if_capenable & mask) != mask)
2876227652Sgrehan			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2877227652Sgrehan	}
2878227652Sgrehan
2879227652Sgrehan	if (ifp->if_capabilities & IFCAP_LRO) {
2880227652Sgrehan		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2881227652Sgrehan			features &= ~VTNET_LRO_FEATURES;
2882227652Sgrehan	}
2883227652Sgrehan
2884227652Sgrehan	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2885227652Sgrehan		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2886227652Sgrehan			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2887227652Sgrehan	}
2888227652Sgrehan
2889255112Sbryanv	error = virtio_reinit(dev, features);
2890255112Sbryanv	if (error)
2891255112Sbryanv		device_printf(dev, "virtio reinit error %d\n", error);
2892255112Sbryanv
2893255112Sbryanv	return (error);
2894227652Sgrehan}
2895227652Sgrehan
2896227652Sgrehanstatic void
2897255112Sbryanvvtnet_init_rx_filters(struct vtnet_softc *sc)
2898227652Sgrehan{
2899255112Sbryanv	struct ifnet *ifp;
2900255112Sbryanv
2901255112Sbryanv	ifp = sc->vtnet_ifp;
2902255112Sbryanv
2903255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2904255112Sbryanv		/* Restore promiscuous and all-multicast modes. */
2905255112Sbryanv		vtnet_rx_filter(sc);
2906255112Sbryanv		/* Restore filtered MAC addresses. */
2907255112Sbryanv		vtnet_rx_filter_mac(sc);
2908255112Sbryanv	}
2909255112Sbryanv
2910255112Sbryanv	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2911255112Sbryanv		vtnet_rx_filter_vlan(sc);
2912255112Sbryanv}
2913255112Sbryanv
2914255112Sbryanvstatic int
2915255112Sbryanvvtnet_init_rx_queues(struct vtnet_softc *sc)
2916255112Sbryanv{
2917227652Sgrehan	device_t dev;
2918255112Sbryanv	struct vtnet_rxq *rxq;
2919255112Sbryanv	int i, clsize, error;
2920255112Sbryanv
2921255112Sbryanv	dev = sc->vtnet_dev;
2922255112Sbryanv
2923255112Sbryanv	/*
2924255112Sbryanv	 * Use the new cluster size if one has been set (via a MTU
2925255112Sbryanv	 * change). Otherwise, use the standard 2K clusters.
2926255112Sbryanv	 *
2927255112Sbryanv	 * BMV: It might make sense to use page sized clusters as
2928255112Sbryanv	 * the default (depending on the features negotiated).
2929255112Sbryanv	 */
2930255112Sbryanv	if (sc->vtnet_rx_new_clsize != 0) {
2931255112Sbryanv		clsize = sc->vtnet_rx_new_clsize;
2932255112Sbryanv		sc->vtnet_rx_new_clsize = 0;
2933255112Sbryanv	} else
2934255112Sbryanv		clsize = MCLBYTES;
2935255112Sbryanv
2936255112Sbryanv	sc->vtnet_rx_clsize = clsize;
2937255112Sbryanv	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
2938255112Sbryanv
2939265286Sbryanv	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
2940265286Sbryanv	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
2941265286Sbryanv	    ("%s: too many rx mbufs %d for %d segments", __func__,
2942265286Sbryanv	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
2943255112Sbryanv
2944270509Sbryanv#ifdef DEV_NETMAP
2945270509Sbryanv	if (vtnet_netmap_init_rx_buffers(sc))
2946270509Sbryanv		return 0;
2947270509Sbryanv#endif /* DEV_NETMAP */
2948270509Sbryanv
2949255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2950255112Sbryanv		rxq = &sc->vtnet_rxqs[i];
2951255112Sbryanv
2952255112Sbryanv		/* Hold the lock to satisfy asserts. */
2953255112Sbryanv		VTNET_RXQ_LOCK(rxq);
2954255112Sbryanv		error = vtnet_rxq_populate(rxq);
2955255112Sbryanv		VTNET_RXQ_UNLOCK(rxq);
2956255112Sbryanv
2957255112Sbryanv		if (error) {
2958255112Sbryanv			device_printf(dev,
2959255112Sbryanv			    "cannot allocate mbufs for Rx queue %d\n", i);
2960255112Sbryanv			return (error);
2961255112Sbryanv		}
2962255112Sbryanv	}
2963255112Sbryanv
2964255112Sbryanv	return (0);
2965255112Sbryanv}
2966255112Sbryanv
2967255112Sbryanvstatic int
2968255112Sbryanvvtnet_init_tx_queues(struct vtnet_softc *sc)
2969255112Sbryanv{
2970255112Sbryanv	struct vtnet_txq *txq;
2971255112Sbryanv	int i;
2972255112Sbryanv
2973255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2974255112Sbryanv		txq = &sc->vtnet_txqs[i];
2975255112Sbryanv		txq->vtntx_watchdog = 0;
2976255112Sbryanv	}
2977255112Sbryanv
2978255112Sbryanv	return (0);
2979255112Sbryanv}
2980255112Sbryanv
2981255112Sbryanvstatic int
2982255112Sbryanvvtnet_init_rxtx_queues(struct vtnet_softc *sc)
2983255112Sbryanv{
2984227652Sgrehan	int error;
2985227652Sgrehan
2986255112Sbryanv	error = vtnet_init_rx_queues(sc);
2987255112Sbryanv	if (error)
2988255112Sbryanv		return (error);
2989255112Sbryanv
2990255112Sbryanv	error = vtnet_init_tx_queues(sc);
2991255112Sbryanv	if (error)
2992255112Sbryanv		return (error);
2993255112Sbryanv
2994255112Sbryanv	return (0);
2995255112Sbryanv}
2996255112Sbryanv
2997255112Sbryanvstatic void
2998255112Sbryanvvtnet_set_active_vq_pairs(struct vtnet_softc *sc)
2999255112Sbryanv{
3000255112Sbryanv	device_t dev;
3001255112Sbryanv	int npairs;
3002255112Sbryanv
3003227652Sgrehan	dev = sc->vtnet_dev;
3004227652Sgrehan
3005255112Sbryanv	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
3006255112Sbryanv		sc->vtnet_act_vq_pairs = 1;
3007227652Sgrehan		return;
3008255112Sbryanv	}
3009227652Sgrehan
3010304081Ssmh	npairs = sc->vtnet_requested_vq_pairs;
3011227652Sgrehan
3012255112Sbryanv	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3013227652Sgrehan		device_printf(dev,
3014255112Sbryanv		    "cannot set active queue pairs to %d\n", npairs);
3015255112Sbryanv		npairs = 1;
3016227652Sgrehan	}
3017227652Sgrehan
3018255112Sbryanv	sc->vtnet_act_vq_pairs = npairs;
3019255112Sbryanv}
3020255112Sbryanv
3021255112Sbryanvstatic int
3022255112Sbryanvvtnet_reinit(struct vtnet_softc *sc)
3023255112Sbryanv{
3024255112Sbryanv	struct ifnet *ifp;
3025255112Sbryanv	int error;
3026255112Sbryanv
3027255112Sbryanv	ifp = sc->vtnet_ifp;
3028255112Sbryanv
3029255112Sbryanv	/* Use the current MAC address. */
3030227652Sgrehan	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3031227652Sgrehan	vtnet_set_hwaddr(sc);
3032227652Sgrehan
3033255112Sbryanv	vtnet_set_active_vq_pairs(sc);
3034255112Sbryanv
3035227652Sgrehan	ifp->if_hwassist = 0;
3036227652Sgrehan	if (ifp->if_capenable & IFCAP_TXCSUM)
3037227652Sgrehan		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3038255112Sbryanv	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3039255112Sbryanv		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3040227652Sgrehan	if (ifp->if_capenable & IFCAP_TSO4)
3041277388Sbryanv		ifp->if_hwassist |= CSUM_IP_TSO;
3042255112Sbryanv	if (ifp->if_capenable & IFCAP_TSO6)
3043277388Sbryanv		ifp->if_hwassist |= CSUM_IP6_TSO;
3044227652Sgrehan
3045255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3046255112Sbryanv		vtnet_init_rx_filters(sc);
3047227652Sgrehan
3048255112Sbryanv	error = vtnet_init_rxtx_queues(sc);
3049255112Sbryanv	if (error)
3050255112Sbryanv		return (error);
3051227652Sgrehan
3052255112Sbryanv	vtnet_enable_interrupts(sc);
3053255112Sbryanv	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3054227652Sgrehan
3055255112Sbryanv	return (0);
3056255112Sbryanv}
3057227652Sgrehan
3058255112Sbryanvstatic void
3059255112Sbryanvvtnet_init_locked(struct vtnet_softc *sc)
3060255112Sbryanv{
3061255112Sbryanv	device_t dev;
3062255112Sbryanv	struct ifnet *ifp;
3063227652Sgrehan
3064255112Sbryanv	dev = sc->vtnet_dev;
3065255112Sbryanv	ifp = sc->vtnet_ifp;
3066227652Sgrehan
3067255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3068255112Sbryanv
3069255112Sbryanv	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3070255112Sbryanv		return;
3071255112Sbryanv
3072255112Sbryanv	vtnet_stop(sc);
3073255112Sbryanv
3074255112Sbryanv	/* Reinitialize with the host. */
3075255112Sbryanv	if (vtnet_virtio_reinit(sc) != 0)
3076255112Sbryanv		goto fail;
3077255112Sbryanv
3078255112Sbryanv	if (vtnet_reinit(sc) != 0)
3079255112Sbryanv		goto fail;
3080255112Sbryanv
3081227652Sgrehan	virtio_reinit_complete(dev);
3082227652Sgrehan
3083227652Sgrehan	vtnet_update_link_status(sc);
3084227652Sgrehan	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3085255112Sbryanv
3086255112Sbryanv	return;
3087255112Sbryanv
3088255112Sbryanvfail:
3089255112Sbryanv	vtnet_stop(sc);
3090227652Sgrehan}
3091227652Sgrehan
3092227652Sgrehanstatic void
3093227652Sgrehanvtnet_init(void *xsc)
3094227652Sgrehan{
3095227652Sgrehan	struct vtnet_softc *sc;
3096227652Sgrehan
3097227652Sgrehan	sc = xsc;
3098227652Sgrehan
3099270509Sbryanv#ifdef DEV_NETMAP
3100270509Sbryanv	if (!NA(sc->vtnet_ifp)) {
3101270509Sbryanv		D("try to attach again");
3102270509Sbryanv		vtnet_netmap_attach(sc);
3103270509Sbryanv	}
3104270509Sbryanv#endif /* DEV_NETMAP */
3105270509Sbryanv
3106255112Sbryanv	VTNET_CORE_LOCK(sc);
3107227652Sgrehan	vtnet_init_locked(sc);
3108255112Sbryanv	VTNET_CORE_UNLOCK(sc);
3109227652Sgrehan}
3110227652Sgrehan
3111227652Sgrehanstatic void
3112255112Sbryanvvtnet_free_ctrl_vq(struct vtnet_softc *sc)
3113255112Sbryanv{
3114255112Sbryanv	struct virtqueue *vq;
3115255112Sbryanv
3116255112Sbryanv	vq = sc->vtnet_ctrl_vq;
3117255112Sbryanv
3118255112Sbryanv	/*
3119255112Sbryanv	 * The control virtqueue is only polled and therefore it should
3120255112Sbryanv	 * already be empty.
3121255112Sbryanv	 */
3122255112Sbryanv	KASSERT(virtqueue_empty(vq),
3123255112Sbryanv	    ("%s: ctrl vq %p not empty", __func__, vq));
3124255112Sbryanv}
3125255112Sbryanv
3126255112Sbryanvstatic void
3127227652Sgrehanvtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3128227652Sgrehan    struct sglist *sg, int readable, int writable)
3129227652Sgrehan{
3130227652Sgrehan	struct virtqueue *vq;
3131227652Sgrehan
3132227652Sgrehan	vq = sc->vtnet_ctrl_vq;
3133227652Sgrehan
3134255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3135227652Sgrehan	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3136255112Sbryanv	    ("%s: CTRL_VQ feature not negotiated", __func__));
3137227652Sgrehan
3138255112Sbryanv	if (!virtqueue_empty(vq))
3139255112Sbryanv		return;
3140227652Sgrehan	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3141227652Sgrehan		return;
3142227652Sgrehan
3143227652Sgrehan	/*
3144255112Sbryanv	 * Poll for the response, but the command is likely already
3145255112Sbryanv	 * done when we return from the notify.
3146227652Sgrehan	 */
3147255112Sbryanv	virtqueue_notify(vq);
3148255112Sbryanv	virtqueue_poll(vq, NULL);
3149227652Sgrehan}
3150227652Sgrehan
3151255112Sbryanvstatic int
3152255112Sbryanvvtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3153227652Sgrehan{
3154268010Sbryanv	struct virtio_net_ctrl_hdr hdr __aligned(2);
3155255112Sbryanv	struct sglist_seg segs[3];
3156255112Sbryanv	struct sglist sg;
3157255112Sbryanv	uint8_t ack;
3158255112Sbryanv	int error;
3159227652Sgrehan
3160255112Sbryanv	hdr.class = VIRTIO_NET_CTRL_MAC;
3161255112Sbryanv	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3162255112Sbryanv	ack = VIRTIO_NET_ERR;
3163227652Sgrehan
3164255112Sbryanv	sglist_init(&sg, 3, segs);
3165255112Sbryanv	error = 0;
3166255112Sbryanv	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3167255112Sbryanv	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3168255112Sbryanv	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3169255112Sbryanv	KASSERT(error == 0 && sg.sg_nseg == 3,
3170255112Sbryanv	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
3171227652Sgrehan
3172255112Sbryanv	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3173227652Sgrehan
3174255112Sbryanv	return (ack == VIRTIO_NET_OK ? 0 : EIO);
3175227652Sgrehan}
3176227652Sgrehan
3177227652Sgrehanstatic int
3178255112Sbryanvvtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3179227652Sgrehan{
3180227652Sgrehan	struct sglist_seg segs[3];
3181227652Sgrehan	struct sglist sg;
3182255112Sbryanv	struct {
3183255112Sbryanv		struct virtio_net_ctrl_hdr hdr;
3184255112Sbryanv		uint8_t pad1;
3185255112Sbryanv		struct virtio_net_ctrl_mq mq;
3186255112Sbryanv		uint8_t pad2;
3187255112Sbryanv		uint8_t ack;
3188268010Sbryanv	} s __aligned(2);
3189227652Sgrehan	int error;
3190227652Sgrehan
3191255112Sbryanv	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3192255112Sbryanv	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3193255112Sbryanv	s.mq.virtqueue_pairs = npairs;
3194255112Sbryanv	s.ack = VIRTIO_NET_ERR;
3195227652Sgrehan
3196255112Sbryanv	sglist_init(&sg, 3, segs);
3197227652Sgrehan	error = 0;
3198255112Sbryanv	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3199255112Sbryanv	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3200255112Sbryanv	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3201255112Sbryanv	KASSERT(error == 0 && sg.sg_nseg == 3,
3202255112Sbryanv	    ("%s: error %d adding MQ message to sglist", __func__, error));
3203227652Sgrehan
3204255112Sbryanv	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3205227652Sgrehan
3206255112Sbryanv	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3207255112Sbryanv}
3208255112Sbryanv
3209255112Sbryanvstatic int
3210255112Sbryanvvtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3211255112Sbryanv{
3212255112Sbryanv	struct sglist_seg segs[3];
3213255112Sbryanv	struct sglist sg;
3214255112Sbryanv	struct {
3215255112Sbryanv		struct virtio_net_ctrl_hdr hdr;
3216255112Sbryanv		uint8_t pad1;
3217255112Sbryanv		uint8_t onoff;
3218255112Sbryanv		uint8_t pad2;
3219255112Sbryanv		uint8_t ack;
3220268010Sbryanv	} s __aligned(2);
3221255112Sbryanv	int error;
3222255112Sbryanv
3223255112Sbryanv	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3224255112Sbryanv	    ("%s: CTRL_RX feature not negotiated", __func__));
3225255112Sbryanv
3226255112Sbryanv	s.hdr.class = VIRTIO_NET_CTRL_RX;
3227255112Sbryanv	s.hdr.cmd = cmd;
3228255112Sbryanv	s.onoff = !!on;
3229255112Sbryanv	s.ack = VIRTIO_NET_ERR;
3230255112Sbryanv
3231227652Sgrehan	sglist_init(&sg, 3, segs);
3232255112Sbryanv	error = 0;
3233255112Sbryanv	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3234255112Sbryanv	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3235255112Sbryanv	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3236227652Sgrehan	KASSERT(error == 0 && sg.sg_nseg == 3,
3237255112Sbryanv	    ("%s: error %d adding Rx message to sglist", __func__, error));
3238227652Sgrehan
3239255112Sbryanv	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3240227652Sgrehan
3241255112Sbryanv	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3242227652Sgrehan}
3243227652Sgrehan
3244227652Sgrehanstatic int
3245227652Sgrehanvtnet_set_promisc(struct vtnet_softc *sc, int on)
3246227652Sgrehan{
3247227652Sgrehan
3248227652Sgrehan	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3249227652Sgrehan}
3250227652Sgrehan
3251227652Sgrehanstatic int
3252227652Sgrehanvtnet_set_allmulti(struct vtnet_softc *sc, int on)
3253227652Sgrehan{
3254227652Sgrehan
3255227652Sgrehan	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3256227652Sgrehan}
3257227652Sgrehan
3258255112Sbryanv/*
3259255112Sbryanv * The device defaults to promiscuous mode for backwards compatibility.
3260255112Sbryanv * Turn it off at attach time if possible.
3261255112Sbryanv */
3262227652Sgrehanstatic void
3263255112Sbryanvvtnet_attach_disable_promisc(struct vtnet_softc *sc)
3264255112Sbryanv{
3265255112Sbryanv	struct ifnet *ifp;
3266255112Sbryanv
3267255112Sbryanv	ifp = sc->vtnet_ifp;
3268255112Sbryanv
3269255112Sbryanv	VTNET_CORE_LOCK(sc);
3270255112Sbryanv	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3271255112Sbryanv		ifp->if_flags |= IFF_PROMISC;
3272255112Sbryanv	} else if (vtnet_set_promisc(sc, 0) != 0) {
3273255112Sbryanv		ifp->if_flags |= IFF_PROMISC;
3274255112Sbryanv		device_printf(sc->vtnet_dev,
3275255112Sbryanv		    "cannot disable default promiscuous mode\n");
3276255112Sbryanv	}
3277255112Sbryanv	VTNET_CORE_UNLOCK(sc);
3278255112Sbryanv}
3279255112Sbryanv
3280255112Sbryanvstatic void
3281255112Sbryanvvtnet_rx_filter(struct vtnet_softc *sc)
3282255112Sbryanv{
3283255112Sbryanv	device_t dev;
3284255112Sbryanv	struct ifnet *ifp;
3285255112Sbryanv
3286255112Sbryanv	dev = sc->vtnet_dev;
3287255112Sbryanv	ifp = sc->vtnet_ifp;
3288255112Sbryanv
3289255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3290255112Sbryanv
3291255112Sbryanv	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3292255112Sbryanv		device_printf(dev, "cannot %s promiscuous mode\n",
3293255112Sbryanv		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3294255112Sbryanv
3295255112Sbryanv	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3296255112Sbryanv		device_printf(dev, "cannot %s all-multicast mode\n",
3297255112Sbryanv		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3298255112Sbryanv}
3299255112Sbryanv
3300255112Sbryanvstatic void
3301227652Sgrehanvtnet_rx_filter_mac(struct vtnet_softc *sc)
3302227652Sgrehan{
3303268010Sbryanv	struct virtio_net_ctrl_hdr hdr __aligned(2);
3304227652Sgrehan	struct vtnet_mac_filter *filter;
3305227652Sgrehan	struct sglist_seg segs[4];
3306227652Sgrehan	struct sglist sg;
3307227652Sgrehan	struct ifnet *ifp;
3308227652Sgrehan	struct ifaddr *ifa;
3309227652Sgrehan	struct ifmultiaddr *ifma;
3310227652Sgrehan	int ucnt, mcnt, promisc, allmulti, error;
3311227652Sgrehan	uint8_t ack;
3312227652Sgrehan
3313227652Sgrehan	ifp = sc->vtnet_ifp;
3314228301Sgrehan	filter = sc->vtnet_mac_filter;
3315227652Sgrehan	ucnt = 0;
3316227652Sgrehan	mcnt = 0;
3317227652Sgrehan	promisc = 0;
3318227652Sgrehan	allmulti = 0;
3319227652Sgrehan
3320255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3321227652Sgrehan	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3322255112Sbryanv	    ("%s: CTRL_RX feature not negotiated", __func__));
3323227652Sgrehan
3324227652Sgrehan	/* Unicast MAC addresses: */
3325227652Sgrehan	if_addr_rlock(ifp);
3326227652Sgrehan	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
3327227652Sgrehan		if (ifa->ifa_addr->sa_family != AF_LINK)
3328227652Sgrehan			continue;
3329255112Sbryanv		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3330255112Sbryanv		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3331255112Sbryanv			continue;
3332255112Sbryanv		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
3333255112Sbryanv			promisc = 1;
3334227652Sgrehan			break;
3335255112Sbryanv		}
3336227652Sgrehan
3337227652Sgrehan		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
3338227652Sgrehan		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
3339227652Sgrehan		ucnt++;
3340227652Sgrehan	}
3341227652Sgrehan	if_addr_runlock(ifp);
3342227652Sgrehan
3343255112Sbryanv	if (promisc != 0) {
3344227652Sgrehan		filter->vmf_unicast.nentries = 0;
3345227652Sgrehan		if_printf(ifp, "more than %d MAC addresses assigned, "
3346227652Sgrehan		    "falling back to promiscuous mode\n",
3347227652Sgrehan		    VTNET_MAX_MAC_ENTRIES);
3348227652Sgrehan	} else
3349227652Sgrehan		filter->vmf_unicast.nentries = ucnt;
3350227652Sgrehan
3351227652Sgrehan	/* Multicast MAC addresses: */
3352227652Sgrehan	if_maddr_rlock(ifp);
3353227652Sgrehan	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3354227652Sgrehan		if (ifma->ifma_addr->sa_family != AF_LINK)
3355227652Sgrehan			continue;
3356255112Sbryanv		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
3357255112Sbryanv			allmulti = 1;
3358227652Sgrehan			break;
3359255112Sbryanv		}
3360227652Sgrehan
3361227652Sgrehan		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3362227652Sgrehan		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
3363227652Sgrehan		mcnt++;
3364227652Sgrehan	}
3365227652Sgrehan	if_maddr_runlock(ifp);
3366227652Sgrehan
3367255112Sbryanv	if (allmulti != 0) {
3368227652Sgrehan		filter->vmf_multicast.nentries = 0;
3369227652Sgrehan		if_printf(ifp, "more than %d multicast MAC addresses "
3370227652Sgrehan		    "assigned, falling back to all-multicast mode\n",
3371227652Sgrehan		    VTNET_MAX_MAC_ENTRIES);
3372227652Sgrehan	} else
3373227652Sgrehan		filter->vmf_multicast.nentries = mcnt;
3374227652Sgrehan
3375255112Sbryanv	if (promisc != 0 && allmulti != 0)
3376227652Sgrehan		goto out;
3377227652Sgrehan
3378227652Sgrehan	hdr.class = VIRTIO_NET_CTRL_MAC;
3379227652Sgrehan	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3380227652Sgrehan	ack = VIRTIO_NET_ERR;
3381227652Sgrehan
3382227652Sgrehan	sglist_init(&sg, 4, segs);
3383255112Sbryanv	error = 0;
3384227652Sgrehan	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3385227652Sgrehan	error |= sglist_append(&sg, &filter->vmf_unicast,
3386251769Sbryanv	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3387227652Sgrehan	error |= sglist_append(&sg, &filter->vmf_multicast,
3388251769Sbryanv	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3389227652Sgrehan	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3390227652Sgrehan	KASSERT(error == 0 && sg.sg_nseg == 4,
3391255112Sbryanv	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3392227652Sgrehan
3393227652Sgrehan	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3394227652Sgrehan
3395227652Sgrehan	if (ack != VIRTIO_NET_OK)
3396227652Sgrehan		if_printf(ifp, "error setting host MAC filter table\n");
3397227652Sgrehan
3398227652Sgrehanout:
3399255112Sbryanv	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3400255112Sbryanv		if_printf(ifp, "cannot enable promiscuous mode\n");
3401255112Sbryanv	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3402255112Sbryanv		if_printf(ifp, "cannot enable all-multicast mode\n");
3403227652Sgrehan}
3404227652Sgrehan
3405227652Sgrehanstatic int
3406227652Sgrehanvtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3407227652Sgrehan{
3408227652Sgrehan	struct sglist_seg segs[3];
3409227652Sgrehan	struct sglist sg;
3410255112Sbryanv	struct {
3411255112Sbryanv		struct virtio_net_ctrl_hdr hdr;
3412255112Sbryanv		uint8_t pad1;
3413255112Sbryanv		uint16_t tag;
3414255112Sbryanv		uint8_t pad2;
3415255112Sbryanv		uint8_t ack;
3416268010Sbryanv	} s __aligned(2);
3417227652Sgrehan	int error;
3418227652Sgrehan
3419255112Sbryanv	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3420255112Sbryanv	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3421255112Sbryanv	s.tag = tag;
3422255112Sbryanv	s.ack = VIRTIO_NET_ERR;
3423227652Sgrehan
3424227652Sgrehan	sglist_init(&sg, 3, segs);
3425255112Sbryanv	error = 0;
3426255112Sbryanv	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3427255112Sbryanv	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3428255112Sbryanv	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3429227652Sgrehan	KASSERT(error == 0 && sg.sg_nseg == 3,
3430255112Sbryanv	    ("%s: error %d adding VLAN message to sglist", __func__, error));
3431227652Sgrehan
3432255112Sbryanv	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3433227652Sgrehan
3434255112Sbryanv	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3435227652Sgrehan}
3436227652Sgrehan
3437227652Sgrehanstatic void
3438227652Sgrehanvtnet_rx_filter_vlan(struct vtnet_softc *sc)
3439227652Sgrehan{
3440255112Sbryanv	uint32_t w;
3441227652Sgrehan	uint16_t tag;
3442255112Sbryanv	int i, bit;
3443227652Sgrehan
3444255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3445227652Sgrehan	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3446255112Sbryanv	    ("%s: VLAN_FILTER feature not negotiated", __func__));
3447227652Sgrehan
3448255112Sbryanv	/* Enable the filter for each configured VLAN. */
3449255112Sbryanv	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3450255112Sbryanv		w = sc->vtnet_vlan_filter[i];
3451227652Sgrehan
3452255112Sbryanv		while ((bit = ffs(w) - 1) != -1) {
3453255112Sbryanv			w &= ~(1 << bit);
3454255112Sbryanv			tag = sizeof(w) * CHAR_BIT * i + bit;
3455255112Sbryanv
3456255112Sbryanv			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3457255112Sbryanv				device_printf(sc->vtnet_dev,
3458255112Sbryanv				    "cannot enable VLAN %d filter\n", tag);
3459227652Sgrehan			}
3460227652Sgrehan		}
3461227652Sgrehan	}
3462227652Sgrehan}
3463227652Sgrehan
3464227652Sgrehanstatic void
3465255112Sbryanvvtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3466227652Sgrehan{
3467227652Sgrehan	struct ifnet *ifp;
3468227652Sgrehan	int idx, bit;
3469227652Sgrehan
3470227652Sgrehan	ifp = sc->vtnet_ifp;
3471227652Sgrehan	idx = (tag >> 5) & 0x7F;
3472227652Sgrehan	bit = tag & 0x1F;
3473227652Sgrehan
3474255112Sbryanv	if (tag == 0 || tag > 4095)
3475255112Sbryanv		return;
3476227652Sgrehan
3477255112Sbryanv	VTNET_CORE_LOCK(sc);
3478227652Sgrehan
3479255112Sbryanv	if (add)
3480255112Sbryanv		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3481255112Sbryanv	else
3482255112Sbryanv		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3483255112Sbryanv
3484255112Sbryanv	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3485255112Sbryanv	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3486255112Sbryanv		device_printf(sc->vtnet_dev,
3487255112Sbryanv		    "cannot %s VLAN %d %s the host filter table\n",
3488255112Sbryanv		    add ? "add" : "remove", tag, add ? "to" : "from");
3489227652Sgrehan	}
3490227652Sgrehan
3491255112Sbryanv	VTNET_CORE_UNLOCK(sc);
3492227652Sgrehan}
3493227652Sgrehan
3494227652Sgrehanstatic void
3495227652Sgrehanvtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3496227652Sgrehan{
3497227652Sgrehan
3498227652Sgrehan	if (ifp->if_softc != arg)
3499227652Sgrehan		return;
3500227652Sgrehan
3501255112Sbryanv	vtnet_update_vlan_filter(arg, 1, tag);
3502227652Sgrehan}
3503227652Sgrehan
3504227652Sgrehanstatic void
3505227652Sgrehanvtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3506227652Sgrehan{
3507227652Sgrehan
3508227652Sgrehan	if (ifp->if_softc != arg)
3509227652Sgrehan		return;
3510227652Sgrehan
3511255112Sbryanv	vtnet_update_vlan_filter(arg, 0, tag);
3512227652Sgrehan}
3513227652Sgrehan
3514227652Sgrehanstatic int
3515255112Sbryanvvtnet_is_link_up(struct vtnet_softc *sc)
3516255112Sbryanv{
3517255112Sbryanv	device_t dev;
3518255112Sbryanv	struct ifnet *ifp;
3519255112Sbryanv	uint16_t status;
3520255112Sbryanv
3521255112Sbryanv	dev = sc->vtnet_dev;
3522255112Sbryanv	ifp = sc->vtnet_ifp;
3523255112Sbryanv
3524255112Sbryanv	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3525255112Sbryanv		status = VIRTIO_NET_S_LINK_UP;
3526255112Sbryanv	else
3527255112Sbryanv		status = virtio_read_dev_config_2(dev,
3528255112Sbryanv		    offsetof(struct virtio_net_config, status));
3529255112Sbryanv
3530255112Sbryanv	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3531255112Sbryanv}
3532255112Sbryanv
3533255112Sbryanvstatic void
3534255112Sbryanvvtnet_update_link_status(struct vtnet_softc *sc)
3535255112Sbryanv{
3536255112Sbryanv	struct ifnet *ifp;
3537255112Sbryanv	int link;
3538255112Sbryanv
3539255112Sbryanv	ifp = sc->vtnet_ifp;
3540255112Sbryanv
3541255112Sbryanv	VTNET_CORE_LOCK_ASSERT(sc);
3542255112Sbryanv	link = vtnet_is_link_up(sc);
3543255112Sbryanv
3544255112Sbryanv	/* Notify if the link status has changed. */
3545255112Sbryanv	if (link != 0 && sc->vtnet_link_active == 0) {
3546255112Sbryanv		sc->vtnet_link_active = 1;
3547255112Sbryanv		if_link_state_change(ifp, LINK_STATE_UP);
3548255112Sbryanv	} else if (link == 0 && sc->vtnet_link_active != 0) {
3549255112Sbryanv		sc->vtnet_link_active = 0;
3550255112Sbryanv		if_link_state_change(ifp, LINK_STATE_DOWN);
3551255112Sbryanv	}
3552255112Sbryanv}
3553255112Sbryanv
3554255112Sbryanvstatic int
3555227652Sgrehanvtnet_ifmedia_upd(struct ifnet *ifp)
3556227652Sgrehan{
3557227652Sgrehan	struct vtnet_softc *sc;
3558227652Sgrehan	struct ifmedia *ifm;
3559227652Sgrehan
3560227652Sgrehan	sc = ifp->if_softc;
3561227652Sgrehan	ifm = &sc->vtnet_media;
3562227652Sgrehan
3563227652Sgrehan	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3564227652Sgrehan		return (EINVAL);
3565227652Sgrehan
3566227652Sgrehan	return (0);
3567227652Sgrehan}
3568227652Sgrehan
3569227652Sgrehanstatic void
3570227652Sgrehanvtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3571227652Sgrehan{
3572227652Sgrehan	struct vtnet_softc *sc;
3573227652Sgrehan
3574227652Sgrehan	sc = ifp->if_softc;
3575227652Sgrehan
3576227652Sgrehan	ifmr->ifm_status = IFM_AVALID;
3577227652Sgrehan	ifmr->ifm_active = IFM_ETHER;
3578227652Sgrehan
3579255112Sbryanv	VTNET_CORE_LOCK(sc);
3580227652Sgrehan	if (vtnet_is_link_up(sc) != 0) {
3581227652Sgrehan		ifmr->ifm_status |= IFM_ACTIVE;
3582227652Sgrehan		ifmr->ifm_active |= VTNET_MEDIATYPE;
3583227652Sgrehan	} else
3584227652Sgrehan		ifmr->ifm_active |= IFM_NONE;
3585255112Sbryanv	VTNET_CORE_UNLOCK(sc);
3586227652Sgrehan}
3587227652Sgrehan
3588227652Sgrehanstatic void
3589255112Sbryanvvtnet_set_hwaddr(struct vtnet_softc *sc)
3590227652Sgrehan{
3591227652Sgrehan	device_t dev;
3592265286Sbryanv	int i;
3593255112Sbryanv
3594255112Sbryanv	dev = sc->vtnet_dev;
3595255112Sbryanv
3596255112Sbryanv	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3597255112Sbryanv		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3598255112Sbryanv			device_printf(dev, "unable to set MAC address\n");
3599255112Sbryanv	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3600265286Sbryanv		for (i = 0; i < ETHER_ADDR_LEN; i++) {
3601265286Sbryanv			virtio_write_dev_config_1(dev,
3602265286Sbryanv			    offsetof(struct virtio_net_config, mac) + i,
3603265286Sbryanv			    sc->vtnet_hwaddr[i]);
3604265286Sbryanv		}
3605255112Sbryanv	}
3606255112Sbryanv}
3607255112Sbryanv
3608255112Sbryanvstatic void
3609255112Sbryanvvtnet_get_hwaddr(struct vtnet_softc *sc)
3610255112Sbryanv{
3611255112Sbryanv	device_t dev;
3612265286Sbryanv	int i;
3613255112Sbryanv
3614255112Sbryanv	dev = sc->vtnet_dev;
3615255112Sbryanv
3616255112Sbryanv	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3617255112Sbryanv		/*
3618255112Sbryanv		 * Generate a random locally administered unicast address.
3619255112Sbryanv		 *
3620255112Sbryanv		 * It would be nice to generate the same MAC address across
3621255112Sbryanv		 * reboots, but it seems all the hosts currently available
3622255112Sbryanv		 * support the MAC feature, so this isn't too important.
3623255112Sbryanv		 */
3624255112Sbryanv		sc->vtnet_hwaddr[0] = 0xB2;
3625255112Sbryanv		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3626255112Sbryanv		vtnet_set_hwaddr(sc);
3627255112Sbryanv		return;
3628255112Sbryanv	}
3629255112Sbryanv
3630265286Sbryanv	for (i = 0; i < ETHER_ADDR_LEN; i++) {
3631265286Sbryanv		sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3632265286Sbryanv		    offsetof(struct virtio_net_config, mac) + i);
3633265286Sbryanv	}
3634255112Sbryanv}
3635255112Sbryanv
3636255112Sbryanvstatic void
3637255112Sbryanvvtnet_vlan_tag_remove(struct mbuf *m)
3638255112Sbryanv{
3639255112Sbryanv	struct ether_vlan_header *evh;
3640255112Sbryanv
3641255112Sbryanv	evh = mtod(m, struct ether_vlan_header *);
3642255112Sbryanv	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3643255112Sbryanv	m->m_flags |= M_VLANTAG;
3644255112Sbryanv
3645255112Sbryanv	/* Strip the 802.1Q header. */
3646255112Sbryanv	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3647255112Sbryanv	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
3648255112Sbryanv	m_adj(m, ETHER_VLAN_ENCAP_LEN);
3649255112Sbryanv}
3650255112Sbryanv
3651255112Sbryanvstatic void
3652270334Sbryanvvtnet_set_rx_process_limit(struct vtnet_softc *sc)
3653270334Sbryanv{
3654270334Sbryanv	int limit;
3655270334Sbryanv
3656270334Sbryanv	limit = vtnet_tunable_int(sc, "rx_process_limit",
3657270334Sbryanv	    vtnet_rx_process_limit);
3658270334Sbryanv	if (limit < 0)
3659270334Sbryanv		limit = INT_MAX;
3660270334Sbryanv	sc->vtnet_rx_process_limit = limit;
3661270334Sbryanv}
3662270334Sbryanv
3663270334Sbryanvstatic void
3664270334Sbryanvvtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
3665270334Sbryanv{
3666270334Sbryanv	device_t dev;
3667270334Sbryanv	int size, thresh;
3668270334Sbryanv
3669270334Sbryanv	dev = sc->vtnet_dev;
3670270334Sbryanv	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
3671270334Sbryanv
3672270334Sbryanv	/*
3673270334Sbryanv	 * The Tx interrupt is disabled until the queue free count falls
3674270334Sbryanv	 * below our threshold. Completed frames are drained from the Tx
3675270334Sbryanv	 * virtqueue before transmitting new frames and in the watchdog
3676270334Sbryanv	 * callout, so the frequency of Tx interrupts is greatly reduced,
3677270334Sbryanv	 * at the cost of not freeing mbufs as quickly as they otherwise
3678270334Sbryanv	 * would be.
3679270334Sbryanv	 *
3680270334Sbryanv	 * N.B. We assume all the Tx queues are the same size.
3681270334Sbryanv	 */
3682270334Sbryanv	thresh = size / 4;
3683270334Sbryanv
3684270334Sbryanv	/*
3685270334Sbryanv	 * Without indirect descriptors, leave enough room for the most
3686270334Sbryanv	 * segments we handle.
3687270334Sbryanv	 */
3688277389Sbryanv	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
3689270334Sbryanv	    thresh < sc->vtnet_tx_nsegs)
3690270334Sbryanv		thresh = sc->vtnet_tx_nsegs;
3691270334Sbryanv
3692270334Sbryanv	sc->vtnet_tx_intr_thresh = thresh;
3693270334Sbryanv}
3694270334Sbryanv
3695270334Sbryanvstatic void
3696255112Sbryanvvtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3697255112Sbryanv    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3698255112Sbryanv{
3699255112Sbryanv	struct sysctl_oid *node;
3700255112Sbryanv	struct sysctl_oid_list *list;
3701255112Sbryanv	struct vtnet_rxq_stats *stats;
3702255112Sbryanv	char namebuf[16];
3703255112Sbryanv
3704255112Sbryanv	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3705255112Sbryanv	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3706255112Sbryanv	    CTLFLAG_RD, NULL, "Receive Queue");
3707255112Sbryanv	list = SYSCTL_CHILDREN(node);
3708255112Sbryanv
3709255112Sbryanv	stats = &rxq->vtnrx_stats;
3710255112Sbryanv
3711255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3712255112Sbryanv	    &stats->vrxs_ipackets, "Receive packets");
3713255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3714255112Sbryanv	    &stats->vrxs_ibytes, "Receive bytes");
3715255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3716255112Sbryanv	    &stats->vrxs_iqdrops, "Receive drops");
3717255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3718255112Sbryanv	    &stats->vrxs_ierrors, "Receive errors");
3719255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3720255112Sbryanv	    &stats->vrxs_csum, "Receive checksum offloaded");
3721255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3722255112Sbryanv	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
3723255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3724255112Sbryanv	    &stats->vrxs_rescheduled,
3725255112Sbryanv	    "Receive interrupt handler rescheduled");
3726255112Sbryanv}
3727255112Sbryanv
3728255112Sbryanvstatic void
3729255112Sbryanvvtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3730255112Sbryanv    struct sysctl_oid_list *child, struct vtnet_txq *txq)
3731255112Sbryanv{
3732255112Sbryanv	struct sysctl_oid *node;
3733255112Sbryanv	struct sysctl_oid_list *list;
3734255112Sbryanv	struct vtnet_txq_stats *stats;
3735255112Sbryanv	char namebuf[16];
3736255112Sbryanv
3737255112Sbryanv	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3738255112Sbryanv	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3739255112Sbryanv	    CTLFLAG_RD, NULL, "Transmit Queue");
3740255112Sbryanv	list = SYSCTL_CHILDREN(node);
3741255112Sbryanv
3742255112Sbryanv	stats = &txq->vtntx_stats;
3743255112Sbryanv
3744255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3745255112Sbryanv	    &stats->vtxs_opackets, "Transmit packets");
3746255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3747255112Sbryanv	    &stats->vtxs_obytes, "Transmit bytes");
3748255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3749255112Sbryanv	    &stats->vtxs_omcasts, "Transmit multicasts");
3750255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3751255112Sbryanv	    &stats->vtxs_csum, "Transmit checksum offloaded");
3752255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3753255112Sbryanv	    &stats->vtxs_tso, "Transmit segmentation offloaded");
3754255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3755255112Sbryanv	    &stats->vtxs_rescheduled,
3756255112Sbryanv	    "Transmit interrupt handler rescheduled");
3757255112Sbryanv}
3758255112Sbryanv
3759255112Sbryanvstatic void
3760255112Sbryanvvtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3761255112Sbryanv{
3762255112Sbryanv	device_t dev;
3763255112Sbryanv	struct sysctl_ctx_list *ctx;
3764227652Sgrehan	struct sysctl_oid *tree;
3765227652Sgrehan	struct sysctl_oid_list *child;
3766255112Sbryanv	int i;
3767227652Sgrehan
3768227652Sgrehan	dev = sc->vtnet_dev;
3769227652Sgrehan	ctx = device_get_sysctl_ctx(dev);
3770227652Sgrehan	tree = device_get_sysctl_tree(dev);
3771227652Sgrehan	child = SYSCTL_CHILDREN(tree);
3772227652Sgrehan
3773255112Sbryanv	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3774255112Sbryanv		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3775255112Sbryanv		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3776255112Sbryanv	}
3777255112Sbryanv}
3778255112Sbryanv
3779255112Sbryanvstatic void
3780255112Sbryanvvtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3781255112Sbryanv    struct sysctl_oid_list *child, struct vtnet_softc *sc)
3782255112Sbryanv{
3783255112Sbryanv	struct vtnet_statistics *stats;
3784255112Sbryanv
3785255112Sbryanv	stats = &sc->vtnet_stats;
3786255112Sbryanv
3787255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3788227652Sgrehan	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
3789227652Sgrehan	    "Mbuf cluster allocation failures");
3790227652Sgrehan
3791255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3792227652Sgrehan	    CTLFLAG_RD, &stats->rx_frame_too_large,
3793227652Sgrehan	    "Received frame larger than the mbuf chain");
3794255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3795227652Sgrehan	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3796227652Sgrehan	    "Enqueuing the replacement receive mbuf failed");
3797255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3798227652Sgrehan	    CTLFLAG_RD, &stats->rx_mergeable_failed,
3799227652Sgrehan	    "Mergeable buffers receive failures");
3800255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3801227652Sgrehan	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3802227652Sgrehan	    "Received checksum offloaded buffer with unsupported "
3803227652Sgrehan	    "Ethernet type");
3804255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3805227652Sgrehan	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3806227652Sgrehan	    "Received checksum offloaded buffer with incorrect IP protocol");
3807255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3808227652Sgrehan	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
3809227652Sgrehan	    "Received checksum offloaded buffer with incorrect offset");
3810255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3811255112Sbryanv	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
3812255112Sbryanv	    "Received checksum offloaded buffer with incorrect protocol");
3813255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3814227652Sgrehan	    CTLFLAG_RD, &stats->rx_csum_failed,
3815227652Sgrehan	    "Received buffer checksum offload failed");
3816255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3817227652Sgrehan	    CTLFLAG_RD, &stats->rx_csum_offloaded,
3818227652Sgrehan	    "Received buffer checksum offload succeeded");
3819255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3820227652Sgrehan	    CTLFLAG_RD, &stats->rx_task_rescheduled,
3821227652Sgrehan	    "Times the receive interrupt task rescheduled itself");
3822227652Sgrehan
3823255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3824227652Sgrehan	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3825227652Sgrehan	    "Aborted transmit of checksum offloaded buffer with unknown "
3826227652Sgrehan	    "Ethernet type");
3827255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3828227652Sgrehan	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3829227652Sgrehan	    "Aborted transmit of TSO buffer with unknown Ethernet type");
3830255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3831255112Sbryanv	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
3832255112Sbryanv	    "Aborted transmit of TSO buffer with non TCP protocol");
3833265286Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3834265286Sbryanv	    CTLFLAG_RD, &stats->tx_defragged,
3835265286Sbryanv	    "Transmit mbufs defragged");
3836265286Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3837265286Sbryanv	    CTLFLAG_RD, &stats->tx_defrag_failed,
3838265286Sbryanv	    "Aborted transmit of buffer because defrag failed");
3839255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3840255112Sbryanv	    CTLFLAG_RD, &stats->tx_csum_offloaded,
3841255112Sbryanv	    "Offloaded checksum of transmitted buffer");
3842255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3843255112Sbryanv	    CTLFLAG_RD, &stats->tx_tso_offloaded,
3844255112Sbryanv	    "Segmentation offload of transmitted buffer");
3845255112Sbryanv	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3846227652Sgrehan	    CTLFLAG_RD, &stats->tx_task_rescheduled,
3847227652Sgrehan	    "Times the transmit interrupt task rescheduled itself");
3848227652Sgrehan}
3849227652Sgrehan
3850255112Sbryanvstatic void
3851255112Sbryanvvtnet_setup_sysctl(struct vtnet_softc *sc)
3852255112Sbryanv{
3853255112Sbryanv	device_t dev;
3854255112Sbryanv	struct sysctl_ctx_list *ctx;
3855255112Sbryanv	struct sysctl_oid *tree;
3856255112Sbryanv	struct sysctl_oid_list *child;
3857255112Sbryanv
3858255112Sbryanv	dev = sc->vtnet_dev;
3859255112Sbryanv	ctx = device_get_sysctl_ctx(dev);
3860255112Sbryanv	tree = device_get_sysctl_tree(dev);
3861255112Sbryanv	child = SYSCTL_CHILDREN(tree);
3862255112Sbryanv
3863255112Sbryanv	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3864255112Sbryanv	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3865255112Sbryanv	    "Maximum number of supported virtqueue pairs");
3866304081Ssmh	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
3867304081Ssmh	    CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
3868304081Ssmh	    "Requested number of virtqueue pairs");
3869255112Sbryanv	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3870255112Sbryanv	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3871255112Sbryanv	    "Number of active virtqueue pairs");
3872255112Sbryanv
3873255112Sbryanv	vtnet_setup_stat_sysctl(ctx, child, sc);
3874255112Sbryanv}
3875255112Sbryanv
3876227652Sgrehanstatic int
3877255112Sbryanvvtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3878227652Sgrehan{
3879227652Sgrehan
3880255112Sbryanv	return (virtqueue_enable_intr(rxq->vtnrx_vq));
3881227652Sgrehan}
3882227652Sgrehan
3883227652Sgrehanstatic void
3884255112Sbryanvvtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3885227652Sgrehan{
3886227652Sgrehan
3887255112Sbryanv	virtqueue_disable_intr(rxq->vtnrx_vq);
3888227652Sgrehan}
3889227652Sgrehan
3890227652Sgrehanstatic int
3891255112Sbryanvvtnet_txq_enable_intr(struct vtnet_txq *txq)
3892227652Sgrehan{
3893270334Sbryanv	struct virtqueue *vq;
3894227652Sgrehan
3895270334Sbryanv	vq = txq->vtntx_vq;
3896270334Sbryanv
3897270334Sbryanv	if (vtnet_txq_below_threshold(txq) != 0)
3898270334Sbryanv		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3899270334Sbryanv
3900270334Sbryanv	/*
3901270334Sbryanv	 * The free count is above our threshold. Keep the Tx interrupt
3902270334Sbryanv	 * disabled until the queue is fuller.
3903270334Sbryanv	 */
3904270334Sbryanv	return (0);
3905227652Sgrehan}
3906227652Sgrehan
3907227652Sgrehanstatic void
3908255112Sbryanvvtnet_txq_disable_intr(struct vtnet_txq *txq)
3909227652Sgrehan{
3910227652Sgrehan
3911255112Sbryanv	virtqueue_disable_intr(txq->vtntx_vq);
3912227652Sgrehan}
3913255112Sbryanv
3914255112Sbryanvstatic void
3915255112Sbryanvvtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3916255112Sbryanv{
3917255112Sbryanv	int i;
3918255112Sbryanv
3919255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3920255112Sbryanv		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3921255112Sbryanv}
3922255112Sbryanv
3923255112Sbryanvstatic void
3924255112Sbryanvvtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3925255112Sbryanv{
3926255112Sbryanv	int i;
3927255112Sbryanv
3928255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3929255112Sbryanv		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3930255112Sbryanv}
3931255112Sbryanv
3932255112Sbryanvstatic void
3933255112Sbryanvvtnet_enable_interrupts(struct vtnet_softc *sc)
3934255112Sbryanv{
3935255112Sbryanv
3936255112Sbryanv	vtnet_enable_rx_interrupts(sc);
3937255112Sbryanv	vtnet_enable_tx_interrupts(sc);
3938255112Sbryanv}
3939255112Sbryanv
3940255112Sbryanvstatic void
3941255112Sbryanvvtnet_disable_rx_interrupts(struct vtnet_softc *sc)
3942255112Sbryanv{
3943255112Sbryanv	int i;
3944255112Sbryanv
3945255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3946255112Sbryanv		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
3947255112Sbryanv}
3948255112Sbryanv
3949255112Sbryanvstatic void
3950255112Sbryanvvtnet_disable_tx_interrupts(struct vtnet_softc *sc)
3951255112Sbryanv{
3952255112Sbryanv	int i;
3953255112Sbryanv
3954255112Sbryanv	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3955255112Sbryanv		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
3956255112Sbryanv}
3957255112Sbryanv
3958255112Sbryanvstatic void
3959255112Sbryanvvtnet_disable_interrupts(struct vtnet_softc *sc)
3960255112Sbryanv{
3961255112Sbryanv
3962255112Sbryanv	vtnet_disable_rx_interrupts(sc);
3963255112Sbryanv	vtnet_disable_tx_interrupts(sc);
3964255112Sbryanv}
3965255112Sbryanv
3966255112Sbryanvstatic int
3967255112Sbryanvvtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
3968255112Sbryanv{
3969255112Sbryanv	char path[64];
3970255112Sbryanv
3971255112Sbryanv	snprintf(path, sizeof(path),
3972255112Sbryanv	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
3973255112Sbryanv	TUNABLE_INT_FETCH(path, &def);
3974255112Sbryanv
3975255112Sbryanv	return (def);
3976255112Sbryanv}
3977