if_vioif.c revision 1.110
1/*	$NetBSD: if_vioif.c,v 1.110 2024/02/09 22:08:36 andvar Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.110 2024/02/09 22:08:36 andvar Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netqueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	bool			 txc_no_free_slots;
270	pcq_t			*txc_intrq;
271	void			*txc_deferred_transmit;
272
273	struct evcnt		 txc_defrag_failed;
274};
275
276struct vioif_rx_context {
277	struct evcnt		 rxc_mbuf_enobufs;
278};
279struct vioif_ctrlqueue {
280	struct virtqueue		*ctrlq_vq;
281	enum {
282		FREE, INUSE, DONE
283	}				ctrlq_inuse;
284	kcondvar_t			ctrlq_wait;
285	kmutex_t			ctrlq_wait_lock;
286	struct lwp			*ctrlq_owner;
287
288	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289	struct virtio_net_ctrl_status	*ctrlq_status;
290	struct virtio_net_ctrl_rx	*ctrlq_rx;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294	struct virtio_net_ctrl_mq	*ctrlq_mq;
295
296	bus_dmamap_t			ctrlq_cmd_dmamap;
297	bus_dmamap_t			ctrlq_status_dmamap;
298	bus_dmamap_t			ctrlq_rx_dmamap;
299	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302	bus_dmamap_t			ctrlq_mq_dmamap;
303
304	struct evcnt			ctrlq_cmd_load_failed;
305	struct evcnt			ctrlq_cmd_failed;
306};
307
308struct vioif_softc {
309	device_t		sc_dev;
310	kmutex_t		sc_lock;
311	struct sysctllog	*sc_sysctllog;
312
313	struct virtio_softc	*sc_virtio;
314	struct virtqueue	*sc_vqs;
315	u_int			 sc_hdr_size;
316
317	int			sc_max_nvq_pairs;
318	int			sc_req_nvq_pairs;
319	int			sc_act_nvq_pairs;
320
321	uint8_t			sc_mac[ETHER_ADDR_LEN];
322	struct ethercom		sc_ethercom;
323	int			sc_link_state;
324
325	struct vioif_netqueue	*sc_netqs;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	 sc_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_cfg_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353					    true : false)
354
355/* cfattach interface functions */
356static int	vioif_match(device_t, cfdata_t, void *);
357static void	vioif_attach(device_t, device_t, void *);
358static int	vioif_finalize_teardown(device_t);
359
360/* ifnet interface functions */
361static int	vioif_init(struct ifnet *);
362static void	vioif_stop(struct ifnet *, int);
363static void	vioif_start(struct ifnet *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags(struct vioif_softc *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static int	vioif_netqueue_init(struct vioif_softc *,
372		    struct virtio_softc *, size_t, u_int);
373static void	vioif_netqueue_teardown(struct vioif_softc *,
374		    struct virtio_softc *, size_t);
375static void	vioif_net_intr_enable(struct vioif_softc *,
376		    struct virtio_softc *);
377static void	vioif_net_intr_disable(struct vioif_softc *,
378		    struct virtio_softc *);
379static void	vioif_net_sched_handle(struct vioif_softc *,
380		    struct vioif_netqueue *);
381
382/* rx */
383static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384		    struct vioif_netqueue *);
385static int	vioif_rx_intr(void *);
386static void	vioif_rx_handle(void *);
387static void	vioif_rx_queue_clear(struct vioif_softc *,
388		    struct virtio_softc *, struct vioif_netqueue *);
389
390/* tx */
391static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393static void	vioif_deferred_transmit(void *);
394static int	vioif_tx_intr(void *);
395static void	vioif_tx_handle(void *);
396static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397		    struct vioif_netqueue *);
398
399/* controls */
400static int	vioif_ctrl_intr(void *);
401static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402static int	vioif_set_promisc(struct vioif_softc *, bool);
403static int	vioif_set_allmulti(struct vioif_softc *, bool);
404static int	vioif_set_rx_filter(struct vioif_softc *);
405static int	vioif_rx_filter(struct vioif_softc *);
406static int	vioif_set_mac_addr(struct vioif_softc *);
407static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408
409/* config interrupt */
410static int	vioif_config_change(struct virtio_softc *);
411static void	vioif_cfg_softint(void *);
412static void	vioif_update_link_status(struct vioif_softc *);
413
414/* others */
415static void	vioif_alloc_queues(struct vioif_softc *);
416static void	vioif_free_queues(struct vioif_softc *);
417static int	vioif_alloc_mems(struct vioif_softc *);
418static struct workqueue*
419		vioif_workq_create(const char *, pri_t, int, int);
420static void	vioif_workq_destroy(struct workqueue *);
421static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424static int	vioif_setup_sysctl(struct vioif_softc *);
425static void	vioif_setup_stats(struct vioif_softc *);
426
427CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428		  vioif_match, vioif_attach, NULL, NULL);
429
430static void
431vioif_intr_barrier(void)
432{
433
434	/* wait for finish all interrupt handler */
435	xc_barrier(0);
436}
437
438static void
439vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440{
441
442	virtio_enqueue_commit(vsc, vq, -1, true);
443}
444
445static int
446vioif_match(device_t parent, cfdata_t match, void *aux)
447{
448	struct virtio_attach_args *va = aux;
449
450	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451		return 1;
452
453	return 0;
454}
455
456static void
457vioif_attach(device_t parent, device_t self, void *aux)
458{
459	struct vioif_softc *sc = device_private(self);
460	struct virtio_softc *vsc = device_private(parent);
461	struct vioif_netqueue *txq0;
462	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463	uint64_t features, req_features;
464	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465	u_int softint_flags;
466	int r, i, req_flags;
467	char xnamebuf[MAXCOMLEN];
468	size_t nvqs;
469
470	if (virtio_child(vsc) != NULL) {
471		aprint_normal(": child already attached for %s; "
472		    "something wrong...\n", device_xname(parent));
473		return;
474	}
475
476	sc->sc_dev = self;
477	sc->sc_virtio = vsc;
478	sc->sc_link_state = LINK_STATE_UNKNOWN;
479
480	sc->sc_max_nvq_pairs = 1;
481	sc->sc_req_nvq_pairs = 1;
482	sc->sc_act_nvq_pairs = 1;
483	sc->sc_txrx_workqueue_sysctl = true;
484	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488
489	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490
491	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494	if (sc->sc_txrx_workqueue == NULL)
495		goto err;
496
497	req_flags = 0;
498
499#ifdef VIOIF_MPSAFE
500	req_flags |= VIRTIO_F_INTR_MPSAFE;
501#endif
502	req_flags |= VIRTIO_F_INTR_MSIX;
503
504	req_features =
505	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507	req_features |= VIRTIO_F_RING_EVENT_IDX;
508	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509#ifdef VIOIF_MULTIQ
510	req_features |= VIRTIO_NET_F_MQ;
511#endif
512
513	virtio_child_attach_start(vsc, self, IPL_NET,
514	    req_features, VIRTIO_NET_FLAG_BITS);
515	features = virtio_features(vsc);
516
517	if (features == 0)
518		goto err;
519
520	if (features & VIRTIO_NET_F_MAC) {
521		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523			    VIRTIO_NET_CONFIG_MAC + i);
524		}
525	} else {
526		/* code stolen from sys/net/if_tap.c */
527		struct timeval tv;
528		uint32_t ui;
529		getmicrouptime(&tv);
530		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533			virtio_write_device_config_1(vsc,
534			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535		}
536	}
537
538	/* 'Ethernet' with capital follows other ethernet driver attachment */
539	aprint_normal_dev(self, "Ethernet address %s\n",
540	    ether_sprintf(sc->sc_mac));
541
542	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544	} else {
545		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546	}
547
548	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549	    (features & VIRTIO_NET_F_CTRL_RX)) {
550		sc->sc_has_ctrl = true;
551
552		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554		ctrlq->ctrlq_inuse = FREE;
555	} else {
556		sc->sc_has_ctrl = false;
557	}
558
559	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562
563		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564			goto err;
565
566		/* Limit the number of queue pairs to use */
567		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568
569		if (sc->sc_max_nvq_pairs > 1)
570			req_flags |= VIRTIO_F_INTR_PERVQ;
571	}
572
573	vioif_alloc_queues(sc);
574
575#ifdef VIOIF_MPSAFE
576	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577#else
578	softint_flags = SOFTINT_NET;
579#endif
580
581	/*
582	 * Initialize network queues
583	 */
584	nvqs = sc->sc_max_nvq_pairs * 2;
585	for (i = 0; i < nvqs; i++) {
586		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587		if (r != 0)
588			goto err;
589	}
590
591	if (sc->sc_has_ctrl) {
592		int ctrlq_idx = nvqs;
593
594		nvqs++;
595		/*
596		 * Allocating a virtqueue for control channel
597		 */
598		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599		virtio_init_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600		    vioif_ctrl_intr, ctrlq);
601
602		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, NBPG, 1, "control");
603		if (r != 0) {
604			aprint_error_dev(self, "failed to allocate "
605			    "a virtqueue for control channel, error code %d\n",
606			    r);
607
608			sc->sc_has_ctrl = false;
609			cv_destroy(&ctrlq->ctrlq_wait);
610			mutex_destroy(&ctrlq->ctrlq_wait_lock);
611		}
612	}
613
614	sc->sc_cfg_softint = softint_establish(softint_flags,
615	    vioif_cfg_softint, sc);
616	if (sc->sc_cfg_softint == NULL) {
617		aprint_error_dev(self, "cannot establish ctl softint\n");
618		goto err;
619	}
620
621	if (vioif_alloc_mems(sc) < 0)
622		goto err;
623
624	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
625	    vioif_config_change, req_flags);
626	if (r != 0)
627		goto err;
628
629	if (vioif_setup_sysctl(sc) != 0) {
630		aprint_error_dev(self, "unable to create sysctl node\n");
631		/* continue */
632	}
633
634	vioif_setup_stats(sc);
635
636	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
637	ifp->if_softc = sc;
638	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
639#ifdef VIOIF_MPSAFE
640	ifp->if_extflags = IFEF_MPSAFE;
641#endif
642	ifp->if_start = vioif_start;
643	if (sc->sc_req_nvq_pairs > 1)
644		ifp->if_transmit = vioif_transmit;
645	ifp->if_ioctl = vioif_ioctl;
646	ifp->if_init = vioif_init;
647	ifp->if_stop = vioif_stop;
648	ifp->if_capabilities = 0;
649	ifp->if_watchdog = vioif_watchdog;
650	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
651	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
652	IFQ_SET_READY(&ifp->if_snd);
653
654	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
655
656	if_attach(ifp);
657	if_deferred_start_init(ifp, NULL);
658	ether_ifattach(ifp, sc->sc_mac);
659	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
660
661	return;
662
663err:
664	nvqs = sc->sc_max_nvq_pairs * 2;
665	for (i = 0; i < nvqs; i++) {
666		vioif_netqueue_teardown(sc, vsc, i);
667	}
668
669	if (sc->sc_has_ctrl) {
670		cv_destroy(&ctrlq->ctrlq_wait);
671		mutex_destroy(&ctrlq->ctrlq_wait_lock);
672		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
673		ctrlq->ctrlq_vq = NULL;
674	}
675
676	vioif_free_queues(sc);
677	mutex_destroy(&sc->sc_lock);
678	virtio_child_attach_failed(vsc);
679	config_finalize_register(self, vioif_finalize_teardown);
680
681	return;
682}
683
684static int
685vioif_finalize_teardown(device_t self)
686{
687	struct vioif_softc *sc = device_private(self);
688
689	if (sc->sc_txrx_workqueue != NULL) {
690		vioif_workq_destroy(sc->sc_txrx_workqueue);
691		sc->sc_txrx_workqueue = NULL;
692	}
693
694	return 0;
695}
696
697/*
698 * Interface functions for ifnet
699 */
700static int
701vioif_init(struct ifnet *ifp)
702{
703	struct vioif_softc *sc = ifp->if_softc;
704	struct virtio_softc *vsc = sc->sc_virtio;
705	struct vioif_netqueue *netq;
706	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
707	int r, i;
708
709	vioif_stop(ifp, 0);
710
711	r = virtio_reinit_start(vsc);
712	if (r != 0) {
713		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
714		return EIO;
715	}
716
717	virtio_negotiate_features(vsc, virtio_features(vsc));
718
719	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
720		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
721
722		mutex_enter(&netq->netq_lock);
723		vioif_populate_rx_mbufs_locked(sc, netq);
724		mutex_exit(&netq->netq_lock);
725	}
726
727	virtio_reinit_end(vsc);
728
729	if (sc->sc_has_ctrl)
730		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
731
732	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
733	if (r == 0)
734		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
735	else
736		sc->sc_act_nvq_pairs = 1;
737
738	SET(ifp->if_flags, IFF_RUNNING);
739
740	vioif_net_intr_enable(sc, vsc);
741
742	vioif_update_link_status(sc);
743	r = vioif_rx_filter(sc);
744
745	return r;
746}
747
748static void
749vioif_stop(struct ifnet *ifp, int disable)
750{
751	struct vioif_softc *sc = ifp->if_softc;
752	struct virtio_softc *vsc = sc->sc_virtio;
753	struct vioif_netqueue *netq;
754	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
755	size_t i, act_qnum;
756
757	act_qnum = sc->sc_act_nvq_pairs * 2;
758
759	CLR(ifp->if_flags, IFF_RUNNING);
760	for (i = 0; i < act_qnum; i++) {
761		netq = &sc->sc_netqs[i];
762
763		mutex_enter(&netq->netq_lock);
764		netq->netq_stopping = true;
765		mutex_exit(&netq->netq_lock);
766	}
767
768	/* disable interrupts */
769	vioif_net_intr_disable(sc, vsc);
770	if (sc->sc_has_ctrl)
771		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
772
773	/*
774	 * only way to stop interrupt, I/O and DMA is resetting...
775	 *
776	 * NOTE: Devices based on VirtIO draft specification can not
777	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
778	 */
779	virtio_reset(vsc);
780
781	vioif_intr_barrier();
782
783	for (i = 0; i < act_qnum; i++) {
784		netq = &sc->sc_netqs[i];
785		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
786	}
787
788	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
789		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
790		vioif_rx_queue_clear(sc, vsc, netq);
791
792		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
793		vioif_tx_queue_clear(sc, vsc, netq);
794	}
795
796	/* all packet processing is stopped */
797	for (i = 0; i < act_qnum; i++) {
798		netq = &sc->sc_netqs[i];
799
800		mutex_enter(&netq->netq_lock);
801		netq->netq_stopping = false;
802		mutex_exit(&netq->netq_lock);
803	}
804}
805
806static void
807vioif_start(struct ifnet *ifp)
808{
809	struct vioif_softc *sc = ifp->if_softc;
810	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
811
812#ifdef VIOIF_MPSAFE
813	KASSERT(if_is_mpsafe(ifp));
814#endif
815
816	mutex_enter(&txq0->netq_lock);
817	vioif_start_locked(ifp, txq0);
818	mutex_exit(&txq0->netq_lock);
819}
820
821static inline int
822vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
823{
824	struct vioif_softc *sc = ifp->if_softc;
825	u_int cpuid = cpu_index(curcpu());
826
827	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
828}
829
830static int
831vioif_transmit(struct ifnet *ifp, struct mbuf *m)
832{
833	struct vioif_softc *sc = ifp->if_softc;
834	struct vioif_netqueue *netq;
835	struct vioif_tx_context *txc;
836	int qid;
837
838	qid = vioif_select_txqueue(ifp, m);
839	netq = &sc->sc_netqs[qid];
840	txc = netq->netq_ctx;
841
842	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
843		m_freem(m);
844		return ENOBUFS;
845	}
846
847	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
848	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
849	if (m->m_flags & M_MCAST)
850		if_statinc_ref(nsr, if_omcasts);
851	IF_STAT_PUTREF(ifp);
852
853	if (mutex_tryenter(&netq->netq_lock)) {
854		vioif_transmit_locked(ifp, netq);
855		mutex_exit(&netq->netq_lock);
856	}
857
858	return 0;
859}
860
861void
862vioif_watchdog(struct ifnet *ifp)
863{
864	struct vioif_softc *sc = ifp->if_softc;
865	struct vioif_netqueue *netq;
866	int i;
867
868	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
869		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
870			log(LOG_DEBUG, "%s: watchdog timed out\n",
871			    ifp->if_xname);
872		}
873
874		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
875			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
876
877			mutex_enter(&netq->netq_lock);
878			if (!netq->netq_running_handle) {
879				netq->netq_running_handle = true;
880				vioif_net_sched_handle(sc, netq);
881			}
882			mutex_exit(&netq->netq_lock);
883		}
884	}
885}
886
887static int
888vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889{
890	int s, r;
891
892	s = splnet();
893
894	r = ether_ioctl(ifp, cmd, data);
895	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
896		if (ifp->if_flags & IFF_RUNNING) {
897			r = vioif_rx_filter(ifp->if_softc);
898		} else {
899			r = 0;
900		}
901	}
902
903	splx(s);
904
905	return r;
906}
907
908static int
909vioif_ifflags(struct vioif_softc *sc)
910{
911	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
912	bool onoff;
913	int r;
914
915	if (!sc->sc_has_ctrl) {
916		/* no ctrl vq; always promisc and allmulti */
917		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
918		return 0;
919	}
920
921	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
922	r = vioif_set_allmulti(sc, onoff);
923	if (r != 0) {
924		log(LOG_WARNING,
925		    "%s: couldn't %sable ALLMULTI\n",
926		    ifp->if_xname, onoff ? "en" : "dis");
927		if (onoff) {
928			CLR(ifp->if_flags, IFF_ALLMULTI);
929		} else {
930			SET(ifp->if_flags, IFF_ALLMULTI);
931		}
932	}
933
934	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
935	r = vioif_set_promisc(sc, onoff);
936	if (r != 0) {
937		log(LOG_WARNING,
938		    "%s: couldn't %sable PROMISC\n",
939		    ifp->if_xname, onoff ? "en" : "dis");
940		if (onoff) {
941			CLR(ifp->if_flags, IFF_PROMISC);
942		} else {
943			SET(ifp->if_flags, IFF_PROMISC);
944		}
945	}
946
947	return 0;
948}
949
950static int
951vioif_ifflags_cb(struct ethercom *ec)
952{
953	struct ifnet *ifp = &ec->ec_if;
954	struct vioif_softc *sc = ifp->if_softc;
955
956	return vioif_ifflags(sc);
957}
958
959static int
960vioif_setup_sysctl(struct vioif_softc *sc)
961{
962	const char *devname;
963	struct sysctllog **log;
964	const struct sysctlnode *rnode, *rxnode, *txnode;
965	int error;
966
967	log = &sc->sc_sysctllog;
968	devname = device_xname(sc->sc_dev);
969
970	error = sysctl_createv(log, 0, NULL, &rnode,
971	    0, CTLTYPE_NODE, devname,
972	    SYSCTL_DESCR("virtio-net information and settings"),
973	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
974	if (error)
975		goto out;
976
977	error = sysctl_createv(log, 0, &rnode, NULL,
978	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
979	    SYSCTL_DESCR("Use workqueue for packet processing"),
980	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
981	if (error)
982		goto out;
983
984	error = sysctl_createv(log, 0, &rnode, &rxnode,
985	    0, CTLTYPE_NODE, "rx",
986	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
987	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
988	if (error)
989		goto out;
990
991	error = sysctl_createv(log, 0, &rxnode, NULL,
992	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
993	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
994	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
995	if (error)
996		goto out;
997
998	error = sysctl_createv(log, 0, &rxnode, NULL,
999	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1000	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1001	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1002	if (error)
1003		goto out;
1004
1005	error = sysctl_createv(log, 0, &rnode, &txnode,
1006	    0, CTLTYPE_NODE, "tx",
1007	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1008	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1009	if (error)
1010		goto out;
1011
1012	error = sysctl_createv(log, 0, &txnode, NULL,
1013	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1014	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1015	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1016	if (error)
1017		goto out;
1018
1019	error = sysctl_createv(log, 0, &txnode, NULL,
1020	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1021	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1022	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1023
1024out:
1025	if (error)
1026		sysctl_teardown(log);
1027
1028	return error;
1029}
1030
1031static void
1032vioif_setup_stats(struct vioif_softc *sc)
1033{
1034	struct vioif_netqueue *netq;
1035	struct vioif_tx_context *txc;
1036	struct vioif_rx_context *rxc;
1037	size_t i, netq_num;
1038
1039	netq_num = sc->sc_max_nvq_pairs * 2;
1040	for (i = 0; i < netq_num; i++) {
1041		netq = &sc->sc_netqs[i];
1042		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1043		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1044		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1045		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1046		    "virtqueue enqueue failed failed");
1047
1048		switch (VIOIF_NETQ_DIR(i)) {
1049		case VIOIF_NETQ_RX:
1050			rxc = netq->netq_ctx;
1051			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1052			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1053			    "no receive buffer");
1054			break;
1055		case VIOIF_NETQ_TX:
1056			txc = netq->netq_ctx;
1057			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1058			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1059			    "m_defrag() failed");
1060			break;
1061		}
1062	}
1063
1064	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1065	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1066	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1067	    NULL, device_xname(sc->sc_dev), "control command failed");
1068}
1069
1070/*
1071 * allocate memory
1072 */
1073static int
1074vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1075    bus_size_t size, int nsegs, const char *usage)
1076{
1077	int r;
1078
1079	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1080	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1081
1082	if (r != 0) {
1083		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1084		    "error code %d\n", usage, r);
1085	}
1086
1087	return r;
1088}
1089
1090static void
1091vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1092{
1093
1094	if (*map) {
1095		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1096		*map = NULL;
1097	}
1098}
1099
1100static int
1101vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1102    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1103{
1104	int r;
1105
1106	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1107	if (r != 0)
1108		return 1;
1109
1110	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1111	    size, NULL, rw | BUS_DMA_NOWAIT);
1112	if (r != 0) {
1113		vioif_dmamap_destroy(sc, map);
1114		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1115		    "error code %d\n", usage, r);
1116	}
1117
1118	return r;
1119}
1120
1121static void *
1122vioif_assign_mem(intptr_t *p, size_t size)
1123{
1124	intptr_t rv;
1125
1126	rv = *p;
1127	*p += size;
1128
1129	return (void *)rv;
1130}
1131
1132/*
1133 * dma memory is used for:
1134 *   netq_maps_kva:	 metadata array for received frames (READ) and
1135 *			 sent frames (WRITE)
1136 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1137 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1138 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1139 *			 (WRITE)
1140 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1141 *			 class command (WRITE)
1142 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1143 *			 class command (WRITE)
1144 * ctrlq_* structures are allocated only one each; they are protected by
1145 * ctrlq_inuse variable and ctrlq_wait condvar.
1146 */
1147static int
1148vioif_alloc_mems(struct vioif_softc *sc)
1149{
1150	struct virtio_softc *vsc = sc->sc_virtio;
1151	struct vioif_netqueue *netq;
1152	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1153	struct vioif_net_map *maps;
1154	unsigned int vq_num;
1155	int r, rsegs;
1156	bus_size_t dmamemsize;
1157	size_t qid, i, netq_num, kmemsize;
1158	void *vaddr;
1159	intptr_t p;
1160
1161	netq_num = sc->sc_max_nvq_pairs * 2;
1162
1163	/* allocate DMA memory */
1164	dmamemsize = 0;
1165
1166	for (qid = 0; qid < netq_num; qid++) {
1167		maps = sc->sc_netqs[qid].netq_maps;
1168		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1169		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1170	}
1171
1172	if (sc->sc_has_ctrl) {
1173		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1174		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1175		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1176		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1177		    + ETHER_ADDR_LEN;
1178		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1179		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1180		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1181		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1182	}
1183
1184	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1185	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1186	if (r != 0) {
1187		aprint_error_dev(sc->sc_dev,
1188		    "DMA memory allocation failed, size %" PRIuBUSSIZE ", "
1189		    "error code %d\n", dmamemsize, r);
1190		goto err_none;
1191	}
1192	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1193	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1194	if (r != 0) {
1195		aprint_error_dev(sc->sc_dev,
1196		    "DMA memory map failed, error code %d\n", r);
1197		goto err_dmamem_alloc;
1198	}
1199
1200	/* assign DMA memory */
1201	memset(vaddr, 0, dmamemsize);
1202	sc->sc_dmamem = vaddr;
1203	p = (intptr_t) vaddr;
1204
1205	for (qid = 0; qid < netq_num; qid++) {
1206		netq = &sc->sc_netqs[qid];
1207		maps = netq->netq_maps;
1208		vq_num = netq->netq_vq->vq_num;
1209
1210		netq->netq_maps_kva = vioif_assign_mem(&p,
1211		    sizeof(*maps[0].vnm_hdr) * vq_num);
1212	}
1213
1214	if (sc->sc_has_ctrl) {
1215		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1216		    sizeof(*ctrlq->ctrlq_cmd));
1217		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1218		    sizeof(*ctrlq->ctrlq_status));
1219		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1220		    sizeof(*ctrlq->ctrlq_rx));
1221		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1222		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1223		    + ETHER_ADDR_LEN);
1224		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1225		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1226		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1227		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1228		    sizeof(*ctrlq->ctrlq_mac_addr));
1229		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1230	}
1231
1232	/* allocate kmem */
1233	kmemsize = 0;
1234
1235	for (qid = 0; qid < netq_num; qid++) {
1236		netq = &sc->sc_netqs[qid];
1237		vq_num = netq->netq_vq->vq_num;
1238
1239		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1240	}
1241
1242	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1243	sc->sc_kmem = vaddr;
1244
1245	/* assign allocated kmem */
1246	p = (intptr_t) vaddr;
1247
1248	for (qid = 0; qid < netq_num; qid++) {
1249		netq = &sc->sc_netqs[qid];
1250		vq_num = netq->netq_vq->vq_num;
1251
1252		netq->netq_maps = vioif_assign_mem(&p,
1253		    sizeof(netq->netq_maps[0]) * vq_num);
1254	}
1255
1256	/* prepare dmamaps */
1257	for (qid = 0; qid < netq_num; qid++) {
1258		static const struct {
1259			const char	*msg_hdr;
1260			const char	*msg_payload;
1261			int		 dma_flag;
1262			bus_size_t	 dma_size;
1263			int		 dma_nsegs;
1264		} dmaparams[VIOIF_NETQ_IDX] = {
1265			[VIOIF_NETQ_RX] = {
1266				.msg_hdr	= "rx header",
1267				.msg_payload	= "rx payload",
1268				.dma_flag	= BUS_DMA_READ,
1269				.dma_size	= MCLBYTES - ETHER_ALIGN,
1270				.dma_nsegs	= 1,
1271			},
1272			[VIOIF_NETQ_TX] = {
1273				.msg_hdr	= "tx header",
1274				.msg_payload	= "tx payload",
1275				.dma_flag	= BUS_DMA_WRITE,
1276				.dma_size	= ETHER_MAX_LEN,
1277				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1278			}
1279		};
1280
1281		struct virtio_net_hdr *hdrs;
1282		int dir;
1283
1284		dir = VIOIF_NETQ_DIR(qid);
1285		netq = &sc->sc_netqs[qid];
1286		vq_num = netq->netq_vq->vq_num;
1287		maps = netq->netq_maps;
1288		hdrs = netq->netq_maps_kva;
1289
1290		for (i = 0; i < vq_num; i++) {
1291			maps[i].vnm_hdr = &hdrs[i];
1292
1293			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1294			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1295			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1296			if (r != 0)
1297				goto err_reqs;
1298
1299			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1300			    dmaparams[dir].dma_size, dmaparams[dir].dma_nsegs,
1301			    dmaparams[dir].msg_payload);
1302			if (r != 0)
1303				goto err_reqs;
1304		}
1305	}
1306
1307	if (sc->sc_has_ctrl) {
1308		/* control vq class & command */
1309		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1310		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1311		    BUS_DMA_WRITE, "control command");
1312		if (r != 0)
1313			goto err_reqs;
1314
1315		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1316		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1317		    BUS_DMA_READ, "control status");
1318		if (r != 0)
1319			goto err_reqs;
1320
1321		/* control vq rx mode command parameter */
1322		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1323		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1324		    BUS_DMA_WRITE, "rx mode control command");
1325		if (r != 0)
1326			goto err_reqs;
1327
1328		/* multiqueue set command */
1329		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1330		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1331		    BUS_DMA_WRITE, "multiqueue set command");
1332		if (r != 0)
1333			goto err_reqs;
1334
1335		/* control vq MAC filter table for unicast */
1336		/* do not load now since its length is variable */
1337		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1338		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1339		    + ETHER_ADDR_LEN, 1,
1340		    "unicast MAC address filter command");
1341		if (r != 0)
1342			goto err_reqs;
1343
1344		/* control vq MAC filter table for multicast */
1345		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1346		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1347		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1348		    "multicast MAC address filter command");
1349		if (r != 0)
1350			goto err_reqs;
1351
1352		/* control vq MAC address set command */
1353		r = vioif_dmamap_create_load(sc,
1354		    &ctrlq->ctrlq_mac_addr_dmamap,
1355		    ctrlq->ctrlq_mac_addr,
1356		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1357		    BUS_DMA_WRITE, "mac addr set command");
1358		if (r != 0)
1359			goto err_reqs;
1360	}
1361
1362	return 0;
1363
1364err_reqs:
1365	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1366	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1367	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1368	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1369	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1370	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1371	for (qid = 0; qid < netq_num; qid++) {
1372		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1373		maps = sc->sc_netqs[qid].netq_maps;
1374
1375		for (i = 0; i < vq_num; i++) {
1376			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1377			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1378		}
1379	}
1380	if (sc->sc_kmem) {
1381		kmem_free(sc->sc_kmem, kmemsize);
1382		sc->sc_kmem = NULL;
1383	}
1384	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1385err_dmamem_alloc:
1386	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1387err_none:
1388	return -1;
1389}
1390
1391static void
1392vioif_alloc_queues(struct vioif_softc *sc)
1393{
1394	int nvq_pairs = sc->sc_max_nvq_pairs;
1395	size_t nvqs, netq_num;
1396
1397	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1398
1399	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1400	if (sc->sc_has_ctrl)
1401		nvqs++;
1402
1403	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1404	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_netqs[0]) * netq_num,
1405	    KM_SLEEP);
1406}
1407
1408static void
1409vioif_free_queues(struct vioif_softc *sc)
1410{
1411	size_t nvqs, netq_num;
1412
1413	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1414	if (sc->sc_ctrlq.ctrlq_vq)
1415		nvqs++;
1416
1417	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1418	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1419	sc->sc_netqs = NULL;
1420	sc->sc_vqs = NULL;
1421}
1422
1423/*
1424 * Network queues
1425 */
1426static int
1427vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1428    size_t qid, u_int softint_flags)
1429{
1430	static const struct {
1431		const char	*dirname;
1432		int		 segsize;
1433		int		 nsegs;
1434		int 		(*intrhand)(void *);
1435		void		(*sihand)(void *);
1436	} params[VIOIF_NETQ_IDX] = {
1437		[VIOIF_NETQ_RX] = {
1438			.dirname	= "rx",
1439			.segsize	= MCLBYTES,
1440			.nsegs		= 2,
1441			.intrhand	= vioif_rx_intr,
1442			.sihand		= vioif_rx_handle,
1443		},
1444		[VIOIF_NETQ_TX] = {
1445			.dirname	= "tx",
1446			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1447			.nsegs		= 2,
1448			.intrhand	= vioif_tx_intr,
1449			.sihand		= vioif_tx_handle,
1450		}
1451	};
1452
1453	struct virtqueue *vq;
1454	struct vioif_netqueue *netq;
1455	struct vioif_tx_context *txc;
1456	struct vioif_rx_context *rxc;
1457	char qname[32];
1458	int r, dir;
1459
1460	txc = NULL;
1461	rxc = NULL;
1462	netq = &sc->sc_netqs[qid];
1463	vq = &sc->sc_vqs[qid];
1464	dir = VIOIF_NETQ_DIR(qid);
1465
1466	netq->netq_vq = &sc->sc_vqs[qid];
1467	netq->netq_stopping = false;
1468	netq->netq_running_handle = false;
1469
1470	snprintf(qname, sizeof(qname), "%s%zu",
1471	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1472	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1473	    "%s-%s", device_xname(sc->sc_dev), qname);
1474
1475	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1476	virtio_init_vq(vsc, vq, qid, params[dir].intrhand, netq);
1477
1478	r = virtio_alloc_vq(vsc, vq,
1479	    params[dir].segsize + sc->sc_hdr_size,
1480	    params[dir].nsegs, qname);
1481	if (r != 0)
1482		goto err;
1483	netq->netq_vq = vq;
1484
1485	netq->netq_softint = softint_establish(softint_flags,
1486	    params[dir].sihand, netq);
1487	if (netq->netq_softint == NULL) {
1488		aprint_error_dev(sc->sc_dev,
1489		    "couldn't establish %s softint\n",
1490		    params[dir].dirname);
1491		goto err;
1492	}
1493	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1494
1495	switch (dir) {
1496	case VIOIF_NETQ_RX:
1497		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1498		netq->netq_ctx = rxc;
1499		/* nothing to do */
1500		break;
1501	case VIOIF_NETQ_TX:
1502		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1503		netq->netq_ctx = (void *)txc;
1504		txc->txc_deferred_transmit = softint_establish(softint_flags,
1505		    vioif_deferred_transmit, netq);
1506		if (txc->txc_deferred_transmit == NULL) {
1507			aprint_error_dev(sc->sc_dev,
1508			    "couldn't establish softint for "
1509			    "tx deferred transmit\n");
1510			goto err;
1511		}
1512		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1513		txc->txc_no_free_slots = false;
1514		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1515		break;
1516	}
1517
1518	return 0;
1519
1520err:
1521	netq->netq_ctx = NULL;
1522
1523	if (rxc != NULL) {
1524		kmem_free(rxc, sizeof(*rxc));
1525	}
1526
1527	if (txc != NULL) {
1528		if (txc->txc_deferred_transmit != NULL)
1529			softint_disestablish(txc->txc_deferred_transmit);
1530		if (txc->txc_intrq != NULL)
1531			pcq_destroy(txc->txc_intrq);
1532		kmem_free(txc, sizeof(txc));
1533	}
1534
1535	vioif_work_set(&netq->netq_work, NULL, NULL);
1536	if (netq->netq_softint != NULL) {
1537		softint_disestablish(netq->netq_softint);
1538		netq->netq_softint = NULL;
1539	}
1540
1541	virtio_free_vq(vsc, vq);
1542	mutex_destroy(&netq->netq_lock);
1543	netq->netq_vq = NULL;
1544
1545	return -1;
1546}
1547
1548static void
1549vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1550    size_t qid)
1551{
1552	struct vioif_netqueue *netq;
1553	struct vioif_rx_context *rxc;
1554	struct vioif_tx_context *txc;
1555	int dir;
1556
1557	netq = &sc->sc_netqs[qid];
1558
1559	if (netq->netq_vq == NULL)
1560		return;
1561
1562	netq = &sc->sc_netqs[qid];
1563	dir = VIOIF_NETQ_DIR(qid);
1564	switch (dir) {
1565	case VIOIF_NETQ_RX:
1566		rxc = netq->netq_ctx;
1567		netq->netq_ctx = NULL;
1568		kmem_free(rxc, sizeof(*rxc));
1569		break;
1570	case VIOIF_NETQ_TX:
1571		txc = netq->netq_ctx;
1572		netq->netq_ctx = NULL;
1573		softint_disestablish(txc->txc_deferred_transmit);
1574		pcq_destroy(txc->txc_intrq);
1575		kmem_free(txc, sizeof(*txc));
1576		break;
1577	}
1578
1579	softint_disestablish(netq->netq_softint);
1580	virtio_free_vq(vsc, netq->netq_vq);
1581	mutex_destroy(&netq->netq_lock);
1582	netq->netq_vq = NULL;
1583}
1584
1585static void
1586vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1587{
1588
1589	KASSERT(mutex_owned(&netq->netq_lock));
1590	KASSERT(!netq->netq_stopping);
1591
1592	if (netq->netq_workqueue) {
1593		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1594	} else {
1595		softint_schedule(netq->netq_softint);
1596	}
1597}
1598
1599static int
1600vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1601   struct mbuf *m, int dma_flags)
1602{
1603	int r;
1604
1605	KASSERT(map->vnm_mbuf == NULL);
1606
1607	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1608	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1609	if (r == 0) {
1610		map->vnm_mbuf = m;
1611	}
1612
1613	return r;
1614}
1615
1616static void
1617vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1618{
1619
1620	KASSERT(map->vnm_mbuf != NULL);
1621	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1622	map->vnm_mbuf = NULL;
1623}
1624
1625static int
1626vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1627    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1628{
1629	int r;
1630
1631	KASSERT(map->vnm_mbuf != NULL);
1632
1633	/* This should actually never fail */
1634	r = virtio_enqueue_reserve(vsc, vq, slot,
1635	    map->vnm_mbuf_map->dm_nsegs + 1);
1636	if (r != 0) {
1637		/* slot already freed by virtio_enqueue_reserve */
1638		return r;
1639	}
1640
1641	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1642	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1643	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1644	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1645
1646	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1647	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1648	virtio_enqueue_commit(vsc, vq, slot, false);
1649
1650	return 0;
1651}
1652
1653static int
1654vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1655    int slot, struct vioif_net_map *map)
1656{
1657
1658	return vioif_net_enqueue(vsc, vq, slot, map,
1659	    BUS_DMASYNC_PREWRITE, true);
1660}
1661
1662static int
1663vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1664    int slot, struct vioif_net_map *map)
1665{
1666
1667	return vioif_net_enqueue(vsc, vq, slot, map,
1668	    BUS_DMASYNC_PREREAD, false);
1669}
1670
1671static struct mbuf *
1672vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1673   int slot, struct vioif_net_map *map, int dma_flags)
1674{
1675	struct mbuf *m;
1676
1677	m = map->vnm_mbuf;
1678	KASSERT(m != NULL);
1679	map->vnm_mbuf = NULL;
1680
1681	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1682	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1683	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1684	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1685
1686	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1687	virtio_dequeue_commit(vsc, vq, slot);
1688
1689	return m;
1690}
1691
1692static void
1693vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1694{
1695	struct vioif_netqueue *netq;
1696	size_t i, act_qnum;
1697	int enqueued;
1698
1699	act_qnum = sc->sc_act_nvq_pairs * 2;
1700	for (i = 0; i < act_qnum; i++) {
1701		netq = &sc->sc_netqs[i];
1702
1703		KASSERT(!netq->netq_stopping);
1704		KASSERT(!netq->netq_running_handle);
1705
1706		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1707		if (enqueued != 0) {
1708			virtio_stop_vq_intr(vsc, netq->netq_vq);
1709
1710			mutex_enter(&netq->netq_lock);
1711			netq->netq_running_handle = true;
1712			vioif_net_sched_handle(sc, netq);
1713			mutex_exit(&netq->netq_lock);
1714		}
1715	}
1716}
1717
1718static void
1719vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1720{
1721	struct vioif_netqueue *netq;
1722	size_t i, act_qnum;
1723
1724	act_qnum = sc->sc_act_nvq_pairs * 2;
1725	for (i = 0; i < act_qnum; i++) {
1726		netq = &sc->sc_netqs[i];
1727
1728		virtio_stop_vq_intr(vsc, netq->netq_vq);
1729	}
1730}
1731
1732/*
1733 * Receive implementation
1734 */
1735/* enqueue mbufs to receive slots */
1736static void
1737vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1738{
1739	struct virtqueue *vq = netq->netq_vq;
1740	struct virtio_softc *vsc = vq->vq_owner;
1741	struct vioif_rx_context *rxc;
1742	struct vioif_net_map *map;
1743	struct mbuf *m;
1744	int i, r, ndone = 0;
1745
1746	KASSERT(mutex_owned(&netq->netq_lock));
1747
1748	rxc = netq->netq_ctx;
1749
1750	for (i = 0; i < vq->vq_num; i++) {
1751		int slot;
1752		r = virtio_enqueue_prep(vsc, vq, &slot);
1753		if (r == EAGAIN)
1754			break;
1755		if (__predict_false(r != 0))
1756			panic("enqueue_prep for rx buffers");
1757
1758		MGETHDR(m, M_DONTWAIT, MT_DATA);
1759		if (m == NULL) {
1760			virtio_enqueue_abort(vsc, vq, slot);
1761			rxc->rxc_mbuf_enobufs.ev_count++;
1762			break;
1763		}
1764		MCLGET(m, M_DONTWAIT);
1765		if ((m->m_flags & M_EXT) == 0) {
1766			virtio_enqueue_abort(vsc, vq, slot);
1767			m_freem(m);
1768			rxc->rxc_mbuf_enobufs.ev_count++;
1769			break;
1770		}
1771
1772		m->m_len = m->m_pkthdr.len = MCLBYTES;
1773		m_adj(m, ETHER_ALIGN);
1774
1775		map = &netq->netq_maps[slot];
1776		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1777		if (r != 0) {
1778			virtio_enqueue_abort(vsc, vq, slot);
1779			m_freem(m);
1780			netq->netq_mbuf_load_failed.ev_count++;
1781			break;
1782		}
1783
1784		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1785		if (r != 0) {
1786			vioif_net_unload_mbuf(vsc, map);
1787			netq->netq_enqueue_failed.ev_count++;
1788			m_freem(m);
1789			/* slot already freed by vioif_net_enqueue_rx */
1790			break;
1791		}
1792
1793		ndone++;
1794	}
1795
1796	if (ndone > 0)
1797		vioif_notify(vsc, vq);
1798}
1799
1800/* dequeue received packets */
1801static bool
1802vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1803    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1804{
1805	struct virtqueue *vq = netq->netq_vq;
1806	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1807	struct vioif_net_map *map;
1808	struct mbuf *m;
1809	int slot, len;
1810	bool more;
1811	size_t ndeq;
1812
1813	KASSERT(mutex_owned(&netq->netq_lock));
1814
1815	more = false;
1816	ndeq = 0;
1817
1818	if (virtio_vq_is_enqueued(vsc, vq) == false)
1819		goto done;
1820
1821	for (;;ndeq++) {
1822		if (ndeq >= limit) {
1823			more = true;
1824			break;
1825		}
1826
1827		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1828			break;
1829
1830		map = &netq->netq_maps[slot];
1831		KASSERT(map->vnm_mbuf != NULL);
1832		m = vioif_net_dequeue_commit(vsc, vq, slot,
1833		    map, BUS_DMASYNC_POSTREAD);
1834		KASSERT(m != NULL);
1835
1836		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1837		m_set_rcvif(m, ifp);
1838		if_percpuq_enqueue(ifp->if_percpuq, m);
1839	}
1840
1841done:
1842	if (ndeqp != NULL)
1843		*ndeqp = ndeq;
1844
1845	return more;
1846}
1847
1848static void
1849vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1850    struct vioif_netqueue *netq)
1851{
1852	struct vioif_net_map *map;
1853	struct mbuf *m;
1854	unsigned int i, vq_num;
1855	bool more;
1856
1857	mutex_enter(&netq->netq_lock);
1858
1859	vq_num = netq->netq_vq->vq_num;
1860	for (;;) {
1861		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1862		if (more == false)
1863			break;
1864	}
1865
1866	for (i = 0; i < vq_num; i++) {
1867		map = &netq->netq_maps[i];
1868
1869		m = map->vnm_mbuf;
1870		if (m == NULL)
1871			continue;
1872
1873		vioif_net_unload_mbuf(vsc, map);
1874		m_freem(m);
1875	}
1876	mutex_exit(&netq->netq_lock);
1877}
1878
1879static void
1880vioif_rx_handle_locked(void *xnetq, u_int limit)
1881{
1882	struct vioif_netqueue *netq = xnetq;
1883	struct virtqueue *vq = netq->netq_vq;
1884	struct virtio_softc *vsc = vq->vq_owner;
1885	struct vioif_softc *sc = device_private(virtio_child(vsc));
1886	bool more;
1887	int enqueued;
1888	size_t ndeq;
1889
1890	KASSERT(mutex_owned(&netq->netq_lock));
1891	KASSERT(!netq->netq_stopping);
1892
1893	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1894	if (ndeq > 0)
1895		vioif_populate_rx_mbufs_locked(sc, netq);
1896
1897	if (more) {
1898		vioif_net_sched_handle(sc, netq);
1899		return;
1900	}
1901
1902	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1903	if (enqueued != 0) {
1904		virtio_stop_vq_intr(vsc, netq->netq_vq);
1905		vioif_net_sched_handle(sc, netq);
1906		return;
1907	}
1908
1909	netq->netq_running_handle = false;
1910}
1911
1912static int
1913vioif_rx_intr(void *arg)
1914{
1915	struct vioif_netqueue *netq = arg;
1916	struct virtqueue *vq = netq->netq_vq;
1917	struct virtio_softc *vsc = vq->vq_owner;
1918	struct vioif_softc *sc = device_private(virtio_child(vsc));
1919	u_int limit;
1920
1921	mutex_enter(&netq->netq_lock);
1922
1923	/* handler is already running in softint/workqueue */
1924	if (netq->netq_running_handle)
1925		goto done;
1926
1927	if (netq->netq_stopping)
1928		goto done;
1929
1930	netq->netq_running_handle = true;
1931
1932	limit = sc->sc_rx_intr_process_limit;
1933	virtio_stop_vq_intr(vsc, vq);
1934	vioif_rx_handle_locked(netq, limit);
1935
1936done:
1937	mutex_exit(&netq->netq_lock);
1938	return 1;
1939}
1940
1941static void
1942vioif_rx_handle(void *xnetq)
1943{
1944	struct vioif_netqueue *netq = xnetq;
1945	struct virtqueue *vq = netq->netq_vq;
1946	struct virtio_softc *vsc = vq->vq_owner;
1947	struct vioif_softc *sc = device_private(virtio_child(vsc));
1948	u_int limit;
1949
1950	mutex_enter(&netq->netq_lock);
1951
1952	KASSERT(netq->netq_running_handle);
1953
1954	if (netq->netq_stopping) {
1955		netq->netq_running_handle = false;
1956		goto done;
1957	}
1958
1959	limit = sc->sc_rx_process_limit;
1960	vioif_rx_handle_locked(netq, limit);
1961
1962done:
1963	mutex_exit(&netq->netq_lock);
1964}
1965
1966/*
1967 * Transmission implementation
1968 */
1969/* enqueue mbufs to send */
1970static void
1971vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1972    bool is_transmit)
1973{
1974	struct vioif_softc *sc = ifp->if_softc;
1975	struct virtio_softc *vsc = sc->sc_virtio;
1976	struct virtqueue *vq = netq->netq_vq;
1977	struct vioif_tx_context *txc;
1978	struct vioif_net_map *map;
1979	struct mbuf *m;
1980	int queued = 0;
1981
1982	KASSERT(mutex_owned(&netq->netq_lock));
1983
1984	if (netq->netq_stopping ||
1985	    !ISSET(ifp->if_flags, IFF_RUNNING))
1986		return;
1987
1988	txc = netq->netq_ctx;
1989
1990	if (!txc->txc_link_active ||
1991	    txc->txc_no_free_slots)
1992		return;
1993
1994	for (;;) {
1995		int slot, r;
1996		r = virtio_enqueue_prep(vsc, vq, &slot);
1997		if (r == EAGAIN) {
1998			txc->txc_no_free_slots = true;
1999			break;
2000		}
2001		if (__predict_false(r != 0))
2002			panic("enqueue_prep for tx buffers");
2003
2004		if (is_transmit)
2005			m = pcq_get(txc->txc_intrq);
2006		else
2007			IFQ_DEQUEUE(&ifp->if_snd, m);
2008
2009		if (m == NULL) {
2010			virtio_enqueue_abort(vsc, vq, slot);
2011			break;
2012		}
2013
2014		map = &netq->netq_maps[slot];
2015		KASSERT(map->vnm_mbuf == NULL);
2016
2017		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2018		if (r != 0) {
2019			/* maybe just too fragmented */
2020			struct mbuf *newm;
2021
2022			newm = m_defrag(m, M_NOWAIT);
2023			if (newm != NULL) {
2024				m = newm;
2025				r = vioif_net_load_mbuf(vsc, map, m,
2026				    BUS_DMA_WRITE);
2027			} else {
2028				txc->txc_defrag_failed.ev_count++;
2029				r = -1;
2030			}
2031
2032			if (r != 0) {
2033				netq->netq_mbuf_load_failed.ev_count++;
2034				m_freem(m);
2035				if_statinc(ifp, if_oerrors);
2036				virtio_enqueue_abort(vsc, vq, slot);
2037				continue;
2038			}
2039		}
2040
2041		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2042
2043		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2044		if (r != 0) {
2045			netq->netq_enqueue_failed.ev_count++;
2046			vioif_net_unload_mbuf(vsc, map);
2047			m_freem(m);
2048			/* slot already freed by vioif_net_enqueue_tx */
2049
2050			if_statinc(ifp, if_oerrors);
2051			continue;
2052		}
2053
2054		queued++;
2055		bpf_mtap(ifp, m, BPF_D_OUT);
2056	}
2057
2058	if (queued > 0) {
2059		vioif_notify(vsc, vq);
2060		ifp->if_timer = 5;
2061	}
2062}
2063
2064/* dequeue sent mbufs */
2065static bool
2066vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2067    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2068{
2069	struct virtqueue *vq = netq->netq_vq;
2070	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2071	struct vioif_net_map *map;
2072	struct mbuf *m;
2073	int slot, len;
2074	bool more;
2075	size_t ndeq;
2076
2077	KASSERT(mutex_owned(&netq->netq_lock));
2078
2079	more = false;
2080	ndeq = 0;
2081
2082	if (virtio_vq_is_enqueued(vsc, vq) == false)
2083		goto done;
2084
2085	for (;;ndeq++) {
2086		if (limit-- == 0) {
2087			more = true;
2088			break;
2089		}
2090
2091		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2092			break;
2093
2094		map = &netq->netq_maps[slot];
2095		KASSERT(map->vnm_mbuf != NULL);
2096		m = vioif_net_dequeue_commit(vsc, vq, slot,
2097		    map, BUS_DMASYNC_POSTWRITE);
2098		KASSERT(m != NULL);
2099
2100		if_statinc(ifp, if_opackets);
2101		m_freem(m);
2102	}
2103
2104done:
2105	if (ndeqp != NULL)
2106		*ndeqp = ndeq;
2107	return more;
2108}
2109
2110static void
2111vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2112    struct vioif_netqueue *netq)
2113{
2114	struct vioif_tx_context *txc;
2115	struct vioif_net_map *map;
2116	struct mbuf *m;
2117	unsigned int i, vq_num;
2118	bool more;
2119
2120	mutex_enter(&netq->netq_lock);
2121
2122	txc = netq->netq_ctx;
2123	vq_num = netq->netq_vq->vq_num;
2124
2125	for (;;) {
2126		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2127		if (more == false)
2128			break;
2129	}
2130
2131	for (i = 0; i < vq_num; i++) {
2132		map = &netq->netq_maps[i];
2133
2134		m = map->vnm_mbuf;
2135		if (m == NULL)
2136			continue;
2137
2138		vioif_net_unload_mbuf(vsc, map);
2139		m_freem(m);
2140	}
2141
2142	txc->txc_no_free_slots = false;
2143
2144	mutex_exit(&netq->netq_lock);
2145}
2146
2147static void
2148vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2149{
2150
2151	/*
2152	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2153	 */
2154	vioif_send_common_locked(ifp, netq, false);
2155
2156}
2157
2158static void
2159vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2160{
2161
2162	vioif_send_common_locked(ifp, netq, true);
2163}
2164
2165static void
2166vioif_deferred_transmit(void *arg)
2167{
2168	struct vioif_netqueue *netq = arg;
2169	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2170	struct vioif_softc *sc = device_private(virtio_child(vsc));
2171	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2172
2173	mutex_enter(&netq->netq_lock);
2174	vioif_send_common_locked(ifp, netq, true);
2175	mutex_exit(&netq->netq_lock);
2176}
2177
2178static void
2179vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2180{
2181	struct virtqueue *vq = netq->netq_vq;
2182	struct vioif_tx_context *txc = netq->netq_ctx;
2183	struct virtio_softc *vsc = vq->vq_owner;
2184	struct vioif_softc *sc = device_private(virtio_child(vsc));
2185	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2186	bool more;
2187	int enqueued;
2188	size_t ndeq;
2189
2190	KASSERT(mutex_owned(&netq->netq_lock));
2191	KASSERT(!netq->netq_stopping);
2192
2193	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2194	if (txc->txc_no_free_slots && ndeq > 0) {
2195		txc->txc_no_free_slots = false;
2196		softint_schedule(txc->txc_deferred_transmit);
2197	}
2198
2199	if (more) {
2200		vioif_net_sched_handle(sc, netq);
2201		return;
2202	}
2203
2204	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2205	    virtio_postpone_intr_smart(vsc, vq):
2206	    virtio_start_vq_intr(vsc, vq);
2207	if (enqueued != 0) {
2208		virtio_stop_vq_intr(vsc, vq);
2209		vioif_net_sched_handle(sc, netq);
2210		return;
2211	}
2212
2213	netq->netq_running_handle = false;
2214
2215	/* for ALTQ */
2216	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2217		if_schedule_deferred_start(ifp);
2218
2219	softint_schedule(txc->txc_deferred_transmit);
2220}
2221
2222static int
2223vioif_tx_intr(void *arg)
2224{
2225	struct vioif_netqueue *netq = arg;
2226	struct virtqueue *vq = netq->netq_vq;
2227	struct virtio_softc *vsc = vq->vq_owner;
2228	struct vioif_softc *sc = device_private(virtio_child(vsc));
2229	u_int limit;
2230
2231	mutex_enter(&netq->netq_lock);
2232
2233	/* tx handler is already running in softint/workqueue */
2234	if (netq->netq_running_handle)
2235		goto done;
2236
2237	if (netq->netq_stopping)
2238		goto done;
2239
2240	netq->netq_running_handle = true;
2241
2242	virtio_stop_vq_intr(vsc, vq);
2243	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2244	limit = sc->sc_tx_intr_process_limit;
2245	vioif_tx_handle_locked(netq, limit);
2246
2247done:
2248	mutex_exit(&netq->netq_lock);
2249	return 1;
2250}
2251
2252static void
2253vioif_tx_handle(void *xnetq)
2254{
2255	struct vioif_netqueue *netq = xnetq;
2256	struct virtqueue *vq = netq->netq_vq;
2257	struct virtio_softc *vsc = vq->vq_owner;
2258	struct vioif_softc *sc = device_private(virtio_child(vsc));
2259	u_int limit;
2260
2261	mutex_enter(&netq->netq_lock);
2262
2263	KASSERT(netq->netq_running_handle);
2264
2265	if (netq->netq_stopping) {
2266		netq->netq_running_handle = false;
2267		goto done;
2268	}
2269
2270	limit = sc->sc_tx_process_limit;
2271	vioif_tx_handle_locked(netq, limit);
2272
2273done:
2274	mutex_exit(&netq->netq_lock);
2275}
2276
2277/*
2278 * Control vq
2279 */
2280/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2281static void
2282vioif_ctrl_acquire(struct vioif_softc *sc)
2283{
2284	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2285
2286	mutex_enter(&ctrlq->ctrlq_wait_lock);
2287	while (ctrlq->ctrlq_inuse != FREE)
2288		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2289	ctrlq->ctrlq_inuse = INUSE;
2290	ctrlq->ctrlq_owner = curlwp;
2291	mutex_exit(&ctrlq->ctrlq_wait_lock);
2292}
2293
2294static void
2295vioif_ctrl_release(struct vioif_softc *sc)
2296{
2297	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2298
2299	KASSERT(ctrlq->ctrlq_inuse != FREE);
2300	KASSERT(ctrlq->ctrlq_owner == curlwp);
2301
2302	mutex_enter(&ctrlq->ctrlq_wait_lock);
2303	ctrlq->ctrlq_inuse = FREE;
2304	ctrlq->ctrlq_owner = NULL;
2305	cv_signal(&ctrlq->ctrlq_wait);
2306	mutex_exit(&ctrlq->ctrlq_wait_lock);
2307}
2308
2309static int
2310vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2311    struct vioif_ctrl_cmdspec *specs, int nspecs)
2312{
2313	struct virtio_softc *vsc = sc->sc_virtio;
2314	int i, r, loaded;
2315
2316	loaded = 0;
2317	for (i = 0; i < nspecs; i++) {
2318		r = bus_dmamap_load(virtio_dmat(vsc),
2319		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2320		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2321		if (r) {
2322			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2323			goto err;
2324		}
2325		loaded++;
2326
2327	}
2328
2329	return r;
2330
2331err:
2332	for (i = 0; i < loaded; i++) {
2333		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2334	}
2335
2336	return r;
2337}
2338
2339static void
2340vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2341    struct vioif_ctrl_cmdspec *specs, int nspecs)
2342{
2343	struct virtio_softc *vsc = sc->sc_virtio;
2344	int i;
2345
2346	for (i = 0; i < nspecs; i++) {
2347		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2348	}
2349}
2350
2351static int
2352vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2353    struct vioif_ctrl_cmdspec *specs, int nspecs)
2354{
2355	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2356	struct virtqueue *vq = ctrlq->ctrlq_vq;
2357	struct virtio_softc *vsc = sc->sc_virtio;
2358	int i, r, slot;
2359
2360	ctrlq->ctrlq_cmd->class = class;
2361	ctrlq->ctrlq_cmd->command = cmd;
2362
2363	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2364	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2365	for (i = 0; i < nspecs; i++) {
2366		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2367		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2368	}
2369	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2370	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2371
2372	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2373	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2374		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2375
2376	r = virtio_enqueue_prep(vsc, vq, &slot);
2377	if (r != 0)
2378		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2379	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2380	if (r != 0)
2381		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2382	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2383	for (i = 0; i < nspecs; i++) {
2384		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2385	}
2386	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2387	virtio_enqueue_commit(vsc, vq, slot, true);
2388
2389	/* wait for done */
2390	mutex_enter(&ctrlq->ctrlq_wait_lock);
2391	while (ctrlq->ctrlq_inuse != DONE)
2392		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2393	mutex_exit(&ctrlq->ctrlq_wait_lock);
2394	/* already dequeued */
2395
2396	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2397	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2398	for (i = 0; i < nspecs; i++) {
2399		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2400		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2401	}
2402	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2403	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2404
2405	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2406		r = 0;
2407	else {
2408		device_printf(sc->sc_dev, "failed setting rx mode\n");
2409		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2410		r = EIO;
2411	}
2412
2413	return r;
2414}
2415
2416/* ctrl vq interrupt; wake up the command issuer */
2417static int
2418vioif_ctrl_intr(void *arg)
2419{
2420	struct vioif_ctrlqueue *ctrlq = arg;
2421	struct virtqueue *vq = ctrlq->ctrlq_vq;
2422	struct virtio_softc *vsc = vq->vq_owner;
2423	int r, slot;
2424
2425	if (virtio_vq_is_enqueued(vsc, vq) == false)
2426		return 0;
2427
2428	r = virtio_dequeue(vsc, vq, &slot, NULL);
2429	if (r == ENOENT)
2430		return 0;
2431	virtio_dequeue_commit(vsc, vq, slot);
2432
2433	mutex_enter(&ctrlq->ctrlq_wait_lock);
2434	ctrlq->ctrlq_inuse = DONE;
2435	cv_signal(&ctrlq->ctrlq_wait);
2436	mutex_exit(&ctrlq->ctrlq_wait_lock);
2437
2438	return 1;
2439}
2440
2441static int
2442vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2443{
2444	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2445	struct vioif_ctrl_cmdspec specs[1];
2446	int r;
2447
2448	if (!sc->sc_has_ctrl)
2449		return ENOTSUP;
2450
2451	vioif_ctrl_acquire(sc);
2452
2453	rx->onoff = onoff;
2454	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2455	specs[0].buf = rx;
2456	specs[0].bufsize = sizeof(*rx);
2457
2458	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2459	    specs, __arraycount(specs));
2460
2461	vioif_ctrl_release(sc);
2462	return r;
2463}
2464
2465static int
2466vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2467{
2468	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2469}
2470
2471static int
2472vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2473{
2474	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2475}
2476
2477static int
2478vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2479{
2480	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2481	struct vioif_ctrl_cmdspec specs[1];
2482	int r;
2483
2484	if (!sc->sc_has_ctrl)
2485		return ENOTSUP;
2486
2487	if (nvq_pairs <= 1)
2488		return EINVAL;
2489
2490	vioif_ctrl_acquire(sc);
2491
2492	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2493	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2494	specs[0].buf = mq;
2495	specs[0].bufsize = sizeof(*mq);
2496
2497	r = vioif_ctrl_send_command(sc,
2498	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2499	    specs, __arraycount(specs));
2500
2501	vioif_ctrl_release(sc);
2502
2503	return r;
2504}
2505
2506static int
2507vioif_set_mac_addr(struct vioif_softc *sc)
2508{
2509	struct virtio_net_ctrl_mac_addr *ma =
2510	    sc->sc_ctrlq.ctrlq_mac_addr;
2511	struct vioif_ctrl_cmdspec specs[1];
2512	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2513	int nspecs = __arraycount(specs);
2514	uint64_t features;
2515	int r;
2516	size_t i;
2517
2518	if (!sc->sc_has_ctrl)
2519		return ENOTSUP;
2520
2521	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2522	    ETHER_ADDR_LEN) == 0) {
2523		return 0;
2524	}
2525
2526	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2527
2528	features = virtio_features(sc->sc_virtio);
2529	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2530		vioif_ctrl_acquire(sc);
2531
2532		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2533		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2534		specs[0].buf = ma;
2535		specs[0].bufsize = sizeof(*ma);
2536
2537		r = vioif_ctrl_send_command(sc,
2538		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2539		    specs, nspecs);
2540
2541		vioif_ctrl_release(sc);
2542	} else {
2543		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2544			virtio_write_device_config_1(sc->sc_virtio,
2545			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2546		}
2547		r = 0;
2548	}
2549
2550	return r;
2551}
2552
2553static int
2554vioif_set_rx_filter(struct vioif_softc *sc)
2555{
2556	/* filter already set in ctrlq->ctrlq_mac_tbl */
2557	struct virtio_softc *vsc = sc->sc_virtio;
2558	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2559	struct vioif_ctrl_cmdspec specs[2];
2560	int nspecs = __arraycount(specs);
2561	int r;
2562
2563	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2564	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2565
2566	if (!sc->sc_has_ctrl)
2567		return ENOTSUP;
2568
2569	vioif_ctrl_acquire(sc);
2570
2571	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2572	specs[0].buf = mac_tbl_uc;
2573	specs[0].bufsize = sizeof(*mac_tbl_uc)
2574	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2575
2576	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2577	specs[1].buf = mac_tbl_mc;
2578	specs[1].bufsize = sizeof(*mac_tbl_mc)
2579	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2580
2581	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2582	if (r != 0)
2583		goto out;
2584
2585	r = vioif_ctrl_send_command(sc,
2586	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2587	    specs, nspecs);
2588
2589	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2590
2591out:
2592	vioif_ctrl_release(sc);
2593
2594	return r;
2595}
2596
2597/*
2598 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2599 * If large multicast filter exist use ALLMULTI
2600 * If setting rx filter fails fall back to ALLMULTI
2601 */
2602static int
2603vioif_rx_filter(struct vioif_softc *sc)
2604{
2605	struct virtio_softc *vsc = sc->sc_virtio;
2606	struct ethercom *ec = &sc->sc_ethercom;
2607	struct ifnet *ifp = &ec->ec_if;
2608	struct ether_multi *enm;
2609	struct ether_multistep step;
2610	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2611	int nentries;
2612	bool allmulti = 0;
2613	int r;
2614
2615	if (!sc->sc_has_ctrl) {
2616		goto set_ifflags;
2617	}
2618
2619	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2620	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2621
2622	nentries = 0;
2623	allmulti = false;
2624
2625	ETHER_LOCK(ec);
2626	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2627	    ETHER_NEXT_MULTI(step, enm)) {
2628		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2629			allmulti = true;
2630			break;
2631		}
2632		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2633			allmulti = true;
2634			break;
2635		}
2636
2637		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2638		    enm->enm_addrlo, ETHER_ADDR_LEN);
2639		nentries++;
2640	}
2641	ETHER_UNLOCK(ec);
2642
2643	r = vioif_set_mac_addr(sc);
2644	if (r != 0) {
2645		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2646		    ifp->if_xname);
2647	}
2648
2649	if (!allmulti) {
2650		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2651		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2652		r = vioif_set_rx_filter(sc);
2653		if (r != 0) {
2654			allmulti = true; /* fallback */
2655		}
2656	}
2657
2658	if (allmulti) {
2659		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2660		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2661		r = vioif_set_rx_filter(sc);
2662		if (r != 0) {
2663			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2664			    ifp->if_xname);
2665			/* what to do on failure? */
2666		}
2667
2668		ifp->if_flags |= IFF_ALLMULTI;
2669	}
2670
2671set_ifflags:
2672	r = vioif_ifflags(sc);
2673
2674	return r;
2675}
2676
2677/*
2678 * VM configuration changes
2679 */
2680static int
2681vioif_config_change(struct virtio_softc *vsc)
2682{
2683	struct vioif_softc *sc = device_private(virtio_child(vsc));
2684
2685	softint_schedule(sc->sc_cfg_softint);
2686	return 0;
2687}
2688
2689static void
2690vioif_cfg_softint(void *arg)
2691{
2692	struct vioif_softc *sc = arg;
2693	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2694
2695	vioif_update_link_status(sc);
2696	vioif_start(ifp);
2697}
2698
2699static int
2700vioif_get_link_status(struct vioif_softc *sc)
2701{
2702	struct virtio_softc *vsc = sc->sc_virtio;
2703	uint16_t status;
2704
2705	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2706		status = virtio_read_device_config_2(vsc,
2707		    VIRTIO_NET_CONFIG_STATUS);
2708	else
2709		status = VIRTIO_NET_S_LINK_UP;
2710
2711	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2712		return LINK_STATE_UP;
2713
2714	return LINK_STATE_DOWN;
2715}
2716
2717static void
2718vioif_update_link_status(struct vioif_softc *sc)
2719{
2720	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2721	struct vioif_netqueue *netq;
2722	struct vioif_tx_context *txc;
2723	bool active;
2724	int link, i;
2725
2726	mutex_enter(&sc->sc_lock);
2727
2728	link = vioif_get_link_status(sc);
2729
2730	if (link == sc->sc_link_state)
2731		goto done;
2732
2733	sc->sc_link_state = link;
2734
2735	active = VIOIF_IS_LINK_ACTIVE(sc);
2736	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2737		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2738
2739		mutex_enter(&netq->netq_lock);
2740		txc = netq->netq_ctx;
2741		txc->txc_link_active = active;
2742		mutex_exit(&netq->netq_lock);
2743	}
2744
2745	if_link_state_change(ifp, sc->sc_link_state);
2746
2747done:
2748	mutex_exit(&sc->sc_lock);
2749}
2750
2751static void
2752vioif_workq_work(struct work *wk, void *context)
2753{
2754	struct vioif_work *work;
2755
2756	work = container_of(wk, struct vioif_work, cookie);
2757
2758	atomic_store_relaxed(&work->added, 0);
2759	work->func(work->arg);
2760}
2761
2762static struct workqueue *
2763vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2764{
2765	struct workqueue *wq;
2766	int error;
2767
2768	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2769	    prio, ipl, flags);
2770
2771	if (error)
2772		return NULL;
2773
2774	return wq;
2775}
2776
2777static void
2778vioif_workq_destroy(struct workqueue *wq)
2779{
2780
2781	workqueue_destroy(wq);
2782}
2783
2784static void
2785vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2786{
2787
2788	memset(work, 0, sizeof(*work));
2789	work->func = func;
2790	work->arg = arg;
2791}
2792
2793static void
2794vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2795{
2796
2797	if (atomic_load_relaxed(&work->added) != 0)
2798		return;
2799
2800	atomic_store_relaxed(&work->added, 1);
2801	kpreempt_disable();
2802	workqueue_enqueue(wq, &work->cookie, NULL);
2803	kpreempt_enable();
2804}
2805
2806static void
2807vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2808{
2809
2810	workqueue_wait(wq, &work->cookie);
2811}
2812
2813MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2814
2815#ifdef _MODULE
2816#include "ioconf.c"
2817#endif
2818
2819static int
2820if_vioif_modcmd(modcmd_t cmd, void *opaque)
2821{
2822	int error = 0;
2823
2824#ifdef _MODULE
2825	switch (cmd) {
2826	case MODULE_CMD_INIT:
2827		error = config_init_component(cfdriver_ioconf_if_vioif,
2828		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2829		break;
2830	case MODULE_CMD_FINI:
2831		error = config_fini_component(cfdriver_ioconf_if_vioif,
2832		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2833		break;
2834	default:
2835		error = ENOTTY;
2836		break;
2837	}
2838#endif
2839
2840	return error;
2841}
2842