if_vioif.c revision 1.106
1/*	$NetBSD: if_vioif.c,v 1.106 2023/03/24 13:16:15 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.106 2023/03/24 13:16:15 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	bool			 txc_no_free_slots;
270	pcq_t			*txc_intrq;
271	void			*txc_deferred_transmit;
272
273	struct evcnt		 txc_defrag_failed;
274};
275
276struct vioif_rx_context {
277	struct evcnt		 rxc_mbuf_enobufs;
278};
279struct vioif_ctrlqueue {
280	struct virtqueue		*ctrlq_vq;
281	enum {
282		FREE, INUSE, DONE
283	}				ctrlq_inuse;
284	kcondvar_t			ctrlq_wait;
285	kmutex_t			ctrlq_wait_lock;
286	struct lwp			*ctrlq_owner;
287
288	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289	struct virtio_net_ctrl_status	*ctrlq_status;
290	struct virtio_net_ctrl_rx	*ctrlq_rx;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294	struct virtio_net_ctrl_mq	*ctrlq_mq;
295
296	bus_dmamap_t			ctrlq_cmd_dmamap;
297	bus_dmamap_t			ctrlq_status_dmamap;
298	bus_dmamap_t			ctrlq_rx_dmamap;
299	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302	bus_dmamap_t			ctrlq_mq_dmamap;
303
304	struct evcnt			ctrlq_cmd_load_failed;
305	struct evcnt			ctrlq_cmd_failed;
306};
307
308struct vioif_softc {
309	device_t		sc_dev;
310	kmutex_t		sc_lock;
311	struct sysctllog	*sc_sysctllog;
312
313	struct virtio_softc	*sc_virtio;
314	struct virtqueue	*sc_vqs;
315	u_int			 sc_hdr_size;
316
317	int			sc_max_nvq_pairs;
318	int			sc_req_nvq_pairs;
319	int			sc_act_nvq_pairs;
320
321	uint8_t			sc_mac[ETHER_ADDR_LEN];
322	struct ethercom		sc_ethercom;
323	int			sc_link_state;
324
325	struct vioif_netqueue	*sc_netqs;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	 sc_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_cfg_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353					    true : false)
354
355/* cfattach interface functions */
356static int	vioif_match(device_t, cfdata_t, void *);
357static void	vioif_attach(device_t, device_t, void *);
358static int	vioif_finalize_teardown(device_t);
359
360/* ifnet interface functions */
361static int	vioif_init(struct ifnet *);
362static void	vioif_stop(struct ifnet *, int);
363static void	vioif_start(struct ifnet *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags(struct vioif_softc *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static int	vioif_netqueue_init(struct vioif_softc *,
372		    struct virtio_softc *, size_t, u_int);
373static void	vioif_netqueue_teardown(struct vioif_softc *,
374		    struct virtio_softc *, size_t);
375static void	vioif_net_intr_enable(struct vioif_softc *,
376		    struct virtio_softc *);
377static void	vioif_net_intr_disable(struct vioif_softc *,
378		    struct virtio_softc *);
379static void	vioif_net_sched_handle(struct vioif_softc *,
380		    struct vioif_netqueue *);
381
382/* rx */
383static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384		    struct vioif_netqueue *);
385static int	vioif_rx_intr(void *);
386static void	vioif_rx_handle(void *);
387static void	vioif_rx_queue_clear(struct vioif_softc *,
388		    struct virtio_softc *, struct vioif_netqueue *);
389
390/* tx */
391static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393static void	vioif_deferred_transmit(void *);
394static int	vioif_tx_intr(void *);
395static void	vioif_tx_handle(void *);
396static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397		    struct vioif_netqueue *);
398
399/* controls */
400static int	vioif_ctrl_intr(void *);
401static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402static int	vioif_set_promisc(struct vioif_softc *, bool);
403static int	vioif_set_allmulti(struct vioif_softc *, bool);
404static int	vioif_set_rx_filter(struct vioif_softc *);
405static int	vioif_rx_filter(struct vioif_softc *);
406static int	vioif_set_mac_addr(struct vioif_softc *);
407static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408
409/* config interrupt */
410static int	vioif_config_change(struct virtio_softc *);
411static void	vioif_cfg_softint(void *);
412static void	vioif_update_link_status(struct vioif_softc *);
413
414/* others */
415static void	vioif_alloc_queues(struct vioif_softc *);
416static void	vioif_free_queues(struct vioif_softc *);
417static int	vioif_alloc_mems(struct vioif_softc *);
418static struct workqueue*
419		vioif_workq_create(const char *, pri_t, int, int);
420static void	vioif_workq_destroy(struct workqueue *);
421static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424static int	vioif_setup_sysctl(struct vioif_softc *);
425static void	vioif_setup_stats(struct vioif_softc *);
426
427CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428		  vioif_match, vioif_attach, NULL, NULL);
429
430static void
431vioif_intr_barrier(void)
432{
433
434	/* wait for finish all interrupt handler */
435	xc_barrier(0);
436}
437
438static void
439vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440{
441
442	virtio_enqueue_commit(vsc, vq, -1, true);
443}
444
445static int
446vioif_match(device_t parent, cfdata_t match, void *aux)
447{
448	struct virtio_attach_args *va = aux;
449
450	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451		return 1;
452
453	return 0;
454}
455
456static void
457vioif_attach(device_t parent, device_t self, void *aux)
458{
459	struct vioif_softc *sc = device_private(self);
460	struct virtio_softc *vsc = device_private(parent);
461	struct vioif_netqueue *txq0;
462	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463	uint64_t features, req_features;
464	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465	u_int softint_flags;
466	int r, i, req_flags;
467	char xnamebuf[MAXCOMLEN];
468	size_t nvqs;
469
470	if (virtio_child(vsc) != NULL) {
471		aprint_normal(": child already attached for %s; "
472		    "something wrong...\n", device_xname(parent));
473		return;
474	}
475
476	sc->sc_dev = self;
477	sc->sc_virtio = vsc;
478	sc->sc_link_state = LINK_STATE_UNKNOWN;
479
480	sc->sc_max_nvq_pairs = 1;
481	sc->sc_req_nvq_pairs = 1;
482	sc->sc_act_nvq_pairs = 1;
483	sc->sc_txrx_workqueue_sysctl = true;
484	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488
489	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490
491	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494	if (sc->sc_txrx_workqueue == NULL)
495		goto err;
496
497	req_flags = 0;
498
499#ifdef VIOIF_MPSAFE
500	req_flags |= VIRTIO_F_INTR_MPSAFE;
501#endif
502	req_flags |= VIRTIO_F_INTR_MSIX;
503
504	req_features =
505	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507	req_features |= VIRTIO_F_RING_EVENT_IDX;
508	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509#ifdef VIOIF_MULTIQ
510	req_features |= VIRTIO_NET_F_MQ;
511#endif
512
513	virtio_child_attach_start(vsc, self, IPL_NET,
514	    req_features, VIRTIO_NET_FLAG_BITS);
515	features = virtio_features(vsc);
516
517	if (features == 0)
518		goto err;
519
520	if (features & VIRTIO_NET_F_MAC) {
521		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523			    VIRTIO_NET_CONFIG_MAC + i);
524		}
525	} else {
526		/* code stolen from sys/net/if_tap.c */
527		struct timeval tv;
528		uint32_t ui;
529		getmicrouptime(&tv);
530		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533			virtio_write_device_config_1(vsc,
534			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535		}
536	}
537
538	/* 'Ethernet' with capital follows other ethernet driver attachment */
539	aprint_normal_dev(self, "Ethernet address %s\n",
540	    ether_sprintf(sc->sc_mac));
541
542	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544	} else {
545		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546	}
547
548	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549	    (features & VIRTIO_NET_F_CTRL_RX)) {
550		sc->sc_has_ctrl = true;
551
552		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554		ctrlq->ctrlq_inuse = FREE;
555	} else {
556		sc->sc_has_ctrl = false;
557	}
558
559	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562
563		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564			goto err;
565
566		/* Limit the number of queue pairs to use */
567		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568
569		if (sc->sc_max_nvq_pairs > 1)
570			req_flags |= VIRTIO_F_INTR_PERVQ;
571	}
572
573	vioif_alloc_queues(sc);
574
575#ifdef VIOIF_MPSAFE
576	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577#else
578	softint_flags = SOFTINT_NET;
579#endif
580
581	/*
582	 * Initialize network queues
583	 */
584	nvqs = sc->sc_max_nvq_pairs * 2;
585	for (i = 0; i < nvqs; i++) {
586		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587		if (r != 0)
588			goto err;
589	}
590
591	if (sc->sc_has_ctrl) {
592		int ctrlq_idx = nvqs;
593
594		nvqs++;
595		/*
596		 * Allocating a virtqueue for control channel
597		 */
598		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599		virtio_init_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600		    vioif_ctrl_intr, ctrlq);
601
602		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, NBPG, 1, "control");
603		if (r != 0) {
604			aprint_error_dev(self, "failed to allocate "
605			    "a virtqueue for control channel, error code %d\n",
606			    r);
607
608			sc->sc_has_ctrl = false;
609			cv_destroy(&ctrlq->ctrlq_wait);
610			mutex_destroy(&ctrlq->ctrlq_wait_lock);
611		}
612	}
613
614	sc->sc_cfg_softint = softint_establish(softint_flags,
615	    vioif_cfg_softint, sc);
616	if (sc->sc_cfg_softint == NULL) {
617		aprint_error_dev(self, "cannot establish ctl softint\n");
618		goto err;
619	}
620
621	if (vioif_alloc_mems(sc) < 0)
622		goto err;
623
624	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
625	    vioif_config_change, req_flags);
626	if (r != 0)
627		goto err;
628
629	if (vioif_setup_sysctl(sc) != 0) {
630		aprint_error_dev(self, "unable to create sysctl node\n");
631		/* continue */
632	}
633
634	vioif_setup_stats(sc);
635
636	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
637	ifp->if_softc = sc;
638	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
639#ifdef VIOIF_MPSAFE
640	ifp->if_extflags = IFEF_MPSAFE;
641#endif
642	ifp->if_start = vioif_start;
643	if (sc->sc_req_nvq_pairs > 1)
644		ifp->if_transmit = vioif_transmit;
645	ifp->if_ioctl = vioif_ioctl;
646	ifp->if_init = vioif_init;
647	ifp->if_stop = vioif_stop;
648	ifp->if_capabilities = 0;
649	ifp->if_watchdog = vioif_watchdog;
650	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
651	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
652	IFQ_SET_READY(&ifp->if_snd);
653
654	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
655
656	if_attach(ifp);
657	if_deferred_start_init(ifp, NULL);
658	ether_ifattach(ifp, sc->sc_mac);
659	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
660
661	return;
662
663err:
664	nvqs = sc->sc_max_nvq_pairs * 2;
665	for (i = 0; i < nvqs; i++) {
666		vioif_netqueue_teardown(sc, vsc, i);
667	}
668
669	if (sc->sc_has_ctrl) {
670		cv_destroy(&ctrlq->ctrlq_wait);
671		mutex_destroy(&ctrlq->ctrlq_wait_lock);
672		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
673		ctrlq->ctrlq_vq = NULL;
674	}
675
676	vioif_free_queues(sc);
677	mutex_destroy(&sc->sc_lock);
678	virtio_child_attach_failed(vsc);
679	config_finalize_register(self, vioif_finalize_teardown);
680
681	return;
682}
683
684static int
685vioif_finalize_teardown(device_t self)
686{
687	struct vioif_softc *sc = device_private(self);
688
689	if (sc->sc_txrx_workqueue != NULL) {
690		vioif_workq_destroy(sc->sc_txrx_workqueue);
691		sc->sc_txrx_workqueue = NULL;
692	}
693
694	return 0;
695}
696
697/*
698 * Interface functions for ifnet
699 */
700static int
701vioif_init(struct ifnet *ifp)
702{
703	struct vioif_softc *sc = ifp->if_softc;
704	struct virtio_softc *vsc = sc->sc_virtio;
705	struct vioif_netqueue *netq;
706	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
707	int r, i;
708
709	vioif_stop(ifp, 0);
710
711	r = virtio_reinit_start(vsc);
712	if (r != 0) {
713		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
714		return EIO;
715	}
716
717	virtio_negotiate_features(vsc, virtio_features(vsc));
718
719	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
720		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
721
722		mutex_enter(&netq->netq_lock);
723		vioif_populate_rx_mbufs_locked(sc, netq);
724		mutex_exit(&netq->netq_lock);
725	}
726
727	virtio_reinit_end(vsc);
728
729	if (sc->sc_has_ctrl)
730		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
731
732	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
733	if (r == 0)
734		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
735	else
736		sc->sc_act_nvq_pairs = 1;
737
738	SET(ifp->if_flags, IFF_RUNNING);
739
740	vioif_net_intr_enable(sc, vsc);
741
742	vioif_update_link_status(sc);
743	r = vioif_rx_filter(sc);
744
745	return r;
746}
747
748static void
749vioif_stop(struct ifnet *ifp, int disable)
750{
751	struct vioif_softc *sc = ifp->if_softc;
752	struct virtio_softc *vsc = sc->sc_virtio;
753	struct vioif_netqueue *netq;
754	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
755	size_t i, act_qnum;
756
757	act_qnum = sc->sc_act_nvq_pairs * 2;
758
759	CLR(ifp->if_flags, IFF_RUNNING);
760	for (i = 0; i < act_qnum; i++) {
761		netq = &sc->sc_netqs[i];
762
763		mutex_enter(&netq->netq_lock);
764		netq->netq_stopping = true;
765		mutex_exit(&netq->netq_lock);
766	}
767
768	/* disable interrupts */
769	vioif_net_intr_disable(sc, vsc);
770	if (sc->sc_has_ctrl)
771		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
772
773	/*
774	 * only way to stop interrupt, I/O and DMA is resetting...
775	 *
776	 * NOTE: Devices based on VirtIO draft specification can not
777	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
778	 */
779	virtio_reset(vsc);
780
781	vioif_intr_barrier();
782
783	for (i = 0; i < act_qnum; i++) {
784		netq = &sc->sc_netqs[i];
785		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
786	}
787
788	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
789		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
790		vioif_rx_queue_clear(sc, vsc, netq);
791
792		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
793		vioif_tx_queue_clear(sc, vsc, netq);
794	}
795
796	/* all packet processing is stopped */
797	for (i = 0; i < act_qnum; i++) {
798		netq = &sc->sc_netqs[i];
799
800		mutex_enter(&netq->netq_lock);
801		netq->netq_stopping = false;
802		mutex_exit(&netq->netq_lock);
803	}
804}
805
806static void
807vioif_start(struct ifnet *ifp)
808{
809	struct vioif_softc *sc = ifp->if_softc;
810	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
811
812#ifdef VIOIF_MPSAFE
813	KASSERT(if_is_mpsafe(ifp));
814#endif
815
816	mutex_enter(&txq0->netq_lock);
817	vioif_start_locked(ifp, txq0);
818	mutex_exit(&txq0->netq_lock);
819}
820
821static inline int
822vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
823{
824	struct vioif_softc *sc = ifp->if_softc;
825	u_int cpuid = cpu_index(curcpu());
826
827	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
828}
829
830static int
831vioif_transmit(struct ifnet *ifp, struct mbuf *m)
832{
833	struct vioif_softc *sc = ifp->if_softc;
834	struct vioif_netqueue *netq;
835	struct vioif_tx_context *txc;
836	int qid;
837
838	qid = vioif_select_txqueue(ifp, m);
839	netq = &sc->sc_netqs[qid];
840	txc = netq->netq_ctx;
841
842	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
843		m_freem(m);
844		return ENOBUFS;
845	}
846
847	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
848	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
849	if (m->m_flags & M_MCAST)
850		if_statinc_ref(nsr, if_omcasts);
851	IF_STAT_PUTREF(ifp);
852
853	if (mutex_tryenter(&netq->netq_lock)) {
854		vioif_transmit_locked(ifp, netq);
855		mutex_exit(&netq->netq_lock);
856	}
857
858	return 0;
859}
860
861void
862vioif_watchdog(struct ifnet *ifp)
863{
864	struct vioif_softc *sc = ifp->if_softc;
865	struct vioif_netqueue *netq;
866	int i;
867
868	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
869		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
870			log(LOG_DEBUG, "%s: watchdog timed out\n",
871			    ifp->if_xname);
872		}
873
874		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
875			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
876
877			mutex_enter(&netq->netq_lock);
878			if (!netq->netq_running_handle) {
879				netq->netq_running_handle = true;
880				vioif_net_sched_handle(sc, netq);
881			}
882			mutex_exit(&netq->netq_lock);
883		}
884	}
885}
886
887static int
888vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889{
890	int s, r;
891
892	s = splnet();
893
894	r = ether_ioctl(ifp, cmd, data);
895	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
896		if (ifp->if_flags & IFF_RUNNING) {
897			r = vioif_rx_filter(ifp->if_softc);
898		} else {
899			r = 0;
900		}
901	}
902
903	splx(s);
904
905	return r;
906}
907
908static int
909vioif_ifflags(struct vioif_softc *sc)
910{
911	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
912	bool onoff;
913	int r;
914
915	if (!sc->sc_has_ctrl) {
916		/* no ctrl vq; always promisc and allmulti */
917		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
918		return 0;
919	}
920
921	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
922	r = vioif_set_allmulti(sc, onoff);
923	if (r != 0) {
924		log(LOG_WARNING,
925		    "%s: couldn't %sable ALLMULTI\n",
926		    ifp->if_xname, onoff ? "en" : "dis");
927		if (onoff) {
928			CLR(ifp->if_flags, IFF_ALLMULTI);
929		} else {
930			SET(ifp->if_flags, IFF_ALLMULTI);
931		}
932	}
933
934	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
935	r = vioif_set_promisc(sc, onoff);
936	if (r != 0) {
937		log(LOG_WARNING,
938		    "%s: couldn't %sable PROMISC\n",
939		    ifp->if_xname, onoff ? "en" : "dis");
940		if (onoff) {
941			CLR(ifp->if_flags, IFF_PROMISC);
942		} else {
943			SET(ifp->if_flags, IFF_PROMISC);
944		}
945	}
946
947	return 0;
948}
949
950static int
951vioif_ifflags_cb(struct ethercom *ec)
952{
953	struct ifnet *ifp = &ec->ec_if;
954	struct vioif_softc *sc = ifp->if_softc;
955
956	return vioif_ifflags(sc);
957}
958
959static int
960vioif_setup_sysctl(struct vioif_softc *sc)
961{
962	const char *devname;
963	struct sysctllog **log;
964	const struct sysctlnode *rnode, *rxnode, *txnode;
965	int error;
966
967	log = &sc->sc_sysctllog;
968	devname = device_xname(sc->sc_dev);
969
970	error = sysctl_createv(log, 0, NULL, &rnode,
971	    0, CTLTYPE_NODE, devname,
972	    SYSCTL_DESCR("virtio-net information and settings"),
973	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
974	if (error)
975		goto out;
976
977	error = sysctl_createv(log, 0, &rnode, NULL,
978	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
979	    SYSCTL_DESCR("Use workqueue for packet processing"),
980	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
981	if (error)
982		goto out;
983
984	error = sysctl_createv(log, 0, &rnode, &rxnode,
985	    0, CTLTYPE_NODE, "rx",
986	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
987	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
988	if (error)
989		goto out;
990
991	error = sysctl_createv(log, 0, &rxnode, NULL,
992	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
993	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
994	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
995	if (error)
996		goto out;
997
998	error = sysctl_createv(log, 0, &rxnode, NULL,
999	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1000	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1001	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1002	if (error)
1003		goto out;
1004
1005	error = sysctl_createv(log, 0, &rnode, &txnode,
1006	    0, CTLTYPE_NODE, "tx",
1007	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1008	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1009	if (error)
1010		goto out;
1011
1012	error = sysctl_createv(log, 0, &txnode, NULL,
1013	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1014	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1015	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1016	if (error)
1017		goto out;
1018
1019	error = sysctl_createv(log, 0, &txnode, NULL,
1020	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1021	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1022	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1023
1024out:
1025	if (error)
1026		sysctl_teardown(log);
1027
1028	return error;
1029}
1030
1031static void
1032vioif_setup_stats(struct vioif_softc *sc)
1033{
1034	struct vioif_netqueue *netq;
1035	struct vioif_tx_context *txc;
1036	struct vioif_rx_context *rxc;
1037	size_t i, netq_num;
1038
1039	netq_num = sc->sc_max_nvq_pairs * 2;
1040	for (i = 0; i < netq_num; i++) {
1041		netq = &sc->sc_netqs[i];
1042		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1043		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1044		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1045		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1046		    "virtqueue enqueue failed failed");
1047
1048		switch (VIOIF_NETQ_DIR(i)) {
1049		case VIOIF_NETQ_RX:
1050			rxc = netq->netq_ctx;
1051			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1052			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1053			    "no receive buffer");
1054			break;
1055		case VIOIF_NETQ_TX:
1056			txc = netq->netq_ctx;
1057			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1058			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1059			    "m_defrag() failed");
1060			break;
1061		}
1062	}
1063
1064	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1065	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1066	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1067	    NULL, device_xname(sc->sc_dev), "control command failed");
1068}
1069
1070/*
1071 * allocate memory
1072 */
1073static int
1074vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1075    bus_size_t size, int nsegs, const char *usage)
1076{
1077	int r;
1078
1079	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1080	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1081
1082	if (r != 0) {
1083		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1084		    "error code %d\n", usage, r);
1085	}
1086
1087	return r;
1088}
1089
1090static void
1091vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1092{
1093
1094	if (*map) {
1095		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1096		*map = NULL;
1097	}
1098}
1099
1100static int
1101vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1102    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1103{
1104	int r;
1105
1106	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1107	if (r != 0)
1108		return 1;
1109
1110	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1111	    size, NULL, rw | BUS_DMA_NOWAIT);
1112	if (r != 0) {
1113		vioif_dmamap_destroy(sc, map);
1114		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1115		    "error code %d\n", usage, r);
1116	}
1117
1118	return r;
1119}
1120
1121static void *
1122vioif_assign_mem(intptr_t *p, size_t size)
1123{
1124	intptr_t rv;
1125
1126	rv = *p;
1127	*p += size;
1128
1129	return (void *)rv;
1130}
1131
1132/*
1133 * dma memory is used for:
1134 *   netq_maps_kva:	 metadata array for received frames (READ) and
1135 *			 sent frames (WRITE)
1136 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1137 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1138 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1139 *			 (WRITE)
1140 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1141 *			 class command (WRITE)
1142 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1143 *			 class command (WRITE)
1144 * ctrlq_* structures are allocated only one each; they are protected by
1145 * ctrlq_inuse variable and ctrlq_wait condvar.
1146 */
1147static int
1148vioif_alloc_mems(struct vioif_softc *sc)
1149{
1150	struct virtio_softc *vsc = sc->sc_virtio;
1151	struct vioif_netqueue *netq;
1152	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1153	struct vioif_net_map *maps;
1154	unsigned int vq_num;
1155	int r, rsegs;
1156	bus_size_t dmamemsize;
1157	size_t qid, i, netq_num, kmemsize;
1158	void *vaddr;
1159	intptr_t p;
1160
1161	netq_num = sc->sc_max_nvq_pairs * 2;
1162
1163	/* allocate DMA memory */
1164	dmamemsize = 0;
1165
1166	for (qid = 0; qid < netq_num; qid++) {
1167		maps = sc->sc_netqs[qid].netq_maps;
1168		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1169		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1170	}
1171
1172	if (sc->sc_has_ctrl) {
1173		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1174		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1175		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1176		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1177		    + ETHER_ADDR_LEN;
1178		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1179		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1180		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1181		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1182	}
1183
1184	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1185	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1186	if (r != 0) {
1187		aprint_error_dev(sc->sc_dev,
1188		    "DMA memory allocation failed, size %zu, "
1189		    "error code %d\n", dmamemsize, r);
1190		goto err_none;
1191	}
1192	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1193	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1194	if (r != 0) {
1195		aprint_error_dev(sc->sc_dev,
1196		    "DMA memory map failed, error code %d\n", r);
1197		goto err_dmamem_alloc;
1198	}
1199
1200	/* assign DMA memory */
1201	memset(vaddr, 0, dmamemsize);
1202	sc->sc_dmamem = vaddr;
1203	p = (intptr_t) vaddr;
1204
1205	for (qid = 0; qid < netq_num; qid++) {
1206		netq = &sc->sc_netqs[qid];
1207		maps = netq->netq_maps;
1208		vq_num = netq->netq_vq->vq_num;
1209
1210		netq->netq_maps_kva = vioif_assign_mem(&p,
1211		    sizeof(*maps[0].vnm_hdr) * vq_num);
1212	}
1213
1214	if (sc->sc_has_ctrl) {
1215		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1216		    sizeof(*ctrlq->ctrlq_cmd));
1217		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1218		    sizeof(*ctrlq->ctrlq_status));
1219		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1220		    sizeof(*ctrlq->ctrlq_rx));
1221		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1222		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1223		    + ETHER_ADDR_LEN);
1224		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1225		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1226		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1227		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1228		    sizeof(*ctrlq->ctrlq_mac_addr));
1229		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1230	}
1231
1232	/* allocate kmem */
1233	kmemsize = 0;
1234
1235	for (qid = 0; qid < netq_num; qid++) {
1236		netq = &sc->sc_netqs[qid];
1237		vq_num = netq->netq_vq->vq_num;
1238
1239		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1240	}
1241
1242	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1243	sc->sc_kmem = vaddr;
1244
1245	/* assign allocated kmem */
1246	p = (intptr_t) vaddr;
1247
1248	for (qid = 0; qid < netq_num; qid++) {
1249		netq = &sc->sc_netqs[qid];
1250		vq_num = netq->netq_vq->vq_num;
1251
1252		netq->netq_maps = vioif_assign_mem(&p,
1253		    sizeof(netq->netq_maps[0]) * vq_num);
1254	}
1255
1256	/* prepare dmamaps */
1257	for (qid = 0; qid < netq_num; qid++) {
1258		static const struct {
1259			const char	*msg_hdr;
1260			const char	*msg_payload;
1261			int		 dma_flag;
1262			bus_size_t	 dma_size;
1263			int		 dma_nsegs;
1264		} dmaparams[VIOIF_NETQ_IDX] = {
1265			[VIOIF_NETQ_RX] = {
1266				.msg_hdr	= "rx header",
1267				.msg_payload	= "rx payload",
1268				.dma_flag	= BUS_DMA_READ,
1269				.dma_size	= MCLBYTES - ETHER_ALIGN,
1270				.dma_nsegs	= 1,
1271			},
1272			[VIOIF_NETQ_TX] = {
1273				.msg_hdr	= "tx header",
1274				.msg_payload	= "tx payload",
1275				.dma_flag	= BUS_DMA_WRITE,
1276				.dma_size	= ETHER_MAX_LEN,
1277				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1278			}
1279		};
1280
1281		struct virtio_net_hdr *hdrs;
1282		int dir;
1283
1284		dir = VIOIF_NETQ_DIR(qid);
1285		netq = &sc->sc_netqs[qid];
1286		vq_num = netq->netq_vq->vq_num;
1287		maps = netq->netq_maps;
1288		hdrs = netq->netq_maps_kva;
1289
1290		for (i = 0; i < vq_num; i++) {
1291			maps[i].vnm_hdr = &hdrs[i];
1292
1293			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1294			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1295			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1296			if (r != 0)
1297				goto err_reqs;
1298
1299			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1300			    dmaparams[dir].dma_size, dmaparams[dir].dma_nsegs,
1301			    dmaparams[dir].msg_payload);
1302			if (r != 0)
1303				goto err_reqs;
1304		}
1305	}
1306
1307	if (sc->sc_has_ctrl) {
1308		/* control vq class & command */
1309		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1310		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1311		    BUS_DMA_WRITE, "control command");
1312		if (r != 0)
1313			goto err_reqs;
1314
1315		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1316		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1317		    BUS_DMA_READ, "control status");
1318		if (r != 0)
1319			goto err_reqs;
1320
1321		/* control vq rx mode command parameter */
1322		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1323		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1324		    BUS_DMA_WRITE, "rx mode control command");
1325		if (r != 0)
1326			goto err_reqs;
1327
1328		/* multiqueue set command */
1329		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1330		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1331		    BUS_DMA_WRITE, "multiqueue set command");
1332		if (r != 0)
1333			goto err_reqs;
1334
1335		/* control vq MAC filter table for unicast */
1336		/* do not load now since its length is variable */
1337		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1338		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1339		    + ETHER_ADDR_LEN, 1,
1340		    "unicast MAC address filter command");
1341		if (r != 0)
1342			goto err_reqs;
1343
1344		/* control vq MAC filter table for multicast */
1345		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1346		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1347		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1348		    "multicast MAC address filter command");
1349		if (r != 0)
1350			goto err_reqs;
1351
1352		/* control vq MAC address set command */
1353		r = vioif_dmamap_create_load(sc,
1354		    &ctrlq->ctrlq_mac_addr_dmamap,
1355		    ctrlq->ctrlq_mac_addr,
1356		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1357		    BUS_DMA_WRITE, "mac addr set command");
1358		if (r != 0)
1359			goto err_reqs;
1360	}
1361
1362	return 0;
1363
1364err_reqs:
1365	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1366	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1367	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1368	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1369	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1370	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1371	for (qid = 0; qid < netq_num; qid++) {
1372		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1373		maps = sc->sc_netqs[qid].netq_maps;
1374
1375		for (i = 0; i < vq_num; i++) {
1376			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1377			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1378		}
1379	}
1380	if (sc->sc_kmem) {
1381		kmem_free(sc->sc_kmem, kmemsize);
1382		sc->sc_kmem = NULL;
1383	}
1384	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1385err_dmamem_alloc:
1386	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1387err_none:
1388	return -1;
1389}
1390
1391static void
1392vioif_alloc_queues(struct vioif_softc *sc)
1393{
1394	int nvq_pairs = sc->sc_max_nvq_pairs;
1395	size_t nvqs, netq_num;
1396
1397	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1398
1399	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1400	if (sc->sc_has_ctrl)
1401		nvqs++;
1402
1403	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1404	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_netqs[0]) * netq_num,
1405	    KM_SLEEP);
1406}
1407
1408static void
1409vioif_free_queues(struct vioif_softc *sc)
1410{
1411	size_t nvqs, netq_num;
1412
1413	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1414	if (sc->sc_ctrlq.ctrlq_vq)
1415		nvqs++;
1416
1417	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1418	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1419	sc->sc_netqs = NULL;
1420	sc->sc_vqs = NULL;
1421}
1422
1423/*
1424 * Network queues
1425 */
1426static int
1427vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1428    size_t qid, u_int softint_flags)
1429{
1430	static const struct {
1431		const char	*dirname;
1432		int		 segsize;
1433		int		 nsegs;
1434		int 		(*intrhand)(void *);
1435		void		(*sihand)(void *);
1436	} params[VIOIF_NETQ_IDX] = {
1437		[VIOIF_NETQ_RX] = {
1438			.dirname	= "rx",
1439			.segsize	= MCLBYTES,
1440			.nsegs		= 2,
1441			.intrhand	= vioif_rx_intr,
1442			.sihand		= vioif_rx_handle,
1443		},
1444		[VIOIF_NETQ_TX] = {
1445			.dirname	= "tx",
1446			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1447			.nsegs		= 2,
1448			.intrhand	= vioif_tx_intr,
1449			.sihand		= vioif_tx_handle,
1450		}
1451	};
1452
1453	struct virtqueue *vq;
1454	struct vioif_netqueue *netq;
1455	struct vioif_tx_context *txc;
1456	struct vioif_rx_context *rxc;
1457	char qname[32];
1458	int r, dir;
1459
1460	txc = NULL;
1461	rxc = NULL;
1462	netq = &sc->sc_netqs[qid];
1463	vq = &sc->sc_vqs[qid];
1464	dir = VIOIF_NETQ_DIR(qid);
1465
1466	netq->netq_vq = &sc->sc_vqs[qid];
1467	netq->netq_stopping = false;
1468	netq->netq_running_handle = false;
1469
1470	snprintf(qname, sizeof(qname), "%s%zu",
1471	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1472	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1473	    "%s-%s", device_xname(sc->sc_dev), qname);
1474
1475	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1476	virtio_init_vq(vsc, vq, qid, params[dir].intrhand, netq);
1477
1478	r = virtio_alloc_vq(vsc, vq,
1479	    params[dir].segsize + sc->sc_hdr_size,
1480	    params[dir].nsegs, qname);
1481	if (r != 0)
1482		goto err;
1483	netq->netq_vq = vq;
1484
1485	netq->netq_softint = softint_establish(softint_flags,
1486	    params[dir].sihand, netq);
1487	if (netq->netq_softint == NULL) {
1488		aprint_error_dev(sc->sc_dev,
1489		    "couldn't establish %s softint\n",
1490		    params[dir].dirname);
1491		goto err;
1492	}
1493	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1494
1495	switch (dir) {
1496	case VIOIF_NETQ_RX:
1497		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1498		netq->netq_ctx = rxc;
1499		/* nothing to do */
1500		break;
1501	case VIOIF_NETQ_TX:
1502		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1503		netq->netq_ctx = (void *)txc;
1504		txc->txc_deferred_transmit = softint_establish(softint_flags,
1505		    vioif_deferred_transmit, netq);
1506		if (txc->txc_deferred_transmit == NULL) {
1507			aprint_error_dev(sc->sc_dev,
1508			    "couldn't establish softint for "
1509			    "tx deferred transmit\n");
1510			goto err;
1511		}
1512		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1513		txc->txc_no_free_slots = false;
1514		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1515		break;
1516	}
1517
1518	return 0;
1519
1520err:
1521	netq->netq_ctx = NULL;
1522
1523	if (rxc != NULL) {
1524		kmem_free(rxc, sizeof(*rxc));
1525	}
1526
1527	if (txc != NULL) {
1528		if (txc->txc_deferred_transmit != NULL)
1529			softint_disestablish(txc->txc_deferred_transmit);
1530		if (txc->txc_intrq != NULL)
1531			pcq_destroy(txc->txc_intrq);
1532		kmem_free(txc, sizeof(txc));
1533	}
1534
1535	vioif_work_set(&netq->netq_work, NULL, NULL);
1536	if (netq->netq_softint != NULL) {
1537		softint_disestablish(netq->netq_softint);
1538		netq->netq_softint = NULL;
1539	}
1540
1541	virtio_free_vq(vsc, vq);
1542	mutex_destroy(&netq->netq_lock);
1543	netq->netq_vq = NULL;
1544
1545	return -1;
1546}
1547
1548static void
1549vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1550    size_t qid)
1551{
1552	struct vioif_netqueue *netq;
1553	struct vioif_rx_context *rxc;
1554	struct vioif_tx_context *txc;
1555	int dir;
1556
1557	netq = &sc->sc_netqs[qid];
1558
1559	if (netq->netq_vq == NULL)
1560		return;
1561
1562	netq = &sc->sc_netqs[qid];
1563	dir = VIOIF_NETQ_DIR(qid);
1564	switch (dir) {
1565	case VIOIF_NETQ_RX:
1566		rxc = netq->netq_ctx;
1567		netq->netq_ctx = NULL;
1568		kmem_free(rxc, sizeof(*rxc));
1569		break;
1570	case VIOIF_NETQ_TX:
1571		txc = netq->netq_ctx;
1572		netq->netq_ctx = NULL;
1573		softint_disestablish(txc->txc_deferred_transmit);
1574		pcq_destroy(txc->txc_intrq);
1575		kmem_free(txc, sizeof(*txc));
1576		break;
1577	}
1578
1579	softint_disestablish(netq->netq_softint);
1580	virtio_free_vq(vsc, netq->netq_vq);
1581	mutex_destroy(&netq->netq_lock);
1582	netq->netq_vq = NULL;
1583}
1584
1585static void
1586vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1587{
1588
1589	KASSERT(mutex_owned(&netq->netq_lock));
1590	KASSERT(!netq->netq_stopping);
1591
1592	if (netq->netq_workqueue) {
1593		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1594	} else {
1595		softint_schedule(netq->netq_softint);
1596	}
1597}
1598
1599static int
1600vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1601   struct mbuf *m, int dma_flags)
1602{
1603	int r;
1604
1605	KASSERT(map->vnm_mbuf == NULL);
1606
1607	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1608	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1609	if (r == 0) {
1610		map->vnm_mbuf = m;
1611	}
1612
1613	return r;
1614}
1615
1616static void
1617vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1618{
1619
1620	KASSERT(map->vnm_mbuf != NULL);
1621	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1622	map->vnm_mbuf = NULL;
1623}
1624
1625static int
1626vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1627    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1628{
1629	int r;
1630
1631	KASSERT(map->vnm_mbuf != NULL);
1632
1633	/* This should actually never fail */
1634	r = virtio_enqueue_reserve(vsc, vq, slot,
1635	    map->vnm_mbuf_map->dm_nsegs + 1);
1636	if (r != 0) {
1637		/* slot already freed by virtio_enqueue_reserve */
1638		return r;
1639	}
1640
1641	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1642	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1643	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1644	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1645
1646	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1647	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1648	virtio_enqueue_commit(vsc, vq, slot, false);
1649
1650	return 0;
1651}
1652
1653static int
1654vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1655    int slot, struct vioif_net_map *map)
1656{
1657
1658	return vioif_net_enqueue(vsc, vq, slot, map,
1659	    BUS_DMASYNC_PREWRITE, true);
1660}
1661
1662static int
1663vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1664    int slot, struct vioif_net_map *map)
1665{
1666
1667	return vioif_net_enqueue(vsc, vq, slot, map,
1668	    BUS_DMASYNC_PREREAD, false);
1669}
1670
1671static struct mbuf *
1672vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1673   int slot, struct vioif_net_map *map, int dma_flags)
1674{
1675	struct mbuf *m;
1676
1677	m = map->vnm_mbuf;
1678	KASSERT(m != NULL);
1679	map->vnm_mbuf = NULL;
1680
1681	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1682	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1683	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1684	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1685
1686	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1687	virtio_dequeue_commit(vsc, vq, slot);
1688
1689	return m;
1690}
1691
1692static void
1693vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1694{
1695	struct vioif_netqueue *netq;
1696	size_t i, act_qnum;
1697	int enqueued;
1698
1699	act_qnum = sc->sc_act_nvq_pairs * 2;
1700	for (i = 0; i < act_qnum; i++) {
1701		netq = &sc->sc_netqs[i];
1702
1703		KASSERT(!netq->netq_stopping);
1704		KASSERT(!netq->netq_running_handle);
1705
1706		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1707		if (enqueued != 0) {
1708			virtio_stop_vq_intr(vsc, netq->netq_vq);
1709
1710			mutex_enter(&netq->netq_lock);
1711			netq->netq_running_handle = true;
1712			vioif_net_sched_handle(sc, netq);
1713			mutex_exit(&netq->netq_lock);
1714		}
1715	}
1716}
1717
1718static void
1719vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1720{
1721	struct vioif_netqueue *netq;
1722	size_t i, act_qnum;
1723
1724	act_qnum = sc->sc_act_nvq_pairs * 2;
1725	for (i = 0; i < act_qnum; i++) {
1726		netq = &sc->sc_netqs[i];
1727
1728		virtio_stop_vq_intr(vsc, netq->netq_vq);
1729	}
1730}
1731
1732/*
1733 * Receive implementation
1734 */
1735/* enqueue mbufs to receive slots */
1736static void
1737vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1738{
1739	struct virtqueue *vq = netq->netq_vq;
1740	struct virtio_softc *vsc = vq->vq_owner;
1741	struct vioif_rx_context *rxc;
1742	struct vioif_net_map *map;
1743	struct mbuf *m;
1744	int i, r, ndone = 0;
1745
1746	KASSERT(mutex_owned(&netq->netq_lock));
1747
1748	rxc = netq->netq_ctx;
1749
1750	for (i = 0; i < vq->vq_num; i++) {
1751		int slot;
1752		r = virtio_enqueue_prep(vsc, vq, &slot);
1753		if (r == EAGAIN)
1754			break;
1755		if (__predict_false(r != 0))
1756			panic("enqueue_prep for rx buffers");
1757
1758		MGETHDR(m, M_DONTWAIT, MT_DATA);
1759		if (m == NULL) {
1760			virtio_enqueue_abort(vsc, vq, slot);
1761			rxc->rxc_mbuf_enobufs.ev_count++;
1762			break;
1763		}
1764		MCLGET(m, M_DONTWAIT);
1765		if ((m->m_flags & M_EXT) == 0) {
1766			virtio_enqueue_abort(vsc, vq, slot);
1767			m_freem(m);
1768			rxc->rxc_mbuf_enobufs.ev_count++;
1769			break;
1770		}
1771
1772		m->m_len = m->m_pkthdr.len = MCLBYTES;
1773		m_adj(m, ETHER_ALIGN);
1774
1775		map = &netq->netq_maps[slot];
1776		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1777		if (r != 0) {
1778			virtio_enqueue_abort(vsc, vq, slot);
1779			m_freem(m);
1780			netq->netq_mbuf_load_failed.ev_count++;
1781			break;
1782		}
1783
1784		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1785		if (r != 0) {
1786			vioif_net_unload_mbuf(vsc, map);
1787			netq->netq_enqueue_failed.ev_count++;
1788			m_freem(m);
1789			/* slot already freed by vioif_net_enqueue_rx */
1790			break;
1791		}
1792
1793		ndone++;
1794	}
1795
1796	if (ndone > 0)
1797		vioif_notify(vsc, vq);
1798}
1799
1800/* dequeue received packets */
1801static bool
1802vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1803    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1804{
1805	struct virtqueue *vq = netq->netq_vq;
1806	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1807	struct vioif_net_map *map;
1808	struct mbuf *m;
1809	int slot, len;
1810	bool more;
1811	size_t ndeq;
1812
1813	KASSERT(mutex_owned(&netq->netq_lock));
1814
1815	more = false;
1816	ndeq = 0;
1817
1818	if (virtio_vq_is_enqueued(vsc, vq) == false)
1819		goto done;
1820
1821	for (;;ndeq++) {
1822		if (ndeq >= limit) {
1823			more = true;
1824			break;
1825		}
1826
1827		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1828			break;
1829
1830		map = &netq->netq_maps[slot];
1831		KASSERT(map->vnm_mbuf != NULL);
1832		m = vioif_net_dequeue_commit(vsc, vq, slot,
1833		    map, BUS_DMASYNC_POSTREAD);
1834		KASSERT(m != NULL);
1835
1836		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1837		m_set_rcvif(m, ifp);
1838		if_percpuq_enqueue(ifp->if_percpuq, m);
1839	}
1840
1841done:
1842	if (ndeqp != NULL)
1843		*ndeqp = ndeq;
1844
1845	return more;
1846}
1847
1848static void
1849vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1850    struct vioif_netqueue *netq)
1851{
1852	struct vioif_net_map *map;
1853	struct mbuf *m;
1854	unsigned int i, vq_num;
1855	bool more;
1856
1857	mutex_enter(&netq->netq_lock);
1858
1859	vq_num = netq->netq_vq->vq_num;
1860	for (;;) {
1861		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1862		if (more == false)
1863			break;
1864	}
1865
1866	for (i = 0; i < vq_num; i++) {
1867		map = &netq->netq_maps[i];
1868
1869		m = map->vnm_mbuf;
1870		if (m == NULL)
1871			continue;
1872
1873		vioif_net_unload_mbuf(vsc, map);
1874		m_freem(m);
1875	}
1876	mutex_exit(&netq->netq_lock);
1877}
1878
1879static void
1880vioif_rx_handle_locked(void *xnetq, u_int limit)
1881{
1882	struct vioif_netqueue *netq = xnetq;
1883	struct virtqueue *vq = netq->netq_vq;
1884	struct virtio_softc *vsc = vq->vq_owner;
1885	struct vioif_softc *sc = device_private(virtio_child(vsc));
1886	bool more;
1887	int enqueued;
1888	size_t ndeq;
1889
1890	KASSERT(mutex_owned(&netq->netq_lock));
1891	KASSERT(!netq->netq_stopping);
1892
1893	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1894	if (ndeq > 0)
1895		vioif_populate_rx_mbufs_locked(sc, netq);
1896
1897	if (more) {
1898		vioif_net_sched_handle(sc, netq);
1899		return;
1900	}
1901
1902	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1903	if (enqueued != 0) {
1904		virtio_stop_vq_intr(vsc, netq->netq_vq);
1905		vioif_net_sched_handle(sc, netq);
1906		return;
1907	}
1908
1909	netq->netq_running_handle = false;
1910}
1911
1912static int
1913vioif_rx_intr(void *arg)
1914{
1915	struct vioif_netqueue *netq = arg;
1916	struct virtqueue *vq = netq->netq_vq;
1917	struct virtio_softc *vsc = vq->vq_owner;
1918	struct vioif_softc *sc = device_private(virtio_child(vsc));
1919	u_int limit;
1920
1921	mutex_enter(&netq->netq_lock);
1922
1923	/* handler is already running in softint/workqueue */
1924	if (netq->netq_running_handle)
1925		goto done;
1926
1927	netq->netq_running_handle = true;
1928
1929	limit = sc->sc_rx_intr_process_limit;
1930	virtio_stop_vq_intr(vsc, vq);
1931	vioif_rx_handle_locked(netq, limit);
1932
1933done:
1934	mutex_exit(&netq->netq_lock);
1935	return 1;
1936}
1937
1938static void
1939vioif_rx_handle(void *xnetq)
1940{
1941	struct vioif_netqueue *netq = xnetq;
1942	struct virtqueue *vq = netq->netq_vq;
1943	struct virtio_softc *vsc = vq->vq_owner;
1944	struct vioif_softc *sc = device_private(virtio_child(vsc));
1945	u_int limit;
1946
1947	mutex_enter(&netq->netq_lock);
1948
1949	KASSERT(netq->netq_running_handle);
1950
1951	if (netq->netq_stopping) {
1952		netq->netq_running_handle = false;
1953		goto done;
1954	}
1955
1956	limit = sc->sc_rx_process_limit;
1957	vioif_rx_handle_locked(netq, limit);
1958
1959done:
1960	mutex_exit(&netq->netq_lock);
1961}
1962
1963/*
1964 * Transmition implementation
1965 */
1966/* enqueue mbufs to send */
1967static void
1968vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1969    bool is_transmit)
1970{
1971	struct vioif_softc *sc = ifp->if_softc;
1972	struct virtio_softc *vsc = sc->sc_virtio;
1973	struct virtqueue *vq = netq->netq_vq;
1974	struct vioif_tx_context *txc;
1975	struct vioif_net_map *map;
1976	struct mbuf *m;
1977	int queued = 0;
1978
1979	KASSERT(mutex_owned(&netq->netq_lock));
1980
1981	if (netq->netq_stopping ||
1982	    !ISSET(ifp->if_flags, IFF_RUNNING))
1983		return;
1984
1985	txc = netq->netq_ctx;
1986
1987	if (!txc->txc_link_active ||
1988	    txc->txc_no_free_slots)
1989		return;
1990
1991	for (;;) {
1992		int slot, r;
1993		r = virtio_enqueue_prep(vsc, vq, &slot);
1994		if (r == EAGAIN) {
1995			txc->txc_no_free_slots = true;
1996			break;
1997		}
1998		if (__predict_false(r != 0))
1999			panic("enqueue_prep for tx buffers");
2000
2001		if (is_transmit)
2002			m = pcq_get(txc->txc_intrq);
2003		else
2004			IFQ_DEQUEUE(&ifp->if_snd, m);
2005
2006		if (m == NULL) {
2007			virtio_enqueue_abort(vsc, vq, slot);
2008			break;
2009		}
2010
2011		map = &netq->netq_maps[slot];
2012		KASSERT(map->vnm_mbuf == NULL);
2013
2014		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2015		if (r != 0) {
2016			/* maybe just too fragmented */
2017			struct mbuf *newm;
2018
2019			newm = m_defrag(m, M_NOWAIT);
2020			if (newm != NULL) {
2021				m = newm;
2022				r = vioif_net_load_mbuf(vsc, map, m,
2023				    BUS_DMA_WRITE);
2024			} else {
2025				txc->txc_defrag_failed.ev_count++;
2026				r = -1;
2027			}
2028
2029			if (r != 0) {
2030				netq->netq_mbuf_load_failed.ev_count++;
2031				m_freem(m);
2032				if_statinc(ifp, if_oerrors);
2033				virtio_enqueue_abort(vsc, vq, slot);
2034				continue;
2035			}
2036		}
2037
2038		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2039
2040		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2041		if (r != 0) {
2042			netq->netq_enqueue_failed.ev_count++;
2043			vioif_net_unload_mbuf(vsc, map);
2044			m_freem(m);
2045			/* slot already freed by vioif_net_enqueue_tx */
2046
2047			if_statinc(ifp, if_oerrors);
2048			continue;
2049		}
2050
2051		queued++;
2052		bpf_mtap(ifp, m, BPF_D_OUT);
2053	}
2054
2055	if (queued > 0) {
2056		vioif_notify(vsc, vq);
2057		ifp->if_timer = 5;
2058	}
2059}
2060
2061/* dequeue sent mbufs */
2062static bool
2063vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2064    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2065{
2066	struct virtqueue *vq = netq->netq_vq;
2067	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2068	struct vioif_net_map *map;
2069	struct mbuf *m;
2070	int slot, len;
2071	bool more;
2072	size_t ndeq;
2073
2074	KASSERT(mutex_owned(&netq->netq_lock));
2075
2076	more = false;
2077	ndeq = 0;
2078
2079	if (virtio_vq_is_enqueued(vsc, vq) == false)
2080		goto done;
2081
2082	for (;;ndeq++) {
2083		if (limit-- == 0) {
2084			more = true;
2085			break;
2086		}
2087
2088		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2089			break;
2090
2091		map = &netq->netq_maps[slot];
2092		KASSERT(map->vnm_mbuf != NULL);
2093		m = vioif_net_dequeue_commit(vsc, vq, slot,
2094		    map, BUS_DMASYNC_POSTWRITE);
2095		KASSERT(m != NULL);
2096
2097		if_statinc(ifp, if_opackets);
2098		m_freem(m);
2099	}
2100
2101done:
2102	if (ndeqp != NULL)
2103		*ndeqp = ndeq;
2104	return more;
2105}
2106
2107static void
2108vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2109    struct vioif_netqueue *netq)
2110{
2111	struct vioif_tx_context *txc;
2112	struct vioif_net_map *map;
2113	struct mbuf *m;
2114	unsigned int i, vq_num;
2115	bool more;
2116
2117	mutex_enter(&netq->netq_lock);
2118
2119	txc = netq->netq_ctx;
2120	vq_num = netq->netq_vq->vq_num;
2121
2122	for (;;) {
2123		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2124		if (more == false)
2125			break;
2126	}
2127
2128	for (i = 0; i < vq_num; i++) {
2129		map = &netq->netq_maps[i];
2130
2131		m = map->vnm_mbuf;
2132		if (m == NULL)
2133			continue;
2134
2135		vioif_net_unload_mbuf(vsc, map);
2136		m_freem(m);
2137	}
2138
2139	txc->txc_no_free_slots = false;
2140
2141	mutex_exit(&netq->netq_lock);
2142}
2143
2144static void
2145vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2146{
2147
2148	/*
2149	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2150	 */
2151	vioif_send_common_locked(ifp, netq, false);
2152
2153}
2154
2155static void
2156vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2157{
2158
2159	vioif_send_common_locked(ifp, netq, true);
2160}
2161
2162static void
2163vioif_deferred_transmit(void *arg)
2164{
2165	struct vioif_netqueue *netq = arg;
2166	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2167	struct vioif_softc *sc = device_private(virtio_child(vsc));
2168	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2169
2170	mutex_enter(&netq->netq_lock);
2171	vioif_send_common_locked(ifp, netq, true);
2172	mutex_exit(&netq->netq_lock);
2173}
2174
2175static void
2176vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2177{
2178	struct virtqueue *vq = netq->netq_vq;
2179	struct vioif_tx_context *txc = netq->netq_ctx;
2180	struct virtio_softc *vsc = vq->vq_owner;
2181	struct vioif_softc *sc = device_private(virtio_child(vsc));
2182	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2183	bool more;
2184	int enqueued;
2185	size_t ndeq;
2186
2187	KASSERT(mutex_owned(&netq->netq_lock));
2188	KASSERT(!netq->netq_stopping);
2189
2190	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2191	if (txc->txc_no_free_slots && ndeq > 0) {
2192		txc->txc_no_free_slots = false;
2193		softint_schedule(txc->txc_deferred_transmit);
2194	}
2195
2196	if (more) {
2197		vioif_net_sched_handle(sc, netq);
2198		return;
2199	}
2200
2201	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2202	    virtio_postpone_intr_smart(vsc, vq):
2203	    virtio_start_vq_intr(vsc, vq);
2204	if (enqueued != 0) {
2205		virtio_stop_vq_intr(vsc, vq);
2206		vioif_net_sched_handle(sc, netq);
2207		return;
2208	}
2209
2210	netq->netq_running_handle = false;
2211
2212	/* for ALTQ */
2213	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2214		if_schedule_deferred_start(ifp);
2215
2216	softint_schedule(txc->txc_deferred_transmit);
2217}
2218
2219static int
2220vioif_tx_intr(void *arg)
2221{
2222	struct vioif_netqueue *netq = arg;
2223	struct virtqueue *vq = netq->netq_vq;
2224	struct virtio_softc *vsc = vq->vq_owner;
2225	struct vioif_softc *sc = device_private(virtio_child(vsc));
2226	u_int limit;
2227
2228	mutex_enter(&netq->netq_lock);
2229
2230	/* tx handler is already running in softint/workqueue */
2231	if (netq->netq_running_handle)
2232		goto done;
2233
2234	if (netq->netq_stopping)
2235		goto done;
2236
2237	netq->netq_running_handle = true;
2238
2239	virtio_stop_vq_intr(vsc, vq);
2240	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2241	limit = sc->sc_tx_intr_process_limit;
2242	vioif_tx_handle_locked(netq, limit);
2243
2244done:
2245	mutex_exit(&netq->netq_lock);
2246	return 1;
2247}
2248
2249static void
2250vioif_tx_handle(void *xnetq)
2251{
2252	struct vioif_netqueue *netq = xnetq;
2253	struct virtqueue *vq = netq->netq_vq;
2254	struct virtio_softc *vsc = vq->vq_owner;
2255	struct vioif_softc *sc = device_private(virtio_child(vsc));
2256	u_int limit;
2257
2258	mutex_enter(&netq->netq_lock);
2259
2260	KASSERT(netq->netq_running_handle);
2261
2262	if (netq->netq_stopping) {
2263		netq->netq_running_handle = false;
2264		goto done;
2265	}
2266
2267	limit = sc->sc_tx_process_limit;
2268	vioif_tx_handle_locked(netq, limit);
2269
2270done:
2271	mutex_exit(&netq->netq_lock);
2272}
2273
2274/*
2275 * Control vq
2276 */
2277/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2278static void
2279vioif_ctrl_acquire(struct vioif_softc *sc)
2280{
2281	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2282
2283	mutex_enter(&ctrlq->ctrlq_wait_lock);
2284	while (ctrlq->ctrlq_inuse != FREE)
2285		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2286	ctrlq->ctrlq_inuse = INUSE;
2287	ctrlq->ctrlq_owner = curlwp;
2288	mutex_exit(&ctrlq->ctrlq_wait_lock);
2289}
2290
2291static void
2292vioif_ctrl_release(struct vioif_softc *sc)
2293{
2294	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2295
2296	KASSERT(ctrlq->ctrlq_inuse != FREE);
2297	KASSERT(ctrlq->ctrlq_owner == curlwp);
2298
2299	mutex_enter(&ctrlq->ctrlq_wait_lock);
2300	ctrlq->ctrlq_inuse = FREE;
2301	ctrlq->ctrlq_owner = NULL;
2302	cv_signal(&ctrlq->ctrlq_wait);
2303	mutex_exit(&ctrlq->ctrlq_wait_lock);
2304}
2305
2306static int
2307vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2308    struct vioif_ctrl_cmdspec *specs, int nspecs)
2309{
2310	struct virtio_softc *vsc = sc->sc_virtio;
2311	int i, r, loaded;
2312
2313	loaded = 0;
2314	for (i = 0; i < nspecs; i++) {
2315		r = bus_dmamap_load(virtio_dmat(vsc),
2316		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2317		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2318		if (r) {
2319			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2320			goto err;
2321		}
2322		loaded++;
2323
2324	}
2325
2326	return r;
2327
2328err:
2329	for (i = 0; i < loaded; i++) {
2330		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2331	}
2332
2333	return r;
2334}
2335
2336static void
2337vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2338    struct vioif_ctrl_cmdspec *specs, int nspecs)
2339{
2340	struct virtio_softc *vsc = sc->sc_virtio;
2341	int i;
2342
2343	for (i = 0; i < nspecs; i++) {
2344		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2345	}
2346}
2347
2348static int
2349vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2350    struct vioif_ctrl_cmdspec *specs, int nspecs)
2351{
2352	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2353	struct virtqueue *vq = ctrlq->ctrlq_vq;
2354	struct virtio_softc *vsc = sc->sc_virtio;
2355	int i, r, slot;
2356
2357	ctrlq->ctrlq_cmd->class = class;
2358	ctrlq->ctrlq_cmd->command = cmd;
2359
2360	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2361	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2362	for (i = 0; i < nspecs; i++) {
2363		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2364		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2365	}
2366	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2367	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2368
2369	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2370	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2371		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2372
2373	r = virtio_enqueue_prep(vsc, vq, &slot);
2374	if (r != 0)
2375		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2376	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2377	if (r != 0)
2378		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2379	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2380	for (i = 0; i < nspecs; i++) {
2381		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2382	}
2383	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2384	virtio_enqueue_commit(vsc, vq, slot, true);
2385
2386	/* wait for done */
2387	mutex_enter(&ctrlq->ctrlq_wait_lock);
2388	while (ctrlq->ctrlq_inuse != DONE)
2389		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2390	mutex_exit(&ctrlq->ctrlq_wait_lock);
2391	/* already dequeueued */
2392
2393	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2394	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2395	for (i = 0; i < nspecs; i++) {
2396		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2397		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2398	}
2399	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2400	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2401
2402	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2403		r = 0;
2404	else {
2405		device_printf(sc->sc_dev, "failed setting rx mode\n");
2406		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2407		r = EIO;
2408	}
2409
2410	return r;
2411}
2412
2413/* ctrl vq interrupt; wake up the command issuer */
2414static int
2415vioif_ctrl_intr(void *arg)
2416{
2417	struct vioif_ctrlqueue *ctrlq = arg;
2418	struct virtqueue *vq = ctrlq->ctrlq_vq;
2419	struct virtio_softc *vsc = vq->vq_owner;
2420	int r, slot;
2421
2422	if (virtio_vq_is_enqueued(vsc, vq) == false)
2423		return 0;
2424
2425	r = virtio_dequeue(vsc, vq, &slot, NULL);
2426	if (r == ENOENT)
2427		return 0;
2428	virtio_dequeue_commit(vsc, vq, slot);
2429
2430	mutex_enter(&ctrlq->ctrlq_wait_lock);
2431	ctrlq->ctrlq_inuse = DONE;
2432	cv_signal(&ctrlq->ctrlq_wait);
2433	mutex_exit(&ctrlq->ctrlq_wait_lock);
2434
2435	return 1;
2436}
2437
2438static int
2439vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2440{
2441	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2442	struct vioif_ctrl_cmdspec specs[1];
2443	int r;
2444
2445	if (!sc->sc_has_ctrl)
2446		return ENOTSUP;
2447
2448	vioif_ctrl_acquire(sc);
2449
2450	rx->onoff = onoff;
2451	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2452	specs[0].buf = rx;
2453	specs[0].bufsize = sizeof(*rx);
2454
2455	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2456	    specs, __arraycount(specs));
2457
2458	vioif_ctrl_release(sc);
2459	return r;
2460}
2461
2462static int
2463vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2464{
2465	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2466}
2467
2468static int
2469vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2470{
2471	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2472}
2473
2474static int
2475vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2476{
2477	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2478	struct vioif_ctrl_cmdspec specs[1];
2479	int r;
2480
2481	if (!sc->sc_has_ctrl)
2482		return ENOTSUP;
2483
2484	if (nvq_pairs <= 1)
2485		return EINVAL;
2486
2487	vioif_ctrl_acquire(sc);
2488
2489	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2490	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2491	specs[0].buf = mq;
2492	specs[0].bufsize = sizeof(*mq);
2493
2494	r = vioif_ctrl_send_command(sc,
2495	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2496	    specs, __arraycount(specs));
2497
2498	vioif_ctrl_release(sc);
2499
2500	return r;
2501}
2502
2503static int
2504vioif_set_mac_addr(struct vioif_softc *sc)
2505{
2506	struct virtio_net_ctrl_mac_addr *ma =
2507	    sc->sc_ctrlq.ctrlq_mac_addr;
2508	struct vioif_ctrl_cmdspec specs[1];
2509	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2510	int nspecs = __arraycount(specs);
2511	uint64_t features;
2512	int r;
2513	size_t i;
2514
2515	if (!sc->sc_has_ctrl)
2516		return ENOTSUP;
2517
2518	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2519	    ETHER_ADDR_LEN) == 0) {
2520		return 0;
2521	}
2522
2523	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2524
2525	features = virtio_features(sc->sc_virtio);
2526	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2527		vioif_ctrl_acquire(sc);
2528
2529		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2530		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2531		specs[0].buf = ma;
2532		specs[0].bufsize = sizeof(*ma);
2533
2534		r = vioif_ctrl_send_command(sc,
2535		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2536		    specs, nspecs);
2537
2538		vioif_ctrl_release(sc);
2539	} else {
2540		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2541			virtio_write_device_config_1(sc->sc_virtio,
2542			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2543		}
2544		r = 0;
2545	}
2546
2547	return r;
2548}
2549
2550static int
2551vioif_set_rx_filter(struct vioif_softc *sc)
2552{
2553	/* filter already set in ctrlq->ctrlq_mac_tbl */
2554	struct virtio_softc *vsc = sc->sc_virtio;
2555	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2556	struct vioif_ctrl_cmdspec specs[2];
2557	int nspecs = __arraycount(specs);
2558	int r;
2559
2560	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2561	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2562
2563	if (!sc->sc_has_ctrl)
2564		return ENOTSUP;
2565
2566	vioif_ctrl_acquire(sc);
2567
2568	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2569	specs[0].buf = mac_tbl_uc;
2570	specs[0].bufsize = sizeof(*mac_tbl_uc)
2571	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2572
2573	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2574	specs[1].buf = mac_tbl_mc;
2575	specs[1].bufsize = sizeof(*mac_tbl_mc)
2576	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2577
2578	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2579	if (r != 0)
2580		goto out;
2581
2582	r = vioif_ctrl_send_command(sc,
2583	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2584	    specs, nspecs);
2585
2586	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2587
2588out:
2589	vioif_ctrl_release(sc);
2590
2591	return r;
2592}
2593
2594/*
2595 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2596 * If large multicast filter exist use ALLMULTI
2597 * If setting rx filter fails fall back to ALLMULTI
2598 */
2599static int
2600vioif_rx_filter(struct vioif_softc *sc)
2601{
2602	struct virtio_softc *vsc = sc->sc_virtio;
2603	struct ethercom *ec = &sc->sc_ethercom;
2604	struct ifnet *ifp = &ec->ec_if;
2605	struct ether_multi *enm;
2606	struct ether_multistep step;
2607	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2608	int nentries;
2609	bool allmulti = 0;
2610	int r;
2611
2612	if (!sc->sc_has_ctrl) {
2613		goto set_ifflags;
2614	}
2615
2616	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2617	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2618
2619	nentries = 0;
2620	allmulti = false;
2621
2622	ETHER_LOCK(ec);
2623	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2624	    ETHER_NEXT_MULTI(step, enm)) {
2625		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2626			allmulti = true;
2627			break;
2628		}
2629		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2630			allmulti = true;
2631			break;
2632		}
2633
2634		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2635		    enm->enm_addrlo, ETHER_ADDR_LEN);
2636		nentries++;
2637	}
2638	ETHER_UNLOCK(ec);
2639
2640	r = vioif_set_mac_addr(sc);
2641	if (r != 0) {
2642		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2643		    ifp->if_xname);
2644	}
2645
2646	if (!allmulti) {
2647		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2648		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2649		r = vioif_set_rx_filter(sc);
2650		if (r != 0) {
2651			allmulti = true; /* fallback */
2652		}
2653	}
2654
2655	if (allmulti) {
2656		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2657		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2658		r = vioif_set_rx_filter(sc);
2659		if (r != 0) {
2660			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2661			    ifp->if_xname);
2662			/* what to do on failure? */
2663		}
2664
2665		ifp->if_flags |= IFF_ALLMULTI;
2666	}
2667
2668set_ifflags:
2669	r = vioif_ifflags(sc);
2670
2671	return r;
2672}
2673
2674/*
2675 * VM configuration changes
2676 */
2677static int
2678vioif_config_change(struct virtio_softc *vsc)
2679{
2680	struct vioif_softc *sc = device_private(virtio_child(vsc));
2681
2682	softint_schedule(sc->sc_cfg_softint);
2683	return 0;
2684}
2685
2686static void
2687vioif_cfg_softint(void *arg)
2688{
2689	struct vioif_softc *sc = arg;
2690	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2691
2692	vioif_update_link_status(sc);
2693	vioif_start(ifp);
2694}
2695
2696static int
2697vioif_get_link_status(struct vioif_softc *sc)
2698{
2699	struct virtio_softc *vsc = sc->sc_virtio;
2700	uint16_t status;
2701
2702	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2703		status = virtio_read_device_config_2(vsc,
2704		    VIRTIO_NET_CONFIG_STATUS);
2705	else
2706		status = VIRTIO_NET_S_LINK_UP;
2707
2708	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2709		return LINK_STATE_UP;
2710
2711	return LINK_STATE_DOWN;
2712}
2713
2714static void
2715vioif_update_link_status(struct vioif_softc *sc)
2716{
2717	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2718	struct vioif_netqueue *netq;
2719	struct vioif_tx_context *txc;
2720	bool active;
2721	int link, i;
2722
2723	mutex_enter(&sc->sc_lock);
2724
2725	link = vioif_get_link_status(sc);
2726
2727	if (link == sc->sc_link_state)
2728		goto done;
2729
2730	sc->sc_link_state = link;
2731
2732	active = VIOIF_IS_LINK_ACTIVE(sc);
2733	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2734		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2735
2736		mutex_enter(&netq->netq_lock);
2737		txc = netq->netq_ctx;
2738		txc->txc_link_active = active;
2739		mutex_exit(&netq->netq_lock);
2740	}
2741
2742	if_link_state_change(ifp, sc->sc_link_state);
2743
2744done:
2745	mutex_exit(&sc->sc_lock);
2746}
2747
2748static void
2749vioif_workq_work(struct work *wk, void *context)
2750{
2751	struct vioif_work *work;
2752
2753	work = container_of(wk, struct vioif_work, cookie);
2754
2755	atomic_store_relaxed(&work->added, 0);
2756	work->func(work->arg);
2757}
2758
2759static struct workqueue *
2760vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2761{
2762	struct workqueue *wq;
2763	int error;
2764
2765	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2766	    prio, ipl, flags);
2767
2768	if (error)
2769		return NULL;
2770
2771	return wq;
2772}
2773
2774static void
2775vioif_workq_destroy(struct workqueue *wq)
2776{
2777
2778	workqueue_destroy(wq);
2779}
2780
2781static void
2782vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2783{
2784
2785	memset(work, 0, sizeof(*work));
2786	work->func = func;
2787	work->arg = arg;
2788}
2789
2790static void
2791vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2792{
2793
2794	if (atomic_load_relaxed(&work->added) != 0)
2795		return;
2796
2797	atomic_store_relaxed(&work->added, 1);
2798	kpreempt_disable();
2799	workqueue_enqueue(wq, &work->cookie, NULL);
2800	kpreempt_enable();
2801}
2802
2803static void
2804vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2805{
2806
2807	workqueue_wait(wq, &work->cookie);
2808}
2809
2810MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2811
2812#ifdef _MODULE
2813#include "ioconf.c"
2814#endif
2815
2816static int
2817if_vioif_modcmd(modcmd_t cmd, void *opaque)
2818{
2819	int error = 0;
2820
2821#ifdef _MODULE
2822	switch (cmd) {
2823	case MODULE_CMD_INIT:
2824		error = config_init_component(cfdriver_ioconf_if_vioif,
2825		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2826		break;
2827	case MODULE_CMD_FINI:
2828		error = config_fini_component(cfdriver_ioconf_if_vioif,
2829		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2830		break;
2831	default:
2832		error = ENOTTY;
2833		break;
2834	}
2835#endif
2836
2837	return error;
2838}
2839