if_vioif.c revision 1.102
1/*	$NetBSD: if_vioif.c,v 1.102 2023/03/23 03:02:17 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.102 2023/03/23 03:02:17 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	bool			 txc_no_free_slots;
270	pcq_t			*txc_intrq;
271	void			*txc_deferred_transmit;
272
273	struct evcnt		 txc_defrag_failed;
274};
275
276struct vioif_rx_context {
277	struct evcnt		 rxc_mbuf_enobufs;
278};
279struct vioif_ctrlqueue {
280	struct virtqueue		*ctrlq_vq;
281	enum {
282		FREE, INUSE, DONE
283	}				ctrlq_inuse;
284	kcondvar_t			ctrlq_wait;
285	kmutex_t			ctrlq_wait_lock;
286	struct lwp			*ctrlq_owner;
287
288	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289	struct virtio_net_ctrl_status	*ctrlq_status;
290	struct virtio_net_ctrl_rx	*ctrlq_rx;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294	struct virtio_net_ctrl_mq	*ctrlq_mq;
295
296	bus_dmamap_t			ctrlq_cmd_dmamap;
297	bus_dmamap_t			ctrlq_status_dmamap;
298	bus_dmamap_t			ctrlq_rx_dmamap;
299	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302	bus_dmamap_t			ctrlq_mq_dmamap;
303
304	struct evcnt			ctrlq_cmd_load_failed;
305	struct evcnt			ctrlq_cmd_failed;
306};
307
308struct vioif_softc {
309	device_t		sc_dev;
310	kmutex_t		sc_lock;
311	struct sysctllog	*sc_sysctllog;
312
313	struct virtio_softc	*sc_virtio;
314	struct virtqueue	*sc_vqs;
315	u_int			 sc_hdr_size;
316
317	int			sc_max_nvq_pairs;
318	int			sc_req_nvq_pairs;
319	int			sc_act_nvq_pairs;
320
321	uint8_t			sc_mac[ETHER_ADDR_LEN];
322	struct ethercom		sc_ethercom;
323	int			sc_link_state;
324
325	struct vioif_netqueue	*sc_netqs;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	 sc_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_cfg_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353					    true : false)
354
355/* cfattach interface functions */
356static int	vioif_match(device_t, cfdata_t, void *);
357static void	vioif_attach(device_t, device_t, void *);
358static int	vioif_finalize_teardown(device_t);
359
360/* ifnet interface functions */
361static int	vioif_init(struct ifnet *);
362static void	vioif_stop(struct ifnet *, int);
363static void	vioif_start(struct ifnet *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags(struct vioif_softc *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static int	vioif_netqueue_init(struct vioif_softc *,
372		    struct virtio_softc *, size_t, u_int);
373static void	vioif_netqueue_teardown(struct vioif_softc *,
374		    struct virtio_softc *, size_t);
375static void	vioif_net_intr_enable(struct vioif_softc *,
376		    struct virtio_softc *);
377static void	vioif_net_intr_disable(struct vioif_softc *,
378		    struct virtio_softc *);
379static void	vioif_net_sched_handle(struct vioif_softc *,
380		    struct vioif_netqueue *);
381
382/* rx */
383static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384		    struct vioif_netqueue *);
385static int	vioif_rx_intr(void *);
386static void	vioif_rx_handle(void *);
387static void	vioif_rx_queue_clear(struct vioif_softc *,
388		    struct virtio_softc *, struct vioif_netqueue *);
389
390/* tx */
391static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393static void	vioif_deferred_transmit(void *);
394static int	vioif_tx_intr(void *);
395static void	vioif_tx_handle(void *);
396static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397		    struct vioif_netqueue *);
398
399/* controls */
400static int	vioif_ctrl_intr(void *);
401static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402static int	vioif_set_promisc(struct vioif_softc *, bool);
403static int	vioif_set_allmulti(struct vioif_softc *, bool);
404static int	vioif_set_rx_filter(struct vioif_softc *);
405static int	vioif_rx_filter(struct vioif_softc *);
406static int	vioif_set_mac_addr(struct vioif_softc *);
407static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408
409/* config interrupt */
410static int	vioif_config_change(struct virtio_softc *);
411static void	vioif_cfg_softint(void *);
412static void	vioif_update_link_status(struct vioif_softc *);
413
414/* others */
415static void	vioif_alloc_queues(struct vioif_softc *);
416static void	vioif_free_queues(struct vioif_softc *);
417static int	vioif_alloc_mems(struct vioif_softc *);
418static struct workqueue*
419		vioif_workq_create(const char *, pri_t, int, int);
420static void	vioif_workq_destroy(struct workqueue *);
421static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424static int	vioif_setup_sysctl(struct vioif_softc *);
425static void	vioif_setup_stats(struct vioif_softc *);
426
427CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428		  vioif_match, vioif_attach, NULL, NULL);
429
430static void
431vioif_intr_barrier(void)
432{
433
434	/* wait for finish all interrupt handler */
435	xc_barrier(0);
436}
437
438static void
439vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440{
441
442	virtio_enqueue_commit(vsc, vq, -1, true);
443}
444
445static int
446vioif_match(device_t parent, cfdata_t match, void *aux)
447{
448	struct virtio_attach_args *va = aux;
449
450	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451		return 1;
452
453	return 0;
454}
455
456static void
457vioif_attach(device_t parent, device_t self, void *aux)
458{
459	struct vioif_softc *sc = device_private(self);
460	struct virtio_softc *vsc = device_private(parent);
461	struct vioif_netqueue *txq0;
462	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463	uint64_t features, req_features;
464	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465	u_int softint_flags;
466	int r, i, req_flags;
467	char xnamebuf[MAXCOMLEN];
468	size_t netq_num;
469
470	if (virtio_child(vsc) != NULL) {
471		aprint_normal(": child already attached for %s; "
472		    "something wrong...\n", device_xname(parent));
473		return;
474	}
475
476	sc->sc_dev = self;
477	sc->sc_virtio = vsc;
478	sc->sc_link_state = LINK_STATE_UNKNOWN;
479
480	sc->sc_max_nvq_pairs = 1;
481	sc->sc_req_nvq_pairs = 1;
482	sc->sc_act_nvq_pairs = 1;
483	sc->sc_txrx_workqueue_sysctl = true;
484	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488
489	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490
491	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494	if (sc->sc_txrx_workqueue == NULL)
495		goto err;
496
497	req_flags = 0;
498
499#ifdef VIOIF_MPSAFE
500	req_flags |= VIRTIO_F_INTR_MPSAFE;
501#endif
502	req_flags |= VIRTIO_F_INTR_MSIX;
503
504	req_features =
505	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507	req_features |= VIRTIO_F_RING_EVENT_IDX;
508	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509#ifdef VIOIF_MULTIQ
510	req_features |= VIRTIO_NET_F_MQ;
511#endif
512	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
513	    vioif_config_change, virtio_vq_intrhand, req_flags,
514	    req_features, VIRTIO_NET_FLAG_BITS);
515
516	features = virtio_features(vsc);
517	if (features == 0)
518		goto err;
519
520	if (features & VIRTIO_NET_F_MAC) {
521		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523			    VIRTIO_NET_CONFIG_MAC + i);
524		}
525	} else {
526		/* code stolen from sys/net/if_tap.c */
527		struct timeval tv;
528		uint32_t ui;
529		getmicrouptime(&tv);
530		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533			virtio_write_device_config_1(vsc,
534			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535		}
536	}
537
538	/* 'Ethernet' with capital follows other ethernet driver attachment */
539	aprint_normal_dev(self, "Ethernet address %s\n",
540	    ether_sprintf(sc->sc_mac));
541
542	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544	} else {
545		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546	}
547
548	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549	    (features & VIRTIO_NET_F_CTRL_RX)) {
550		sc->sc_has_ctrl = true;
551
552		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554		ctrlq->ctrlq_inuse = FREE;
555	} else {
556		sc->sc_has_ctrl = false;
557	}
558
559	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562
563		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564			goto err;
565
566		/* Limit the number of queue pairs to use */
567		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568	}
569
570	vioif_alloc_queues(sc);
571	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
572
573#ifdef VIOIF_MPSAFE
574	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
575#else
576	softint_flags = SOFTINT_NET;
577#endif
578
579	/*
580	 * Initialize network queues
581	 */
582	netq_num = sc->sc_max_nvq_pairs * 2;
583	for (i = 0; i < netq_num; i++) {
584		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
585		if (r != 0)
586			goto err;
587	}
588
589	if (sc->sc_has_ctrl) {
590		int ctrlq_idx = sc->sc_max_nvq_pairs * 2;
591		/*
592		 * Allocating a virtqueue for control channel
593		 */
594		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
595		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
596		    NBPG, 1, "control");
597		if (r != 0) {
598			aprint_error_dev(self, "failed to allocate "
599			    "a virtqueue for control channel, error code %d\n",
600			    r);
601
602			sc->sc_has_ctrl = false;
603			cv_destroy(&ctrlq->ctrlq_wait);
604			mutex_destroy(&ctrlq->ctrlq_wait_lock);
605		} else {
606			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
607			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
608		}
609	}
610
611	sc->sc_cfg_softint = softint_establish(softint_flags,
612	    vioif_cfg_softint, sc);
613	if (sc->sc_cfg_softint == NULL) {
614		aprint_error_dev(self, "cannot establish ctl softint\n");
615		goto err;
616	}
617
618	if (vioif_alloc_mems(sc) < 0)
619		goto err;
620
621	if (virtio_child_attach_finish(vsc) != 0)
622		goto err;
623
624	if (vioif_setup_sysctl(sc) != 0) {
625		aprint_error_dev(self, "unable to create sysctl node\n");
626		/* continue */
627	}
628
629	vioif_setup_stats(sc);
630
631	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
632	ifp->if_softc = sc;
633	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
634#ifdef VIOIF_MPSAFE
635	ifp->if_extflags = IFEF_MPSAFE;
636#endif
637	ifp->if_start = vioif_start;
638	if (sc->sc_req_nvq_pairs > 1)
639		ifp->if_transmit = vioif_transmit;
640	ifp->if_ioctl = vioif_ioctl;
641	ifp->if_init = vioif_init;
642	ifp->if_stop = vioif_stop;
643	ifp->if_capabilities = 0;
644	ifp->if_watchdog = vioif_watchdog;
645	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
646	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
647	IFQ_SET_READY(&ifp->if_snd);
648
649	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
650
651	if_attach(ifp);
652	if_deferred_start_init(ifp, NULL);
653	ether_ifattach(ifp, sc->sc_mac);
654	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
655
656	return;
657
658err:
659	netq_num = sc->sc_max_nvq_pairs * 2;
660	for (i = 0; i < netq_num; i++) {
661		vioif_netqueue_teardown(sc, vsc, i);
662	}
663
664	if (sc->sc_has_ctrl) {
665		cv_destroy(&ctrlq->ctrlq_wait);
666		mutex_destroy(&ctrlq->ctrlq_wait_lock);
667		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
668		ctrlq->ctrlq_vq = NULL;
669	}
670
671	vioif_free_queues(sc);
672	mutex_destroy(&sc->sc_lock);
673	virtio_child_attach_failed(vsc);
674	config_finalize_register(self, vioif_finalize_teardown);
675
676	return;
677}
678
679static int
680vioif_finalize_teardown(device_t self)
681{
682	struct vioif_softc *sc = device_private(self);
683
684	if (sc->sc_txrx_workqueue != NULL) {
685		vioif_workq_destroy(sc->sc_txrx_workqueue);
686		sc->sc_txrx_workqueue = NULL;
687	}
688
689	return 0;
690}
691
692/*
693 * Interface functions for ifnet
694 */
695static int
696vioif_init(struct ifnet *ifp)
697{
698	struct vioif_softc *sc = ifp->if_softc;
699	struct virtio_softc *vsc = sc->sc_virtio;
700	struct vioif_netqueue *netq;
701	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
702	int r, i;
703
704	vioif_stop(ifp, 0);
705
706	r = virtio_reinit_start(vsc);
707	if (r != 0) {
708		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
709		return EIO;
710	}
711
712	virtio_negotiate_features(vsc, virtio_features(vsc));
713
714	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
715		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
716
717		mutex_enter(&netq->netq_lock);
718		vioif_populate_rx_mbufs_locked(sc, netq);
719		mutex_exit(&netq->netq_lock);
720	}
721
722	virtio_reinit_end(vsc);
723
724	if (sc->sc_has_ctrl)
725		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
726
727	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
728	if (r == 0)
729		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
730	else
731		sc->sc_act_nvq_pairs = 1;
732
733	SET(ifp->if_flags, IFF_RUNNING);
734
735	vioif_net_intr_enable(sc, vsc);
736
737	vioif_update_link_status(sc);
738	r = vioif_rx_filter(sc);
739
740	return r;
741}
742
743static void
744vioif_stop(struct ifnet *ifp, int disable)
745{
746	struct vioif_softc *sc = ifp->if_softc;
747	struct virtio_softc *vsc = sc->sc_virtio;
748	struct vioif_netqueue *netq;
749	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
750	size_t i, act_qnum;
751
752	act_qnum = sc->sc_act_nvq_pairs * 2;
753
754	CLR(ifp->if_flags, IFF_RUNNING);
755	for (i = 0; i < act_qnum; i++) {
756		netq = &sc->sc_netqs[i];
757
758		mutex_enter(&netq->netq_lock);
759		netq->netq_stopping = true;
760		mutex_exit(&netq->netq_lock);
761	}
762
763	/* disable interrupts */
764	vioif_net_intr_disable(sc, vsc);
765	if (sc->sc_has_ctrl)
766		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
767
768	/*
769	 * only way to stop interrupt, I/O and DMA is resetting...
770	 *
771	 * NOTE: Devices based on VirtIO draft specification can not
772	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
773	 */
774	virtio_reset(vsc);
775
776	vioif_intr_barrier();
777
778	for (i = 0; i < act_qnum; i++) {
779		netq = &sc->sc_netqs[i];
780		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
781	}
782
783	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
784		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
785		vioif_rx_queue_clear(sc, vsc, netq);
786
787		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
788		vioif_tx_queue_clear(sc, vsc, netq);
789	}
790
791	/* all packet processing is stopped */
792	for (i = 0; i < act_qnum; i++) {
793		netq = &sc->sc_netqs[i];
794
795		mutex_enter(&netq->netq_lock);
796		netq->netq_stopping = false;
797		mutex_exit(&netq->netq_lock);
798	}
799}
800
801static void
802vioif_start(struct ifnet *ifp)
803{
804	struct vioif_softc *sc = ifp->if_softc;
805	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
806
807#ifdef VIOIF_MPSAFE
808	KASSERT(if_is_mpsafe(ifp));
809#endif
810
811	mutex_enter(&txq0->netq_lock);
812	vioif_start_locked(ifp, txq0);
813	mutex_exit(&txq0->netq_lock);
814}
815
816static inline int
817vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
818{
819	struct vioif_softc *sc = ifp->if_softc;
820	u_int cpuid = cpu_index(curcpu());
821
822	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
823}
824
825static int
826vioif_transmit(struct ifnet *ifp, struct mbuf *m)
827{
828	struct vioif_softc *sc = ifp->if_softc;
829	struct vioif_netqueue *netq;
830	struct vioif_tx_context *txc;
831	int qid;
832
833	qid = vioif_select_txqueue(ifp, m);
834	netq = &sc->sc_netqs[qid];
835	txc = netq->netq_ctx;
836
837	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
838		m_freem(m);
839		return ENOBUFS;
840	}
841
842	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
843	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
844	if (m->m_flags & M_MCAST)
845		if_statinc_ref(nsr, if_omcasts);
846	IF_STAT_PUTREF(ifp);
847
848	if (mutex_tryenter(&netq->netq_lock)) {
849		vioif_transmit_locked(ifp, netq);
850		mutex_exit(&netq->netq_lock);
851	}
852
853	return 0;
854}
855
856void
857vioif_watchdog(struct ifnet *ifp)
858{
859	struct vioif_softc *sc = ifp->if_softc;
860	struct vioif_netqueue *netq;
861	int i;
862
863	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
864		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
865			log(LOG_DEBUG, "%s: watchdog timed out\n",
866			    ifp->if_xname);
867		}
868
869		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
870			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
871
872			mutex_enter(&netq->netq_lock);
873			if (!netq->netq_running_handle) {
874				netq->netq_running_handle = true;
875				vioif_net_sched_handle(sc, netq);
876			}
877			mutex_exit(&netq->netq_lock);
878		}
879	}
880}
881
882static int
883vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
884{
885	int s, r;
886
887	s = splnet();
888
889	r = ether_ioctl(ifp, cmd, data);
890	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
891		if (ifp->if_flags & IFF_RUNNING) {
892			r = vioif_rx_filter(ifp->if_softc);
893		} else {
894			r = 0;
895		}
896	}
897
898	splx(s);
899
900	return r;
901}
902
903static int
904vioif_ifflags(struct vioif_softc *sc)
905{
906	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
907	bool onoff;
908	int r;
909
910	if (!sc->sc_has_ctrl) {
911		/* no ctrl vq; always promisc and allmulti */
912		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
913		return 0;
914	}
915
916	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
917	r = vioif_set_allmulti(sc, onoff);
918	if (r != 0) {
919		log(LOG_WARNING,
920		    "%s: couldn't %sable ALLMULTI\n",
921		    ifp->if_xname, onoff ? "en" : "dis");
922		if (onoff == false) {
923			ifp->if_flags |= IFF_ALLMULTI;
924		}
925	}
926
927	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
928	r = vioif_set_promisc(sc, onoff);
929	if (r != 0) {
930		log(LOG_WARNING,
931		    "%s: couldn't %sable PROMISC\n",
932		    ifp->if_xname, onoff ? "en" : "dis");
933		if (onoff == false) {
934			ifp->if_flags |= IFF_PROMISC;
935		}
936	}
937
938	return 0;
939}
940
941static int
942vioif_ifflags_cb(struct ethercom *ec)
943{
944	struct ifnet *ifp = &ec->ec_if;
945	struct vioif_softc *sc = ifp->if_softc;
946
947	return vioif_ifflags(sc);
948}
949
950static int
951vioif_setup_sysctl(struct vioif_softc *sc)
952{
953	const char *devname;
954	struct sysctllog **log;
955	const struct sysctlnode *rnode, *rxnode, *txnode;
956	int error;
957
958	log = &sc->sc_sysctllog;
959	devname = device_xname(sc->sc_dev);
960
961	error = sysctl_createv(log, 0, NULL, &rnode,
962	    0, CTLTYPE_NODE, devname,
963	    SYSCTL_DESCR("virtio-net information and settings"),
964	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
965	if (error)
966		goto out;
967
968	error = sysctl_createv(log, 0, &rnode, NULL,
969	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
970	    SYSCTL_DESCR("Use workqueue for packet processing"),
971	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
972	if (error)
973		goto out;
974
975	error = sysctl_createv(log, 0, &rnode, &rxnode,
976	    0, CTLTYPE_NODE, "rx",
977	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
978	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
979	if (error)
980		goto out;
981
982	error = sysctl_createv(log, 0, &rxnode, NULL,
983	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
984	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
985	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
986	if (error)
987		goto out;
988
989	error = sysctl_createv(log, 0, &rxnode, NULL,
990	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
991	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
992	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
993	if (error)
994		goto out;
995
996	error = sysctl_createv(log, 0, &rnode, &txnode,
997	    0, CTLTYPE_NODE, "tx",
998	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
999	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1000	if (error)
1001		goto out;
1002
1003	error = sysctl_createv(log, 0, &txnode, NULL,
1004	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1005	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1006	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1007	if (error)
1008		goto out;
1009
1010	error = sysctl_createv(log, 0, &txnode, NULL,
1011	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1012	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1013	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1014
1015out:
1016	if (error)
1017		sysctl_teardown(log);
1018
1019	return error;
1020}
1021
1022static void
1023vioif_setup_stats(struct vioif_softc *sc)
1024{
1025	struct vioif_netqueue *netq;
1026	struct vioif_tx_context *txc;
1027	struct vioif_rx_context *rxc;
1028	size_t i, netq_num;
1029
1030	netq_num = sc->sc_max_nvq_pairs * 2;
1031	for (i = 0; i < netq_num; i++) {
1032		netq = &sc->sc_netqs[i];
1033		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1034		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1035		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1036		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1037		    "virtqueue enqueue failed failed");
1038
1039		switch (VIOIF_NETQ_DIR(i)) {
1040		case VIOIF_NETQ_RX:
1041			rxc = netq->netq_ctx;
1042			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1043			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1044			    "no receive buffer");
1045			break;
1046		case VIOIF_NETQ_TX:
1047			txc = netq->netq_ctx;
1048			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1049			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1050			    "m_defrag() failed");
1051			break;
1052		}
1053	}
1054
1055	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1056	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1057	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1058	    NULL, device_xname(sc->sc_dev), "control command failed");
1059}
1060
1061/*
1062 * allocate memory
1063 */
1064static int
1065vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1066    bus_size_t size, int nsegs, const char *usage)
1067{
1068	int r;
1069
1070	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1071	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1072
1073	if (r != 0) {
1074		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1075		    "error code %d\n", usage, r);
1076	}
1077
1078	return r;
1079}
1080
1081static void
1082vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1083{
1084
1085	if (*map) {
1086		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1087		*map = NULL;
1088	}
1089}
1090
1091static int
1092vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1093    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1094{
1095	int r;
1096
1097	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1098	if (r != 0)
1099		return 1;
1100
1101	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1102	    size, NULL, rw | BUS_DMA_NOWAIT);
1103	if (r != 0) {
1104		vioif_dmamap_destroy(sc, map);
1105		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1106		    "error code %d\n", usage, r);
1107	}
1108
1109	return r;
1110}
1111
1112static void *
1113vioif_assign_mem(intptr_t *p, size_t size)
1114{
1115	intptr_t rv;
1116
1117	rv = *p;
1118	*p += size;
1119
1120	return (void *)rv;
1121}
1122
1123/*
1124 * dma memory is used for:
1125 *   netq_maps_kva:	 metadata array for received frames (READ) and
1126 *			 sent frames (WRITE)
1127 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1128 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1129 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1130 *			 (WRITE)
1131 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1132 *			 class command (WRITE)
1133 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1134 *			 class command (WRITE)
1135 * ctrlq_* structures are allocated only one each; they are protected by
1136 * ctrlq_inuse variable and ctrlq_wait condvar.
1137 */
1138static int
1139vioif_alloc_mems(struct vioif_softc *sc)
1140{
1141	struct virtio_softc *vsc = sc->sc_virtio;
1142	struct vioif_netqueue *netq;
1143	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1144	struct vioif_net_map *maps;
1145	unsigned int vq_num;
1146	int r, rsegs;
1147	bus_size_t dmamemsize;
1148	size_t qid, i, netq_num, kmemsize;
1149	void *vaddr;
1150	intptr_t p;
1151
1152	netq_num = sc->sc_max_nvq_pairs * 2;
1153
1154	/* allocate DMA memory */
1155	dmamemsize = 0;
1156
1157	for (qid = 0; qid < netq_num; qid++) {
1158		maps = sc->sc_netqs[qid].netq_maps;
1159		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1160		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1161	}
1162
1163	if (sc->sc_has_ctrl) {
1164		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1165		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1166		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1167		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1168		    + ETHER_ADDR_LEN;
1169		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1170		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1171		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1172		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1173	}
1174
1175	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1176	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1177	if (r != 0) {
1178		aprint_error_dev(sc->sc_dev,
1179		    "DMA memory allocation failed, size %zu, "
1180		    "error code %d\n", dmamemsize, r);
1181		goto err_none;
1182	}
1183	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1184	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1185	if (r != 0) {
1186		aprint_error_dev(sc->sc_dev,
1187		    "DMA memory map failed, error code %d\n", r);
1188		goto err_dmamem_alloc;
1189	}
1190
1191	/* assign DMA memory */
1192	memset(vaddr, 0, dmamemsize);
1193	sc->sc_dmamem = vaddr;
1194	p = (intptr_t) vaddr;
1195
1196	for (qid = 0; qid < netq_num; qid++) {
1197		netq = &sc->sc_netqs[qid];
1198		maps = netq->netq_maps;
1199		vq_num = netq->netq_vq->vq_num;
1200
1201		netq->netq_maps_kva = vioif_assign_mem(&p,
1202		    sizeof(*maps[0].vnm_hdr) * vq_num);
1203	}
1204
1205	if (sc->sc_has_ctrl) {
1206		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1207		    sizeof(*ctrlq->ctrlq_cmd));
1208		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1209		    sizeof(*ctrlq->ctrlq_status));
1210		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1211		    sizeof(*ctrlq->ctrlq_rx));
1212		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1213		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1214		    + ETHER_ADDR_LEN);
1215		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1216		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1217		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1218		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1219		    sizeof(*ctrlq->ctrlq_mac_addr));
1220		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1221	}
1222
1223	/* allocate kmem */
1224	kmemsize = 0;
1225
1226	for (qid = 0; qid < netq_num; qid++) {
1227		netq = &sc->sc_netqs[qid];
1228		vq_num = netq->netq_vq->vq_num;
1229
1230		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1231	}
1232
1233	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1234	sc->sc_kmem = vaddr;
1235
1236	/* assign allocated kmem */
1237	p = (intptr_t) vaddr;
1238
1239	for (qid = 0; qid < netq_num; qid++) {
1240		netq = &sc->sc_netqs[qid];
1241		vq_num = netq->netq_vq->vq_num;
1242
1243		netq->netq_maps = vioif_assign_mem(&p,
1244		    sizeof(netq->netq_maps[0]) * vq_num);
1245	}
1246
1247	/* prepare dmamaps */
1248	for (qid = 0; qid < netq_num; qid++) {
1249		static const struct {
1250			const char	*msg_hdr;
1251			const char	*msg_payload;
1252			int		 dma_flag;
1253			bus_size_t	 dma_size;
1254			int		 dma_nsegs;
1255		} dmaparams[VIOIF_NETQ_IDX] = {
1256			[VIOIF_NETQ_RX] = {
1257				.msg_hdr	= "rx header",
1258				.msg_payload	= "rx payload",
1259				.dma_flag	= BUS_DMA_READ,
1260				.dma_size	= MCLBYTES - ETHER_ALIGN,
1261				.dma_nsegs	= 1,
1262			},
1263			[VIOIF_NETQ_TX] = {
1264				.msg_hdr	= "tx header",
1265				.msg_payload	= "tx payload",
1266				.dma_flag	= BUS_DMA_WRITE,
1267				.dma_size	= ETHER_MAX_LEN,
1268				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1269			}
1270		};
1271
1272		struct virtio_net_hdr *hdrs;
1273		int dir;
1274
1275		dir = VIOIF_NETQ_DIR(qid);
1276		netq = &sc->sc_netqs[qid];
1277		vq_num = netq->netq_vq->vq_num;
1278		maps = netq->netq_maps;
1279		hdrs = netq->netq_maps_kva;
1280
1281		for (i = 0; i < vq_num; i++) {
1282			maps[i].vnm_hdr = &hdrs[i];
1283
1284			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1285			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1286			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1287			if (r != 0)
1288				goto err_reqs;
1289
1290			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1291			    dmaparams[dir].dma_size, dmaparams[dir].dma_nsegs,
1292			    dmaparams[dir].msg_payload);
1293			if (r != 0)
1294				goto err_reqs;
1295		}
1296	}
1297
1298	if (sc->sc_has_ctrl) {
1299		/* control vq class & command */
1300		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1301		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1302		    BUS_DMA_WRITE, "control command");
1303		if (r != 0)
1304			goto err_reqs;
1305
1306		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1307		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1308		    BUS_DMA_READ, "control status");
1309		if (r != 0)
1310			goto err_reqs;
1311
1312		/* control vq rx mode command parameter */
1313		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1314		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1315		    BUS_DMA_WRITE, "rx mode control command");
1316		if (r != 0)
1317			goto err_reqs;
1318
1319		/* multiqueue set command */
1320		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1321		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1322		    BUS_DMA_WRITE, "multiqueue set command");
1323		if (r != 0)
1324			goto err_reqs;
1325
1326		/* control vq MAC filter table for unicast */
1327		/* do not load now since its length is variable */
1328		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1329		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1330		    + ETHER_ADDR_LEN, 1,
1331		    "unicast MAC address filter command");
1332		if (r != 0)
1333			goto err_reqs;
1334
1335		/* control vq MAC filter table for multicast */
1336		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1337		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1338		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1339		    "multicast MAC address filter command");
1340		if (r != 0)
1341			goto err_reqs;
1342
1343		/* control vq MAC address set command */
1344		r = vioif_dmamap_create_load(sc,
1345		    &ctrlq->ctrlq_mac_addr_dmamap,
1346		    ctrlq->ctrlq_mac_addr,
1347		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1348		    BUS_DMA_WRITE, "mac addr set command");
1349		if (r != 0)
1350			goto err_reqs;
1351	}
1352
1353	return 0;
1354
1355err_reqs:
1356	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1357	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1358	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1359	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1360	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1361	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1362	for (qid = 0; qid < netq_num; qid++) {
1363		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1364		maps = sc->sc_netqs[qid].netq_maps;
1365
1366		for (i = 0; i < vq_num; i++) {
1367			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1368			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1369		}
1370	}
1371	if (sc->sc_kmem) {
1372		kmem_free(sc->sc_kmem, kmemsize);
1373		sc->sc_kmem = NULL;
1374	}
1375	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1376err_dmamem_alloc:
1377	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1378err_none:
1379	return -1;
1380}
1381
1382static void
1383vioif_alloc_queues(struct vioif_softc *sc)
1384{
1385	int nvq_pairs = sc->sc_max_nvq_pairs;
1386	size_t nvqs, netq_num;
1387
1388	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1389
1390	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1391	if (sc->sc_has_ctrl)
1392		nvqs++;
1393
1394	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1395	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * netq_num,
1396	    KM_SLEEP);
1397}
1398
1399static void
1400vioif_free_queues(struct vioif_softc *sc)
1401{
1402	size_t nvqs, netq_num;
1403
1404	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1405	if (sc->sc_ctrlq.ctrlq_vq)
1406		nvqs++;
1407
1408	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1409	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1410	sc->sc_netqs = NULL;
1411	sc->sc_vqs = NULL;
1412}
1413
1414/*
1415 * Network queues
1416 */
1417static int
1418vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1419    size_t qid, u_int softint_flags)
1420{
1421	static const struct {
1422		const char	*dirname;
1423		int		 segsize;
1424		int		 nsegs;
1425		int 		(*intrhand)(void *);
1426		void		(*sihand)(void *);
1427	} params[VIOIF_NETQ_IDX] = {
1428		[VIOIF_NETQ_RX] = {
1429			.dirname	= "rx",
1430			.segsize	= MCLBYTES,
1431			.nsegs		= 2,
1432			.intrhand	= vioif_rx_intr,
1433			.sihand		= vioif_rx_handle,
1434		},
1435		[VIOIF_NETQ_TX] = {
1436			.dirname	= "tx",
1437			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1438			.nsegs		= 2,
1439			.intrhand	= vioif_tx_intr,
1440			.sihand		= vioif_tx_handle,
1441		}
1442	};
1443
1444	struct virtqueue *vq;
1445	struct vioif_netqueue *netq;
1446	struct vioif_tx_context *txc;
1447	struct vioif_rx_context *rxc;
1448	char qname[32];
1449	int r, dir;
1450
1451	txc = NULL;
1452	rxc = NULL;
1453	netq = &sc->sc_netqs[qid];
1454	vq = &sc->sc_vqs[qid];
1455	dir = VIOIF_NETQ_DIR(qid);
1456
1457	netq->netq_vq = &sc->sc_vqs[qid];
1458	netq->netq_stopping = false;
1459	netq->netq_running_handle = false;
1460
1461	snprintf(qname, sizeof(qname), "%s%zu",
1462	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1463	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1464	    "%s-%s", device_xname(sc->sc_dev), qname);
1465
1466	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1467	r = virtio_alloc_vq(vsc, vq, qid,
1468	    params[dir].segsize + sc->sc_hdr_size,
1469	    params[dir].nsegs, qname);
1470	if (r != 0)
1471		goto err;
1472	netq->netq_vq = vq;
1473
1474	netq->netq_vq->vq_intrhand = params[dir].intrhand;
1475	netq->netq_vq->vq_intrhand_arg = netq;
1476	netq->netq_softint = softint_establish(softint_flags,
1477	    params[dir].sihand, netq);
1478	if (netq->netq_softint == NULL) {
1479		aprint_error_dev(sc->sc_dev,
1480		    "couldn't establish %s softint\n",
1481		    params[dir].dirname);
1482		goto err;
1483	}
1484	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1485
1486	switch (dir) {
1487	case VIOIF_NETQ_RX:
1488		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1489		netq->netq_ctx = rxc;
1490		/* nothing to do */
1491		break;
1492	case VIOIF_NETQ_TX:
1493		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1494		netq->netq_ctx = (void *)txc;
1495		txc->txc_deferred_transmit = softint_establish(softint_flags,
1496		    vioif_deferred_transmit, netq);
1497		if (txc->txc_deferred_transmit == NULL) {
1498			aprint_error_dev(sc->sc_dev,
1499			    "couldn't establish softint for "
1500			    "tx deferred transmit\n");
1501			goto err;
1502		}
1503		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1504		txc->txc_no_free_slots = false;
1505		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1506		break;
1507	}
1508
1509	return 0;
1510
1511err:
1512	netq->netq_ctx = NULL;
1513
1514	if (rxc != NULL) {
1515		kmem_free(rxc, sizeof(*rxc));
1516	}
1517
1518	if (txc != NULL) {
1519		if (txc->txc_deferred_transmit != NULL)
1520			softint_disestablish(txc->txc_deferred_transmit);
1521		if (txc->txc_intrq != NULL)
1522			pcq_destroy(txc->txc_intrq);
1523		kmem_free(txc, sizeof(txc));
1524	}
1525
1526	vioif_work_set(&netq->netq_work, NULL, NULL);
1527	if (netq->netq_softint != NULL) {
1528		softint_disestablish(netq->netq_softint);
1529		netq->netq_softint = NULL;
1530	}
1531	netq->netq_vq->vq_intrhand = NULL;
1532	netq->netq_vq->vq_intrhand_arg = NULL;
1533
1534	virtio_free_vq(vsc, vq);
1535	mutex_destroy(&netq->netq_lock);
1536	netq->netq_vq = NULL;
1537
1538	return -1;
1539}
1540
1541static void
1542vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1543    size_t qid)
1544{
1545	struct vioif_netqueue *netq;
1546	struct vioif_rx_context *rxc;
1547	struct vioif_tx_context *txc;
1548	int dir;
1549
1550	netq = &sc->sc_netqs[qid];
1551
1552	if (netq->netq_vq == NULL)
1553		return;
1554
1555	netq = &sc->sc_netqs[qid];
1556	dir = VIOIF_NETQ_DIR(qid);
1557	switch (dir) {
1558	case VIOIF_NETQ_RX:
1559		rxc = netq->netq_ctx;
1560		netq->netq_ctx = NULL;
1561		kmem_free(rxc, sizeof(*rxc));
1562		break;
1563	case VIOIF_NETQ_TX:
1564		txc = netq->netq_ctx;
1565		netq->netq_ctx = NULL;
1566		softint_disestablish(txc->txc_deferred_transmit);
1567		pcq_destroy(txc->txc_intrq);
1568		kmem_free(txc, sizeof(*txc));
1569		break;
1570	}
1571
1572	softint_disestablish(netq->netq_softint);
1573	virtio_free_vq(vsc, netq->netq_vq);
1574	mutex_destroy(&netq->netq_lock);
1575	netq->netq_vq = NULL;
1576}
1577
1578static void
1579vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1580{
1581
1582	KASSERT(mutex_owned(&netq->netq_lock));
1583	KASSERT(!netq->netq_stopping);
1584
1585	if (netq->netq_workqueue) {
1586		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1587	} else {
1588		softint_schedule(netq->netq_softint);
1589	}
1590}
1591
1592static int
1593vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1594   struct mbuf *m, int dma_flags)
1595{
1596	int r;
1597
1598	KASSERT(map->vnm_mbuf == NULL);
1599
1600	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1601	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1602	if (r == 0) {
1603		map->vnm_mbuf = m;
1604	}
1605
1606	return r;
1607}
1608
1609static void
1610vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1611{
1612
1613	KASSERT(map->vnm_mbuf != NULL);
1614	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1615	map->vnm_mbuf = NULL;
1616}
1617
1618static int
1619vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1620    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1621{
1622	int r;
1623
1624	KASSERT(map->vnm_mbuf != NULL);
1625
1626	/* This should actually never fail */
1627	r = virtio_enqueue_reserve(vsc, vq, slot,
1628	    map->vnm_mbuf_map->dm_nsegs + 1);
1629	if (r != 0) {
1630		/* slot already freed by virtio_enqueue_reserve */
1631		return r;
1632	}
1633
1634	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1635	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1636	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1637	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1638
1639	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1640	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1641	virtio_enqueue_commit(vsc, vq, slot, false);
1642
1643	return 0;
1644}
1645
1646static int
1647vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1648    int slot, struct vioif_net_map *map)
1649{
1650
1651	return vioif_net_enqueue(vsc, vq, slot, map,
1652	    BUS_DMASYNC_PREWRITE, true);
1653}
1654
1655static int
1656vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1657    int slot, struct vioif_net_map *map)
1658{
1659
1660	return vioif_net_enqueue(vsc, vq, slot, map,
1661	    BUS_DMASYNC_PREREAD, false);
1662}
1663
1664static struct mbuf *
1665vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1666   int slot, struct vioif_net_map *map, int dma_flags)
1667{
1668	struct mbuf *m;
1669
1670	m = map->vnm_mbuf;
1671	KASSERT(m != NULL);
1672	map->vnm_mbuf = NULL;
1673
1674	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1675	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1676	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1677	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1678
1679	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1680	virtio_dequeue_commit(vsc, vq, slot);
1681
1682	return m;
1683}
1684
1685static void
1686vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1687{
1688	struct vioif_netqueue *netq;
1689	size_t i, act_qnum;
1690	int enqueued;
1691
1692	act_qnum = sc->sc_act_nvq_pairs * 2;
1693	for (i = 0; i < act_qnum; i++) {
1694		netq = &sc->sc_netqs[i];
1695
1696		KASSERT(!netq->netq_stopping);
1697		KASSERT(!netq->netq_running_handle);
1698
1699		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1700		if (enqueued != 0) {
1701			virtio_stop_vq_intr(vsc, netq->netq_vq);
1702
1703			mutex_enter(&netq->netq_lock);
1704			netq->netq_running_handle = true;
1705			vioif_net_sched_handle(sc, netq);
1706			mutex_exit(&netq->netq_lock);
1707		}
1708	}
1709}
1710
1711static void
1712vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1713{
1714	struct vioif_netqueue *netq;
1715	size_t i, act_qnum;
1716
1717	act_qnum = sc->sc_act_nvq_pairs * 2;
1718	for (i = 0; i < act_qnum; i++) {
1719		netq = &sc->sc_netqs[i];
1720
1721		virtio_stop_vq_intr(vsc, netq->netq_vq);
1722	}
1723}
1724
1725/*
1726 * Receive implementation
1727 */
1728/* enqueue mbufs to receive slots */
1729static void
1730vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1731{
1732	struct virtqueue *vq = netq->netq_vq;
1733	struct virtio_softc *vsc = vq->vq_owner;
1734	struct vioif_rx_context *rxc;
1735	struct vioif_net_map *map;
1736	struct mbuf *m;
1737	int i, r, ndone = 0;
1738
1739	KASSERT(mutex_owned(&netq->netq_lock));
1740
1741	rxc = netq->netq_ctx;
1742
1743	for (i = 0; i < vq->vq_num; i++) {
1744		int slot;
1745		r = virtio_enqueue_prep(vsc, vq, &slot);
1746		if (r == EAGAIN)
1747			break;
1748		if (__predict_false(r != 0))
1749			panic("enqueue_prep for rx buffers");
1750
1751		MGETHDR(m, M_DONTWAIT, MT_DATA);
1752		if (m == NULL) {
1753			virtio_enqueue_abort(vsc, vq, slot);
1754			rxc->rxc_mbuf_enobufs.ev_count++;
1755			break;
1756		}
1757		MCLGET(m, M_DONTWAIT);
1758		if ((m->m_flags & M_EXT) == 0) {
1759			virtio_enqueue_abort(vsc, vq, slot);
1760			m_freem(m);
1761			rxc->rxc_mbuf_enobufs.ev_count++;
1762			break;
1763		}
1764
1765		m->m_len = m->m_pkthdr.len = MCLBYTES;
1766		m_adj(m, ETHER_ALIGN);
1767
1768		map = &netq->netq_maps[slot];
1769		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1770		if (r != 0) {
1771			virtio_enqueue_abort(vsc, vq, slot);
1772			m_freem(m);
1773			netq->netq_mbuf_load_failed.ev_count++;
1774			break;
1775		}
1776
1777		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1778		if (r != 0) {
1779			vioif_net_unload_mbuf(vsc, map);
1780			netq->netq_enqueue_failed.ev_count++;
1781			m_freem(m);
1782			/* slot already freed by vioif_net_enqueue_rx */
1783			break;
1784		}
1785
1786		ndone++;
1787	}
1788
1789	if (ndone > 0)
1790		vioif_notify(vsc, vq);
1791}
1792
1793/* dequeue received packets */
1794static bool
1795vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1796    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1797{
1798	struct virtqueue *vq = netq->netq_vq;
1799	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1800	struct vioif_net_map *map;
1801	struct mbuf *m;
1802	int slot, len;
1803	bool more;
1804	size_t ndeq;
1805
1806	KASSERT(mutex_owned(&netq->netq_lock));
1807
1808	more = false;
1809	ndeq = 0;
1810
1811	if (virtio_vq_is_enqueued(vsc, vq) == false)
1812		goto done;
1813
1814	for (;;ndeq++) {
1815		if (ndeq >= limit) {
1816			more = true;
1817			break;
1818		}
1819
1820		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1821			break;
1822
1823		map = &netq->netq_maps[slot];
1824		KASSERT(map->vnm_mbuf != NULL);
1825		m = vioif_net_dequeue_commit(vsc, vq, slot,
1826		    map, BUS_DMASYNC_POSTREAD);
1827		KASSERT(m != NULL);
1828
1829		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1830		m_set_rcvif(m, ifp);
1831		if_percpuq_enqueue(ifp->if_percpuq, m);
1832	}
1833
1834done:
1835	if (ndeqp != NULL)
1836		*ndeqp = ndeq;
1837
1838	return more;
1839}
1840
1841static void
1842vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1843    struct vioif_netqueue *netq)
1844{
1845	struct vioif_net_map *map;
1846	struct mbuf *m;
1847	unsigned int i, vq_num;
1848	bool more;
1849
1850	mutex_enter(&netq->netq_lock);
1851
1852	vq_num = netq->netq_vq->vq_num;
1853	for (;;) {
1854		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1855		if (more == false)
1856			break;
1857	}
1858
1859	for (i = 0; i < vq_num; i++) {
1860		map = &netq->netq_maps[i];
1861
1862		m = map->vnm_mbuf;
1863		if (m == NULL)
1864			continue;
1865
1866		vioif_net_unload_mbuf(vsc, map);
1867		m_freem(m);
1868	}
1869	mutex_exit(&netq->netq_lock);
1870}
1871
1872static void
1873vioif_rx_handle_locked(void *xnetq, u_int limit)
1874{
1875	struct vioif_netqueue *netq = xnetq;
1876	struct virtqueue *vq = netq->netq_vq;
1877	struct virtio_softc *vsc = vq->vq_owner;
1878	struct vioif_softc *sc = device_private(virtio_child(vsc));
1879	bool more;
1880	int enqueued;
1881	size_t ndeq;
1882
1883	KASSERT(mutex_owned(&netq->netq_lock));
1884	KASSERT(!netq->netq_stopping);
1885
1886	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1887	if (ndeq > 0)
1888		vioif_populate_rx_mbufs_locked(sc, netq);
1889
1890	if (more) {
1891		vioif_net_sched_handle(sc, netq);
1892		return;
1893	}
1894
1895	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1896	if (enqueued != 0) {
1897		virtio_stop_vq_intr(vsc, netq->netq_vq);
1898		vioif_net_sched_handle(sc, netq);
1899		return;
1900	}
1901
1902	netq->netq_running_handle = false;
1903}
1904
1905static int
1906vioif_rx_intr(void *arg)
1907{
1908	struct vioif_netqueue *netq = arg;
1909	struct virtqueue *vq = netq->netq_vq;
1910	struct virtio_softc *vsc = vq->vq_owner;
1911	struct vioif_softc *sc = device_private(virtio_child(vsc));
1912	u_int limit;
1913
1914	mutex_enter(&netq->netq_lock);
1915
1916	/* handler is already running in softint/workqueue */
1917	if (netq->netq_running_handle)
1918		goto done;
1919
1920	netq->netq_running_handle = true;
1921
1922	limit = sc->sc_rx_intr_process_limit;
1923	virtio_stop_vq_intr(vsc, vq);
1924	vioif_rx_handle_locked(netq, limit);
1925
1926done:
1927	mutex_exit(&netq->netq_lock);
1928	return 1;
1929}
1930
1931static void
1932vioif_rx_handle(void *xnetq)
1933{
1934	struct vioif_netqueue *netq = xnetq;
1935	struct virtqueue *vq = netq->netq_vq;
1936	struct virtio_softc *vsc = vq->vq_owner;
1937	struct vioif_softc *sc = device_private(virtio_child(vsc));
1938	u_int limit;
1939
1940	mutex_enter(&netq->netq_lock);
1941
1942	KASSERT(netq->netq_running_handle);
1943
1944	if (netq->netq_stopping) {
1945		netq->netq_running_handle = false;
1946		goto done;
1947	}
1948
1949	limit = sc->sc_rx_process_limit;
1950	vioif_rx_handle_locked(netq, limit);
1951
1952done:
1953	mutex_exit(&netq->netq_lock);
1954}
1955
1956/*
1957 * Transmition implementation
1958 */
1959/* enqueue mbufs to send */
1960static void
1961vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1962    bool is_transmit)
1963{
1964	struct vioif_softc *sc = ifp->if_softc;
1965	struct virtio_softc *vsc = sc->sc_virtio;
1966	struct virtqueue *vq = netq->netq_vq;
1967	struct vioif_tx_context *txc;
1968	struct vioif_net_map *map;
1969	struct mbuf *m;
1970	int queued = 0;
1971
1972	KASSERT(mutex_owned(&netq->netq_lock));
1973
1974	if (netq->netq_stopping ||
1975	    !ISSET(ifp->if_flags, IFF_RUNNING))
1976		return;
1977
1978	txc = netq->netq_ctx;
1979
1980	if (!txc->txc_link_active ||
1981	    txc->txc_no_free_slots)
1982		return;
1983
1984	for (;;) {
1985		int slot, r;
1986		r = virtio_enqueue_prep(vsc, vq, &slot);
1987		if (r == EAGAIN) {
1988			txc->txc_no_free_slots = true;
1989			break;
1990		}
1991		if (__predict_false(r != 0))
1992			panic("enqueue_prep for tx buffers");
1993
1994		if (is_transmit)
1995			m = pcq_get(txc->txc_intrq);
1996		else
1997			IFQ_DEQUEUE(&ifp->if_snd, m);
1998
1999		if (m == NULL) {
2000			virtio_enqueue_abort(vsc, vq, slot);
2001			break;
2002		}
2003
2004		map = &netq->netq_maps[slot];
2005		KASSERT(map->vnm_mbuf == NULL);
2006
2007		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2008		if (r != 0) {
2009			/* maybe just too fragmented */
2010			struct mbuf *newm;
2011
2012			newm = m_defrag(m, M_NOWAIT);
2013			if (newm != NULL) {
2014				m = newm;
2015				r = vioif_net_load_mbuf(vsc, map, m,
2016				    BUS_DMA_WRITE);
2017			} else {
2018				txc->txc_defrag_failed.ev_count++;
2019				r = -1;
2020			}
2021
2022			if (r != 0) {
2023				netq->netq_mbuf_load_failed.ev_count++;
2024				m_freem(m);
2025				if_statinc(ifp, if_oerrors);
2026				virtio_enqueue_abort(vsc, vq, slot);
2027				continue;
2028			}
2029		}
2030
2031		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2032
2033		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2034		if (r != 0) {
2035			netq->netq_enqueue_failed.ev_count++;
2036			vioif_net_unload_mbuf(vsc, map);
2037			m_freem(m);
2038			/* slot already freed by vioif_net_enqueue_tx */
2039
2040			if_statinc(ifp, if_oerrors);
2041			continue;
2042		}
2043
2044		queued++;
2045		bpf_mtap(ifp, m, BPF_D_OUT);
2046	}
2047
2048	if (queued > 0) {
2049		vioif_notify(vsc, vq);
2050		ifp->if_timer = 5;
2051	}
2052}
2053
2054/* dequeue sent mbufs */
2055static bool
2056vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2057    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2058{
2059	struct virtqueue *vq = netq->netq_vq;
2060	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2061	struct vioif_net_map *map;
2062	struct mbuf *m;
2063	int slot, len;
2064	bool more;
2065	size_t ndeq;
2066
2067	KASSERT(mutex_owned(&netq->netq_lock));
2068
2069	more = false;
2070	ndeq = 0;
2071
2072	if (virtio_vq_is_enqueued(vsc, vq) == false)
2073		goto done;
2074
2075	for (;;ndeq++) {
2076		if (limit-- == 0) {
2077			more = true;
2078			break;
2079		}
2080
2081		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2082			break;
2083
2084		map = &netq->netq_maps[slot];
2085		KASSERT(map->vnm_mbuf != NULL);
2086		m = vioif_net_dequeue_commit(vsc, vq, slot,
2087		    map, BUS_DMASYNC_POSTWRITE);
2088		KASSERT(m != NULL);
2089
2090		if_statinc(ifp, if_opackets);
2091		m_freem(m);
2092	}
2093
2094done:
2095	if (ndeqp != NULL)
2096		*ndeqp = ndeq;
2097	return more;
2098}
2099
2100static void
2101vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2102    struct vioif_netqueue *netq)
2103{
2104	struct vioif_tx_context *txc;
2105	struct vioif_net_map *map;
2106	struct mbuf *m;
2107	unsigned int i, vq_num;
2108	bool more;
2109
2110	mutex_enter(&netq->netq_lock);
2111
2112	txc = netq->netq_ctx;
2113	vq_num = netq->netq_vq->vq_num;
2114
2115	for (;;) {
2116		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2117		if (more == false)
2118			break;
2119	}
2120
2121	for (i = 0; i < vq_num; i++) {
2122		map = &netq->netq_maps[i];
2123
2124		m = map->vnm_mbuf;
2125		if (m == NULL)
2126			continue;
2127
2128		vioif_net_unload_mbuf(vsc, map);
2129		m_freem(m);
2130	}
2131
2132	txc->txc_no_free_slots = false;
2133
2134	mutex_exit(&netq->netq_lock);
2135}
2136
2137static void
2138vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2139{
2140
2141	/*
2142	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2143	 */
2144	vioif_send_common_locked(ifp, netq, false);
2145
2146}
2147
2148static void
2149vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2150{
2151
2152	vioif_send_common_locked(ifp, netq, true);
2153}
2154
2155static void
2156vioif_deferred_transmit(void *arg)
2157{
2158	struct vioif_netqueue *netq = arg;
2159	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2160	struct vioif_softc *sc = device_private(virtio_child(vsc));
2161	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2162
2163	mutex_enter(&netq->netq_lock);
2164	vioif_send_common_locked(ifp, netq, true);
2165	mutex_exit(&netq->netq_lock);
2166}
2167
2168static void
2169vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2170{
2171	struct virtqueue *vq = netq->netq_vq;
2172	struct vioif_tx_context *txc = netq->netq_ctx;
2173	struct virtio_softc *vsc = vq->vq_owner;
2174	struct vioif_softc *sc = device_private(virtio_child(vsc));
2175	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2176	bool more;
2177	int enqueued;
2178	size_t ndeq;
2179
2180	KASSERT(mutex_owned(&netq->netq_lock));
2181	KASSERT(!netq->netq_stopping);
2182
2183	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2184	if (txc->txc_no_free_slots && ndeq > 0) {
2185		txc->txc_no_free_slots = false;
2186		softint_schedule(txc->txc_deferred_transmit);
2187	}
2188
2189	if (more) {
2190		vioif_net_sched_handle(sc, netq);
2191		return;
2192	}
2193
2194	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2195	    virtio_postpone_intr_smart(vsc, vq):
2196	    virtio_start_vq_intr(vsc, vq);
2197	if (enqueued != 0) {
2198		virtio_stop_vq_intr(vsc, vq);
2199		vioif_net_sched_handle(sc, netq);
2200		return;
2201	}
2202
2203	netq->netq_running_handle = false;
2204
2205	/* for ALTQ */
2206	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2207		if_schedule_deferred_start(ifp);
2208
2209	softint_schedule(txc->txc_deferred_transmit);
2210}
2211
2212static int
2213vioif_tx_intr(void *arg)
2214{
2215	struct vioif_netqueue *netq = arg;
2216	struct virtqueue *vq = netq->netq_vq;
2217	struct virtio_softc *vsc = vq->vq_owner;
2218	struct vioif_softc *sc = device_private(virtio_child(vsc));
2219	u_int limit;
2220
2221	mutex_enter(&netq->netq_lock);
2222
2223	/* tx handler is already running in softint/workqueue */
2224	if (netq->netq_running_handle)
2225		goto done;
2226
2227	if (netq->netq_stopping)
2228		goto done;
2229
2230	netq->netq_running_handle = true;
2231
2232	virtio_stop_vq_intr(vsc, vq);
2233	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2234	limit = sc->sc_tx_intr_process_limit;
2235	vioif_tx_handle_locked(netq, limit);
2236
2237done:
2238	mutex_exit(&netq->netq_lock);
2239	return 1;
2240}
2241
2242static void
2243vioif_tx_handle(void *xnetq)
2244{
2245	struct vioif_netqueue *netq = xnetq;
2246	struct virtqueue *vq = netq->netq_vq;
2247	struct virtio_softc *vsc = vq->vq_owner;
2248	struct vioif_softc *sc = device_private(virtio_child(vsc));
2249	u_int limit;
2250
2251	mutex_enter(&netq->netq_lock);
2252
2253	KASSERT(netq->netq_running_handle);
2254
2255	if (netq->netq_stopping) {
2256		netq->netq_running_handle = false;
2257		goto done;
2258	}
2259
2260	limit = sc->sc_tx_process_limit;
2261	vioif_tx_handle_locked(netq, limit);
2262
2263done:
2264	mutex_exit(&netq->netq_lock);
2265}
2266
2267/*
2268 * Control vq
2269 */
2270/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2271static void
2272vioif_ctrl_acquire(struct vioif_softc *sc)
2273{
2274	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2275
2276	mutex_enter(&ctrlq->ctrlq_wait_lock);
2277	while (ctrlq->ctrlq_inuse != FREE)
2278		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2279	ctrlq->ctrlq_inuse = INUSE;
2280	ctrlq->ctrlq_owner = curlwp;
2281	mutex_exit(&ctrlq->ctrlq_wait_lock);
2282}
2283
2284static void
2285vioif_ctrl_release(struct vioif_softc *sc)
2286{
2287	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2288
2289	KASSERT(ctrlq->ctrlq_inuse != FREE);
2290	KASSERT(ctrlq->ctrlq_owner == curlwp);
2291
2292	mutex_enter(&ctrlq->ctrlq_wait_lock);
2293	ctrlq->ctrlq_inuse = FREE;
2294	ctrlq->ctrlq_owner = NULL;
2295	cv_signal(&ctrlq->ctrlq_wait);
2296	mutex_exit(&ctrlq->ctrlq_wait_lock);
2297}
2298
2299static int
2300vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2301    struct vioif_ctrl_cmdspec *specs, int nspecs)
2302{
2303	struct virtio_softc *vsc = sc->sc_virtio;
2304	int i, r, loaded;
2305
2306	loaded = 0;
2307	for (i = 0; i < nspecs; i++) {
2308		r = bus_dmamap_load(virtio_dmat(vsc),
2309		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2310		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2311		if (r) {
2312			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2313			goto err;
2314		}
2315		loaded++;
2316
2317	}
2318
2319	return r;
2320
2321err:
2322	for (i = 0; i < loaded; i++) {
2323		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2324	}
2325
2326	return r;
2327}
2328
2329static void
2330vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2331    struct vioif_ctrl_cmdspec *specs, int nspecs)
2332{
2333	struct virtio_softc *vsc = sc->sc_virtio;
2334	int i;
2335
2336	for (i = 0; i < nspecs; i++) {
2337		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2338	}
2339}
2340
2341static int
2342vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2343    struct vioif_ctrl_cmdspec *specs, int nspecs)
2344{
2345	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2346	struct virtqueue *vq = ctrlq->ctrlq_vq;
2347	struct virtio_softc *vsc = sc->sc_virtio;
2348	int i, r, slot;
2349
2350	ctrlq->ctrlq_cmd->class = class;
2351	ctrlq->ctrlq_cmd->command = cmd;
2352
2353	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2354	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2355	for (i = 0; i < nspecs; i++) {
2356		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2357		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2358	}
2359	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2360	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2361
2362	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2363	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2364		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2365
2366	r = virtio_enqueue_prep(vsc, vq, &slot);
2367	if (r != 0)
2368		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2369	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2370	if (r != 0)
2371		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2372	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2373	for (i = 0; i < nspecs; i++) {
2374		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2375	}
2376	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2377	virtio_enqueue_commit(vsc, vq, slot, true);
2378
2379	/* wait for done */
2380	mutex_enter(&ctrlq->ctrlq_wait_lock);
2381	while (ctrlq->ctrlq_inuse != DONE)
2382		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2383	mutex_exit(&ctrlq->ctrlq_wait_lock);
2384	/* already dequeueued */
2385
2386	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2387	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2388	for (i = 0; i < nspecs; i++) {
2389		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2390		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2391	}
2392	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2393	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2394
2395	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2396		r = 0;
2397	else {
2398		device_printf(sc->sc_dev, "failed setting rx mode\n");
2399		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2400		r = EIO;
2401	}
2402
2403	return r;
2404}
2405
2406/* ctrl vq interrupt; wake up the command issuer */
2407static int
2408vioif_ctrl_intr(void *arg)
2409{
2410	struct vioif_ctrlqueue *ctrlq = arg;
2411	struct virtqueue *vq = ctrlq->ctrlq_vq;
2412	struct virtio_softc *vsc = vq->vq_owner;
2413	int r, slot;
2414
2415	if (virtio_vq_is_enqueued(vsc, vq) == false)
2416		return 0;
2417
2418	r = virtio_dequeue(vsc, vq, &slot, NULL);
2419	if (r == ENOENT)
2420		return 0;
2421	virtio_dequeue_commit(vsc, vq, slot);
2422
2423	mutex_enter(&ctrlq->ctrlq_wait_lock);
2424	ctrlq->ctrlq_inuse = DONE;
2425	cv_signal(&ctrlq->ctrlq_wait);
2426	mutex_exit(&ctrlq->ctrlq_wait_lock);
2427
2428	return 1;
2429}
2430
2431static int
2432vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2433{
2434	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2435	struct vioif_ctrl_cmdspec specs[1];
2436	int r;
2437
2438	if (!sc->sc_has_ctrl)
2439		return ENOTSUP;
2440
2441	vioif_ctrl_acquire(sc);
2442
2443	rx->onoff = onoff;
2444	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2445	specs[0].buf = rx;
2446	specs[0].bufsize = sizeof(*rx);
2447
2448	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2449	    specs, __arraycount(specs));
2450
2451	vioif_ctrl_release(sc);
2452	return r;
2453}
2454
2455static int
2456vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2457{
2458	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2459}
2460
2461static int
2462vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2463{
2464	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2465}
2466
2467static int
2468vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2469{
2470	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2471	struct vioif_ctrl_cmdspec specs[1];
2472	int r;
2473
2474	if (!sc->sc_has_ctrl)
2475		return ENOTSUP;
2476
2477	if (nvq_pairs <= 1)
2478		return EINVAL;
2479
2480	vioif_ctrl_acquire(sc);
2481
2482	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2483	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2484	specs[0].buf = mq;
2485	specs[0].bufsize = sizeof(*mq);
2486
2487	r = vioif_ctrl_send_command(sc,
2488	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2489	    specs, __arraycount(specs));
2490
2491	vioif_ctrl_release(sc);
2492
2493	return r;
2494}
2495
2496static int
2497vioif_set_mac_addr(struct vioif_softc *sc)
2498{
2499	struct virtio_net_ctrl_mac_addr *ma =
2500	    sc->sc_ctrlq.ctrlq_mac_addr;
2501	struct vioif_ctrl_cmdspec specs[1];
2502	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2503	int nspecs = __arraycount(specs);
2504	uint64_t features;
2505	int r;
2506	size_t i;
2507
2508	if (!sc->sc_has_ctrl)
2509		return ENOTSUP;
2510
2511	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2512	    ETHER_ADDR_LEN) == 0) {
2513		return 0;
2514	}
2515
2516	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2517
2518	features = virtio_features(sc->sc_virtio);
2519	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2520		vioif_ctrl_acquire(sc);
2521
2522		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2523		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2524		specs[0].buf = ma;
2525		specs[0].bufsize = sizeof(*ma);
2526
2527		r = vioif_ctrl_send_command(sc,
2528		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2529		    specs, nspecs);
2530
2531		vioif_ctrl_release(sc);
2532	} else {
2533		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2534			virtio_write_device_config_1(sc->sc_virtio,
2535			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2536		}
2537		r = 0;
2538	}
2539
2540	return r;
2541}
2542
2543static int
2544vioif_set_rx_filter(struct vioif_softc *sc)
2545{
2546	/* filter already set in ctrlq->ctrlq_mac_tbl */
2547	struct virtio_softc *vsc = sc->sc_virtio;
2548	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2549	struct vioif_ctrl_cmdspec specs[2];
2550	int nspecs = __arraycount(specs);
2551	int r;
2552
2553	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2554	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2555
2556	if (!sc->sc_has_ctrl)
2557		return ENOTSUP;
2558
2559	vioif_ctrl_acquire(sc);
2560
2561	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2562	specs[0].buf = mac_tbl_uc;
2563	specs[0].bufsize = sizeof(*mac_tbl_uc)
2564	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2565
2566	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2567	specs[1].buf = mac_tbl_mc;
2568	specs[1].bufsize = sizeof(*mac_tbl_mc)
2569	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2570
2571	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2572	if (r != 0)
2573		goto out;
2574
2575	r = vioif_ctrl_send_command(sc,
2576	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2577	    specs, nspecs);
2578
2579	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2580
2581out:
2582	vioif_ctrl_release(sc);
2583
2584	return r;
2585}
2586
2587/*
2588 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2589 * If large multicast filter exist use ALLMULTI
2590 * If setting rx filter fails fall back to ALLMULTI
2591 */
2592static int
2593vioif_rx_filter(struct vioif_softc *sc)
2594{
2595	struct virtio_softc *vsc = sc->sc_virtio;
2596	struct ethercom *ec = &sc->sc_ethercom;
2597	struct ifnet *ifp = &ec->ec_if;
2598	struct ether_multi *enm;
2599	struct ether_multistep step;
2600	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2601	int nentries;
2602	bool allmulti = 0;
2603	int r;
2604
2605	if (!sc->sc_has_ctrl) {
2606		goto set_ifflags;
2607	}
2608
2609	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2610	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2611
2612	nentries = 0;
2613	allmulti = false;
2614
2615	ETHER_LOCK(ec);
2616	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2617	    ETHER_NEXT_MULTI(step, enm)) {
2618		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2619			allmulti = true;
2620			break;
2621		}
2622		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2623			allmulti = true;
2624			break;
2625		}
2626
2627		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2628		    enm->enm_addrlo, ETHER_ADDR_LEN);
2629		nentries++;
2630	}
2631	ETHER_UNLOCK(ec);
2632
2633	r = vioif_set_mac_addr(sc);
2634	if (r != 0) {
2635		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2636		    ifp->if_xname);
2637	}
2638
2639	if (!allmulti) {
2640		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2641		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2642		r = vioif_set_rx_filter(sc);
2643		if (r != 0) {
2644			allmulti = true; /* fallback */
2645		}
2646	}
2647
2648	if (allmulti) {
2649		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2650		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2651		r = vioif_set_rx_filter(sc);
2652		if (r != 0) {
2653			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2654			    ifp->if_xname);
2655			/* what to do on failure? */
2656		}
2657
2658		ifp->if_flags |= IFF_ALLMULTI;
2659	}
2660
2661set_ifflags:
2662	r = vioif_ifflags(sc);
2663
2664	return r;
2665}
2666
2667/*
2668 * VM configuration changes
2669 */
2670static int
2671vioif_config_change(struct virtio_softc *vsc)
2672{
2673	struct vioif_softc *sc = device_private(virtio_child(vsc));
2674
2675	softint_schedule(sc->sc_cfg_softint);
2676	return 0;
2677}
2678
2679static void
2680vioif_cfg_softint(void *arg)
2681{
2682	struct vioif_softc *sc = arg;
2683	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2684
2685	vioif_update_link_status(sc);
2686	vioif_start(ifp);
2687}
2688
2689static int
2690vioif_get_link_status(struct vioif_softc *sc)
2691{
2692	struct virtio_softc *vsc = sc->sc_virtio;
2693	uint16_t status;
2694
2695	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2696		status = virtio_read_device_config_2(vsc,
2697		    VIRTIO_NET_CONFIG_STATUS);
2698	else
2699		status = VIRTIO_NET_S_LINK_UP;
2700
2701	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2702		return LINK_STATE_UP;
2703
2704	return LINK_STATE_DOWN;
2705}
2706
2707static void
2708vioif_update_link_status(struct vioif_softc *sc)
2709{
2710	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2711	struct vioif_netqueue *netq;
2712	struct vioif_tx_context *txc;
2713	bool active;
2714	int link, i;
2715
2716	mutex_enter(&sc->sc_lock);
2717
2718	link = vioif_get_link_status(sc);
2719
2720	if (link == sc->sc_link_state)
2721		goto done;
2722
2723	sc->sc_link_state = link;
2724
2725	active = VIOIF_IS_LINK_ACTIVE(sc);
2726	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2727		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2728
2729		mutex_enter(&netq->netq_lock);
2730		txc = netq->netq_ctx;
2731		txc->txc_link_active = active;
2732		mutex_exit(&netq->netq_lock);
2733	}
2734
2735	if_link_state_change(ifp, sc->sc_link_state);
2736
2737done:
2738	mutex_exit(&sc->sc_lock);
2739}
2740
2741static void
2742vioif_workq_work(struct work *wk, void *context)
2743{
2744	struct vioif_work *work;
2745
2746	work = container_of(wk, struct vioif_work, cookie);
2747
2748	atomic_store_relaxed(&work->added, 0);
2749	work->func(work->arg);
2750}
2751
2752static struct workqueue *
2753vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2754{
2755	struct workqueue *wq;
2756	int error;
2757
2758	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2759	    prio, ipl, flags);
2760
2761	if (error)
2762		return NULL;
2763
2764	return wq;
2765}
2766
2767static void
2768vioif_workq_destroy(struct workqueue *wq)
2769{
2770
2771	workqueue_destroy(wq);
2772}
2773
2774static void
2775vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2776{
2777
2778	memset(work, 0, sizeof(*work));
2779	work->func = func;
2780	work->arg = arg;
2781}
2782
2783static void
2784vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2785{
2786
2787	if (atomic_load_relaxed(&work->added) != 0)
2788		return;
2789
2790	atomic_store_relaxed(&work->added, 1);
2791	kpreempt_disable();
2792	workqueue_enqueue(wq, &work->cookie, NULL);
2793	kpreempt_enable();
2794}
2795
2796static void
2797vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2798{
2799
2800	workqueue_wait(wq, &work->cookie);
2801}
2802
2803MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2804
2805#ifdef _MODULE
2806#include "ioconf.c"
2807#endif
2808
2809static int
2810if_vioif_modcmd(modcmd_t cmd, void *opaque)
2811{
2812	int error = 0;
2813
2814#ifdef _MODULE
2815	switch (cmd) {
2816	case MODULE_CMD_INIT:
2817		error = config_init_component(cfdriver_ioconf_if_vioif,
2818		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2819		break;
2820	case MODULE_CMD_FINI:
2821		error = config_fini_component(cfdriver_ioconf_if_vioif,
2822		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2823		break;
2824	default:
2825		error = ENOTTY;
2826		break;
2827	}
2828#endif
2829
2830	return error;
2831}
2832