if_vioif.c revision 1.103
1/*	$NetBSD: if_vioif.c,v 1.103 2023/03/23 03:27:48 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.103 2023/03/23 03:27:48 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	bool			 txc_no_free_slots;
270	pcq_t			*txc_intrq;
271	void			*txc_deferred_transmit;
272
273	struct evcnt		 txc_defrag_failed;
274};
275
276struct vioif_rx_context {
277	struct evcnt		 rxc_mbuf_enobufs;
278};
279struct vioif_ctrlqueue {
280	struct virtqueue		*ctrlq_vq;
281	enum {
282		FREE, INUSE, DONE
283	}				ctrlq_inuse;
284	kcondvar_t			ctrlq_wait;
285	kmutex_t			ctrlq_wait_lock;
286	struct lwp			*ctrlq_owner;
287
288	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289	struct virtio_net_ctrl_status	*ctrlq_status;
290	struct virtio_net_ctrl_rx	*ctrlq_rx;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294	struct virtio_net_ctrl_mq	*ctrlq_mq;
295
296	bus_dmamap_t			ctrlq_cmd_dmamap;
297	bus_dmamap_t			ctrlq_status_dmamap;
298	bus_dmamap_t			ctrlq_rx_dmamap;
299	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302	bus_dmamap_t			ctrlq_mq_dmamap;
303
304	struct evcnt			ctrlq_cmd_load_failed;
305	struct evcnt			ctrlq_cmd_failed;
306};
307
308struct vioif_softc {
309	device_t		sc_dev;
310	kmutex_t		sc_lock;
311	struct sysctllog	*sc_sysctllog;
312
313	struct virtio_softc	*sc_virtio;
314	struct virtqueue	*sc_vqs;
315	u_int			 sc_hdr_size;
316
317	int			sc_max_nvq_pairs;
318	int			sc_req_nvq_pairs;
319	int			sc_act_nvq_pairs;
320
321	uint8_t			sc_mac[ETHER_ADDR_LEN];
322	struct ethercom		sc_ethercom;
323	int			sc_link_state;
324
325	struct vioif_netqueue	*sc_netqs;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	 sc_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_cfg_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353					    true : false)
354
355/* cfattach interface functions */
356static int	vioif_match(device_t, cfdata_t, void *);
357static void	vioif_attach(device_t, device_t, void *);
358static int	vioif_finalize_teardown(device_t);
359
360/* ifnet interface functions */
361static int	vioif_init(struct ifnet *);
362static void	vioif_stop(struct ifnet *, int);
363static void	vioif_start(struct ifnet *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags(struct vioif_softc *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static int	vioif_netqueue_init(struct vioif_softc *,
372		    struct virtio_softc *, size_t, u_int);
373static void	vioif_netqueue_teardown(struct vioif_softc *,
374		    struct virtio_softc *, size_t);
375static void	vioif_net_intr_enable(struct vioif_softc *,
376		    struct virtio_softc *);
377static void	vioif_net_intr_disable(struct vioif_softc *,
378		    struct virtio_softc *);
379static void	vioif_net_sched_handle(struct vioif_softc *,
380		    struct vioif_netqueue *);
381
382/* rx */
383static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384		    struct vioif_netqueue *);
385static int	vioif_rx_intr(void *);
386static void	vioif_rx_handle(void *);
387static void	vioif_rx_queue_clear(struct vioif_softc *,
388		    struct virtio_softc *, struct vioif_netqueue *);
389
390/* tx */
391static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393static void	vioif_deferred_transmit(void *);
394static int	vioif_tx_intr(void *);
395static void	vioif_tx_handle(void *);
396static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397		    struct vioif_netqueue *);
398
399/* controls */
400static int	vioif_ctrl_intr(void *);
401static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402static int	vioif_set_promisc(struct vioif_softc *, bool);
403static int	vioif_set_allmulti(struct vioif_softc *, bool);
404static int	vioif_set_rx_filter(struct vioif_softc *);
405static int	vioif_rx_filter(struct vioif_softc *);
406static int	vioif_set_mac_addr(struct vioif_softc *);
407static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408
409/* config interrupt */
410static int	vioif_config_change(struct virtio_softc *);
411static void	vioif_cfg_softint(void *);
412static void	vioif_update_link_status(struct vioif_softc *);
413
414/* others */
415static void	vioif_alloc_queues(struct vioif_softc *);
416static void	vioif_free_queues(struct vioif_softc *);
417static int	vioif_alloc_mems(struct vioif_softc *);
418static struct workqueue*
419		vioif_workq_create(const char *, pri_t, int, int);
420static void	vioif_workq_destroy(struct workqueue *);
421static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424static int	vioif_setup_sysctl(struct vioif_softc *);
425static void	vioif_setup_stats(struct vioif_softc *);
426
427CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428		  vioif_match, vioif_attach, NULL, NULL);
429
430static void
431vioif_intr_barrier(void)
432{
433
434	/* wait for finish all interrupt handler */
435	xc_barrier(0);
436}
437
438static void
439vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440{
441
442	virtio_enqueue_commit(vsc, vq, -1, true);
443}
444
445static int
446vioif_match(device_t parent, cfdata_t match, void *aux)
447{
448	struct virtio_attach_args *va = aux;
449
450	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451		return 1;
452
453	return 0;
454}
455
456static void
457vioif_attach(device_t parent, device_t self, void *aux)
458{
459	struct vioif_softc *sc = device_private(self);
460	struct virtio_softc *vsc = device_private(parent);
461	struct vioif_netqueue *txq0;
462	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463	uint64_t features, req_features;
464	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465	u_int softint_flags;
466	int r, i, req_flags;
467	char xnamebuf[MAXCOMLEN];
468	size_t nvqs;
469
470	if (virtio_child(vsc) != NULL) {
471		aprint_normal(": child already attached for %s; "
472		    "something wrong...\n", device_xname(parent));
473		return;
474	}
475
476	sc->sc_dev = self;
477	sc->sc_virtio = vsc;
478	sc->sc_link_state = LINK_STATE_UNKNOWN;
479
480	sc->sc_max_nvq_pairs = 1;
481	sc->sc_req_nvq_pairs = 1;
482	sc->sc_act_nvq_pairs = 1;
483	sc->sc_txrx_workqueue_sysctl = true;
484	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488
489	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490
491	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494	if (sc->sc_txrx_workqueue == NULL)
495		goto err;
496
497	req_flags = 0;
498
499#ifdef VIOIF_MPSAFE
500	req_flags |= VIRTIO_F_INTR_MPSAFE;
501#endif
502	req_flags |= VIRTIO_F_INTR_MSIX;
503
504	req_features =
505	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507	req_features |= VIRTIO_F_RING_EVENT_IDX;
508	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509#ifdef VIOIF_MULTIQ
510	req_features |= VIRTIO_NET_F_MQ;
511#endif
512
513	virtio_child_attach_start(vsc, self, IPL_NET,
514	    req_features, VIRTIO_NET_FLAG_BITS);
515	features = virtio_features(vsc);
516
517	if (features == 0)
518		goto err;
519
520	if (features & VIRTIO_NET_F_MAC) {
521		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523			    VIRTIO_NET_CONFIG_MAC + i);
524		}
525	} else {
526		/* code stolen from sys/net/if_tap.c */
527		struct timeval tv;
528		uint32_t ui;
529		getmicrouptime(&tv);
530		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533			virtio_write_device_config_1(vsc,
534			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535		}
536	}
537
538	/* 'Ethernet' with capital follows other ethernet driver attachment */
539	aprint_normal_dev(self, "Ethernet address %s\n",
540	    ether_sprintf(sc->sc_mac));
541
542	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544	} else {
545		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546	}
547
548	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549	    (features & VIRTIO_NET_F_CTRL_RX)) {
550		sc->sc_has_ctrl = true;
551
552		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554		ctrlq->ctrlq_inuse = FREE;
555	} else {
556		sc->sc_has_ctrl = false;
557	}
558
559	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562
563		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564			goto err;
565
566		/* Limit the number of queue pairs to use */
567		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568
569		if (sc->sc_max_nvq_pairs > 1)
570			req_flags |= VIRTIO_F_INTR_PERVQ;
571	}
572
573	vioif_alloc_queues(sc);
574
575#ifdef VIOIF_MPSAFE
576	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577#else
578	softint_flags = SOFTINT_NET;
579#endif
580
581	/*
582	 * Initialize network queues
583	 */
584	nvqs = sc->sc_max_nvq_pairs * 2;
585	for (i = 0; i < nvqs; i++) {
586		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587		if (r != 0)
588			goto err;
589	}
590
591	if (sc->sc_has_ctrl) {
592		int ctrlq_idx = nvqs;
593
594		nvqs++;
595		/*
596		 * Allocating a virtqueue for control channel
597		 */
598		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600		    NBPG, 1, "control");
601		if (r != 0) {
602			aprint_error_dev(self, "failed to allocate "
603			    "a virtqueue for control channel, error code %d\n",
604			    r);
605
606			sc->sc_has_ctrl = false;
607			cv_destroy(&ctrlq->ctrlq_wait);
608			mutex_destroy(&ctrlq->ctrlq_wait_lock);
609		} else {
610			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
611			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
612		}
613	}
614
615	sc->sc_cfg_softint = softint_establish(softint_flags,
616	    vioif_cfg_softint, sc);
617	if (sc->sc_cfg_softint == NULL) {
618		aprint_error_dev(self, "cannot establish ctl softint\n");
619		goto err;
620	}
621
622	if (vioif_alloc_mems(sc) < 0)
623		goto err;
624
625	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
626	    vioif_config_change, virtio_vq_intrhand, req_flags);
627	if (r != 0)
628		goto err;
629
630	if (vioif_setup_sysctl(sc) != 0) {
631		aprint_error_dev(self, "unable to create sysctl node\n");
632		/* continue */
633	}
634
635	vioif_setup_stats(sc);
636
637	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
638	ifp->if_softc = sc;
639	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
640#ifdef VIOIF_MPSAFE
641	ifp->if_extflags = IFEF_MPSAFE;
642#endif
643	ifp->if_start = vioif_start;
644	if (sc->sc_req_nvq_pairs > 1)
645		ifp->if_transmit = vioif_transmit;
646	ifp->if_ioctl = vioif_ioctl;
647	ifp->if_init = vioif_init;
648	ifp->if_stop = vioif_stop;
649	ifp->if_capabilities = 0;
650	ifp->if_watchdog = vioif_watchdog;
651	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
652	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
653	IFQ_SET_READY(&ifp->if_snd);
654
655	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
656
657	if_attach(ifp);
658	if_deferred_start_init(ifp, NULL);
659	ether_ifattach(ifp, sc->sc_mac);
660	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
661
662	return;
663
664err:
665	nvqs = sc->sc_max_nvq_pairs * 2;
666	for (i = 0; i < nvqs; i++) {
667		vioif_netqueue_teardown(sc, vsc, i);
668	}
669
670	if (sc->sc_has_ctrl) {
671		cv_destroy(&ctrlq->ctrlq_wait);
672		mutex_destroy(&ctrlq->ctrlq_wait_lock);
673		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
674		ctrlq->ctrlq_vq = NULL;
675	}
676
677	vioif_free_queues(sc);
678	mutex_destroy(&sc->sc_lock);
679	virtio_child_attach_failed(vsc);
680	config_finalize_register(self, vioif_finalize_teardown);
681
682	return;
683}
684
685static int
686vioif_finalize_teardown(device_t self)
687{
688	struct vioif_softc *sc = device_private(self);
689
690	if (sc->sc_txrx_workqueue != NULL) {
691		vioif_workq_destroy(sc->sc_txrx_workqueue);
692		sc->sc_txrx_workqueue = NULL;
693	}
694
695	return 0;
696}
697
698/*
699 * Interface functions for ifnet
700 */
701static int
702vioif_init(struct ifnet *ifp)
703{
704	struct vioif_softc *sc = ifp->if_softc;
705	struct virtio_softc *vsc = sc->sc_virtio;
706	struct vioif_netqueue *netq;
707	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
708	int r, i;
709
710	vioif_stop(ifp, 0);
711
712	r = virtio_reinit_start(vsc);
713	if (r != 0) {
714		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
715		return EIO;
716	}
717
718	virtio_negotiate_features(vsc, virtio_features(vsc));
719
720	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
721		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
722
723		mutex_enter(&netq->netq_lock);
724		vioif_populate_rx_mbufs_locked(sc, netq);
725		mutex_exit(&netq->netq_lock);
726	}
727
728	virtio_reinit_end(vsc);
729
730	if (sc->sc_has_ctrl)
731		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
732
733	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
734	if (r == 0)
735		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
736	else
737		sc->sc_act_nvq_pairs = 1;
738
739	SET(ifp->if_flags, IFF_RUNNING);
740
741	vioif_net_intr_enable(sc, vsc);
742
743	vioif_update_link_status(sc);
744	r = vioif_rx_filter(sc);
745
746	return r;
747}
748
749static void
750vioif_stop(struct ifnet *ifp, int disable)
751{
752	struct vioif_softc *sc = ifp->if_softc;
753	struct virtio_softc *vsc = sc->sc_virtio;
754	struct vioif_netqueue *netq;
755	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
756	size_t i, act_qnum;
757
758	act_qnum = sc->sc_act_nvq_pairs * 2;
759
760	CLR(ifp->if_flags, IFF_RUNNING);
761	for (i = 0; i < act_qnum; i++) {
762		netq = &sc->sc_netqs[i];
763
764		mutex_enter(&netq->netq_lock);
765		netq->netq_stopping = true;
766		mutex_exit(&netq->netq_lock);
767	}
768
769	/* disable interrupts */
770	vioif_net_intr_disable(sc, vsc);
771	if (sc->sc_has_ctrl)
772		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
773
774	/*
775	 * only way to stop interrupt, I/O and DMA is resetting...
776	 *
777	 * NOTE: Devices based on VirtIO draft specification can not
778	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
779	 */
780	virtio_reset(vsc);
781
782	vioif_intr_barrier();
783
784	for (i = 0; i < act_qnum; i++) {
785		netq = &sc->sc_netqs[i];
786		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
787	}
788
789	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
790		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
791		vioif_rx_queue_clear(sc, vsc, netq);
792
793		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
794		vioif_tx_queue_clear(sc, vsc, netq);
795	}
796
797	/* all packet processing is stopped */
798	for (i = 0; i < act_qnum; i++) {
799		netq = &sc->sc_netqs[i];
800
801		mutex_enter(&netq->netq_lock);
802		netq->netq_stopping = false;
803		mutex_exit(&netq->netq_lock);
804	}
805}
806
807static void
808vioif_start(struct ifnet *ifp)
809{
810	struct vioif_softc *sc = ifp->if_softc;
811	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
812
813#ifdef VIOIF_MPSAFE
814	KASSERT(if_is_mpsafe(ifp));
815#endif
816
817	mutex_enter(&txq0->netq_lock);
818	vioif_start_locked(ifp, txq0);
819	mutex_exit(&txq0->netq_lock);
820}
821
822static inline int
823vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
824{
825	struct vioif_softc *sc = ifp->if_softc;
826	u_int cpuid = cpu_index(curcpu());
827
828	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
829}
830
831static int
832vioif_transmit(struct ifnet *ifp, struct mbuf *m)
833{
834	struct vioif_softc *sc = ifp->if_softc;
835	struct vioif_netqueue *netq;
836	struct vioif_tx_context *txc;
837	int qid;
838
839	qid = vioif_select_txqueue(ifp, m);
840	netq = &sc->sc_netqs[qid];
841	txc = netq->netq_ctx;
842
843	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
844		m_freem(m);
845		return ENOBUFS;
846	}
847
848	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
849	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
850	if (m->m_flags & M_MCAST)
851		if_statinc_ref(nsr, if_omcasts);
852	IF_STAT_PUTREF(ifp);
853
854	if (mutex_tryenter(&netq->netq_lock)) {
855		vioif_transmit_locked(ifp, netq);
856		mutex_exit(&netq->netq_lock);
857	}
858
859	return 0;
860}
861
862void
863vioif_watchdog(struct ifnet *ifp)
864{
865	struct vioif_softc *sc = ifp->if_softc;
866	struct vioif_netqueue *netq;
867	int i;
868
869	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
870		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
871			log(LOG_DEBUG, "%s: watchdog timed out\n",
872			    ifp->if_xname);
873		}
874
875		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
876			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
877
878			mutex_enter(&netq->netq_lock);
879			if (!netq->netq_running_handle) {
880				netq->netq_running_handle = true;
881				vioif_net_sched_handle(sc, netq);
882			}
883			mutex_exit(&netq->netq_lock);
884		}
885	}
886}
887
888static int
889vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
890{
891	int s, r;
892
893	s = splnet();
894
895	r = ether_ioctl(ifp, cmd, data);
896	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
897		if (ifp->if_flags & IFF_RUNNING) {
898			r = vioif_rx_filter(ifp->if_softc);
899		} else {
900			r = 0;
901		}
902	}
903
904	splx(s);
905
906	return r;
907}
908
909static int
910vioif_ifflags(struct vioif_softc *sc)
911{
912	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
913	bool onoff;
914	int r;
915
916	if (!sc->sc_has_ctrl) {
917		/* no ctrl vq; always promisc and allmulti */
918		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
919		return 0;
920	}
921
922	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
923	r = vioif_set_allmulti(sc, onoff);
924	if (r != 0) {
925		log(LOG_WARNING,
926		    "%s: couldn't %sable ALLMULTI\n",
927		    ifp->if_xname, onoff ? "en" : "dis");
928		if (onoff == false) {
929			ifp->if_flags |= IFF_ALLMULTI;
930		}
931	}
932
933	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
934	r = vioif_set_promisc(sc, onoff);
935	if (r != 0) {
936		log(LOG_WARNING,
937		    "%s: couldn't %sable PROMISC\n",
938		    ifp->if_xname, onoff ? "en" : "dis");
939		if (onoff == false) {
940			ifp->if_flags |= IFF_PROMISC;
941		}
942	}
943
944	return 0;
945}
946
947static int
948vioif_ifflags_cb(struct ethercom *ec)
949{
950	struct ifnet *ifp = &ec->ec_if;
951	struct vioif_softc *sc = ifp->if_softc;
952
953	return vioif_ifflags(sc);
954}
955
956static int
957vioif_setup_sysctl(struct vioif_softc *sc)
958{
959	const char *devname;
960	struct sysctllog **log;
961	const struct sysctlnode *rnode, *rxnode, *txnode;
962	int error;
963
964	log = &sc->sc_sysctllog;
965	devname = device_xname(sc->sc_dev);
966
967	error = sysctl_createv(log, 0, NULL, &rnode,
968	    0, CTLTYPE_NODE, devname,
969	    SYSCTL_DESCR("virtio-net information and settings"),
970	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
971	if (error)
972		goto out;
973
974	error = sysctl_createv(log, 0, &rnode, NULL,
975	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
976	    SYSCTL_DESCR("Use workqueue for packet processing"),
977	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
978	if (error)
979		goto out;
980
981	error = sysctl_createv(log, 0, &rnode, &rxnode,
982	    0, CTLTYPE_NODE, "rx",
983	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
984	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
985	if (error)
986		goto out;
987
988	error = sysctl_createv(log, 0, &rxnode, NULL,
989	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
990	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
991	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
992	if (error)
993		goto out;
994
995	error = sysctl_createv(log, 0, &rxnode, NULL,
996	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
997	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
998	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
999	if (error)
1000		goto out;
1001
1002	error = sysctl_createv(log, 0, &rnode, &txnode,
1003	    0, CTLTYPE_NODE, "tx",
1004	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1005	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1006	if (error)
1007		goto out;
1008
1009	error = sysctl_createv(log, 0, &txnode, NULL,
1010	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1011	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1012	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1013	if (error)
1014		goto out;
1015
1016	error = sysctl_createv(log, 0, &txnode, NULL,
1017	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1018	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1019	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1020
1021out:
1022	if (error)
1023		sysctl_teardown(log);
1024
1025	return error;
1026}
1027
1028static void
1029vioif_setup_stats(struct vioif_softc *sc)
1030{
1031	struct vioif_netqueue *netq;
1032	struct vioif_tx_context *txc;
1033	struct vioif_rx_context *rxc;
1034	size_t i, netq_num;
1035
1036	netq_num = sc->sc_max_nvq_pairs * 2;
1037	for (i = 0; i < netq_num; i++) {
1038		netq = &sc->sc_netqs[i];
1039		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1040		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1041		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1042		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1043		    "virtqueue enqueue failed failed");
1044
1045		switch (VIOIF_NETQ_DIR(i)) {
1046		case VIOIF_NETQ_RX:
1047			rxc = netq->netq_ctx;
1048			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1049			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1050			    "no receive buffer");
1051			break;
1052		case VIOIF_NETQ_TX:
1053			txc = netq->netq_ctx;
1054			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1055			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1056			    "m_defrag() failed");
1057			break;
1058		}
1059	}
1060
1061	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1062	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1063	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1064	    NULL, device_xname(sc->sc_dev), "control command failed");
1065}
1066
1067/*
1068 * allocate memory
1069 */
1070static int
1071vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1072    bus_size_t size, int nsegs, const char *usage)
1073{
1074	int r;
1075
1076	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1077	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1078
1079	if (r != 0) {
1080		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1081		    "error code %d\n", usage, r);
1082	}
1083
1084	return r;
1085}
1086
1087static void
1088vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1089{
1090
1091	if (*map) {
1092		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1093		*map = NULL;
1094	}
1095}
1096
1097static int
1098vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1099    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1100{
1101	int r;
1102
1103	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1104	if (r != 0)
1105		return 1;
1106
1107	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1108	    size, NULL, rw | BUS_DMA_NOWAIT);
1109	if (r != 0) {
1110		vioif_dmamap_destroy(sc, map);
1111		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1112		    "error code %d\n", usage, r);
1113	}
1114
1115	return r;
1116}
1117
1118static void *
1119vioif_assign_mem(intptr_t *p, size_t size)
1120{
1121	intptr_t rv;
1122
1123	rv = *p;
1124	*p += size;
1125
1126	return (void *)rv;
1127}
1128
1129/*
1130 * dma memory is used for:
1131 *   netq_maps_kva:	 metadata array for received frames (READ) and
1132 *			 sent frames (WRITE)
1133 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1134 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1135 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1136 *			 (WRITE)
1137 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1138 *			 class command (WRITE)
1139 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1140 *			 class command (WRITE)
1141 * ctrlq_* structures are allocated only one each; they are protected by
1142 * ctrlq_inuse variable and ctrlq_wait condvar.
1143 */
1144static int
1145vioif_alloc_mems(struct vioif_softc *sc)
1146{
1147	struct virtio_softc *vsc = sc->sc_virtio;
1148	struct vioif_netqueue *netq;
1149	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1150	struct vioif_net_map *maps;
1151	unsigned int vq_num;
1152	int r, rsegs;
1153	bus_size_t dmamemsize;
1154	size_t qid, i, netq_num, kmemsize;
1155	void *vaddr;
1156	intptr_t p;
1157
1158	netq_num = sc->sc_max_nvq_pairs * 2;
1159
1160	/* allocate DMA memory */
1161	dmamemsize = 0;
1162
1163	for (qid = 0; qid < netq_num; qid++) {
1164		maps = sc->sc_netqs[qid].netq_maps;
1165		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1166		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1167	}
1168
1169	if (sc->sc_has_ctrl) {
1170		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1171		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1172		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1173		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1174		    + ETHER_ADDR_LEN;
1175		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1176		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1177		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1178		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1179	}
1180
1181	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1182	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1183	if (r != 0) {
1184		aprint_error_dev(sc->sc_dev,
1185		    "DMA memory allocation failed, size %zu, "
1186		    "error code %d\n", dmamemsize, r);
1187		goto err_none;
1188	}
1189	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1190	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1191	if (r != 0) {
1192		aprint_error_dev(sc->sc_dev,
1193		    "DMA memory map failed, error code %d\n", r);
1194		goto err_dmamem_alloc;
1195	}
1196
1197	/* assign DMA memory */
1198	memset(vaddr, 0, dmamemsize);
1199	sc->sc_dmamem = vaddr;
1200	p = (intptr_t) vaddr;
1201
1202	for (qid = 0; qid < netq_num; qid++) {
1203		netq = &sc->sc_netqs[qid];
1204		maps = netq->netq_maps;
1205		vq_num = netq->netq_vq->vq_num;
1206
1207		netq->netq_maps_kva = vioif_assign_mem(&p,
1208		    sizeof(*maps[0].vnm_hdr) * vq_num);
1209	}
1210
1211	if (sc->sc_has_ctrl) {
1212		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1213		    sizeof(*ctrlq->ctrlq_cmd));
1214		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1215		    sizeof(*ctrlq->ctrlq_status));
1216		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1217		    sizeof(*ctrlq->ctrlq_rx));
1218		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1219		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1220		    + ETHER_ADDR_LEN);
1221		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1222		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1223		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1224		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1225		    sizeof(*ctrlq->ctrlq_mac_addr));
1226		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1227	}
1228
1229	/* allocate kmem */
1230	kmemsize = 0;
1231
1232	for (qid = 0; qid < netq_num; qid++) {
1233		netq = &sc->sc_netqs[qid];
1234		vq_num = netq->netq_vq->vq_num;
1235
1236		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1237	}
1238
1239	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1240	sc->sc_kmem = vaddr;
1241
1242	/* assign allocated kmem */
1243	p = (intptr_t) vaddr;
1244
1245	for (qid = 0; qid < netq_num; qid++) {
1246		netq = &sc->sc_netqs[qid];
1247		vq_num = netq->netq_vq->vq_num;
1248
1249		netq->netq_maps = vioif_assign_mem(&p,
1250		    sizeof(netq->netq_maps[0]) * vq_num);
1251	}
1252
1253	/* prepare dmamaps */
1254	for (qid = 0; qid < netq_num; qid++) {
1255		static const struct {
1256			const char	*msg_hdr;
1257			const char	*msg_payload;
1258			int		 dma_flag;
1259			bus_size_t	 dma_size;
1260			int		 dma_nsegs;
1261		} dmaparams[VIOIF_NETQ_IDX] = {
1262			[VIOIF_NETQ_RX] = {
1263				.msg_hdr	= "rx header",
1264				.msg_payload	= "rx payload",
1265				.dma_flag	= BUS_DMA_READ,
1266				.dma_size	= MCLBYTES - ETHER_ALIGN,
1267				.dma_nsegs	= 1,
1268			},
1269			[VIOIF_NETQ_TX] = {
1270				.msg_hdr	= "tx header",
1271				.msg_payload	= "tx payload",
1272				.dma_flag	= BUS_DMA_WRITE,
1273				.dma_size	= ETHER_MAX_LEN,
1274				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1275			}
1276		};
1277
1278		struct virtio_net_hdr *hdrs;
1279		int dir;
1280
1281		dir = VIOIF_NETQ_DIR(qid);
1282		netq = &sc->sc_netqs[qid];
1283		vq_num = netq->netq_vq->vq_num;
1284		maps = netq->netq_maps;
1285		hdrs = netq->netq_maps_kva;
1286
1287		for (i = 0; i < vq_num; i++) {
1288			maps[i].vnm_hdr = &hdrs[i];
1289
1290			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1291			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1292			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1293			if (r != 0)
1294				goto err_reqs;
1295
1296			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1297			    dmaparams[dir].dma_size, dmaparams[dir].dma_nsegs,
1298			    dmaparams[dir].msg_payload);
1299			if (r != 0)
1300				goto err_reqs;
1301		}
1302	}
1303
1304	if (sc->sc_has_ctrl) {
1305		/* control vq class & command */
1306		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1307		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1308		    BUS_DMA_WRITE, "control command");
1309		if (r != 0)
1310			goto err_reqs;
1311
1312		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1313		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1314		    BUS_DMA_READ, "control status");
1315		if (r != 0)
1316			goto err_reqs;
1317
1318		/* control vq rx mode command parameter */
1319		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1320		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1321		    BUS_DMA_WRITE, "rx mode control command");
1322		if (r != 0)
1323			goto err_reqs;
1324
1325		/* multiqueue set command */
1326		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1327		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1328		    BUS_DMA_WRITE, "multiqueue set command");
1329		if (r != 0)
1330			goto err_reqs;
1331
1332		/* control vq MAC filter table for unicast */
1333		/* do not load now since its length is variable */
1334		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1335		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1336		    + ETHER_ADDR_LEN, 1,
1337		    "unicast MAC address filter command");
1338		if (r != 0)
1339			goto err_reqs;
1340
1341		/* control vq MAC filter table for multicast */
1342		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1343		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1344		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1345		    "multicast MAC address filter command");
1346		if (r != 0)
1347			goto err_reqs;
1348
1349		/* control vq MAC address set command */
1350		r = vioif_dmamap_create_load(sc,
1351		    &ctrlq->ctrlq_mac_addr_dmamap,
1352		    ctrlq->ctrlq_mac_addr,
1353		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1354		    BUS_DMA_WRITE, "mac addr set command");
1355		if (r != 0)
1356			goto err_reqs;
1357	}
1358
1359	return 0;
1360
1361err_reqs:
1362	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1363	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1364	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1365	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1366	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1367	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1368	for (qid = 0; qid < netq_num; qid++) {
1369		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1370		maps = sc->sc_netqs[qid].netq_maps;
1371
1372		for (i = 0; i < vq_num; i++) {
1373			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1374			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1375		}
1376	}
1377	if (sc->sc_kmem) {
1378		kmem_free(sc->sc_kmem, kmemsize);
1379		sc->sc_kmem = NULL;
1380	}
1381	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1382err_dmamem_alloc:
1383	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1384err_none:
1385	return -1;
1386}
1387
1388static void
1389vioif_alloc_queues(struct vioif_softc *sc)
1390{
1391	int nvq_pairs = sc->sc_max_nvq_pairs;
1392	size_t nvqs, netq_num;
1393
1394	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1395
1396	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1397	if (sc->sc_has_ctrl)
1398		nvqs++;
1399
1400	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1401	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * netq_num,
1402	    KM_SLEEP);
1403}
1404
1405static void
1406vioif_free_queues(struct vioif_softc *sc)
1407{
1408	size_t nvqs, netq_num;
1409
1410	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1411	if (sc->sc_ctrlq.ctrlq_vq)
1412		nvqs++;
1413
1414	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1415	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1416	sc->sc_netqs = NULL;
1417	sc->sc_vqs = NULL;
1418}
1419
1420/*
1421 * Network queues
1422 */
1423static int
1424vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1425    size_t qid, u_int softint_flags)
1426{
1427	static const struct {
1428		const char	*dirname;
1429		int		 segsize;
1430		int		 nsegs;
1431		int 		(*intrhand)(void *);
1432		void		(*sihand)(void *);
1433	} params[VIOIF_NETQ_IDX] = {
1434		[VIOIF_NETQ_RX] = {
1435			.dirname	= "rx",
1436			.segsize	= MCLBYTES,
1437			.nsegs		= 2,
1438			.intrhand	= vioif_rx_intr,
1439			.sihand		= vioif_rx_handle,
1440		},
1441		[VIOIF_NETQ_TX] = {
1442			.dirname	= "tx",
1443			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1444			.nsegs		= 2,
1445			.intrhand	= vioif_tx_intr,
1446			.sihand		= vioif_tx_handle,
1447		}
1448	};
1449
1450	struct virtqueue *vq;
1451	struct vioif_netqueue *netq;
1452	struct vioif_tx_context *txc;
1453	struct vioif_rx_context *rxc;
1454	char qname[32];
1455	int r, dir;
1456
1457	txc = NULL;
1458	rxc = NULL;
1459	netq = &sc->sc_netqs[qid];
1460	vq = &sc->sc_vqs[qid];
1461	dir = VIOIF_NETQ_DIR(qid);
1462
1463	netq->netq_vq = &sc->sc_vqs[qid];
1464	netq->netq_stopping = false;
1465	netq->netq_running_handle = false;
1466
1467	snprintf(qname, sizeof(qname), "%s%zu",
1468	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1469	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1470	    "%s-%s", device_xname(sc->sc_dev), qname);
1471
1472	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1473	r = virtio_alloc_vq(vsc, vq, qid,
1474	    params[dir].segsize + sc->sc_hdr_size,
1475	    params[dir].nsegs, qname);
1476	if (r != 0)
1477		goto err;
1478	netq->netq_vq = vq;
1479
1480	netq->netq_vq->vq_intrhand = params[dir].intrhand;
1481	netq->netq_vq->vq_intrhand_arg = netq;
1482	netq->netq_softint = softint_establish(softint_flags,
1483	    params[dir].sihand, netq);
1484	if (netq->netq_softint == NULL) {
1485		aprint_error_dev(sc->sc_dev,
1486		    "couldn't establish %s softint\n",
1487		    params[dir].dirname);
1488		goto err;
1489	}
1490	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1491
1492	switch (dir) {
1493	case VIOIF_NETQ_RX:
1494		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1495		netq->netq_ctx = rxc;
1496		/* nothing to do */
1497		break;
1498	case VIOIF_NETQ_TX:
1499		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1500		netq->netq_ctx = (void *)txc;
1501		txc->txc_deferred_transmit = softint_establish(softint_flags,
1502		    vioif_deferred_transmit, netq);
1503		if (txc->txc_deferred_transmit == NULL) {
1504			aprint_error_dev(sc->sc_dev,
1505			    "couldn't establish softint for "
1506			    "tx deferred transmit\n");
1507			goto err;
1508		}
1509		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1510		txc->txc_no_free_slots = false;
1511		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1512		break;
1513	}
1514
1515	return 0;
1516
1517err:
1518	netq->netq_ctx = NULL;
1519
1520	if (rxc != NULL) {
1521		kmem_free(rxc, sizeof(*rxc));
1522	}
1523
1524	if (txc != NULL) {
1525		if (txc->txc_deferred_transmit != NULL)
1526			softint_disestablish(txc->txc_deferred_transmit);
1527		if (txc->txc_intrq != NULL)
1528			pcq_destroy(txc->txc_intrq);
1529		kmem_free(txc, sizeof(txc));
1530	}
1531
1532	vioif_work_set(&netq->netq_work, NULL, NULL);
1533	if (netq->netq_softint != NULL) {
1534		softint_disestablish(netq->netq_softint);
1535		netq->netq_softint = NULL;
1536	}
1537	netq->netq_vq->vq_intrhand = NULL;
1538	netq->netq_vq->vq_intrhand_arg = NULL;
1539
1540	virtio_free_vq(vsc, vq);
1541	mutex_destroy(&netq->netq_lock);
1542	netq->netq_vq = NULL;
1543
1544	return -1;
1545}
1546
1547static void
1548vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1549    size_t qid)
1550{
1551	struct vioif_netqueue *netq;
1552	struct vioif_rx_context *rxc;
1553	struct vioif_tx_context *txc;
1554	int dir;
1555
1556	netq = &sc->sc_netqs[qid];
1557
1558	if (netq->netq_vq == NULL)
1559		return;
1560
1561	netq = &sc->sc_netqs[qid];
1562	dir = VIOIF_NETQ_DIR(qid);
1563	switch (dir) {
1564	case VIOIF_NETQ_RX:
1565		rxc = netq->netq_ctx;
1566		netq->netq_ctx = NULL;
1567		kmem_free(rxc, sizeof(*rxc));
1568		break;
1569	case VIOIF_NETQ_TX:
1570		txc = netq->netq_ctx;
1571		netq->netq_ctx = NULL;
1572		softint_disestablish(txc->txc_deferred_transmit);
1573		pcq_destroy(txc->txc_intrq);
1574		kmem_free(txc, sizeof(*txc));
1575		break;
1576	}
1577
1578	softint_disestablish(netq->netq_softint);
1579	virtio_free_vq(vsc, netq->netq_vq);
1580	mutex_destroy(&netq->netq_lock);
1581	netq->netq_vq = NULL;
1582}
1583
1584static void
1585vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1586{
1587
1588	KASSERT(mutex_owned(&netq->netq_lock));
1589	KASSERT(!netq->netq_stopping);
1590
1591	if (netq->netq_workqueue) {
1592		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1593	} else {
1594		softint_schedule(netq->netq_softint);
1595	}
1596}
1597
1598static int
1599vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1600   struct mbuf *m, int dma_flags)
1601{
1602	int r;
1603
1604	KASSERT(map->vnm_mbuf == NULL);
1605
1606	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1607	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1608	if (r == 0) {
1609		map->vnm_mbuf = m;
1610	}
1611
1612	return r;
1613}
1614
1615static void
1616vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1617{
1618
1619	KASSERT(map->vnm_mbuf != NULL);
1620	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1621	map->vnm_mbuf = NULL;
1622}
1623
1624static int
1625vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1626    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1627{
1628	int r;
1629
1630	KASSERT(map->vnm_mbuf != NULL);
1631
1632	/* This should actually never fail */
1633	r = virtio_enqueue_reserve(vsc, vq, slot,
1634	    map->vnm_mbuf_map->dm_nsegs + 1);
1635	if (r != 0) {
1636		/* slot already freed by virtio_enqueue_reserve */
1637		return r;
1638	}
1639
1640	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1641	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1642	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1643	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1644
1645	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1646	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1647	virtio_enqueue_commit(vsc, vq, slot, false);
1648
1649	return 0;
1650}
1651
1652static int
1653vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1654    int slot, struct vioif_net_map *map)
1655{
1656
1657	return vioif_net_enqueue(vsc, vq, slot, map,
1658	    BUS_DMASYNC_PREWRITE, true);
1659}
1660
1661static int
1662vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1663    int slot, struct vioif_net_map *map)
1664{
1665
1666	return vioif_net_enqueue(vsc, vq, slot, map,
1667	    BUS_DMASYNC_PREREAD, false);
1668}
1669
1670static struct mbuf *
1671vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1672   int slot, struct vioif_net_map *map, int dma_flags)
1673{
1674	struct mbuf *m;
1675
1676	m = map->vnm_mbuf;
1677	KASSERT(m != NULL);
1678	map->vnm_mbuf = NULL;
1679
1680	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1681	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1682	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1683	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1684
1685	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1686	virtio_dequeue_commit(vsc, vq, slot);
1687
1688	return m;
1689}
1690
1691static void
1692vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1693{
1694	struct vioif_netqueue *netq;
1695	size_t i, act_qnum;
1696	int enqueued;
1697
1698	act_qnum = sc->sc_act_nvq_pairs * 2;
1699	for (i = 0; i < act_qnum; i++) {
1700		netq = &sc->sc_netqs[i];
1701
1702		KASSERT(!netq->netq_stopping);
1703		KASSERT(!netq->netq_running_handle);
1704
1705		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1706		if (enqueued != 0) {
1707			virtio_stop_vq_intr(vsc, netq->netq_vq);
1708
1709			mutex_enter(&netq->netq_lock);
1710			netq->netq_running_handle = true;
1711			vioif_net_sched_handle(sc, netq);
1712			mutex_exit(&netq->netq_lock);
1713		}
1714	}
1715}
1716
1717static void
1718vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1719{
1720	struct vioif_netqueue *netq;
1721	size_t i, act_qnum;
1722
1723	act_qnum = sc->sc_act_nvq_pairs * 2;
1724	for (i = 0; i < act_qnum; i++) {
1725		netq = &sc->sc_netqs[i];
1726
1727		virtio_stop_vq_intr(vsc, netq->netq_vq);
1728	}
1729}
1730
1731/*
1732 * Receive implementation
1733 */
1734/* enqueue mbufs to receive slots */
1735static void
1736vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1737{
1738	struct virtqueue *vq = netq->netq_vq;
1739	struct virtio_softc *vsc = vq->vq_owner;
1740	struct vioif_rx_context *rxc;
1741	struct vioif_net_map *map;
1742	struct mbuf *m;
1743	int i, r, ndone = 0;
1744
1745	KASSERT(mutex_owned(&netq->netq_lock));
1746
1747	rxc = netq->netq_ctx;
1748
1749	for (i = 0; i < vq->vq_num; i++) {
1750		int slot;
1751		r = virtio_enqueue_prep(vsc, vq, &slot);
1752		if (r == EAGAIN)
1753			break;
1754		if (__predict_false(r != 0))
1755			panic("enqueue_prep for rx buffers");
1756
1757		MGETHDR(m, M_DONTWAIT, MT_DATA);
1758		if (m == NULL) {
1759			virtio_enqueue_abort(vsc, vq, slot);
1760			rxc->rxc_mbuf_enobufs.ev_count++;
1761			break;
1762		}
1763		MCLGET(m, M_DONTWAIT);
1764		if ((m->m_flags & M_EXT) == 0) {
1765			virtio_enqueue_abort(vsc, vq, slot);
1766			m_freem(m);
1767			rxc->rxc_mbuf_enobufs.ev_count++;
1768			break;
1769		}
1770
1771		m->m_len = m->m_pkthdr.len = MCLBYTES;
1772		m_adj(m, ETHER_ALIGN);
1773
1774		map = &netq->netq_maps[slot];
1775		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1776		if (r != 0) {
1777			virtio_enqueue_abort(vsc, vq, slot);
1778			m_freem(m);
1779			netq->netq_mbuf_load_failed.ev_count++;
1780			break;
1781		}
1782
1783		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1784		if (r != 0) {
1785			vioif_net_unload_mbuf(vsc, map);
1786			netq->netq_enqueue_failed.ev_count++;
1787			m_freem(m);
1788			/* slot already freed by vioif_net_enqueue_rx */
1789			break;
1790		}
1791
1792		ndone++;
1793	}
1794
1795	if (ndone > 0)
1796		vioif_notify(vsc, vq);
1797}
1798
1799/* dequeue received packets */
1800static bool
1801vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1802    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1803{
1804	struct virtqueue *vq = netq->netq_vq;
1805	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1806	struct vioif_net_map *map;
1807	struct mbuf *m;
1808	int slot, len;
1809	bool more;
1810	size_t ndeq;
1811
1812	KASSERT(mutex_owned(&netq->netq_lock));
1813
1814	more = false;
1815	ndeq = 0;
1816
1817	if (virtio_vq_is_enqueued(vsc, vq) == false)
1818		goto done;
1819
1820	for (;;ndeq++) {
1821		if (ndeq >= limit) {
1822			more = true;
1823			break;
1824		}
1825
1826		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1827			break;
1828
1829		map = &netq->netq_maps[slot];
1830		KASSERT(map->vnm_mbuf != NULL);
1831		m = vioif_net_dequeue_commit(vsc, vq, slot,
1832		    map, BUS_DMASYNC_POSTREAD);
1833		KASSERT(m != NULL);
1834
1835		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1836		m_set_rcvif(m, ifp);
1837		if_percpuq_enqueue(ifp->if_percpuq, m);
1838	}
1839
1840done:
1841	if (ndeqp != NULL)
1842		*ndeqp = ndeq;
1843
1844	return more;
1845}
1846
1847static void
1848vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1849    struct vioif_netqueue *netq)
1850{
1851	struct vioif_net_map *map;
1852	struct mbuf *m;
1853	unsigned int i, vq_num;
1854	bool more;
1855
1856	mutex_enter(&netq->netq_lock);
1857
1858	vq_num = netq->netq_vq->vq_num;
1859	for (;;) {
1860		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1861		if (more == false)
1862			break;
1863	}
1864
1865	for (i = 0; i < vq_num; i++) {
1866		map = &netq->netq_maps[i];
1867
1868		m = map->vnm_mbuf;
1869		if (m == NULL)
1870			continue;
1871
1872		vioif_net_unload_mbuf(vsc, map);
1873		m_freem(m);
1874	}
1875	mutex_exit(&netq->netq_lock);
1876}
1877
1878static void
1879vioif_rx_handle_locked(void *xnetq, u_int limit)
1880{
1881	struct vioif_netqueue *netq = xnetq;
1882	struct virtqueue *vq = netq->netq_vq;
1883	struct virtio_softc *vsc = vq->vq_owner;
1884	struct vioif_softc *sc = device_private(virtio_child(vsc));
1885	bool more;
1886	int enqueued;
1887	size_t ndeq;
1888
1889	KASSERT(mutex_owned(&netq->netq_lock));
1890	KASSERT(!netq->netq_stopping);
1891
1892	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1893	if (ndeq > 0)
1894		vioif_populate_rx_mbufs_locked(sc, netq);
1895
1896	if (more) {
1897		vioif_net_sched_handle(sc, netq);
1898		return;
1899	}
1900
1901	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1902	if (enqueued != 0) {
1903		virtio_stop_vq_intr(vsc, netq->netq_vq);
1904		vioif_net_sched_handle(sc, netq);
1905		return;
1906	}
1907
1908	netq->netq_running_handle = false;
1909}
1910
1911static int
1912vioif_rx_intr(void *arg)
1913{
1914	struct vioif_netqueue *netq = arg;
1915	struct virtqueue *vq = netq->netq_vq;
1916	struct virtio_softc *vsc = vq->vq_owner;
1917	struct vioif_softc *sc = device_private(virtio_child(vsc));
1918	u_int limit;
1919
1920	mutex_enter(&netq->netq_lock);
1921
1922	/* handler is already running in softint/workqueue */
1923	if (netq->netq_running_handle)
1924		goto done;
1925
1926	netq->netq_running_handle = true;
1927
1928	limit = sc->sc_rx_intr_process_limit;
1929	virtio_stop_vq_intr(vsc, vq);
1930	vioif_rx_handle_locked(netq, limit);
1931
1932done:
1933	mutex_exit(&netq->netq_lock);
1934	return 1;
1935}
1936
1937static void
1938vioif_rx_handle(void *xnetq)
1939{
1940	struct vioif_netqueue *netq = xnetq;
1941	struct virtqueue *vq = netq->netq_vq;
1942	struct virtio_softc *vsc = vq->vq_owner;
1943	struct vioif_softc *sc = device_private(virtio_child(vsc));
1944	u_int limit;
1945
1946	mutex_enter(&netq->netq_lock);
1947
1948	KASSERT(netq->netq_running_handle);
1949
1950	if (netq->netq_stopping) {
1951		netq->netq_running_handle = false;
1952		goto done;
1953	}
1954
1955	limit = sc->sc_rx_process_limit;
1956	vioif_rx_handle_locked(netq, limit);
1957
1958done:
1959	mutex_exit(&netq->netq_lock);
1960}
1961
1962/*
1963 * Transmition implementation
1964 */
1965/* enqueue mbufs to send */
1966static void
1967vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1968    bool is_transmit)
1969{
1970	struct vioif_softc *sc = ifp->if_softc;
1971	struct virtio_softc *vsc = sc->sc_virtio;
1972	struct virtqueue *vq = netq->netq_vq;
1973	struct vioif_tx_context *txc;
1974	struct vioif_net_map *map;
1975	struct mbuf *m;
1976	int queued = 0;
1977
1978	KASSERT(mutex_owned(&netq->netq_lock));
1979
1980	if (netq->netq_stopping ||
1981	    !ISSET(ifp->if_flags, IFF_RUNNING))
1982		return;
1983
1984	txc = netq->netq_ctx;
1985
1986	if (!txc->txc_link_active ||
1987	    txc->txc_no_free_slots)
1988		return;
1989
1990	for (;;) {
1991		int slot, r;
1992		r = virtio_enqueue_prep(vsc, vq, &slot);
1993		if (r == EAGAIN) {
1994			txc->txc_no_free_slots = true;
1995			break;
1996		}
1997		if (__predict_false(r != 0))
1998			panic("enqueue_prep for tx buffers");
1999
2000		if (is_transmit)
2001			m = pcq_get(txc->txc_intrq);
2002		else
2003			IFQ_DEQUEUE(&ifp->if_snd, m);
2004
2005		if (m == NULL) {
2006			virtio_enqueue_abort(vsc, vq, slot);
2007			break;
2008		}
2009
2010		map = &netq->netq_maps[slot];
2011		KASSERT(map->vnm_mbuf == NULL);
2012
2013		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2014		if (r != 0) {
2015			/* maybe just too fragmented */
2016			struct mbuf *newm;
2017
2018			newm = m_defrag(m, M_NOWAIT);
2019			if (newm != NULL) {
2020				m = newm;
2021				r = vioif_net_load_mbuf(vsc, map, m,
2022				    BUS_DMA_WRITE);
2023			} else {
2024				txc->txc_defrag_failed.ev_count++;
2025				r = -1;
2026			}
2027
2028			if (r != 0) {
2029				netq->netq_mbuf_load_failed.ev_count++;
2030				m_freem(m);
2031				if_statinc(ifp, if_oerrors);
2032				virtio_enqueue_abort(vsc, vq, slot);
2033				continue;
2034			}
2035		}
2036
2037		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2038
2039		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2040		if (r != 0) {
2041			netq->netq_enqueue_failed.ev_count++;
2042			vioif_net_unload_mbuf(vsc, map);
2043			m_freem(m);
2044			/* slot already freed by vioif_net_enqueue_tx */
2045
2046			if_statinc(ifp, if_oerrors);
2047			continue;
2048		}
2049
2050		queued++;
2051		bpf_mtap(ifp, m, BPF_D_OUT);
2052	}
2053
2054	if (queued > 0) {
2055		vioif_notify(vsc, vq);
2056		ifp->if_timer = 5;
2057	}
2058}
2059
2060/* dequeue sent mbufs */
2061static bool
2062vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2063    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2064{
2065	struct virtqueue *vq = netq->netq_vq;
2066	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2067	struct vioif_net_map *map;
2068	struct mbuf *m;
2069	int slot, len;
2070	bool more;
2071	size_t ndeq;
2072
2073	KASSERT(mutex_owned(&netq->netq_lock));
2074
2075	more = false;
2076	ndeq = 0;
2077
2078	if (virtio_vq_is_enqueued(vsc, vq) == false)
2079		goto done;
2080
2081	for (;;ndeq++) {
2082		if (limit-- == 0) {
2083			more = true;
2084			break;
2085		}
2086
2087		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2088			break;
2089
2090		map = &netq->netq_maps[slot];
2091		KASSERT(map->vnm_mbuf != NULL);
2092		m = vioif_net_dequeue_commit(vsc, vq, slot,
2093		    map, BUS_DMASYNC_POSTWRITE);
2094		KASSERT(m != NULL);
2095
2096		if_statinc(ifp, if_opackets);
2097		m_freem(m);
2098	}
2099
2100done:
2101	if (ndeqp != NULL)
2102		*ndeqp = ndeq;
2103	return more;
2104}
2105
2106static void
2107vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2108    struct vioif_netqueue *netq)
2109{
2110	struct vioif_tx_context *txc;
2111	struct vioif_net_map *map;
2112	struct mbuf *m;
2113	unsigned int i, vq_num;
2114	bool more;
2115
2116	mutex_enter(&netq->netq_lock);
2117
2118	txc = netq->netq_ctx;
2119	vq_num = netq->netq_vq->vq_num;
2120
2121	for (;;) {
2122		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2123		if (more == false)
2124			break;
2125	}
2126
2127	for (i = 0; i < vq_num; i++) {
2128		map = &netq->netq_maps[i];
2129
2130		m = map->vnm_mbuf;
2131		if (m == NULL)
2132			continue;
2133
2134		vioif_net_unload_mbuf(vsc, map);
2135		m_freem(m);
2136	}
2137
2138	txc->txc_no_free_slots = false;
2139
2140	mutex_exit(&netq->netq_lock);
2141}
2142
2143static void
2144vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2145{
2146
2147	/*
2148	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2149	 */
2150	vioif_send_common_locked(ifp, netq, false);
2151
2152}
2153
2154static void
2155vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2156{
2157
2158	vioif_send_common_locked(ifp, netq, true);
2159}
2160
2161static void
2162vioif_deferred_transmit(void *arg)
2163{
2164	struct vioif_netqueue *netq = arg;
2165	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2166	struct vioif_softc *sc = device_private(virtio_child(vsc));
2167	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2168
2169	mutex_enter(&netq->netq_lock);
2170	vioif_send_common_locked(ifp, netq, true);
2171	mutex_exit(&netq->netq_lock);
2172}
2173
2174static void
2175vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2176{
2177	struct virtqueue *vq = netq->netq_vq;
2178	struct vioif_tx_context *txc = netq->netq_ctx;
2179	struct virtio_softc *vsc = vq->vq_owner;
2180	struct vioif_softc *sc = device_private(virtio_child(vsc));
2181	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2182	bool more;
2183	int enqueued;
2184	size_t ndeq;
2185
2186	KASSERT(mutex_owned(&netq->netq_lock));
2187	KASSERT(!netq->netq_stopping);
2188
2189	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2190	if (txc->txc_no_free_slots && ndeq > 0) {
2191		txc->txc_no_free_slots = false;
2192		softint_schedule(txc->txc_deferred_transmit);
2193	}
2194
2195	if (more) {
2196		vioif_net_sched_handle(sc, netq);
2197		return;
2198	}
2199
2200	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2201	    virtio_postpone_intr_smart(vsc, vq):
2202	    virtio_start_vq_intr(vsc, vq);
2203	if (enqueued != 0) {
2204		virtio_stop_vq_intr(vsc, vq);
2205		vioif_net_sched_handle(sc, netq);
2206		return;
2207	}
2208
2209	netq->netq_running_handle = false;
2210
2211	/* for ALTQ */
2212	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2213		if_schedule_deferred_start(ifp);
2214
2215	softint_schedule(txc->txc_deferred_transmit);
2216}
2217
2218static int
2219vioif_tx_intr(void *arg)
2220{
2221	struct vioif_netqueue *netq = arg;
2222	struct virtqueue *vq = netq->netq_vq;
2223	struct virtio_softc *vsc = vq->vq_owner;
2224	struct vioif_softc *sc = device_private(virtio_child(vsc));
2225	u_int limit;
2226
2227	mutex_enter(&netq->netq_lock);
2228
2229	/* tx handler is already running in softint/workqueue */
2230	if (netq->netq_running_handle)
2231		goto done;
2232
2233	if (netq->netq_stopping)
2234		goto done;
2235
2236	netq->netq_running_handle = true;
2237
2238	virtio_stop_vq_intr(vsc, vq);
2239	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2240	limit = sc->sc_tx_intr_process_limit;
2241	vioif_tx_handle_locked(netq, limit);
2242
2243done:
2244	mutex_exit(&netq->netq_lock);
2245	return 1;
2246}
2247
2248static void
2249vioif_tx_handle(void *xnetq)
2250{
2251	struct vioif_netqueue *netq = xnetq;
2252	struct virtqueue *vq = netq->netq_vq;
2253	struct virtio_softc *vsc = vq->vq_owner;
2254	struct vioif_softc *sc = device_private(virtio_child(vsc));
2255	u_int limit;
2256
2257	mutex_enter(&netq->netq_lock);
2258
2259	KASSERT(netq->netq_running_handle);
2260
2261	if (netq->netq_stopping) {
2262		netq->netq_running_handle = false;
2263		goto done;
2264	}
2265
2266	limit = sc->sc_tx_process_limit;
2267	vioif_tx_handle_locked(netq, limit);
2268
2269done:
2270	mutex_exit(&netq->netq_lock);
2271}
2272
2273/*
2274 * Control vq
2275 */
2276/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2277static void
2278vioif_ctrl_acquire(struct vioif_softc *sc)
2279{
2280	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2281
2282	mutex_enter(&ctrlq->ctrlq_wait_lock);
2283	while (ctrlq->ctrlq_inuse != FREE)
2284		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2285	ctrlq->ctrlq_inuse = INUSE;
2286	ctrlq->ctrlq_owner = curlwp;
2287	mutex_exit(&ctrlq->ctrlq_wait_lock);
2288}
2289
2290static void
2291vioif_ctrl_release(struct vioif_softc *sc)
2292{
2293	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2294
2295	KASSERT(ctrlq->ctrlq_inuse != FREE);
2296	KASSERT(ctrlq->ctrlq_owner == curlwp);
2297
2298	mutex_enter(&ctrlq->ctrlq_wait_lock);
2299	ctrlq->ctrlq_inuse = FREE;
2300	ctrlq->ctrlq_owner = NULL;
2301	cv_signal(&ctrlq->ctrlq_wait);
2302	mutex_exit(&ctrlq->ctrlq_wait_lock);
2303}
2304
2305static int
2306vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2307    struct vioif_ctrl_cmdspec *specs, int nspecs)
2308{
2309	struct virtio_softc *vsc = sc->sc_virtio;
2310	int i, r, loaded;
2311
2312	loaded = 0;
2313	for (i = 0; i < nspecs; i++) {
2314		r = bus_dmamap_load(virtio_dmat(vsc),
2315		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2316		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2317		if (r) {
2318			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2319			goto err;
2320		}
2321		loaded++;
2322
2323	}
2324
2325	return r;
2326
2327err:
2328	for (i = 0; i < loaded; i++) {
2329		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2330	}
2331
2332	return r;
2333}
2334
2335static void
2336vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2337    struct vioif_ctrl_cmdspec *specs, int nspecs)
2338{
2339	struct virtio_softc *vsc = sc->sc_virtio;
2340	int i;
2341
2342	for (i = 0; i < nspecs; i++) {
2343		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2344	}
2345}
2346
2347static int
2348vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2349    struct vioif_ctrl_cmdspec *specs, int nspecs)
2350{
2351	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2352	struct virtqueue *vq = ctrlq->ctrlq_vq;
2353	struct virtio_softc *vsc = sc->sc_virtio;
2354	int i, r, slot;
2355
2356	ctrlq->ctrlq_cmd->class = class;
2357	ctrlq->ctrlq_cmd->command = cmd;
2358
2359	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2360	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2361	for (i = 0; i < nspecs; i++) {
2362		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2363		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2364	}
2365	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2366	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2367
2368	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2369	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2370		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2371
2372	r = virtio_enqueue_prep(vsc, vq, &slot);
2373	if (r != 0)
2374		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2375	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2376	if (r != 0)
2377		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2378	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2379	for (i = 0; i < nspecs; i++) {
2380		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2381	}
2382	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2383	virtio_enqueue_commit(vsc, vq, slot, true);
2384
2385	/* wait for done */
2386	mutex_enter(&ctrlq->ctrlq_wait_lock);
2387	while (ctrlq->ctrlq_inuse != DONE)
2388		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2389	mutex_exit(&ctrlq->ctrlq_wait_lock);
2390	/* already dequeueued */
2391
2392	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2393	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2394	for (i = 0; i < nspecs; i++) {
2395		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2396		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2397	}
2398	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2399	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2400
2401	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2402		r = 0;
2403	else {
2404		device_printf(sc->sc_dev, "failed setting rx mode\n");
2405		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2406		r = EIO;
2407	}
2408
2409	return r;
2410}
2411
2412/* ctrl vq interrupt; wake up the command issuer */
2413static int
2414vioif_ctrl_intr(void *arg)
2415{
2416	struct vioif_ctrlqueue *ctrlq = arg;
2417	struct virtqueue *vq = ctrlq->ctrlq_vq;
2418	struct virtio_softc *vsc = vq->vq_owner;
2419	int r, slot;
2420
2421	if (virtio_vq_is_enqueued(vsc, vq) == false)
2422		return 0;
2423
2424	r = virtio_dequeue(vsc, vq, &slot, NULL);
2425	if (r == ENOENT)
2426		return 0;
2427	virtio_dequeue_commit(vsc, vq, slot);
2428
2429	mutex_enter(&ctrlq->ctrlq_wait_lock);
2430	ctrlq->ctrlq_inuse = DONE;
2431	cv_signal(&ctrlq->ctrlq_wait);
2432	mutex_exit(&ctrlq->ctrlq_wait_lock);
2433
2434	return 1;
2435}
2436
2437static int
2438vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2439{
2440	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2441	struct vioif_ctrl_cmdspec specs[1];
2442	int r;
2443
2444	if (!sc->sc_has_ctrl)
2445		return ENOTSUP;
2446
2447	vioif_ctrl_acquire(sc);
2448
2449	rx->onoff = onoff;
2450	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2451	specs[0].buf = rx;
2452	specs[0].bufsize = sizeof(*rx);
2453
2454	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2455	    specs, __arraycount(specs));
2456
2457	vioif_ctrl_release(sc);
2458	return r;
2459}
2460
2461static int
2462vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2463{
2464	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2465}
2466
2467static int
2468vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2469{
2470	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2471}
2472
2473static int
2474vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2475{
2476	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2477	struct vioif_ctrl_cmdspec specs[1];
2478	int r;
2479
2480	if (!sc->sc_has_ctrl)
2481		return ENOTSUP;
2482
2483	if (nvq_pairs <= 1)
2484		return EINVAL;
2485
2486	vioif_ctrl_acquire(sc);
2487
2488	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2489	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2490	specs[0].buf = mq;
2491	specs[0].bufsize = sizeof(*mq);
2492
2493	r = vioif_ctrl_send_command(sc,
2494	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2495	    specs, __arraycount(specs));
2496
2497	vioif_ctrl_release(sc);
2498
2499	return r;
2500}
2501
2502static int
2503vioif_set_mac_addr(struct vioif_softc *sc)
2504{
2505	struct virtio_net_ctrl_mac_addr *ma =
2506	    sc->sc_ctrlq.ctrlq_mac_addr;
2507	struct vioif_ctrl_cmdspec specs[1];
2508	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2509	int nspecs = __arraycount(specs);
2510	uint64_t features;
2511	int r;
2512	size_t i;
2513
2514	if (!sc->sc_has_ctrl)
2515		return ENOTSUP;
2516
2517	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2518	    ETHER_ADDR_LEN) == 0) {
2519		return 0;
2520	}
2521
2522	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2523
2524	features = virtio_features(sc->sc_virtio);
2525	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2526		vioif_ctrl_acquire(sc);
2527
2528		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2529		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2530		specs[0].buf = ma;
2531		specs[0].bufsize = sizeof(*ma);
2532
2533		r = vioif_ctrl_send_command(sc,
2534		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2535		    specs, nspecs);
2536
2537		vioif_ctrl_release(sc);
2538	} else {
2539		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2540			virtio_write_device_config_1(sc->sc_virtio,
2541			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2542		}
2543		r = 0;
2544	}
2545
2546	return r;
2547}
2548
2549static int
2550vioif_set_rx_filter(struct vioif_softc *sc)
2551{
2552	/* filter already set in ctrlq->ctrlq_mac_tbl */
2553	struct virtio_softc *vsc = sc->sc_virtio;
2554	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2555	struct vioif_ctrl_cmdspec specs[2];
2556	int nspecs = __arraycount(specs);
2557	int r;
2558
2559	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2560	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2561
2562	if (!sc->sc_has_ctrl)
2563		return ENOTSUP;
2564
2565	vioif_ctrl_acquire(sc);
2566
2567	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2568	specs[0].buf = mac_tbl_uc;
2569	specs[0].bufsize = sizeof(*mac_tbl_uc)
2570	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2571
2572	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2573	specs[1].buf = mac_tbl_mc;
2574	specs[1].bufsize = sizeof(*mac_tbl_mc)
2575	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2576
2577	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2578	if (r != 0)
2579		goto out;
2580
2581	r = vioif_ctrl_send_command(sc,
2582	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2583	    specs, nspecs);
2584
2585	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2586
2587out:
2588	vioif_ctrl_release(sc);
2589
2590	return r;
2591}
2592
2593/*
2594 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2595 * If large multicast filter exist use ALLMULTI
2596 * If setting rx filter fails fall back to ALLMULTI
2597 */
2598static int
2599vioif_rx_filter(struct vioif_softc *sc)
2600{
2601	struct virtio_softc *vsc = sc->sc_virtio;
2602	struct ethercom *ec = &sc->sc_ethercom;
2603	struct ifnet *ifp = &ec->ec_if;
2604	struct ether_multi *enm;
2605	struct ether_multistep step;
2606	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2607	int nentries;
2608	bool allmulti = 0;
2609	int r;
2610
2611	if (!sc->sc_has_ctrl) {
2612		goto set_ifflags;
2613	}
2614
2615	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2616	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2617
2618	nentries = 0;
2619	allmulti = false;
2620
2621	ETHER_LOCK(ec);
2622	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2623	    ETHER_NEXT_MULTI(step, enm)) {
2624		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2625			allmulti = true;
2626			break;
2627		}
2628		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2629			allmulti = true;
2630			break;
2631		}
2632
2633		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2634		    enm->enm_addrlo, ETHER_ADDR_LEN);
2635		nentries++;
2636	}
2637	ETHER_UNLOCK(ec);
2638
2639	r = vioif_set_mac_addr(sc);
2640	if (r != 0) {
2641		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2642		    ifp->if_xname);
2643	}
2644
2645	if (!allmulti) {
2646		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2647		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2648		r = vioif_set_rx_filter(sc);
2649		if (r != 0) {
2650			allmulti = true; /* fallback */
2651		}
2652	}
2653
2654	if (allmulti) {
2655		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2656		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2657		r = vioif_set_rx_filter(sc);
2658		if (r != 0) {
2659			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2660			    ifp->if_xname);
2661			/* what to do on failure? */
2662		}
2663
2664		ifp->if_flags |= IFF_ALLMULTI;
2665	}
2666
2667set_ifflags:
2668	r = vioif_ifflags(sc);
2669
2670	return r;
2671}
2672
2673/*
2674 * VM configuration changes
2675 */
2676static int
2677vioif_config_change(struct virtio_softc *vsc)
2678{
2679	struct vioif_softc *sc = device_private(virtio_child(vsc));
2680
2681	softint_schedule(sc->sc_cfg_softint);
2682	return 0;
2683}
2684
2685static void
2686vioif_cfg_softint(void *arg)
2687{
2688	struct vioif_softc *sc = arg;
2689	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2690
2691	vioif_update_link_status(sc);
2692	vioif_start(ifp);
2693}
2694
2695static int
2696vioif_get_link_status(struct vioif_softc *sc)
2697{
2698	struct virtio_softc *vsc = sc->sc_virtio;
2699	uint16_t status;
2700
2701	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2702		status = virtio_read_device_config_2(vsc,
2703		    VIRTIO_NET_CONFIG_STATUS);
2704	else
2705		status = VIRTIO_NET_S_LINK_UP;
2706
2707	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2708		return LINK_STATE_UP;
2709
2710	return LINK_STATE_DOWN;
2711}
2712
2713static void
2714vioif_update_link_status(struct vioif_softc *sc)
2715{
2716	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2717	struct vioif_netqueue *netq;
2718	struct vioif_tx_context *txc;
2719	bool active;
2720	int link, i;
2721
2722	mutex_enter(&sc->sc_lock);
2723
2724	link = vioif_get_link_status(sc);
2725
2726	if (link == sc->sc_link_state)
2727		goto done;
2728
2729	sc->sc_link_state = link;
2730
2731	active = VIOIF_IS_LINK_ACTIVE(sc);
2732	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2733		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2734
2735		mutex_enter(&netq->netq_lock);
2736		txc = netq->netq_ctx;
2737		txc->txc_link_active = active;
2738		mutex_exit(&netq->netq_lock);
2739	}
2740
2741	if_link_state_change(ifp, sc->sc_link_state);
2742
2743done:
2744	mutex_exit(&sc->sc_lock);
2745}
2746
2747static void
2748vioif_workq_work(struct work *wk, void *context)
2749{
2750	struct vioif_work *work;
2751
2752	work = container_of(wk, struct vioif_work, cookie);
2753
2754	atomic_store_relaxed(&work->added, 0);
2755	work->func(work->arg);
2756}
2757
2758static struct workqueue *
2759vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2760{
2761	struct workqueue *wq;
2762	int error;
2763
2764	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2765	    prio, ipl, flags);
2766
2767	if (error)
2768		return NULL;
2769
2770	return wq;
2771}
2772
2773static void
2774vioif_workq_destroy(struct workqueue *wq)
2775{
2776
2777	workqueue_destroy(wq);
2778}
2779
2780static void
2781vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2782{
2783
2784	memset(work, 0, sizeof(*work));
2785	work->func = func;
2786	work->arg = arg;
2787}
2788
2789static void
2790vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2791{
2792
2793	if (atomic_load_relaxed(&work->added) != 0)
2794		return;
2795
2796	atomic_store_relaxed(&work->added, 1);
2797	kpreempt_disable();
2798	workqueue_enqueue(wq, &work->cookie, NULL);
2799	kpreempt_enable();
2800}
2801
2802static void
2803vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2804{
2805
2806	workqueue_wait(wq, &work->cookie);
2807}
2808
2809MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2810
2811#ifdef _MODULE
2812#include "ioconf.c"
2813#endif
2814
2815static int
2816if_vioif_modcmd(modcmd_t cmd, void *opaque)
2817{
2818	int error = 0;
2819
2820#ifdef _MODULE
2821	switch (cmd) {
2822	case MODULE_CMD_INIT:
2823		error = config_init_component(cfdriver_ioconf_if_vioif,
2824		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2825		break;
2826	case MODULE_CMD_FINI:
2827		error = config_fini_component(cfdriver_ioconf_if_vioif,
2828		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2829		break;
2830	default:
2831		error = ENOTTY;
2832		break;
2833	}
2834#endif
2835
2836	return error;
2837}
2838