if_vioif.c revision 1.100
1/*	$NetBSD: if_vioif.c,v 1.100 2023/03/23 02:52:29 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.100 2023/03/23 02:52:29 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	pcq_t			*txc_intrq;
270	void			*txc_deferred_transmit;
271
272	struct evcnt		 txc_defrag_failed;
273};
274
275struct vioif_rx_context {
276	struct evcnt		 rxc_mbuf_enobufs;
277};
278struct vioif_ctrlqueue {
279	struct virtqueue		*ctrlq_vq;
280	enum {
281		FREE, INUSE, DONE
282	}				ctrlq_inuse;
283	kcondvar_t			ctrlq_wait;
284	kmutex_t			ctrlq_wait_lock;
285	struct lwp			*ctrlq_owner;
286
287	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
288	struct virtio_net_ctrl_status	*ctrlq_status;
289	struct virtio_net_ctrl_rx	*ctrlq_rx;
290	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
292	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
293	struct virtio_net_ctrl_mq	*ctrlq_mq;
294
295	bus_dmamap_t			ctrlq_cmd_dmamap;
296	bus_dmamap_t			ctrlq_status_dmamap;
297	bus_dmamap_t			ctrlq_rx_dmamap;
298	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
299	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
300	bus_dmamap_t			ctrlq_mac_addr_dmamap;
301	bus_dmamap_t			ctrlq_mq_dmamap;
302
303	struct evcnt			ctrlq_cmd_load_failed;
304	struct evcnt			ctrlq_cmd_failed;
305};
306
307struct vioif_softc {
308	device_t		sc_dev;
309	kmutex_t		sc_lock;
310	struct sysctllog	*sc_sysctllog;
311
312	struct virtio_softc	*sc_virtio;
313	struct virtqueue	*sc_vqs;
314	u_int			 sc_hdr_size;
315
316	int			sc_max_nvq_pairs;
317	int			sc_req_nvq_pairs;
318	int			sc_act_nvq_pairs;
319
320	uint8_t			sc_mac[ETHER_ADDR_LEN];
321	struct ethercom		sc_ethercom;
322	int			sc_link_state;
323
324	struct vioif_netqueue	*sc_netqs;
325
326	bool			sc_has_ctrl;
327	struct vioif_ctrlqueue	sc_ctrlq;
328
329	bus_dma_segment_t	 sc_segs[1];
330	void			*sc_dmamem;
331	void			*sc_kmem;
332
333	void			*sc_ctl_softint;
334
335	struct workqueue	*sc_txrx_workqueue;
336	bool			 sc_txrx_workqueue_sysctl;
337	u_int			 sc_tx_intr_process_limit;
338	u_int			 sc_tx_process_limit;
339	u_int			 sc_rx_intr_process_limit;
340	u_int			 sc_rx_process_limit;
341};
342#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
343#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
344
345#define VIOIF_TX_INTR_PROCESS_LIMIT	256
346#define VIOIF_TX_PROCESS_LIMIT		256
347#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
348#define VIOIF_RX_PROCESS_LIMIT		256
349
350#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
351#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
352					    true : false)
353
354/* cfattach interface functions */
355static int	vioif_match(device_t, cfdata_t, void *);
356static void	vioif_attach(device_t, device_t, void *);
357static int	vioif_finalize_teardown(device_t);
358
359/* ifnet interface functions */
360static int	vioif_init(struct ifnet *);
361static void	vioif_stop(struct ifnet *, int);
362static void	vioif_start(struct ifnet *);
363static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
366static int	vioif_ioctl(struct ifnet *, u_long, void *);
367static void	vioif_watchdog(struct ifnet *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static void	vioif_net_sched_handle(struct vioif_softc *,
372		    struct vioif_netqueue *);
373static int	vioif_net_load_mbuf(struct virtio_softc *,
374		    struct vioif_net_map *, struct mbuf *, int);
375static void	vioif_net_unload_mbuf(struct virtio_softc *,
376		    struct vioif_net_map *);
377static int	vioif_net_enqueue_tx(struct virtio_softc *, struct virtqueue *,
378		    int, struct vioif_net_map *);
379static int	vioif_net_enqueue_rx(struct virtio_softc *, struct virtqueue *,
380		    int, struct vioif_net_map *);
381static struct mbuf *
382		vioif_net_dequeue_commit(struct virtio_softc *,
383		    struct virtqueue *, int, struct vioif_net_map *, int);
384static void	vioif_net_intr_enable(struct vioif_softc *,
385		    struct virtio_softc *);
386static void	vioif_net_intr_disable(struct vioif_softc *,
387		    struct virtio_softc *);
388
389/* rx */
390static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
391		    struct vioif_netqueue *);
392static void	vioif_rx_queue_clear(struct vioif_softc *, struct virtio_softc *,
393		    struct vioif_netqueue *);
394static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
395		    struct vioif_netqueue *, u_int, size_t *);
396static int	vioif_rx_intr(void *);
397static void	vioif_rx_handle(void *);
398
399/* tx */
400static int	vioif_tx_intr(void *);
401static void	vioif_tx_handle(void *);
402static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
403		    struct vioif_netqueue *);
404static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
405		    struct vioif_netqueue *, u_int);
406static void	vioif_deferred_transmit(void *);
407
408/* workqueue */
409static struct workqueue*
410		vioif_workq_create(const char *, pri_t, int, int);
411static void	vioif_workq_destroy(struct workqueue *);
412static void	vioif_workq_work(struct work *, void *);
413static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
414static void	vioif_work_add(struct workqueue *, struct vioif_work *);
415static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
416
417/* other control */
418static int	vioif_get_link_status(struct vioif_softc *);
419static void	vioif_update_link_status(struct vioif_softc *);
420static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
421static int	vioif_set_promisc(struct vioif_softc *, bool);
422static int	vioif_set_allmulti(struct vioif_softc *, bool);
423static int	vioif_set_rx_filter(struct vioif_softc *);
424static int	vioif_rx_filter(struct vioif_softc *);
425static int	vioif_set_mac_addr(struct vioif_softc *);
426static int	vioif_ctrl_intr(void *);
427static int	vioif_config_change(struct virtio_softc *);
428static void	vioif_ctl_softint(void *);
429static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
430static int	vioif_setup_sysctl(struct vioif_softc *);
431static void	vioif_setup_stats(struct vioif_softc *);
432static int	vioif_ifflags(struct vioif_softc *);
433static void	vioif_intr_barrier(void);
434static void	vioif_notify(struct virtio_softc *, struct virtqueue *);
435
436CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
437		  vioif_match, vioif_attach, NULL, NULL);
438
439static int
440vioif_match(device_t parent, cfdata_t match, void *aux)
441{
442	struct virtio_attach_args *va = aux;
443
444	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
445		return 1;
446
447	return 0;
448}
449
450static int
451vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
452    bus_size_t size, int nsegs, const char *usage)
453{
454	int r;
455
456	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
457	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
458
459	if (r != 0) {
460		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
461		    "error code %d\n", usage, r);
462	}
463
464	return r;
465}
466
467static void
468vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
469{
470
471	if (*map) {
472		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
473		*map = NULL;
474	}
475}
476
477static int
478vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
479    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
480{
481	int r;
482
483	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
484	if (r != 0)
485		return 1;
486
487	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
488	    size, NULL, rw | BUS_DMA_NOWAIT);
489	if (r != 0) {
490		vioif_dmamap_destroy(sc, map);
491		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
492		    "error code %d\n", usage, r);
493	}
494
495	return r;
496}
497
498static void *
499vioif_assign_mem(intptr_t *p, size_t size)
500{
501	intptr_t rv;
502
503	rv = *p;
504	*p += size;
505
506	return (void *)rv;
507}
508
509static void
510vioif_alloc_queues(struct vioif_softc *sc)
511{
512	int nvq_pairs = sc->sc_max_nvq_pairs;
513	size_t nvqs, netq_num;
514
515	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
516
517	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
518	if (sc->sc_has_ctrl)
519		nvqs++;
520
521	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
522	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * netq_num,
523	    KM_SLEEP);
524}
525
526static void
527vioif_free_queues(struct vioif_softc *sc)
528{
529	size_t nvqs, netq_num;
530
531	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
532	if (sc->sc_ctrlq.ctrlq_vq)
533		nvqs++;
534
535	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
536	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
537	sc->sc_netqs = NULL;
538	sc->sc_vqs = NULL;
539}
540
541static int
542vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
543    size_t qid, u_int softint_flags)
544{
545	static const struct {
546		const char	*dirname;
547		int		 segsize;
548		int		 nsegs;
549		int 		(*intrhand)(void *);
550		void		(*sihand)(void *);
551	} params[VIOIF_NETQ_IDX] = {
552		[VIOIF_NETQ_RX] = {
553			.dirname	= "rx",
554			.segsize	= MCLBYTES,
555			.nsegs		= 2,
556			.intrhand	= vioif_rx_intr,
557			.sihand		= vioif_rx_handle,
558		},
559		[VIOIF_NETQ_TX] = {
560			.dirname	= "tx",
561			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
562			.nsegs		= 2,
563			.intrhand	= vioif_tx_intr,
564			.sihand		= vioif_tx_handle,
565		}
566	};
567
568	struct virtqueue *vq;
569	struct vioif_netqueue *netq;
570	struct vioif_tx_context *txc;
571	struct vioif_rx_context *rxc;
572	char qname[32];
573	int r, dir;
574
575	txc = NULL;
576	rxc = NULL;
577	netq = &sc->sc_netqs[qid];
578	vq = &sc->sc_vqs[qid];
579	dir = VIOIF_NETQ_DIR(qid);
580
581	netq->netq_vq = &sc->sc_vqs[qid];
582	netq->netq_stopping = false;
583	netq->netq_running_handle = false;
584
585	snprintf(qname, sizeof(qname), "%s%zu",
586	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
587	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
588	    "%s-%s", device_xname(sc->sc_dev), qname);
589
590	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
591	r = virtio_alloc_vq(vsc, vq, qid,
592	    params[dir].segsize + sc->sc_hdr_size,
593	    params[dir].nsegs, qname);
594	if (r != 0)
595		goto err;
596	netq->netq_vq = vq;
597
598	netq->netq_vq->vq_intrhand = params[dir].intrhand;
599	netq->netq_vq->vq_intrhand_arg = netq;
600	netq->netq_softint = softint_establish(softint_flags,
601	    params[dir].sihand, netq);
602	if (netq->netq_softint == NULL) {
603		aprint_error_dev(sc->sc_dev,
604		    "couldn't establish %s softint\n",
605		    params[dir].dirname);
606		goto err;
607	}
608	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
609
610	switch (dir) {
611	case VIOIF_NETQ_RX:
612		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
613		netq->netq_ctx = rxc;
614		/* nothing to do */
615		break;
616	case VIOIF_NETQ_TX:
617		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
618		netq->netq_ctx = (void *)txc;
619		txc->txc_deferred_transmit = softint_establish(softint_flags,
620		    vioif_deferred_transmit, netq);
621		if (txc->txc_deferred_transmit == NULL) {
622			aprint_error_dev(sc->sc_dev,
623			    "couldn't establish softint for "
624			    "tx deferred transmit\n");
625			goto err;
626		}
627		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
628		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
629		break;
630	}
631
632	return 0;
633
634err:
635	netq->netq_ctx = NULL;
636
637	if (rxc != NULL) {
638		kmem_free(rxc, sizeof(*rxc));
639	}
640
641	if (txc != NULL) {
642		if (txc->txc_deferred_transmit != NULL)
643			softint_disestablish(txc->txc_deferred_transmit);
644		if (txc->txc_intrq != NULL)
645			pcq_destroy(txc->txc_intrq);
646		kmem_free(txc, sizeof(txc));
647	}
648
649	vioif_work_set(&netq->netq_work, NULL, NULL);
650	if (netq->netq_softint != NULL) {
651		softint_disestablish(netq->netq_softint);
652		netq->netq_softint = NULL;
653	}
654	netq->netq_vq->vq_intrhand = NULL;
655	netq->netq_vq->vq_intrhand_arg = NULL;
656
657	virtio_free_vq(vsc, vq);
658	mutex_destroy(&netq->netq_lock);
659	netq->netq_vq = NULL;
660
661	return -1;
662}
663
664static void
665vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
666    size_t qid)
667{
668	struct vioif_netqueue *netq;
669	struct vioif_rx_context *rxc;
670	struct vioif_tx_context *txc;
671	int dir;
672
673	netq = &sc->sc_netqs[qid];
674
675	if (netq->netq_vq == NULL)
676		return;
677
678	netq = &sc->sc_netqs[qid];
679	dir = VIOIF_NETQ_DIR(qid);
680	switch (dir) {
681	case VIOIF_NETQ_RX:
682		rxc = netq->netq_ctx;
683		netq->netq_ctx = NULL;
684		kmem_free(rxc, sizeof(*rxc));
685		break;
686	case VIOIF_NETQ_TX:
687		txc = netq->netq_ctx;
688		netq->netq_ctx = NULL;
689		softint_disestablish(txc->txc_deferred_transmit);
690		pcq_destroy(txc->txc_intrq);
691		kmem_free(txc, sizeof(*txc));
692		break;
693	}
694
695	softint_disestablish(netq->netq_softint);
696	virtio_free_vq(vsc, netq->netq_vq);
697	mutex_destroy(&netq->netq_lock);
698	netq->netq_vq = NULL;
699}
700
701/* allocate memory */
702/*
703 * dma memory is used for:
704 *   netq_maps_kva:	 metadata array for received frames (READ) and
705 *			 sent frames (WRITE)
706 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
707 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
708 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
709 *			 (WRITE)
710 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
711 *			 class command (WRITE)
712 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
713 *			 class command (WRITE)
714 * ctrlq_* structures are allocated only one each; they are protected by
715 * ctrlq_inuse variable and ctrlq_wait condvar.
716 */
717static int
718vioif_alloc_mems(struct vioif_softc *sc)
719{
720	struct virtio_softc *vsc = sc->sc_virtio;
721	struct vioif_netqueue *netq;
722	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
723	struct vioif_net_map *maps;
724	unsigned int vq_num;
725	int r, rsegs;
726	bus_size_t dmamemsize;
727	size_t qid, i, netq_num, kmemsize;
728	void *vaddr;
729	intptr_t p;
730
731	netq_num = sc->sc_max_nvq_pairs * 2;
732
733	/* allocate DMA memory */
734	dmamemsize = 0;
735
736	for (qid = 0; qid < netq_num; qid++) {
737		maps = sc->sc_netqs[qid].netq_maps;
738		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
739		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
740	}
741
742	if (sc->sc_has_ctrl) {
743		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
744		dmamemsize += sizeof(struct virtio_net_ctrl_status);
745		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
746		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
747		    + ETHER_ADDR_LEN;
748		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
749		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
750		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
751		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
752	}
753
754	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
755	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
756	if (r != 0) {
757		aprint_error_dev(sc->sc_dev,
758		    "DMA memory allocation failed, size %zu, "
759		    "error code %d\n", dmamemsize, r);
760		goto err_none;
761	}
762	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
763	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
764	if (r != 0) {
765		aprint_error_dev(sc->sc_dev,
766		    "DMA memory map failed, error code %d\n", r);
767		goto err_dmamem_alloc;
768	}
769
770	/* assign DMA memory */
771	memset(vaddr, 0, dmamemsize);
772	sc->sc_dmamem = vaddr;
773	p = (intptr_t) vaddr;
774
775	for (qid = 0; qid < netq_num; qid++) {
776		netq = &sc->sc_netqs[qid];
777		maps = netq->netq_maps;
778		vq_num = netq->netq_vq->vq_num;
779
780		netq->netq_maps_kva = vioif_assign_mem(&p,
781		    sizeof(*maps[0].vnm_hdr) * vq_num);
782	}
783
784	if (sc->sc_has_ctrl) {
785		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
786		    sizeof(*ctrlq->ctrlq_cmd));
787		ctrlq->ctrlq_status = vioif_assign_mem(&p,
788		    sizeof(*ctrlq->ctrlq_status));
789		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
790		    sizeof(*ctrlq->ctrlq_rx));
791		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
792		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
793		    + ETHER_ADDR_LEN);
794		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
795		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
796		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
797		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
798		    sizeof(*ctrlq->ctrlq_mac_addr));
799		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
800	}
801
802	/* allocate kmem */
803	kmemsize = 0;
804
805	for (qid = 0; qid < netq_num; qid++) {
806		netq = &sc->sc_netqs[qid];
807		vq_num = netq->netq_vq->vq_num;
808
809		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
810	}
811
812	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
813	sc->sc_kmem = vaddr;
814
815	/* assign allocated kmem */
816	p = (intptr_t) vaddr;
817
818	for (qid = 0; qid < netq_num; qid++) {
819		netq = &sc->sc_netqs[qid];
820		vq_num = netq->netq_vq->vq_num;
821
822		netq->netq_maps = vioif_assign_mem(&p,
823		    sizeof(netq->netq_maps[0]) * vq_num);
824	}
825
826	/* prepare dmamaps */
827	for (qid = 0; qid < netq_num; qid++) {
828		static const struct {
829			const char	*msg_hdr;
830			const char	*msg_payload;
831			int		 dma_flag;
832			bus_size_t	 dma_size;
833			int		 dma_nsegs;
834		} dmaparams[VIOIF_NETQ_IDX] = {
835			[VIOIF_NETQ_RX] = {
836				.msg_hdr	= "rx header",
837				.msg_payload	= "rx payload",
838				.dma_flag	= BUS_DMA_READ,
839				.dma_size	= MCLBYTES - ETHER_ALIGN,
840				.dma_nsegs	= 1,
841			},
842			[VIOIF_NETQ_TX] = {
843				.msg_hdr	= "tx header",
844				.msg_payload	= "tx payload",
845				.dma_flag	= BUS_DMA_WRITE,
846				.dma_size	= ETHER_MAX_LEN,
847				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
848			}
849		};
850
851		struct virtio_net_hdr *hdrs;
852		int dir;
853
854		dir = VIOIF_NETQ_DIR(qid);
855		netq = &sc->sc_netqs[qid];
856		vq_num = netq->netq_vq->vq_num;
857		maps = netq->netq_maps;
858		hdrs = netq->netq_maps_kva;
859
860		for (i = 0; i < vq_num; i++) {
861			maps[i].vnm_hdr = &hdrs[i];
862
863			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
864			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
865			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
866			if (r != 0)
867				goto err_reqs;
868
869			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
870			    dmaparams[dir].dma_size, dmaparams[dir].dma_nsegs,
871			    dmaparams[dir].msg_payload);
872			if (r != 0)
873				goto err_reqs;
874		}
875	}
876
877	if (sc->sc_has_ctrl) {
878		/* control vq class & command */
879		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
880		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
881		    BUS_DMA_WRITE, "control command");
882		if (r != 0)
883			goto err_reqs;
884
885		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
886		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
887		    BUS_DMA_READ, "control status");
888		if (r != 0)
889			goto err_reqs;
890
891		/* control vq rx mode command parameter */
892		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
893		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
894		    BUS_DMA_WRITE, "rx mode control command");
895		if (r != 0)
896			goto err_reqs;
897
898		/* multiqueue set command */
899		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
900		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
901		    BUS_DMA_WRITE, "multiqueue set command");
902		if (r != 0)
903			goto err_reqs;
904
905		/* control vq MAC filter table for unicast */
906		/* do not load now since its length is variable */
907		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
908		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
909		    + ETHER_ADDR_LEN, 1,
910		    "unicast MAC address filter command");
911		if (r != 0)
912			goto err_reqs;
913
914		/* control vq MAC filter table for multicast */
915		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
916		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
917		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
918		    "multicast MAC address filter command");
919		if (r != 0)
920			goto err_reqs;
921
922		/* control vq MAC address set command */
923		r = vioif_dmamap_create_load(sc,
924		    &ctrlq->ctrlq_mac_addr_dmamap,
925		    ctrlq->ctrlq_mac_addr,
926		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
927		    BUS_DMA_WRITE, "mac addr set command");
928		if (r != 0)
929			goto err_reqs;
930	}
931
932	return 0;
933
934err_reqs:
935	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
936	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
937	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
938	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
939	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
940	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
941	for (qid = 0; qid < netq_num; qid++) {
942		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
943		maps = sc->sc_netqs[qid].netq_maps;
944
945		for (i = 0; i < vq_num; i++) {
946			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
947			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
948		}
949	}
950	if (sc->sc_kmem) {
951		kmem_free(sc->sc_kmem, kmemsize);
952		sc->sc_kmem = NULL;
953	}
954	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
955err_dmamem_alloc:
956	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
957err_none:
958	return -1;
959}
960
961static void
962vioif_attach(device_t parent, device_t self, void *aux)
963{
964	struct vioif_softc *sc = device_private(self);
965	struct virtio_softc *vsc = device_private(parent);
966	struct vioif_netqueue *txq0;
967	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
968	uint64_t features, req_features;
969	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
970	u_int softint_flags;
971	int r, i, req_flags;
972	char xnamebuf[MAXCOMLEN];
973	size_t netq_num;
974
975	if (virtio_child(vsc) != NULL) {
976		aprint_normal(": child already attached for %s; "
977		    "something wrong...\n", device_xname(parent));
978		return;
979	}
980
981	sc->sc_dev = self;
982	sc->sc_virtio = vsc;
983	sc->sc_link_state = LINK_STATE_UNKNOWN;
984
985	sc->sc_max_nvq_pairs = 1;
986	sc->sc_req_nvq_pairs = 1;
987	sc->sc_act_nvq_pairs = 1;
988	sc->sc_txrx_workqueue_sysctl = true;
989	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
990	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
991	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
992	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
993
994	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
995
996	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
997	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
998	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
999	if (sc->sc_txrx_workqueue == NULL)
1000		goto err;
1001
1002	req_flags = 0;
1003
1004#ifdef VIOIF_MPSAFE
1005	req_flags |= VIRTIO_F_INTR_MPSAFE;
1006#endif
1007	req_flags |= VIRTIO_F_INTR_MSIX;
1008
1009	req_features =
1010	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
1011	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
1012	req_features |= VIRTIO_F_RING_EVENT_IDX;
1013	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
1014#ifdef VIOIF_MULTIQ
1015	req_features |= VIRTIO_NET_F_MQ;
1016#endif
1017	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
1018	    vioif_config_change, virtio_vq_intrhand, req_flags,
1019	    req_features, VIRTIO_NET_FLAG_BITS);
1020
1021	features = virtio_features(vsc);
1022	if (features == 0)
1023		goto err;
1024
1025	if (features & VIRTIO_NET_F_MAC) {
1026		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
1027			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
1028			    VIRTIO_NET_CONFIG_MAC + i);
1029		}
1030	} else {
1031		/* code stolen from sys/net/if_tap.c */
1032		struct timeval tv;
1033		uint32_t ui;
1034		getmicrouptime(&tv);
1035		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
1036		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
1037		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
1038			virtio_write_device_config_1(vsc,
1039			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1040		}
1041	}
1042
1043	/* 'Ethernet' with capital follows other ethernet driver attachment */
1044	aprint_normal_dev(self, "Ethernet address %s\n",
1045	    ether_sprintf(sc->sc_mac));
1046
1047	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
1048		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
1049	} else {
1050		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
1051	}
1052
1053	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
1054	    (features & VIRTIO_NET_F_CTRL_RX)) {
1055		sc->sc_has_ctrl = true;
1056
1057		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
1058		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
1059		ctrlq->ctrlq_inuse = FREE;
1060	} else {
1061		sc->sc_has_ctrl = false;
1062	}
1063
1064	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
1065		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
1066		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
1067
1068		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
1069			goto err;
1070
1071		/* Limit the number of queue pairs to use */
1072		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
1073	}
1074
1075	vioif_alloc_queues(sc);
1076	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
1077
1078#ifdef VIOIF_MPSAFE
1079	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
1080#else
1081	softint_flags = SOFTINT_NET;
1082#endif
1083
1084	/*
1085	 * Initialize network queues
1086	 */
1087	netq_num = sc->sc_max_nvq_pairs * 2;
1088	for (i = 0; i < netq_num; i++) {
1089		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
1090		if (r != 0)
1091			goto err;
1092	}
1093
1094	if (sc->sc_has_ctrl) {
1095		int ctrlq_idx = sc->sc_max_nvq_pairs * 2;
1096		/*
1097		 * Allocating a virtqueue for control channel
1098		 */
1099		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
1100		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
1101		    NBPG, 1, "control");
1102		if (r != 0) {
1103			aprint_error_dev(self, "failed to allocate "
1104			    "a virtqueue for control channel, error code %d\n",
1105			    r);
1106
1107			sc->sc_has_ctrl = false;
1108			cv_destroy(&ctrlq->ctrlq_wait);
1109			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1110		} else {
1111			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1112			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1113		}
1114	}
1115
1116	sc->sc_ctl_softint = softint_establish(softint_flags,
1117	    vioif_ctl_softint, sc);
1118	if (sc->sc_ctl_softint == NULL) {
1119		aprint_error_dev(self, "cannot establish ctl softint\n");
1120		goto err;
1121	}
1122
1123	if (vioif_alloc_mems(sc) < 0)
1124		goto err;
1125
1126	if (virtio_child_attach_finish(vsc) != 0)
1127		goto err;
1128
1129	if (vioif_setup_sysctl(sc) != 0) {
1130		aprint_error_dev(self, "unable to create sysctl node\n");
1131		/* continue */
1132	}
1133
1134	vioif_setup_stats(sc);
1135
1136	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1137	ifp->if_softc = sc;
1138	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1139#ifdef VIOIF_MPSAFE
1140	ifp->if_extflags = IFEF_MPSAFE;
1141#endif
1142	ifp->if_start = vioif_start;
1143	if (sc->sc_req_nvq_pairs > 1)
1144		ifp->if_transmit = vioif_transmit;
1145	ifp->if_ioctl = vioif_ioctl;
1146	ifp->if_init = vioif_init;
1147	ifp->if_stop = vioif_stop;
1148	ifp->if_capabilities = 0;
1149	ifp->if_watchdog = vioif_watchdog;
1150	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
1151	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
1152	IFQ_SET_READY(&ifp->if_snd);
1153
1154	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1155
1156	if_attach(ifp);
1157	if_deferred_start_init(ifp, NULL);
1158	ether_ifattach(ifp, sc->sc_mac);
1159	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1160
1161	return;
1162
1163err:
1164	netq_num = sc->sc_max_nvq_pairs * 2;
1165	for (i = 0; i < netq_num; i++) {
1166		vioif_netqueue_teardown(sc, vsc, i);
1167	}
1168
1169	if (sc->sc_has_ctrl) {
1170		cv_destroy(&ctrlq->ctrlq_wait);
1171		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1172		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
1173		ctrlq->ctrlq_vq = NULL;
1174	}
1175
1176	vioif_free_queues(sc);
1177	mutex_destroy(&sc->sc_lock);
1178	virtio_child_attach_failed(vsc);
1179	config_finalize_register(self, vioif_finalize_teardown);
1180
1181	return;
1182}
1183
1184static int
1185vioif_finalize_teardown(device_t self)
1186{
1187	struct vioif_softc *sc = device_private(self);
1188
1189	if (sc->sc_txrx_workqueue != NULL) {
1190		vioif_workq_destroy(sc->sc_txrx_workqueue);
1191		sc->sc_txrx_workqueue = NULL;
1192	}
1193
1194	return 0;
1195}
1196
1197/*
1198 * Interface functions for ifnet
1199 */
1200static int
1201vioif_init(struct ifnet *ifp)
1202{
1203	struct vioif_softc *sc = ifp->if_softc;
1204	struct virtio_softc *vsc = sc->sc_virtio;
1205	struct vioif_netqueue *netq;
1206	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1207	int r, i;
1208
1209	vioif_stop(ifp, 0);
1210
1211	r = virtio_reinit_start(vsc);
1212	if (r != 0) {
1213		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1214		return EIO;
1215	}
1216
1217	virtio_negotiate_features(vsc, virtio_features(vsc));
1218
1219	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1220		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
1221
1222		mutex_enter(&netq->netq_lock);
1223		vioif_populate_rx_mbufs_locked(sc, netq);
1224		mutex_exit(&netq->netq_lock);
1225	}
1226
1227	virtio_reinit_end(vsc);
1228
1229	if (sc->sc_has_ctrl)
1230		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1231
1232	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1233	if (r == 0)
1234		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1235	else
1236		sc->sc_act_nvq_pairs = 1;
1237
1238	SET(ifp->if_flags, IFF_RUNNING);
1239	CLR(ifp->if_flags, IFF_OACTIVE);
1240
1241	vioif_net_intr_enable(sc, vsc);
1242
1243	vioif_update_link_status(sc);
1244	r = vioif_rx_filter(sc);
1245
1246	return r;
1247}
1248
1249static void
1250vioif_stop(struct ifnet *ifp, int disable)
1251{
1252	struct vioif_softc *sc = ifp->if_softc;
1253	struct virtio_softc *vsc = sc->sc_virtio;
1254	struct vioif_netqueue *netq;
1255	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1256	size_t i, act_qnum;
1257
1258	act_qnum = sc->sc_act_nvq_pairs * 2;
1259
1260	CLR(ifp->if_flags, IFF_RUNNING);
1261	for (i = 0; i < act_qnum; i++) {
1262		netq = &sc->sc_netqs[i];
1263
1264		mutex_enter(&netq->netq_lock);
1265		netq->netq_stopping = true;
1266		mutex_exit(&netq->netq_lock);
1267	}
1268
1269	/* disable interrupts */
1270	vioif_net_intr_disable(sc, vsc);
1271	if (sc->sc_has_ctrl)
1272		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1273
1274	/*
1275	 * only way to stop interrupt, I/O and DMA is resetting...
1276	 *
1277	 * NOTE: Devices based on VirtIO draft specification can not
1278	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
1279	 */
1280	virtio_reset(vsc);
1281
1282	vioif_intr_barrier();
1283
1284	for (i = 0; i < act_qnum; i++) {
1285		netq = &sc->sc_netqs[i];
1286		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
1287	}
1288
1289	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1290		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
1291		vioif_rx_queue_clear(sc, vsc, netq);
1292
1293		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
1294		vioif_tx_queue_clear(sc, vsc, netq);
1295	}
1296
1297	/* all packet processing is stopped */
1298	for (i = 0; i < act_qnum; i++) {
1299		netq = &sc->sc_netqs[i];
1300
1301		mutex_enter(&netq->netq_lock);
1302		netq->netq_stopping = false;
1303		mutex_exit(&netq->netq_lock);
1304	}
1305}
1306
1307static void
1308vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1309    bool is_transmit)
1310{
1311	struct vioif_softc *sc = ifp->if_softc;
1312	struct virtio_softc *vsc = sc->sc_virtio;
1313	struct virtqueue *vq = netq->netq_vq;
1314	struct vioif_tx_context *txc;
1315	struct vioif_net_map *map;
1316	struct mbuf *m;
1317	int queued = 0;
1318
1319	KASSERT(mutex_owned(&netq->netq_lock));
1320
1321	if (netq->netq_stopping ||
1322	    !ISSET(ifp->if_flags, IFF_RUNNING))
1323		return;
1324
1325	txc = netq->netq_ctx;
1326
1327	if (!txc->txc_link_active)
1328		return;
1329
1330	if (!is_transmit &&
1331	    ISSET(ifp->if_flags, IFF_OACTIVE))
1332		return;
1333
1334	for (;;) {
1335		int slot, r;
1336		r = virtio_enqueue_prep(vsc, vq, &slot);
1337		if (r == EAGAIN)
1338			break;
1339		if (__predict_false(r != 0))
1340			panic("enqueue_prep for tx buffers");
1341
1342		if (is_transmit)
1343			m = pcq_get(txc->txc_intrq);
1344		else
1345			IFQ_DEQUEUE(&ifp->if_snd, m);
1346
1347		if (m == NULL) {
1348			virtio_enqueue_abort(vsc, vq, slot);
1349			break;
1350		}
1351
1352		map = &netq->netq_maps[slot];
1353		KASSERT(map->vnm_mbuf == NULL);
1354
1355		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
1356		if (r != 0) {
1357			/* maybe just too fragmented */
1358			struct mbuf *newm;
1359
1360			newm = m_defrag(m, M_NOWAIT);
1361			if (newm != NULL) {
1362				m = newm;
1363				r = vioif_net_load_mbuf(vsc, map, m,
1364				    BUS_DMA_WRITE);
1365			} else {
1366				txc->txc_defrag_failed.ev_count++;
1367				r = -1;
1368			}
1369
1370			if (r != 0) {
1371				netq->netq_mbuf_load_failed.ev_count++;
1372				m_freem(m);
1373				if_statinc(ifp, if_oerrors);
1374				virtio_enqueue_abort(vsc, vq, slot);
1375				continue;
1376			}
1377		}
1378
1379		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
1380
1381		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
1382		if (r != 0) {
1383			netq->netq_enqueue_failed.ev_count++;
1384			vioif_net_unload_mbuf(vsc, map);
1385			m_freem(m);
1386			/* slot already freed by vioif_net_enqueue_tx */
1387
1388			if_statinc(ifp, if_oerrors);
1389			continue;
1390		}
1391
1392		queued++;
1393		bpf_mtap(ifp, m, BPF_D_OUT);
1394	}
1395
1396	if (queued > 0) {
1397		vioif_notify(vsc, vq);
1398		ifp->if_timer = 5;
1399	}
1400}
1401
1402static void
1403vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
1404{
1405
1406	/*
1407	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1408	 */
1409	vioif_send_common_locked(ifp, netq, false);
1410
1411}
1412
1413static void
1414vioif_start(struct ifnet *ifp)
1415{
1416	struct vioif_softc *sc = ifp->if_softc;
1417	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
1418
1419#ifdef VIOIF_MPSAFE
1420	KASSERT(if_is_mpsafe(ifp));
1421#endif
1422
1423	mutex_enter(&txq0->netq_lock);
1424	vioif_start_locked(ifp, txq0);
1425	mutex_exit(&txq0->netq_lock);
1426}
1427
1428static inline int
1429vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1430{
1431	struct vioif_softc *sc = ifp->if_softc;
1432	u_int cpuid = cpu_index(curcpu());
1433
1434	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
1435}
1436
1437static void
1438vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
1439{
1440
1441	vioif_send_common_locked(ifp, netq, true);
1442}
1443
1444static int
1445vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1446{
1447	struct vioif_softc *sc = ifp->if_softc;
1448	struct vioif_netqueue *netq;
1449	struct vioif_tx_context *txc;
1450	int qid;
1451
1452	qid = vioif_select_txqueue(ifp, m);
1453	netq = &sc->sc_netqs[qid];
1454	txc = netq->netq_ctx;
1455
1456	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
1457		m_freem(m);
1458		return ENOBUFS;
1459	}
1460
1461	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1462	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1463	if (m->m_flags & M_MCAST)
1464		if_statinc_ref(nsr, if_omcasts);
1465	IF_STAT_PUTREF(ifp);
1466
1467	if (mutex_tryenter(&netq->netq_lock)) {
1468		vioif_transmit_locked(ifp, netq);
1469		mutex_exit(&netq->netq_lock);
1470	}
1471
1472	return 0;
1473}
1474
1475static void
1476vioif_deferred_transmit(void *arg)
1477{
1478	struct vioif_netqueue *netq = arg;
1479	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
1480	struct vioif_softc *sc = device_private(virtio_child(vsc));
1481	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1482
1483	mutex_enter(&netq->netq_lock);
1484	vioif_send_common_locked(ifp, netq, true);
1485	mutex_exit(&netq->netq_lock);
1486}
1487
1488static int
1489vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1490{
1491	int s, r;
1492
1493	s = splnet();
1494
1495	r = ether_ioctl(ifp, cmd, data);
1496	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1497		if (ifp->if_flags & IFF_RUNNING) {
1498			r = vioif_rx_filter(ifp->if_softc);
1499		} else {
1500			r = 0;
1501		}
1502	}
1503
1504	splx(s);
1505
1506	return r;
1507}
1508
1509void
1510vioif_watchdog(struct ifnet *ifp)
1511{
1512	struct vioif_softc *sc = ifp->if_softc;
1513	struct vioif_netqueue *netq;
1514	int i;
1515
1516	if (ifp->if_flags & IFF_RUNNING) {
1517		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1518			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
1519
1520			mutex_enter(&netq->netq_lock);
1521			if (!netq->netq_running_handle) {
1522				netq->netq_running_handle = true;
1523				vioif_net_sched_handle(sc, netq);
1524			}
1525			mutex_exit(&netq->netq_lock);
1526		}
1527	}
1528}
1529
1530static void
1531vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1532{
1533
1534	KASSERT(mutex_owned(&netq->netq_lock));
1535	KASSERT(!netq->netq_stopping);
1536
1537	if (netq->netq_workqueue) {
1538		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1539	} else {
1540		softint_schedule(netq->netq_softint);
1541	}
1542}
1543
1544static int
1545vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1546   struct mbuf *m, int dma_flags)
1547{
1548	int r;
1549
1550	KASSERT(map->vnm_mbuf == NULL);
1551
1552	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1553	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1554	if (r == 0) {
1555		map->vnm_mbuf = m;
1556	}
1557
1558	return r;
1559}
1560
1561static void
1562vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1563{
1564
1565	KASSERT(map->vnm_mbuf != NULL);
1566	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1567	map->vnm_mbuf = NULL;
1568}
1569
1570static int
1571vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1572    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1573{
1574	int r;
1575
1576	KASSERT(map->vnm_mbuf != NULL);
1577
1578	/* This should actually never fail */
1579	r = virtio_enqueue_reserve(vsc, vq, slot,
1580	    map->vnm_mbuf_map->dm_nsegs + 1);
1581	if (r != 0) {
1582		/* slot already freed by virtio_enqueue_reserve */
1583		return r;
1584	}
1585
1586	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1587	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1588	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1589	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1590
1591	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1592	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1593	virtio_enqueue_commit(vsc, vq, slot, false);
1594
1595	return 0;
1596}
1597
1598static int
1599vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1600    int slot, struct vioif_net_map *map)
1601{
1602
1603	return vioif_net_enqueue(vsc, vq, slot, map,
1604	    BUS_DMASYNC_PREWRITE, true);
1605}
1606
1607static int
1608vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1609    int slot, struct vioif_net_map *map)
1610{
1611
1612	return vioif_net_enqueue(vsc, vq, slot, map,
1613	    BUS_DMASYNC_PREREAD, false);
1614}
1615
1616static void
1617vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
1618{
1619
1620	virtio_enqueue_commit(vsc, vq, -1, true);
1621}
1622
1623static struct mbuf *
1624vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1625   int slot, struct vioif_net_map *map, int dma_flags)
1626{
1627	struct mbuf *m;
1628
1629	m = map->vnm_mbuf;
1630	KASSERT(m != NULL);
1631	map->vnm_mbuf = NULL;
1632
1633	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1634	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1635	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1636	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1637
1638	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1639	virtio_dequeue_commit(vsc, vq, slot);
1640
1641	return m;
1642}
1643
1644static void
1645vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1646{
1647	struct vioif_netqueue *netq;
1648	size_t i, act_qnum;
1649	int enqueued;
1650
1651	act_qnum = sc->sc_act_nvq_pairs * 2;
1652	for (i = 0; i < act_qnum; i++) {
1653		netq = &sc->sc_netqs[i];
1654
1655		KASSERT(!netq->netq_stopping);
1656		KASSERT(!netq->netq_running_handle);
1657
1658		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1659		if (enqueued != 0) {
1660			virtio_stop_vq_intr(vsc, netq->netq_vq);
1661
1662			mutex_enter(&netq->netq_lock);
1663			netq->netq_running_handle = true;
1664			vioif_net_sched_handle(sc, netq);
1665			mutex_exit(&netq->netq_lock);
1666		}
1667	}
1668}
1669
1670static void
1671vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1672{
1673	struct vioif_netqueue *netq;
1674	size_t i, act_qnum;
1675
1676	act_qnum = sc->sc_act_nvq_pairs * 2;
1677	for (i = 0; i < act_qnum; i++) {
1678		netq = &sc->sc_netqs[i];
1679
1680		virtio_stop_vq_intr(vsc, netq->netq_vq);
1681	}
1682
1683}
1684
1685/*
1686 * Receive implementation
1687 */
1688/* add mbufs for all the empty receive slots */
1689static void
1690vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1691{
1692	struct virtqueue *vq = netq->netq_vq;
1693	struct virtio_softc *vsc = vq->vq_owner;
1694	struct vioif_rx_context *rxc;
1695	struct vioif_net_map *map;
1696	struct mbuf *m;
1697	int i, r, ndone = 0;
1698
1699	KASSERT(mutex_owned(&netq->netq_lock));
1700
1701	rxc = netq->netq_ctx;
1702
1703	for (i = 0; i < vq->vq_num; i++) {
1704		int slot;
1705		r = virtio_enqueue_prep(vsc, vq, &slot);
1706		if (r == EAGAIN)
1707			break;
1708		if (__predict_false(r != 0))
1709			panic("enqueue_prep for rx buffers");
1710
1711		MGETHDR(m, M_DONTWAIT, MT_DATA);
1712		if (m == NULL) {
1713			virtio_enqueue_abort(vsc, vq, slot);
1714			rxc->rxc_mbuf_enobufs.ev_count++;
1715			break;
1716		}
1717		MCLGET(m, M_DONTWAIT);
1718		if ((m->m_flags & M_EXT) == 0) {
1719			virtio_enqueue_abort(vsc, vq, slot);
1720			m_freem(m);
1721			rxc->rxc_mbuf_enobufs.ev_count++;
1722			break;
1723		}
1724
1725		m->m_len = m->m_pkthdr.len = MCLBYTES;
1726		m_adj(m, ETHER_ALIGN);
1727
1728		map = &netq->netq_maps[slot];
1729		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1730		if (r != 0) {
1731			virtio_enqueue_abort(vsc, vq, slot);
1732			m_freem(m);
1733			netq->netq_mbuf_load_failed.ev_count++;
1734			break;
1735		}
1736
1737		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1738		if (r != 0) {
1739			vioif_net_unload_mbuf(vsc, map);
1740			netq->netq_enqueue_failed.ev_count++;
1741			m_freem(m);
1742			/* slot already freed by vioif_net_enqueue_rx */
1743			break;
1744		}
1745
1746		ndone++;
1747	}
1748
1749	if (ndone > 0)
1750		vioif_notify(vsc, vq);
1751}
1752
1753static void
1754vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1755    struct vioif_netqueue *netq)
1756{
1757	struct vioif_net_map *map;
1758	struct mbuf *m;
1759	unsigned int i, vq_num;
1760	bool more;
1761
1762	mutex_enter(&netq->netq_lock);
1763	vq_num = netq->netq_vq->vq_num;
1764
1765	for (;;) {
1766		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1767		if (more == false)
1768			break;
1769	}
1770
1771	for (i = 0; i < vq_num; i++) {
1772		map = &netq->netq_maps[i];
1773
1774		m = map->vnm_mbuf;
1775		if (m == NULL)
1776			continue;
1777
1778		vioif_net_unload_mbuf(vsc, map);
1779		m_freem(m);
1780	}
1781	mutex_exit(&netq->netq_lock);
1782}
1783
1784/* dequeue received packets */
1785static bool
1786vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1787    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1788{
1789	struct virtqueue *vq = netq->netq_vq;
1790	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1791	struct vioif_net_map *map;
1792	struct mbuf *m;
1793	int slot, len;
1794	bool more;
1795	size_t ndeq;
1796
1797	KASSERT(mutex_owned(&netq->netq_lock));
1798
1799	more = false;
1800	ndeq = 0;
1801
1802	if (virtio_vq_is_enqueued(vsc, vq) == false)
1803		goto done;
1804
1805	for (;;ndeq++) {
1806		if (ndeq >= limit) {
1807			more = true;
1808			break;
1809		}
1810
1811		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1812			break;
1813
1814		map = &netq->netq_maps[slot];
1815		KASSERT(map->vnm_mbuf != NULL);
1816		m = vioif_net_dequeue_commit(vsc, vq, slot,
1817		    map, BUS_DMASYNC_POSTREAD);
1818		KASSERT(m != NULL);
1819
1820		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1821		m_set_rcvif(m, ifp);
1822		if_percpuq_enqueue(ifp->if_percpuq, m);
1823	}
1824
1825done:
1826	if (ndeqp != NULL)
1827		*ndeqp = ndeq;
1828
1829	return more;
1830}
1831
1832/* rx interrupt; call _dequeue above and schedule a softint */
1833
1834static void
1835vioif_rx_handle_locked(void *xnetq, u_int limit)
1836{
1837	struct vioif_netqueue *netq = xnetq;
1838	struct virtqueue *vq = netq->netq_vq;
1839	struct virtio_softc *vsc = vq->vq_owner;
1840	struct vioif_softc *sc = device_private(virtio_child(vsc));
1841	bool more;
1842	int enqueued;
1843	size_t ndeq;
1844
1845	KASSERT(mutex_owned(&netq->netq_lock));
1846	KASSERT(!netq->netq_stopping);
1847
1848	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1849	if (ndeq > 0)
1850		vioif_populate_rx_mbufs_locked(sc, netq);
1851
1852	if (more) {
1853		vioif_net_sched_handle(sc, netq);
1854		return;
1855	}
1856
1857	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1858	if (enqueued != 0) {
1859		virtio_stop_vq_intr(vsc, netq->netq_vq);
1860		vioif_net_sched_handle(sc, netq);
1861		return;
1862	}
1863
1864	netq->netq_running_handle = false;
1865}
1866
1867static int
1868vioif_rx_intr(void *arg)
1869{
1870	struct vioif_netqueue *netq = arg;
1871	struct virtqueue *vq = netq->netq_vq;
1872	struct virtio_softc *vsc = vq->vq_owner;
1873	struct vioif_softc *sc = device_private(virtio_child(vsc));
1874	u_int limit;
1875
1876	mutex_enter(&netq->netq_lock);
1877
1878	/* handler is already running in softint/workqueue */
1879	if (netq->netq_running_handle)
1880		goto done;
1881
1882	netq->netq_running_handle = true;
1883
1884	limit = sc->sc_rx_intr_process_limit;
1885	virtio_stop_vq_intr(vsc, vq);
1886	vioif_rx_handle_locked(netq, limit);
1887
1888done:
1889	mutex_exit(&netq->netq_lock);
1890	return 1;
1891}
1892
1893static void
1894vioif_rx_handle(void *xnetq)
1895{
1896	struct vioif_netqueue *netq = xnetq;
1897	struct virtqueue *vq = netq->netq_vq;
1898	struct virtio_softc *vsc = vq->vq_owner;
1899	struct vioif_softc *sc = device_private(virtio_child(vsc));
1900	u_int limit;
1901
1902	mutex_enter(&netq->netq_lock);
1903
1904	KASSERT(netq->netq_running_handle);
1905
1906	if (netq->netq_stopping) {
1907		netq->netq_running_handle = false;
1908		goto done;
1909	}
1910
1911	limit = sc->sc_rx_process_limit;
1912	vioif_rx_handle_locked(netq, limit);
1913
1914done:
1915	mutex_exit(&netq->netq_lock);
1916}
1917
1918/*
1919 * Transmition implementation
1920 */
1921/* actual transmission is done in if_start */
1922/* tx interrupt; dequeue and free mbufs */
1923/*
1924 * tx interrupt is actually disabled; this should be called upon
1925 * tx vq full and watchdog
1926 */
1927
1928static void
1929vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
1930{
1931	struct virtqueue *vq = netq->netq_vq;
1932	struct vioif_tx_context *txc = netq->netq_ctx;
1933	struct virtio_softc *vsc = vq->vq_owner;
1934	struct vioif_softc *sc = device_private(virtio_child(vsc));
1935	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1936	bool more;
1937	int enqueued;
1938
1939	KASSERT(mutex_owned(&netq->netq_lock));
1940	KASSERT(!netq->netq_stopping);
1941
1942	more = vioif_tx_deq_locked(sc, vsc, netq, limit);
1943	if (more) {
1944		vioif_net_sched_handle(sc, netq);
1945		return;
1946	}
1947
1948	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
1949	    virtio_postpone_intr_smart(vsc, vq):
1950	    virtio_start_vq_intr(vsc, vq);
1951	if (enqueued != 0) {
1952		virtio_stop_vq_intr(vsc, vq);
1953		vioif_net_sched_handle(sc, netq);
1954		return;
1955	}
1956
1957	netq->netq_running_handle = false;
1958
1959	/* for ALTQ */
1960	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)]) {
1961		if_schedule_deferred_start(ifp);
1962		ifp->if_flags &= ~IFF_OACTIVE;
1963	}
1964	softint_schedule(txc->txc_deferred_transmit);
1965}
1966
1967static int
1968vioif_tx_intr(void *arg)
1969{
1970	struct vioif_netqueue *netq = arg;
1971	struct virtqueue *vq = netq->netq_vq;
1972	struct virtio_softc *vsc = vq->vq_owner;
1973	struct vioif_softc *sc = device_private(virtio_child(vsc));
1974	u_int limit;
1975
1976	mutex_enter(&netq->netq_lock);
1977
1978	/* tx handler is already running in softint/workqueue */
1979	if (netq->netq_running_handle)
1980		goto done;
1981
1982	if (netq->netq_stopping)
1983		goto done;
1984
1985	netq->netq_running_handle = true;
1986
1987	virtio_stop_vq_intr(vsc, vq);
1988	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
1989	limit = sc->sc_tx_intr_process_limit;
1990	vioif_tx_handle_locked(netq, limit);
1991
1992done:
1993	mutex_exit(&netq->netq_lock);
1994	return 1;
1995}
1996
1997static void
1998vioif_tx_handle(void *xnetq)
1999{
2000	struct vioif_netqueue *netq = xnetq;
2001	struct virtqueue *vq = netq->netq_vq;
2002	struct virtio_softc *vsc = vq->vq_owner;
2003	struct vioif_softc *sc = device_private(virtio_child(vsc));
2004	u_int limit;
2005
2006	mutex_enter(&netq->netq_lock);
2007
2008	KASSERT(netq->netq_running_handle);
2009
2010	if (netq->netq_stopping) {
2011		netq->netq_running_handle = false;
2012		goto done;
2013	}
2014
2015	limit = sc->sc_tx_process_limit;
2016	vioif_tx_handle_locked(netq, limit);
2017
2018done:
2019	mutex_exit(&netq->netq_lock);
2020}
2021
2022static void
2023vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2024    struct vioif_netqueue *netq)
2025{
2026	struct vioif_net_map *map;
2027	struct mbuf *m;
2028	unsigned int i, vq_num;
2029	bool more;
2030
2031	mutex_enter(&netq->netq_lock);
2032
2033	vq_num = netq->netq_vq->vq_num;
2034	for (;;) {
2035		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num);
2036		if (more == false)
2037			break;
2038	}
2039
2040	for (i = 0; i < vq_num; i++) {
2041		map = &netq->netq_maps[i];
2042
2043		m = map->vnm_mbuf;
2044		if (m == NULL)
2045			continue;
2046
2047		vioif_net_unload_mbuf(vsc, map);
2048		m_freem(m);
2049	}
2050	mutex_exit(&netq->netq_lock);
2051}
2052
2053static bool
2054vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2055    struct vioif_netqueue *netq, u_int limit)
2056{
2057	struct virtqueue *vq = netq->netq_vq;
2058	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2059	struct vioif_net_map *map;
2060	struct mbuf *m;
2061	int slot, len;
2062	bool more = false;
2063
2064	KASSERT(mutex_owned(&netq->netq_lock));
2065
2066	if (virtio_vq_is_enqueued(vsc, vq) == false)
2067		return false;
2068
2069	for (;;) {
2070		if (limit-- == 0) {
2071			more = true;
2072			break;
2073		}
2074
2075		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2076			break;
2077
2078		map = &netq->netq_maps[slot];
2079		KASSERT(map->vnm_mbuf != NULL);
2080		m = vioif_net_dequeue_commit(vsc, vq, slot,
2081		    map, BUS_DMASYNC_POSTWRITE);
2082		KASSERT(m != NULL);
2083
2084		if_statinc(ifp, if_opackets);
2085		m_freem(m);
2086	}
2087
2088	return more;
2089}
2090
2091/*
2092 * Control vq
2093 */
2094/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2095static void
2096vioif_ctrl_acquire(struct vioif_softc *sc)
2097{
2098	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2099
2100	mutex_enter(&ctrlq->ctrlq_wait_lock);
2101	while (ctrlq->ctrlq_inuse != FREE)
2102		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2103	ctrlq->ctrlq_inuse = INUSE;
2104	ctrlq->ctrlq_owner = curlwp;
2105	mutex_exit(&ctrlq->ctrlq_wait_lock);
2106}
2107
2108static void
2109vioif_ctrl_release(struct vioif_softc *sc)
2110{
2111	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2112
2113	KASSERT(ctrlq->ctrlq_inuse != FREE);
2114	KASSERT(ctrlq->ctrlq_owner == curlwp);
2115
2116	mutex_enter(&ctrlq->ctrlq_wait_lock);
2117	ctrlq->ctrlq_inuse = FREE;
2118	ctrlq->ctrlq_owner = NULL;
2119	cv_signal(&ctrlq->ctrlq_wait);
2120	mutex_exit(&ctrlq->ctrlq_wait_lock);
2121}
2122
2123static int
2124vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2125    struct vioif_ctrl_cmdspec *specs, int nspecs)
2126{
2127	struct virtio_softc *vsc = sc->sc_virtio;
2128	int i, r, loaded;
2129
2130	loaded = 0;
2131	for (i = 0; i < nspecs; i++) {
2132		r = bus_dmamap_load(virtio_dmat(vsc),
2133		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2134		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2135		if (r) {
2136			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2137			goto err;
2138		}
2139		loaded++;
2140
2141	}
2142
2143	return r;
2144
2145err:
2146	for (i = 0; i < loaded; i++) {
2147		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2148	}
2149
2150	return r;
2151}
2152
2153static void
2154vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2155    struct vioif_ctrl_cmdspec *specs, int nspecs)
2156{
2157	struct virtio_softc *vsc = sc->sc_virtio;
2158	int i;
2159
2160	for (i = 0; i < nspecs; i++) {
2161		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2162	}
2163}
2164
2165static int
2166vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2167    struct vioif_ctrl_cmdspec *specs, int nspecs)
2168{
2169	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2170	struct virtqueue *vq = ctrlq->ctrlq_vq;
2171	struct virtio_softc *vsc = sc->sc_virtio;
2172	int i, r, slot;
2173
2174	ctrlq->ctrlq_cmd->class = class;
2175	ctrlq->ctrlq_cmd->command = cmd;
2176
2177	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2178	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2179	for (i = 0; i < nspecs; i++) {
2180		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2181		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2182	}
2183	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2184	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2185
2186	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2187	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2188		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2189
2190	r = virtio_enqueue_prep(vsc, vq, &slot);
2191	if (r != 0)
2192		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2193	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2194	if (r != 0)
2195		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2196	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2197	for (i = 0; i < nspecs; i++) {
2198		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2199	}
2200	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2201	virtio_enqueue_commit(vsc, vq, slot, true);
2202
2203	/* wait for done */
2204	mutex_enter(&ctrlq->ctrlq_wait_lock);
2205	while (ctrlq->ctrlq_inuse != DONE)
2206		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2207	mutex_exit(&ctrlq->ctrlq_wait_lock);
2208	/* already dequeueued */
2209
2210	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2211	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2212	for (i = 0; i < nspecs; i++) {
2213		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2214		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2215	}
2216	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2217	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2218
2219	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2220		r = 0;
2221	else {
2222		device_printf(sc->sc_dev, "failed setting rx mode\n");
2223		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2224		r = EIO;
2225	}
2226
2227	return r;
2228}
2229
2230static int
2231vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2232{
2233	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2234	struct vioif_ctrl_cmdspec specs[1];
2235	int r;
2236
2237	if (!sc->sc_has_ctrl)
2238		return ENOTSUP;
2239
2240	vioif_ctrl_acquire(sc);
2241
2242	rx->onoff = onoff;
2243	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2244	specs[0].buf = rx;
2245	specs[0].bufsize = sizeof(*rx);
2246
2247	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2248	    specs, __arraycount(specs));
2249
2250	vioif_ctrl_release(sc);
2251	return r;
2252}
2253
2254static int
2255vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2256{
2257	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2258}
2259
2260static int
2261vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2262{
2263	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2264}
2265
2266/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2267static int
2268vioif_set_rx_filter(struct vioif_softc *sc)
2269{
2270	/* filter already set in ctrlq->ctrlq_mac_tbl */
2271	struct virtio_softc *vsc = sc->sc_virtio;
2272	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2273	struct vioif_ctrl_cmdspec specs[2];
2274	int nspecs = __arraycount(specs);
2275	int r;
2276
2277	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2278	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2279
2280	if (!sc->sc_has_ctrl)
2281		return ENOTSUP;
2282
2283	vioif_ctrl_acquire(sc);
2284
2285	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2286	specs[0].buf = mac_tbl_uc;
2287	specs[0].bufsize = sizeof(*mac_tbl_uc)
2288	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2289
2290	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2291	specs[1].buf = mac_tbl_mc;
2292	specs[1].bufsize = sizeof(*mac_tbl_mc)
2293	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2294
2295	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2296	if (r != 0)
2297		goto out;
2298
2299	r = vioif_ctrl_send_command(sc,
2300	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2301	    specs, nspecs);
2302
2303	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2304
2305out:
2306	vioif_ctrl_release(sc);
2307
2308	return r;
2309}
2310
2311static int
2312vioif_set_mac_addr(struct vioif_softc *sc)
2313{
2314	struct virtio_net_ctrl_mac_addr *ma =
2315	    sc->sc_ctrlq.ctrlq_mac_addr;
2316	struct vioif_ctrl_cmdspec specs[1];
2317	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2318	int nspecs = __arraycount(specs);
2319	uint64_t features;
2320	int r;
2321	size_t i;
2322
2323	if (!sc->sc_has_ctrl)
2324		return ENOTSUP;
2325
2326	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2327	    ETHER_ADDR_LEN) == 0) {
2328		return 0;
2329	}
2330
2331	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2332
2333	features = virtio_features(sc->sc_virtio);
2334	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2335		vioif_ctrl_acquire(sc);
2336
2337		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2338		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2339		specs[0].buf = ma;
2340		specs[0].bufsize = sizeof(*ma);
2341
2342		r = vioif_ctrl_send_command(sc,
2343		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2344		    specs, nspecs);
2345
2346		vioif_ctrl_release(sc);
2347	} else {
2348		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2349			virtio_write_device_config_1(sc->sc_virtio,
2350			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2351		}
2352		r = 0;
2353	}
2354
2355	return r;
2356}
2357
2358static int
2359vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2360{
2361	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2362	struct vioif_ctrl_cmdspec specs[1];
2363	int r;
2364
2365	if (!sc->sc_has_ctrl)
2366		return ENOTSUP;
2367
2368	if (nvq_pairs <= 1)
2369		return EINVAL;
2370
2371	vioif_ctrl_acquire(sc);
2372
2373	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2374	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2375	specs[0].buf = mq;
2376	specs[0].bufsize = sizeof(*mq);
2377
2378	r = vioif_ctrl_send_command(sc,
2379	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2380	    specs, __arraycount(specs));
2381
2382	vioif_ctrl_release(sc);
2383
2384	return r;
2385}
2386
2387/* ctrl vq interrupt; wake up the command issuer */
2388static int
2389vioif_ctrl_intr(void *arg)
2390{
2391	struct vioif_ctrlqueue *ctrlq = arg;
2392	struct virtqueue *vq = ctrlq->ctrlq_vq;
2393	struct virtio_softc *vsc = vq->vq_owner;
2394	int r, slot;
2395
2396	if (virtio_vq_is_enqueued(vsc, vq) == false)
2397		return 0;
2398
2399	r = virtio_dequeue(vsc, vq, &slot, NULL);
2400	if (r == ENOENT)
2401		return 0;
2402	virtio_dequeue_commit(vsc, vq, slot);
2403
2404	mutex_enter(&ctrlq->ctrlq_wait_lock);
2405	ctrlq->ctrlq_inuse = DONE;
2406	cv_signal(&ctrlq->ctrlq_wait);
2407	mutex_exit(&ctrlq->ctrlq_wait_lock);
2408
2409	return 1;
2410}
2411
2412static int
2413vioif_ifflags(struct vioif_softc *sc)
2414{
2415	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2416	bool onoff;
2417	int r;
2418
2419	if (!sc->sc_has_ctrl) {
2420		/* no ctrl vq; always promisc and allmulti */
2421		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2422		return 0;
2423	}
2424
2425	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2426	r = vioif_set_allmulti(sc, onoff);
2427	if (r != 0) {
2428		log(LOG_WARNING,
2429		    "%s: couldn't %sable ALLMULTI\n",
2430		    ifp->if_xname, onoff ? "en" : "dis");
2431		if (onoff == false) {
2432			ifp->if_flags |= IFF_ALLMULTI;
2433		}
2434	}
2435
2436	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2437	r = vioif_set_promisc(sc, onoff);
2438	if (r != 0) {
2439		log(LOG_WARNING,
2440		    "%s: couldn't %sable PROMISC\n",
2441		    ifp->if_xname, onoff ? "en" : "dis");
2442		if (onoff == false) {
2443			ifp->if_flags |= IFF_PROMISC;
2444		}
2445	}
2446
2447	return 0;
2448}
2449
2450static int
2451vioif_ifflags_cb(struct ethercom *ec)
2452{
2453	struct ifnet *ifp = &ec->ec_if;
2454	struct vioif_softc *sc = ifp->if_softc;
2455
2456	return vioif_ifflags(sc);
2457}
2458
2459/*
2460 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2461 * If large multicast filter exist use ALLMULTI
2462 * If setting rx filter fails fall back to ALLMULTI
2463 */
2464static int
2465vioif_rx_filter(struct vioif_softc *sc)
2466{
2467	struct virtio_softc *vsc = sc->sc_virtio;
2468	struct ethercom *ec = &sc->sc_ethercom;
2469	struct ifnet *ifp = &ec->ec_if;
2470	struct ether_multi *enm;
2471	struct ether_multistep step;
2472	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2473	int nentries;
2474	bool allmulti = 0;
2475	int r;
2476
2477	if (!sc->sc_has_ctrl) {
2478		goto set_ifflags;
2479	}
2480
2481	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2482	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2483
2484	nentries = 0;
2485	allmulti = false;
2486
2487	ETHER_LOCK(ec);
2488	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2489	    ETHER_NEXT_MULTI(step, enm)) {
2490		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2491			allmulti = true;
2492			break;
2493		}
2494		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2495			allmulti = true;
2496			break;
2497		}
2498
2499		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2500		    enm->enm_addrlo, ETHER_ADDR_LEN);
2501		nentries++;
2502	}
2503	ETHER_UNLOCK(ec);
2504
2505	r = vioif_set_mac_addr(sc);
2506	if (r != 0) {
2507		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2508		    ifp->if_xname);
2509	}
2510
2511	if (!allmulti) {
2512		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2513		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2514		r = vioif_set_rx_filter(sc);
2515		if (r != 0) {
2516			allmulti = true; /* fallback */
2517		}
2518	}
2519
2520	if (allmulti) {
2521		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2522		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2523		r = vioif_set_rx_filter(sc);
2524		if (r != 0) {
2525			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2526			    ifp->if_xname);
2527			/* what to do on failure? */
2528		}
2529
2530		ifp->if_flags |= IFF_ALLMULTI;
2531	}
2532
2533set_ifflags:
2534	r = vioif_ifflags(sc);
2535
2536	return r;
2537}
2538
2539static int
2540vioif_get_link_status(struct vioif_softc *sc)
2541{
2542	struct virtio_softc *vsc = sc->sc_virtio;
2543	uint16_t status;
2544
2545	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2546		status = virtio_read_device_config_2(vsc,
2547		    VIRTIO_NET_CONFIG_STATUS);
2548	else
2549		status = VIRTIO_NET_S_LINK_UP;
2550
2551	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2552		return LINK_STATE_UP;
2553
2554	return LINK_STATE_DOWN;
2555}
2556
2557/* change link status */
2558static void
2559vioif_update_link_status(struct vioif_softc *sc)
2560{
2561	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2562	struct vioif_netqueue *netq;
2563	struct vioif_tx_context *txc;
2564	bool active;
2565	int link, i;
2566
2567	mutex_enter(&sc->sc_lock);
2568
2569	link = vioif_get_link_status(sc);
2570
2571	if (link == sc->sc_link_state)
2572		goto done;
2573
2574	sc->sc_link_state = link;
2575
2576	active = VIOIF_IS_LINK_ACTIVE(sc);
2577	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2578		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2579
2580		mutex_enter(&netq->netq_lock);
2581		txc = netq->netq_ctx;
2582		txc->txc_link_active = active;
2583		mutex_exit(&netq->netq_lock);
2584	}
2585
2586	if_link_state_change(ifp, sc->sc_link_state);
2587
2588done:
2589	mutex_exit(&sc->sc_lock);
2590}
2591
2592static int
2593vioif_config_change(struct virtio_softc *vsc)
2594{
2595	struct vioif_softc *sc = device_private(virtio_child(vsc));
2596
2597	softint_schedule(sc->sc_ctl_softint);
2598	return 0;
2599}
2600
2601static void
2602vioif_ctl_softint(void *arg)
2603{
2604	struct vioif_softc *sc = arg;
2605	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2606
2607	vioif_update_link_status(sc);
2608	vioif_start(ifp);
2609}
2610
2611static struct workqueue *
2612vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2613{
2614	struct workqueue *wq;
2615	int error;
2616
2617	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2618	    prio, ipl, flags);
2619
2620	if (error)
2621		return NULL;
2622
2623	return wq;
2624}
2625
2626static void
2627vioif_workq_destroy(struct workqueue *wq)
2628{
2629
2630	workqueue_destroy(wq);
2631}
2632
2633static void
2634vioif_workq_work(struct work *wk, void *context)
2635{
2636	struct vioif_work *work;
2637
2638	work = container_of(wk, struct vioif_work, cookie);
2639
2640	atomic_store_relaxed(&work->added, 0);
2641	work->func(work->arg);
2642}
2643
2644static void
2645vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2646{
2647
2648	memset(work, 0, sizeof(*work));
2649	work->func = func;
2650	work->arg = arg;
2651}
2652
2653static void
2654vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2655{
2656
2657	if (atomic_load_relaxed(&work->added) != 0)
2658		return;
2659
2660	atomic_store_relaxed(&work->added, 1);
2661	kpreempt_disable();
2662	workqueue_enqueue(wq, &work->cookie, NULL);
2663	kpreempt_enable();
2664}
2665
2666static void
2667vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2668{
2669
2670	workqueue_wait(wq, &work->cookie);
2671}
2672
2673static int
2674vioif_setup_sysctl(struct vioif_softc *sc)
2675{
2676	const char *devname;
2677	struct sysctllog **log;
2678	const struct sysctlnode *rnode, *rxnode, *txnode;
2679	int error;
2680
2681	log = &sc->sc_sysctllog;
2682	devname = device_xname(sc->sc_dev);
2683
2684	error = sysctl_createv(log, 0, NULL, &rnode,
2685	    0, CTLTYPE_NODE, devname,
2686	    SYSCTL_DESCR("virtio-net information and settings"),
2687	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2688	if (error)
2689		goto out;
2690
2691	error = sysctl_createv(log, 0, &rnode, NULL,
2692	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2693	    SYSCTL_DESCR("Use workqueue for packet processing"),
2694	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2695	if (error)
2696		goto out;
2697
2698	error = sysctl_createv(log, 0, &rnode, &rxnode,
2699	    0, CTLTYPE_NODE, "rx",
2700	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2701	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2702	if (error)
2703		goto out;
2704
2705	error = sysctl_createv(log, 0, &rxnode, NULL,
2706	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2707	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2708	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2709	if (error)
2710		goto out;
2711
2712	error = sysctl_createv(log, 0, &rxnode, NULL,
2713	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2714	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2715	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2716	if (error)
2717		goto out;
2718
2719	error = sysctl_createv(log, 0, &rnode, &txnode,
2720	    0, CTLTYPE_NODE, "tx",
2721	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2722	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2723	if (error)
2724		goto out;
2725
2726	error = sysctl_createv(log, 0, &txnode, NULL,
2727	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2728	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2729	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2730	if (error)
2731		goto out;
2732
2733	error = sysctl_createv(log, 0, &txnode, NULL,
2734	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2735	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2736	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2737
2738out:
2739	if (error)
2740		sysctl_teardown(log);
2741
2742	return error;
2743}
2744
2745static void
2746vioif_setup_stats(struct vioif_softc *sc)
2747{
2748	struct vioif_netqueue *netq;
2749	struct vioif_tx_context *txc;
2750	struct vioif_rx_context *rxc;
2751	size_t i, netq_num;
2752
2753	netq_num = sc->sc_max_nvq_pairs * 2;
2754	for (i = 0; i < netq_num; i++) {
2755		netq = &sc->sc_netqs[i];
2756		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
2757		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
2758		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
2759		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
2760		    "virtqueue enqueue failed failed");
2761
2762		switch (VIOIF_NETQ_DIR(i)) {
2763		case VIOIF_NETQ_RX:
2764			rxc = netq->netq_ctx;
2765			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
2766			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
2767			    "no receive buffer");
2768			break;
2769		case VIOIF_NETQ_TX:
2770			txc = netq->netq_ctx;
2771			evcnt_attach_dynamic(&txc->txc_defrag_failed,
2772			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
2773			    "m_defrag() failed");
2774			break;
2775		}
2776	}
2777
2778	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2779	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2780	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2781	    NULL, device_xname(sc->sc_dev), "control command failed");
2782}
2783
2784static void
2785vioif_intr_barrier(void)
2786{
2787
2788	/* wait for finish all interrupt handler */
2789	xc_barrier(0);
2790}
2791
2792MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2793
2794#ifdef _MODULE
2795#include "ioconf.c"
2796#endif
2797
2798static int
2799if_vioif_modcmd(modcmd_t cmd, void *opaque)
2800{
2801	int error = 0;
2802
2803#ifdef _MODULE
2804	switch (cmd) {
2805	case MODULE_CMD_INIT:
2806		error = config_init_component(cfdriver_ioconf_if_vioif,
2807		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2808		break;
2809	case MODULE_CMD_FINI:
2810		error = config_fini_component(cfdriver_ioconf_if_vioif,
2811		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2812		break;
2813	default:
2814		error = ENOTTY;
2815		break;
2816	}
2817#endif
2818
2819	return error;
2820}
2821