if_vioif.c revision 1.81
1/*	$NetBSD: if_vioif.c,v 1.81 2022/05/04 02:38:27 simonb Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.81 2022/05/04 02:38:27 simonb Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54
55#include <dev/pci/virtioreg.h>
56#include <dev/pci/virtiovar.h>
57
58#include <net/if.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_ether.h>
62
63#include <net/bpf.h>
64
65#include "ioconf.h"
66
67#ifdef NET_MPSAFE
68#define VIOIF_MPSAFE	1
69#define VIOIF_MULTIQ	1
70#endif
71
72/*
73 * if_vioifreg.h:
74 */
75/* Configuration registers */
76#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
77#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
78#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
79#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
80
81/* Feature bits */
82#define VIRTIO_NET_F_CSUM		__BIT(0)
83#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
84#define VIRTIO_NET_F_MAC		__BIT(5)
85#define VIRTIO_NET_F_GSO		__BIT(6)
86#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
87#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
88#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
89#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
90#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
91#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
92#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
93#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
94#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
95#define VIRTIO_NET_F_STATUS		__BIT(16)
96#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
97#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
98#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
99#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
100#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
101#define VIRTIO_NET_F_MQ			__BIT(22)
102#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
103
104#define VIRTIO_NET_FLAG_BITS			\
105	VIRTIO_COMMON_FLAG_BITS			\
106	"b\x17" "CTRL_MAC\0"			\
107	"b\x16" "MQ\0"				\
108	"b\x15" "GUEST_ANNOUNCE\0"		\
109	"b\x14" "CTRL_RX_EXTRA\0"		\
110	"b\x13" "CTRL_VLAN\0"			\
111	"b\x12" "CTRL_RX\0"			\
112	"b\x11" "CTRL_VQ\0"			\
113	"b\x10" "STATUS\0"			\
114	"b\x0f" "MRG_RXBUF\0"			\
115	"b\x0e" "HOST_UFO\0"			\
116	"b\x0d" "HOST_ECN\0"			\
117	"b\x0c" "HOST_TSO6\0"			\
118	"b\x0b" "HOST_TSO4\0"			\
119	"b\x0a" "GUEST_UFO\0"			\
120	"b\x09" "GUEST_ECN\0"			\
121	"b\x08" "GUEST_TSO6\0"			\
122	"b\x07" "GUEST_TSO4\0"			\
123	"b\x06" "GSO\0"				\
124	"b\x05" "MAC\0"				\
125	"b\x01" "GUEST_CSUM\0"			\
126	"b\x00" "CSUM\0"
127
128/* Status */
129#define VIRTIO_NET_S_LINK_UP	1
130
131/* Packet header structure */
132struct virtio_net_hdr {
133	uint8_t		flags;
134	uint8_t		gso_type;
135	uint16_t	hdr_len;
136	uint16_t	gso_size;
137	uint16_t	csum_start;
138	uint16_t	csum_offset;
139
140	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
141} __packed;
142
143#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
144#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
145#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
149
150#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
151
152/* Control virtqueue */
153struct virtio_net_ctrl_cmd {
154	uint8_t	class;
155	uint8_t	command;
156} __packed;
157#define VIRTIO_NET_CTRL_RX		0
158# define VIRTIO_NET_CTRL_RX_PROMISC	0
159# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
160
161#define VIRTIO_NET_CTRL_MAC		1
162# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
163# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
164
165#define VIRTIO_NET_CTRL_VLAN		2
166# define VIRTIO_NET_CTRL_VLAN_ADD	0
167# define VIRTIO_NET_CTRL_VLAN_DEL	1
168
169#define VIRTIO_NET_CTRL_MQ			4
170# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
173
174struct virtio_net_ctrl_status {
175	uint8_t	ack;
176} __packed;
177#define VIRTIO_NET_OK			0
178#define VIRTIO_NET_ERR			1
179
180struct virtio_net_ctrl_rx {
181	uint8_t	onoff;
182} __packed;
183
184struct virtio_net_ctrl_mac_tbl {
185	uint32_t nentries;
186	uint8_t macs[][ETHER_ADDR_LEN];
187} __packed;
188
189struct virtio_net_ctrl_mac_addr {
190	uint8_t mac[ETHER_ADDR_LEN];
191} __packed;
192
193struct virtio_net_ctrl_vlan {
194	uint16_t id;
195} __packed;
196
197struct virtio_net_ctrl_mq {
198	uint16_t virtqueue_pairs;
199} __packed;
200
201/*
202 * if_vioifvar.h:
203 */
204
205/*
206 * Locking notes:
207 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
208 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
209 *      - more than one lock cannot be held at onece
210 * + ctrlq_inuse is protected by ctrlq_wait_lock.
211 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
212 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
213 * + fields in vioif_softc except queues are protected by
214 *   sc->sc_lock(an adaptive mutex)
215 *      - the lock is held before acquisition of other locks
216 */
217
218struct vioif_ctrl_cmdspec {
219	bus_dmamap_t	dmamap;
220	void		*buf;
221	bus_size_t	bufsize;
222};
223
224struct vioif_work {
225	struct work	 cookie;
226	void		(*func)(void *);
227	void		*arg;
228	unsigned int	 added;
229};
230
231struct vioif_txqueue {
232	kmutex_t		*txq_lock;	/* lock for tx operations */
233
234	struct virtqueue	*txq_vq;
235	bool			txq_stopping;
236	bool			txq_link_active;
237	pcq_t			*txq_intrq;
238
239	struct virtio_net_hdr	*txq_hdrs;
240	bus_dmamap_t		*txq_hdr_dmamaps;
241
242	struct mbuf		**txq_mbufs;
243	bus_dmamap_t		*txq_dmamaps;
244
245	void			*txq_deferred_transmit;
246	void			*txq_handle_si;
247	struct vioif_work	 txq_work;
248	bool			 txq_workqueue;
249	bool			 txq_active;
250
251	char			 txq_evgroup[16];
252	struct evcnt		 txq_defrag_failed;
253	struct evcnt		 txq_mbuf_load_failed;
254	struct evcnt		 txq_enqueue_reserve_failed;
255};
256
257struct vioif_rxqueue {
258	kmutex_t		*rxq_lock;	/* lock for rx operations */
259
260	struct virtqueue	*rxq_vq;
261	bool			rxq_stopping;
262
263	struct virtio_net_hdr	*rxq_hdrs;
264	bus_dmamap_t		*rxq_hdr_dmamaps;
265
266	struct mbuf		**rxq_mbufs;
267	bus_dmamap_t		*rxq_dmamaps;
268
269	void			*rxq_handle_si;
270	struct vioif_work	 rxq_work;
271	bool			 rxq_workqueue;
272	bool			 rxq_active;
273
274	char			 rxq_evgroup[16];
275	struct evcnt		 rxq_mbuf_add_failed;
276};
277
278struct vioif_ctrlqueue {
279	struct virtqueue		*ctrlq_vq;
280	enum {
281		FREE, INUSE, DONE
282	}				ctrlq_inuse;
283	kcondvar_t			ctrlq_wait;
284	kmutex_t			ctrlq_wait_lock;
285	struct lwp			*ctrlq_owner;
286
287	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
288	struct virtio_net_ctrl_status	*ctrlq_status;
289	struct virtio_net_ctrl_rx	*ctrlq_rx;
290	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
292	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
293	struct virtio_net_ctrl_mq	*ctrlq_mq;
294
295	bus_dmamap_t			ctrlq_cmd_dmamap;
296	bus_dmamap_t			ctrlq_status_dmamap;
297	bus_dmamap_t			ctrlq_rx_dmamap;
298	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
299	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
300	bus_dmamap_t			ctrlq_mac_addr_dmamap;
301	bus_dmamap_t			ctrlq_mq_dmamap;
302
303	struct evcnt			ctrlq_cmd_load_failed;
304	struct evcnt			ctrlq_cmd_failed;
305};
306
307struct vioif_softc {
308	device_t		sc_dev;
309	kmutex_t		sc_lock;
310	struct sysctllog	*sc_sysctllog;
311
312	struct virtio_softc	*sc_virtio;
313	struct virtqueue	*sc_vqs;
314	u_int			 sc_hdr_size;
315
316	int			sc_max_nvq_pairs;
317	int			sc_req_nvq_pairs;
318	int			sc_act_nvq_pairs;
319
320	uint8_t			sc_mac[ETHER_ADDR_LEN];
321	struct ethercom		sc_ethercom;
322	bool			sc_link_active;
323
324	struct vioif_txqueue	*sc_txq;
325	struct vioif_rxqueue	*sc_rxq;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	sc_hdr_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_ctl_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352
353/* cfattach interface functions */
354static int	vioif_match(device_t, cfdata_t, void *);
355static void	vioif_attach(device_t, device_t, void *);
356static int	vioif_finalize_teardown(device_t);
357
358/* ifnet interface functions */
359static int	vioif_init(struct ifnet *);
360static void	vioif_stop(struct ifnet *, int);
361static void	vioif_start(struct ifnet *);
362static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
363static int	vioif_transmit(struct ifnet *, struct mbuf *);
364static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags_cb(struct ethercom *);
368
369/* rx */
370static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
371static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
372static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
373		    struct vioif_rxqueue *);
374static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
375static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
376		    struct vioif_rxqueue *, u_int);
377static int	vioif_rx_intr(void *);
378static void	vioif_rx_handle(void *);
379static void	vioif_rx_sched_handle(struct vioif_softc *,
380		    struct vioif_rxqueue *);
381static void	vioif_rx_drain(struct vioif_rxqueue *);
382
383/* tx */
384static int	vioif_tx_intr(void *);
385static void	vioif_tx_handle(void *);
386static void	vioif_tx_sched_handle(struct vioif_softc *,
387		    struct vioif_txqueue *);
388static void	vioif_tx_queue_clear(struct vioif_txqueue *);
389static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
390		    struct vioif_txqueue *, u_int);
391static void	vioif_tx_drain(struct vioif_txqueue *);
392static void	vioif_deferred_transmit(void *);
393
394/* workqueue */
395static struct workqueue*
396		vioif_workq_create(const char *, pri_t, int, int);
397static void	vioif_workq_destroy(struct workqueue *);
398static void	vioif_workq_work(struct work *, void *);
399static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
400static void	vioif_work_add(struct workqueue *, struct vioif_work *);
401static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
402
403/* other control */
404static bool	vioif_is_link_up(struct vioif_softc *);
405static void	vioif_update_link_status(struct vioif_softc *);
406static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
407static int	vioif_set_promisc(struct vioif_softc *, bool);
408static int	vioif_set_allmulti(struct vioif_softc *, bool);
409static int	vioif_set_rx_filter(struct vioif_softc *);
410static int	vioif_rx_filter(struct vioif_softc *);
411static int	vioif_set_mac_addr(struct vioif_softc *);
412static int	vioif_ctrl_intr(void *);
413static int	vioif_config_change(struct virtio_softc *);
414static void	vioif_ctl_softint(void *);
415static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
416static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
417static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
418static int	vioif_setup_sysctl(struct vioif_softc *);
419static void	vioif_setup_stats(struct vioif_softc *);
420static int	vioif_ifflags(struct vioif_softc *);
421
422CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
423		  vioif_match, vioif_attach, NULL, NULL);
424
425static int
426vioif_match(device_t parent, cfdata_t match, void *aux)
427{
428	struct virtio_attach_args *va = aux;
429
430	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
431		return 1;
432
433	return 0;
434}
435
436static int
437vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
438    bus_size_t size, int nsegs, const char *usage)
439{
440	int r;
441
442	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
443	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
444
445	if (r != 0) {
446		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
447		    "error code %d\n", usage, r);
448	}
449
450	return r;
451}
452
453static void
454vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
455{
456
457	if (*map) {
458		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
459		*map = NULL;
460	}
461}
462
463static int
464vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
465    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
466{
467	int r;
468
469	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
470	if (r != 0)
471		return 1;
472
473	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
474	    size, NULL, rw | BUS_DMA_NOWAIT);
475	if (r != 0) {
476		vioif_dmamap_destroy(sc, map);
477		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
478		    "error code %d\n", usage, r);
479	}
480
481	return r;
482}
483
484static void *
485vioif_assign_mem(intptr_t *p, size_t size)
486{
487	intptr_t rv;
488
489	rv = *p;
490	*p += size;
491
492	return (void *)rv;
493}
494
495static void
496vioif_alloc_queues(struct vioif_softc *sc)
497{
498	int nvq_pairs = sc->sc_max_nvq_pairs;
499	int nvqs = nvq_pairs * 2;
500	int i;
501
502	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
503
504	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
505	    KM_SLEEP);
506	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
507	    KM_SLEEP);
508
509	if (sc->sc_has_ctrl)
510		nvqs++;
511
512	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
513	nvqs = 0;
514	for (i = 0; i < nvq_pairs; i++) {
515		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
516		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
517	}
518
519	if (sc->sc_has_ctrl)
520		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
521}
522
523static void
524vioif_free_queues(struct vioif_softc *sc)
525{
526	int nvq_pairs = sc->sc_max_nvq_pairs;
527	int nvqs = nvq_pairs * 2;
528
529	if (sc->sc_ctrlq.ctrlq_vq)
530		nvqs++;
531
532	if (sc->sc_txq) {
533		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
534		sc->sc_txq = NULL;
535	}
536
537	if (sc->sc_rxq) {
538		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
539		sc->sc_rxq = NULL;
540	}
541
542	if (sc->sc_vqs) {
543		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
544		sc->sc_vqs = NULL;
545	}
546}
547
548/* allocate memory */
549/*
550 * dma memory is used for:
551 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
552 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
553 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
554 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
555 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
556 *			 (WRITE)
557 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
558 *			 class command (WRITE)
559 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
560 *			 class command (WRITE)
561 * ctrlq_* structures are allocated only one each; they are protected by
562 * ctrlq_inuse variable and ctrlq_wait condvar.
563 */
564/*
565 * dynamically allocated memory is used for:
566 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
567 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
568 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
569 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
570 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
571 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
572 */
573static int
574vioif_alloc_mems(struct vioif_softc *sc)
575{
576	struct virtio_softc *vsc = sc->sc_virtio;
577	struct vioif_txqueue *txq;
578	struct vioif_rxqueue *rxq;
579	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
580	int allocsize, allocsize2, r, rsegs, i, qid;
581	void *vaddr;
582	intptr_t p;
583
584	allocsize = 0;
585	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
586		rxq = &sc->sc_rxq[qid];
587		txq = &sc->sc_txq[qid];
588
589		allocsize += sizeof(struct virtio_net_hdr) *
590			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
591	}
592	if (sc->sc_has_ctrl) {
593		allocsize += sizeof(struct virtio_net_ctrl_cmd);
594		allocsize += sizeof(struct virtio_net_ctrl_status);
595		allocsize += sizeof(struct virtio_net_ctrl_rx);
596		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
597		    + ETHER_ADDR_LEN;
598		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
599		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
600		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
601		allocsize += sizeof(struct virtio_net_ctrl_mq);
602	}
603	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
604	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
605	if (r != 0) {
606		aprint_error_dev(sc->sc_dev,
607		    "DMA memory allocation failed, size %d, "
608		    "error code %d\n", allocsize, r);
609		goto err_none;
610	}
611	r = bus_dmamem_map(virtio_dmat(vsc),
612	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
613	if (r != 0) {
614		aprint_error_dev(sc->sc_dev,
615		    "DMA memory map failed, error code %d\n", r);
616		goto err_dmamem_alloc;
617	}
618
619	memset(vaddr, 0, allocsize);
620	sc->sc_dmamem = vaddr;
621	p = (intptr_t) vaddr;
622
623	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
624		rxq = &sc->sc_rxq[qid];
625		txq = &sc->sc_txq[qid];
626
627		rxq->rxq_hdrs = vioif_assign_mem(&p,
628		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
629		txq->txq_hdrs = vioif_assign_mem(&p,
630		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
631	}
632	if (sc->sc_has_ctrl) {
633		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
634		    sizeof(*ctrlq->ctrlq_cmd));
635		ctrlq->ctrlq_status = vioif_assign_mem(&p,
636		    sizeof(*ctrlq->ctrlq_status));
637		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
638		    sizeof(*ctrlq->ctrlq_rx));
639		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
640		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
641		    + ETHER_ADDR_LEN);
642		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
643		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
644		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
645		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
646		    sizeof(*ctrlq->ctrlq_mac_addr));
647		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
648	}
649
650	allocsize2 = 0;
651	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
652		int rxqsize, txqsize;
653
654		rxq = &sc->sc_rxq[qid];
655		txq = &sc->sc_txq[qid];
656		rxqsize = rxq->rxq_vq->vq_num;
657		txqsize = txq->txq_vq->vq_num;
658
659		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
660		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
661		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
662
663		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
664		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
665		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
666	}
667	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
668	sc->sc_kmem = vaddr;
669	p = (intptr_t) vaddr;
670
671	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
672		int rxqsize, txqsize;
673		rxq = &sc->sc_rxq[qid];
674		txq = &sc->sc_txq[qid];
675		rxqsize = rxq->rxq_vq->vq_num;
676		txqsize = txq->txq_vq->vq_num;
677
678		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
679		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
680		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
681		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
682		rxq->rxq_dmamaps = vioif_assign_mem(&p,
683		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
684		txq->txq_dmamaps = vioif_assign_mem(&p,
685		    sizeof(txq->txq_dmamaps[0]) * txqsize);
686		rxq->rxq_mbufs = vioif_assign_mem(&p,
687		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
688		txq->txq_mbufs = vioif_assign_mem(&p,
689		    sizeof(txq->txq_mbufs[0]) * txqsize);
690	}
691
692	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
693		rxq = &sc->sc_rxq[qid];
694		txq = &sc->sc_txq[qid];
695
696		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
697			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
698			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
699			    BUS_DMA_READ, "rx header");
700			if (r != 0)
701				goto err_reqs;
702
703			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
704			    MCLBYTES, 1, "rx payload");
705			if (r != 0)
706				goto err_reqs;
707		}
708
709		for (i = 0; i < txq->txq_vq->vq_num; i++) {
710			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
711			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
712			    BUS_DMA_READ, "tx header");
713			if (r != 0)
714				goto err_reqs;
715
716			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
717			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
718			if (r != 0)
719				goto err_reqs;
720		}
721	}
722
723	if (sc->sc_has_ctrl) {
724		/* control vq class & command */
725		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
726		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
727		    BUS_DMA_WRITE, "control command");
728		if (r != 0)
729			goto err_reqs;
730
731		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
732		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
733		    BUS_DMA_READ, "control status");
734		if (r != 0)
735			goto err_reqs;
736
737		/* control vq rx mode command parameter */
738		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
739		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
740		    BUS_DMA_WRITE, "rx mode control command");
741		if (r != 0)
742			goto err_reqs;
743
744		/* multiqueue set command */
745		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
746		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
747		    BUS_DMA_WRITE, "multiqueue set command");
748		if (r != 0)
749			goto err_reqs;
750
751		/* control vq MAC filter table for unicast */
752		/* do not load now since its length is variable */
753		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
754		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
755		    + ETHER_ADDR_LEN, 1,
756		    "unicast MAC address filter command");
757		if (r != 0)
758			goto err_reqs;
759
760		/* control vq MAC filter table for multicast */
761		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
762		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
763		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
764		    "multicast MAC address filter command");
765		if (r != 0)
766			goto err_reqs;
767
768		/* control vq MAC address set command */
769		r = vioif_dmamap_create_load(sc,
770		    &ctrlq->ctrlq_mac_addr_dmamap,
771		    ctrlq->ctrlq_mac_addr,
772		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
773		    BUS_DMA_WRITE, "mac addr set command");
774		if (r != 0)
775			goto err_reqs;
776	}
777
778	return 0;
779
780err_reqs:
781	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
782	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
783	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
784	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
785	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
786	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
787	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
788		rxq = &sc->sc_rxq[qid];
789		txq = &sc->sc_txq[qid];
790
791		for (i = 0; i < txq->txq_vq->vq_num; i++) {
792			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
793			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
794		}
795		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
796			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
797			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
798		}
799	}
800	if (sc->sc_kmem) {
801		kmem_free(sc->sc_kmem, allocsize2);
802		sc->sc_kmem = NULL;
803	}
804	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
805err_dmamem_alloc:
806	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
807err_none:
808	return -1;
809}
810
811static void
812vioif_attach(device_t parent, device_t self, void *aux)
813{
814	struct vioif_softc *sc = device_private(self);
815	struct virtio_softc *vsc = device_private(parent);
816	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
817	struct vioif_txqueue *txq;
818	struct vioif_rxqueue *rxq;
819	uint64_t features, req_features;
820	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
821	u_int softint_flags;
822	int r, i, nvqs = 0, req_flags;
823	char xnamebuf[MAXCOMLEN];
824
825	if (virtio_child(vsc) != NULL) {
826		aprint_normal(": child already attached for %s; "
827		    "something wrong...\n", device_xname(parent));
828		return;
829	}
830
831	sc->sc_dev = self;
832	sc->sc_virtio = vsc;
833	sc->sc_link_active = false;
834
835	sc->sc_max_nvq_pairs = 1;
836	sc->sc_req_nvq_pairs = 1;
837	sc->sc_act_nvq_pairs = 1;
838	sc->sc_txrx_workqueue_sysctl = true;
839	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
840	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
841	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
842	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
843
844	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
845
846	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
847	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
848	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
849	if (sc->sc_txrx_workqueue == NULL)
850		goto err;
851
852	req_flags = 0;
853
854#ifdef VIOIF_MPSAFE
855	req_flags |= VIRTIO_F_INTR_MPSAFE;
856#endif
857	req_flags |= VIRTIO_F_INTR_MSIX;
858
859	req_features =
860	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
861	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
862	req_features |= VIRTIO_F_RING_EVENT_IDX;
863	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
864#ifdef VIOIF_MULTIQ
865	req_features |= VIRTIO_NET_F_MQ;
866#endif
867	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
868	    vioif_config_change, virtio_vq_intrhand, req_flags,
869	    req_features, VIRTIO_NET_FLAG_BITS);
870
871	features = virtio_features(vsc);
872	if (features == 0)
873		goto err;
874
875	if (features & VIRTIO_NET_F_MAC) {
876		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
877			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
878			    VIRTIO_NET_CONFIG_MAC + i);
879		}
880	} else {
881		/* code stolen from sys/net/if_tap.c */
882		struct timeval tv;
883		uint32_t ui;
884		getmicrouptime(&tv);
885		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
886		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
887		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
888			virtio_write_device_config_1(vsc,
889			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
890		}
891	}
892
893	/* 'Ethernet' with capital follows other ethernet driver attachment */
894	aprint_normal_dev(self, "Ethernet address %s\n",
895	    ether_sprintf(sc->sc_mac));
896
897	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
898		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
899	} else {
900		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
901	}
902
903	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
904	    (features & VIRTIO_NET_F_CTRL_RX)) {
905		sc->sc_has_ctrl = true;
906
907		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
908		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
909		ctrlq->ctrlq_inuse = FREE;
910	} else {
911		sc->sc_has_ctrl = false;
912	}
913
914	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
915		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
916		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
917
918		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
919			goto err;
920
921		/* Limit the number of queue pairs to use */
922		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
923	}
924
925	vioif_alloc_queues(sc);
926	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
927
928#ifdef VIOIF_MPSAFE
929	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
930#else
931	softint_flags = SOFTINT_NET;
932#endif
933
934	/*
935	 * Allocating virtqueues
936	 */
937	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
938		rxq = &sc->sc_rxq[i];
939		txq = &sc->sc_txq[i];
940		char qname[32];
941
942		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
943
944		rxq->rxq_handle_si = softint_establish(softint_flags,
945		    vioif_rx_handle, rxq);
946		if (rxq->rxq_handle_si == NULL) {
947			aprint_error_dev(self, "cannot establish rx softint\n");
948			goto err;
949		}
950
951		snprintf(qname, sizeof(qname), "rx%d", i);
952		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
953		    MCLBYTES + sc->sc_hdr_size, 2, qname);
954		if (r != 0)
955			goto err;
956		nvqs++;
957		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
958		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
959		rxq->rxq_stopping = true;
960		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
961
962		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
963
964		txq->txq_deferred_transmit = softint_establish(softint_flags,
965		    vioif_deferred_transmit, txq);
966		if (txq->txq_deferred_transmit == NULL) {
967			aprint_error_dev(self, "cannot establish tx softint\n");
968			goto err;
969		}
970		txq->txq_handle_si = softint_establish(softint_flags,
971		    vioif_tx_handle, txq);
972		if (txq->txq_handle_si == NULL) {
973			aprint_error_dev(self, "cannot establish tx softint\n");
974			goto err;
975		}
976
977		snprintf(qname, sizeof(qname), "tx%d", i);
978		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
979		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
980		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
981		if (r != 0)
982			goto err;
983		nvqs++;
984		txq->txq_vq->vq_intrhand = vioif_tx_intr;
985		txq->txq_vq->vq_intrhand_arg = (void *)txq;
986		txq->txq_link_active = sc->sc_link_active;
987		txq->txq_stopping = false;
988		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
989		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
990	}
991
992	if (sc->sc_has_ctrl) {
993		/*
994		 * Allocating a virtqueue for control channel
995		 */
996		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
997		    NBPG, 1, "control");
998		if (r != 0) {
999			aprint_error_dev(self, "failed to allocate "
1000			    "a virtqueue for control channel, error code %d\n",
1001			    r);
1002
1003			sc->sc_has_ctrl = false;
1004			cv_destroy(&ctrlq->ctrlq_wait);
1005			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1006		} else {
1007			nvqs++;
1008			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1009			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1010		}
1011	}
1012
1013	sc->sc_ctl_softint = softint_establish(softint_flags,
1014	    vioif_ctl_softint, sc);
1015	if (sc->sc_ctl_softint == NULL) {
1016		aprint_error_dev(self, "cannot establish ctl softint\n");
1017		goto err;
1018	}
1019
1020	if (vioif_alloc_mems(sc) < 0)
1021		goto err;
1022
1023	if (virtio_child_attach_finish(vsc) != 0)
1024		goto err;
1025
1026	if (vioif_setup_sysctl(sc) != 0) {
1027		aprint_error_dev(self, "unable to create sysctl node\n");
1028		/* continue */
1029	}
1030
1031	vioif_setup_stats(sc);
1032
1033	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1034	ifp->if_softc = sc;
1035	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1036#ifdef VIOIF_MPSAFE
1037	ifp->if_extflags = IFEF_MPSAFE;
1038#endif
1039	ifp->if_start = vioif_start;
1040	if (sc->sc_req_nvq_pairs > 1)
1041		ifp->if_transmit = vioif_transmit;
1042	ifp->if_ioctl = vioif_ioctl;
1043	ifp->if_init = vioif_init;
1044	ifp->if_stop = vioif_stop;
1045	ifp->if_capabilities = 0;
1046	ifp->if_watchdog = vioif_watchdog;
1047	txq = &sc->sc_txq[0];
1048	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1049	IFQ_SET_READY(&ifp->if_snd);
1050
1051	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1052
1053	if_attach(ifp);
1054	if_deferred_start_init(ifp, NULL);
1055	ether_ifattach(ifp, sc->sc_mac);
1056	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1057
1058	return;
1059
1060err:
1061	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1062		rxq = &sc->sc_rxq[i];
1063		txq = &sc->sc_txq[i];
1064
1065		if (rxq->rxq_lock) {
1066			mutex_obj_free(rxq->rxq_lock);
1067			rxq->rxq_lock = NULL;
1068		}
1069
1070		if (rxq->rxq_handle_si) {
1071			softint_disestablish(rxq->rxq_handle_si);
1072			rxq->rxq_handle_si = NULL;
1073		}
1074
1075		if (txq->txq_lock) {
1076			mutex_obj_free(txq->txq_lock);
1077			txq->txq_lock = NULL;
1078		}
1079
1080		if (txq->txq_handle_si) {
1081			softint_disestablish(txq->txq_handle_si);
1082			txq->txq_handle_si = NULL;
1083		}
1084
1085		if (txq->txq_deferred_transmit) {
1086			softint_disestablish(txq->txq_deferred_transmit);
1087			txq->txq_deferred_transmit = NULL;
1088		}
1089
1090		if (txq->txq_intrq) {
1091			pcq_destroy(txq->txq_intrq);
1092			txq->txq_intrq = NULL;
1093		}
1094	}
1095
1096	if (sc->sc_has_ctrl) {
1097		cv_destroy(&ctrlq->ctrlq_wait);
1098		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1099	}
1100
1101	while (nvqs > 0)
1102		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1103
1104	vioif_free_queues(sc);
1105	mutex_destroy(&sc->sc_lock);
1106	virtio_child_attach_failed(vsc);
1107	config_finalize_register(self, vioif_finalize_teardown);
1108
1109	return;
1110}
1111
1112static int
1113vioif_finalize_teardown(device_t self)
1114{
1115	struct vioif_softc *sc = device_private(self);
1116
1117	if (sc->sc_txrx_workqueue != NULL) {
1118		vioif_workq_destroy(sc->sc_txrx_workqueue);
1119		sc->sc_txrx_workqueue = NULL;
1120	}
1121
1122	return 0;
1123}
1124
1125static void
1126vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1127{
1128	struct virtio_softc *vsc = sc->sc_virtio;
1129	struct vioif_txqueue *txq;
1130	struct vioif_rxqueue *rxq;
1131	int i;
1132
1133	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1134		txq = &sc->sc_txq[i];
1135		rxq = &sc->sc_rxq[i];
1136
1137		virtio_start_vq_intr(vsc, txq->txq_vq);
1138		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1139	}
1140}
1141
1142static void
1143vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1144{
1145	struct virtio_softc *vsc = sc->sc_virtio;
1146	struct vioif_txqueue *txq;
1147	struct vioif_rxqueue *rxq;
1148	int i;
1149
1150	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1151		rxq = &sc->sc_rxq[i];
1152		txq = &sc->sc_txq[i];
1153
1154		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1155		virtio_stop_vq_intr(vsc, txq->txq_vq);
1156	}
1157}
1158
1159/*
1160 * Interface functions for ifnet
1161 */
1162static int
1163vioif_init(struct ifnet *ifp)
1164{
1165	struct vioif_softc *sc = ifp->if_softc;
1166	struct virtio_softc *vsc = sc->sc_virtio;
1167	struct vioif_rxqueue *rxq;
1168	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1169	int r, i;
1170
1171	vioif_stop(ifp, 0);
1172
1173	r = virtio_reinit_start(vsc);
1174	if (r != 0) {
1175		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1176		return EIO;
1177	}
1178
1179	virtio_negotiate_features(vsc, virtio_features(vsc));
1180
1181	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1182		rxq = &sc->sc_rxq[i];
1183
1184		/* Have to set false before vioif_populate_rx_mbufs */
1185		mutex_enter(rxq->rxq_lock);
1186		rxq->rxq_stopping = false;
1187		vioif_populate_rx_mbufs_locked(sc, rxq);
1188		mutex_exit(rxq->rxq_lock);
1189
1190	}
1191
1192	virtio_reinit_end(vsc);
1193
1194	if (sc->sc_has_ctrl)
1195		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1196
1197	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1198	if (r == 0)
1199		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1200	else
1201		sc->sc_act_nvq_pairs = 1;
1202
1203	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1204		sc->sc_txq[i].txq_stopping = false;
1205
1206	vioif_enable_interrupt_vqpairs(sc);
1207
1208	vioif_update_link_status(sc);
1209	ifp->if_flags |= IFF_RUNNING;
1210	ifp->if_flags &= ~IFF_OACTIVE;
1211	r = vioif_rx_filter(sc);
1212
1213	return r;
1214}
1215
1216static void
1217vioif_stop(struct ifnet *ifp, int disable)
1218{
1219	struct vioif_softc *sc = ifp->if_softc;
1220	struct virtio_softc *vsc = sc->sc_virtio;
1221	struct vioif_txqueue *txq;
1222	struct vioif_rxqueue *rxq;
1223	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1224	int i;
1225
1226	/* disable interrupts */
1227	vioif_disable_interrupt_vqpairs(sc);
1228	if (sc->sc_has_ctrl)
1229		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1230
1231	/*
1232	 * stop all packet processing:
1233	 * 1. stop interrupt handlers by rxq_stopping and txq_stopping
1234	 * 2. wait for stopping workqueue for packet processing
1235	 */
1236	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1237		txq = &sc->sc_txq[i];
1238		rxq = &sc->sc_rxq[i];
1239
1240		mutex_enter(rxq->rxq_lock);
1241		rxq->rxq_stopping = true;
1242		mutex_exit(rxq->rxq_lock);
1243		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1244
1245		mutex_enter(txq->txq_lock);
1246		txq->txq_stopping = true;
1247		mutex_exit(txq->txq_lock);
1248		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1249	}
1250
1251	/* only way to stop I/O and DMA is resetting... */
1252	virtio_reset(vsc);
1253
1254	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1255		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1256		vioif_tx_queue_clear(&sc->sc_txq[i]);
1257	}
1258
1259	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1260	sc->sc_link_active = false;
1261
1262	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1263		txq = &sc->sc_txq[i];
1264		rxq = &sc->sc_rxq[i];
1265
1266		txq->txq_link_active = false;
1267
1268		if (disable)
1269			vioif_rx_drain(rxq);
1270
1271		vioif_tx_drain(txq);
1272	}
1273}
1274
1275static void
1276vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1277    bool is_transmit)
1278{
1279	struct vioif_softc *sc = ifp->if_softc;
1280	struct virtio_softc *vsc = sc->sc_virtio;
1281	struct virtqueue *vq = txq->txq_vq;
1282	struct virtio_net_hdr *hdr;
1283	struct mbuf *m;
1284	int queued = 0;
1285
1286	KASSERT(mutex_owned(txq->txq_lock));
1287
1288	if ((ifp->if_flags & IFF_RUNNING) == 0)
1289		return;
1290
1291	if (!txq->txq_link_active || txq->txq_stopping)
1292		return;
1293
1294	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1295		return;
1296
1297	for (;;) {
1298		int slot, r;
1299
1300		if (is_transmit)
1301			m = pcq_get(txq->txq_intrq);
1302		else
1303			IFQ_DEQUEUE(&ifp->if_snd, m);
1304
1305		if (m == NULL)
1306			break;
1307
1308		r = virtio_enqueue_prep(vsc, vq, &slot);
1309		if (r == EAGAIN) {
1310			ifp->if_flags |= IFF_OACTIVE;
1311			m_freem(m);
1312			break;
1313		}
1314		if (r != 0)
1315			panic("enqueue_prep for a tx buffer");
1316
1317		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1318		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1319		if (r != 0) {
1320			/* maybe just too fragmented */
1321			struct mbuf *newm;
1322
1323			newm = m_defrag(m, M_NOWAIT);
1324			if (newm == NULL) {
1325				txq->txq_defrag_failed.ev_count++;
1326				goto skip;
1327			}
1328
1329			m = newm;
1330			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1331			    txq->txq_dmamaps[slot], m,
1332			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1333			if (r != 0) {
1334				txq->txq_mbuf_load_failed.ev_count++;
1335skip:
1336				m_freem(m);
1337				virtio_enqueue_abort(vsc, vq, slot);
1338				continue;
1339			}
1340		}
1341
1342		/* This should actually never fail */
1343		r = virtio_enqueue_reserve(vsc, vq, slot,
1344		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1345		if (r != 0) {
1346			txq->txq_enqueue_reserve_failed.ev_count++;
1347			bus_dmamap_unload(virtio_dmat(vsc),
1348			     txq->txq_dmamaps[slot]);
1349			/* slot already freed by virtio_enqueue_reserve */
1350			m_freem(m);
1351			continue;
1352		}
1353
1354		txq->txq_mbufs[slot] = m;
1355
1356		hdr = &txq->txq_hdrs[slot];
1357		memset(hdr, 0, sc->sc_hdr_size);
1358		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1359		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1360		    BUS_DMASYNC_PREWRITE);
1361		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1362		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1363		    BUS_DMASYNC_PREWRITE);
1364		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1365		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1366		virtio_enqueue_commit(vsc, vq, slot, false);
1367
1368		queued++;
1369		bpf_mtap(ifp, m, BPF_D_OUT);
1370	}
1371
1372	if (queued > 0) {
1373		virtio_enqueue_commit(vsc, vq, -1, true);
1374		ifp->if_timer = 5;
1375	}
1376}
1377
1378static void
1379vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1380{
1381
1382	/*
1383	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1384	 */
1385	vioif_send_common_locked(ifp, txq, false);
1386
1387}
1388
1389static void
1390vioif_start(struct ifnet *ifp)
1391{
1392	struct vioif_softc *sc = ifp->if_softc;
1393	struct vioif_txqueue *txq = &sc->sc_txq[0];
1394
1395#ifdef VIOIF_MPSAFE
1396	KASSERT(if_is_mpsafe(ifp));
1397#endif
1398
1399	mutex_enter(txq->txq_lock);
1400	vioif_start_locked(ifp, txq);
1401	mutex_exit(txq->txq_lock);
1402}
1403
1404static inline int
1405vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1406{
1407	struct vioif_softc *sc = ifp->if_softc;
1408	u_int cpuid = cpu_index(curcpu());
1409
1410	return cpuid % sc->sc_act_nvq_pairs;
1411}
1412
1413static void
1414vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1415{
1416
1417	vioif_send_common_locked(ifp, txq, true);
1418}
1419
1420static int
1421vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1422{
1423	struct vioif_softc *sc = ifp->if_softc;
1424	struct vioif_txqueue *txq;
1425	int qid;
1426
1427	qid = vioif_select_txqueue(ifp, m);
1428	txq = &sc->sc_txq[qid];
1429
1430	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1431		m_freem(m);
1432		return ENOBUFS;
1433	}
1434
1435	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1436	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1437	if (m->m_flags & M_MCAST)
1438		if_statinc_ref(nsr, if_omcasts);
1439	IF_STAT_PUTREF(ifp);
1440
1441	if (mutex_tryenter(txq->txq_lock)) {
1442		vioif_transmit_locked(ifp, txq);
1443		mutex_exit(txq->txq_lock);
1444	}
1445
1446	return 0;
1447}
1448
1449static void
1450vioif_deferred_transmit(void *arg)
1451{
1452	struct vioif_txqueue *txq = arg;
1453	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1454	struct vioif_softc *sc = device_private(virtio_child(vsc));
1455	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1456
1457	mutex_enter(txq->txq_lock);
1458	vioif_send_common_locked(ifp, txq, true);
1459	mutex_exit(txq->txq_lock);
1460}
1461
1462static int
1463vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1464{
1465	int s, r;
1466
1467	s = splnet();
1468
1469	r = ether_ioctl(ifp, cmd, data);
1470	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1471		if (ifp->if_flags & IFF_RUNNING) {
1472			r = vioif_rx_filter(ifp->if_softc);
1473		} else {
1474			r = 0;
1475		}
1476	}
1477
1478	splx(s);
1479
1480	return r;
1481}
1482
1483void
1484vioif_watchdog(struct ifnet *ifp)
1485{
1486	struct vioif_softc *sc = ifp->if_softc;
1487	int i;
1488
1489	if (ifp->if_flags & IFF_RUNNING) {
1490		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1491			vioif_tx_queue_clear(&sc->sc_txq[i]);
1492		}
1493	}
1494}
1495
1496/*
1497 * Receive implementation
1498 */
1499/* allocate and initialize a mbuf for receive */
1500static int
1501vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1502{
1503	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1504	struct mbuf *m;
1505	int r;
1506
1507	MGETHDR(m, M_DONTWAIT, MT_DATA);
1508	if (m == NULL)
1509		return ENOBUFS;
1510	MCLGET(m, M_DONTWAIT);
1511	if ((m->m_flags & M_EXT) == 0) {
1512		m_freem(m);
1513		return ENOBUFS;
1514	}
1515	rxq->rxq_mbufs[i] = m;
1516	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1517	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1518	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1519	if (r) {
1520		m_freem(m);
1521		rxq->rxq_mbufs[i] = NULL;
1522		return r;
1523	}
1524
1525	return 0;
1526}
1527
1528/* free a mbuf for receive */
1529static void
1530vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1531{
1532	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1533
1534	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1535	m_freem(rxq->rxq_mbufs[i]);
1536	rxq->rxq_mbufs[i] = NULL;
1537}
1538
1539/* add mbufs for all the empty receive slots */
1540static void
1541vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1542{
1543	struct virtqueue *vq = rxq->rxq_vq;
1544	struct virtio_softc *vsc = vq->vq_owner;
1545	int i, r, ndone = 0;
1546
1547	KASSERT(mutex_owned(rxq->rxq_lock));
1548
1549	if (rxq->rxq_stopping)
1550		return;
1551
1552	for (i = 0; i < vq->vq_num; i++) {
1553		int slot;
1554		r = virtio_enqueue_prep(vsc, vq, &slot);
1555		if (r == EAGAIN)
1556			break;
1557		if (r != 0)
1558			panic("enqueue_prep for rx buffers");
1559		if (rxq->rxq_mbufs[slot] == NULL) {
1560			r = vioif_add_rx_mbuf(rxq, slot);
1561			if (r != 0) {
1562				rxq->rxq_mbuf_add_failed.ev_count++;
1563				break;
1564			}
1565		}
1566		r = virtio_enqueue_reserve(vsc, vq, slot,
1567		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1568		if (r != 0) {
1569			vioif_free_rx_mbuf(rxq, slot);
1570			break;
1571		}
1572		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1573		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1574		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1575		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1576		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1577		    false);
1578		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1579		virtio_enqueue_commit(vsc, vq, slot, false);
1580		ndone++;
1581	}
1582	if (ndone > 0)
1583		virtio_enqueue_commit(vsc, vq, -1, true);
1584}
1585
1586static void
1587vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1588{
1589	struct virtqueue *vq = rxq->rxq_vq;
1590	struct virtio_softc *vsc = vq->vq_owner;
1591	struct vioif_softc *sc = device_private(virtio_child(vsc));
1592	u_int limit = UINT_MAX;
1593	bool more;
1594
1595	KASSERT(rxq->rxq_stopping);
1596
1597	mutex_enter(rxq->rxq_lock);
1598	for (;;) {
1599		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1600		if (more == false)
1601			break;
1602	}
1603	mutex_exit(rxq->rxq_lock);
1604}
1605
1606/* dequeue received packets */
1607static bool
1608vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1609    struct vioif_rxqueue *rxq, u_int limit)
1610{
1611	struct virtqueue *vq = rxq->rxq_vq;
1612	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1613	struct mbuf *m;
1614	int slot, len;
1615	bool more = false, dequeued = false;
1616
1617	KASSERT(mutex_owned(rxq->rxq_lock));
1618
1619	if (virtio_vq_is_enqueued(vsc, vq) == false)
1620		return false;
1621
1622	for (;;) {
1623		if (limit-- == 0) {
1624			more = true;
1625			break;
1626		}
1627
1628		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1629			break;
1630
1631		dequeued = true;
1632
1633		len -= sc->sc_hdr_size;
1634		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1635		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1636		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1637		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1638		m = rxq->rxq_mbufs[slot];
1639		KASSERT(m != NULL);
1640		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1641		rxq->rxq_mbufs[slot] = NULL;
1642		virtio_dequeue_commit(vsc, vq, slot);
1643		m_set_rcvif(m, ifp);
1644		m->m_len = m->m_pkthdr.len = len;
1645
1646		mutex_exit(rxq->rxq_lock);
1647		if_percpuq_enqueue(ifp->if_percpuq, m);
1648		mutex_enter(rxq->rxq_lock);
1649
1650		if (rxq->rxq_stopping)
1651			break;
1652	}
1653
1654	if (dequeued)
1655		vioif_populate_rx_mbufs_locked(sc, rxq);
1656
1657	return more;
1658}
1659
1660/* rx interrupt; call _dequeue above and schedule a softint */
1661
1662static void
1663vioif_rx_handle_locked(void *xrxq, u_int limit)
1664{
1665	struct vioif_rxqueue *rxq = xrxq;
1666	struct virtqueue *vq = rxq->rxq_vq;
1667	struct virtio_softc *vsc = vq->vq_owner;
1668	struct vioif_softc *sc = device_private(virtio_child(vsc));
1669	bool more;
1670
1671	KASSERT(!rxq->rxq_stopping);
1672
1673	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1674	if (more) {
1675		vioif_rx_sched_handle(sc, rxq);
1676		return;
1677	}
1678	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1679	if (more) {
1680		vioif_rx_sched_handle(sc, rxq);
1681		return;
1682	}
1683	atomic_store_relaxed(&rxq->rxq_active, false);
1684}
1685
1686static int
1687vioif_rx_intr(void *arg)
1688{
1689	struct vioif_rxqueue *rxq = arg;
1690	struct virtqueue *vq = rxq->rxq_vq;
1691	struct virtio_softc *vsc = vq->vq_owner;
1692	struct vioif_softc *sc = device_private(virtio_child(vsc));
1693	u_int limit;
1694
1695	limit = sc->sc_rx_intr_process_limit;
1696
1697	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1698		return 1;
1699
1700	mutex_enter(rxq->rxq_lock);
1701
1702	if (!rxq->rxq_stopping) {
1703		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1704
1705		virtio_stop_vq_intr(vsc, vq);
1706		atomic_store_relaxed(&rxq->rxq_active, true);
1707
1708		vioif_rx_handle_locked(rxq, limit);
1709	}
1710
1711	mutex_exit(rxq->rxq_lock);
1712	return 1;
1713}
1714
1715static void
1716vioif_rx_handle(void *xrxq)
1717{
1718	struct vioif_rxqueue *rxq = xrxq;
1719	struct virtqueue *vq = rxq->rxq_vq;
1720	struct virtio_softc *vsc = vq->vq_owner;
1721	struct vioif_softc *sc = device_private(virtio_child(vsc));
1722	u_int limit;
1723
1724	limit = sc->sc_rx_process_limit;
1725
1726	mutex_enter(rxq->rxq_lock);
1727
1728	if (!rxq->rxq_stopping)
1729		vioif_rx_handle_locked(rxq, limit);
1730
1731	mutex_exit(rxq->rxq_lock);
1732}
1733
1734static void
1735vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1736{
1737
1738	KASSERT(mutex_owned(rxq->rxq_lock));
1739
1740	if (rxq->rxq_stopping)
1741		return;
1742
1743	if (rxq->rxq_workqueue)
1744		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1745	else
1746		softint_schedule(rxq->rxq_handle_si);
1747}
1748
1749/* free all the mbufs; called from if_stop(disable) */
1750static void
1751vioif_rx_drain(struct vioif_rxqueue *rxq)
1752{
1753	struct virtqueue *vq = rxq->rxq_vq;
1754	int i;
1755
1756	for (i = 0; i < vq->vq_num; i++) {
1757		if (rxq->rxq_mbufs[i] == NULL)
1758			continue;
1759		vioif_free_rx_mbuf(rxq, i);
1760	}
1761}
1762
1763/*
1764 * Transmition implementation
1765 */
1766/* actual transmission is done in if_start */
1767/* tx interrupt; dequeue and free mbufs */
1768/*
1769 * tx interrupt is actually disabled; this should be called upon
1770 * tx vq full and watchdog
1771 */
1772
1773static void
1774vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1775{
1776	struct virtqueue *vq = txq->txq_vq;
1777	struct virtio_softc *vsc = vq->vq_owner;
1778	struct vioif_softc *sc = device_private(virtio_child(vsc));
1779	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1780	bool more;
1781
1782	KASSERT(!txq->txq_stopping);
1783
1784	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1785	if (more) {
1786		vioif_tx_sched_handle(sc, txq);
1787		return;
1788	}
1789
1790	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1791		more = virtio_postpone_intr_smart(vsc, vq);
1792	else
1793		more = virtio_start_vq_intr(vsc, vq);
1794	if (more) {
1795		vioif_tx_sched_handle(sc, txq);
1796		return;
1797	}
1798
1799	atomic_store_relaxed(&txq->txq_active, false);
1800	/* for ALTQ */
1801	if (txq == &sc->sc_txq[0]) {
1802		if_schedule_deferred_start(ifp);
1803		ifp->if_flags &= ~IFF_OACTIVE;
1804	}
1805	softint_schedule(txq->txq_deferred_transmit);
1806}
1807
1808
1809static int
1810vioif_tx_intr(void *arg)
1811{
1812	struct vioif_txqueue *txq = arg;
1813	struct virtqueue *vq = txq->txq_vq;
1814	struct virtio_softc *vsc = vq->vq_owner;
1815	struct vioif_softc *sc = device_private(virtio_child(vsc));
1816	u_int limit;
1817
1818	limit = sc->sc_tx_intr_process_limit;
1819
1820	if (atomic_load_relaxed(&txq->txq_active) == true)
1821		return 1;
1822
1823	mutex_enter(txq->txq_lock);
1824
1825	if (!txq->txq_stopping) {
1826		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1827
1828		virtio_stop_vq_intr(vsc, vq);
1829		atomic_store_relaxed(&txq->txq_active, true);
1830
1831		vioif_tx_handle_locked(txq, limit);
1832	}
1833
1834	mutex_exit(txq->txq_lock);
1835
1836	return 1;
1837}
1838
1839static void
1840vioif_tx_handle(void *xtxq)
1841{
1842	struct vioif_txqueue *txq = xtxq;
1843	struct virtqueue *vq = txq->txq_vq;
1844	struct virtio_softc *vsc = vq->vq_owner;
1845	struct vioif_softc *sc = device_private(virtio_child(vsc));
1846	u_int limit;
1847
1848	limit = sc->sc_tx_process_limit;
1849
1850	mutex_enter(txq->txq_lock);
1851	if (!txq->txq_stopping)
1852		vioif_tx_handle_locked(txq, limit);
1853	mutex_exit(txq->txq_lock);
1854}
1855
1856static void
1857vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1858{
1859
1860	KASSERT(mutex_owned(txq->txq_lock));
1861
1862	if (txq->txq_stopping)
1863		return;
1864
1865	if (txq->txq_workqueue)
1866		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1867	else
1868		softint_schedule(txq->txq_handle_si);
1869}
1870
1871static void
1872vioif_tx_queue_clear(struct vioif_txqueue *txq)
1873{
1874	struct virtqueue *vq = txq->txq_vq;
1875	struct virtio_softc *vsc = vq->vq_owner;
1876	struct vioif_softc *sc = device_private(virtio_child(vsc));
1877	u_int limit = UINT_MAX;
1878	bool more;
1879
1880	mutex_enter(txq->txq_lock);
1881	for (;;) {
1882		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1883		if (more == false)
1884			break;
1885	}
1886	mutex_exit(txq->txq_lock);
1887}
1888
1889static bool
1890vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1891    struct vioif_txqueue *txq, u_int limit)
1892{
1893	struct virtqueue *vq = txq->txq_vq;
1894	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1895	struct mbuf *m;
1896	int slot, len;
1897	bool more = false;
1898
1899	KASSERT(mutex_owned(txq->txq_lock));
1900
1901	if (virtio_vq_is_enqueued(vsc, vq) == false)
1902		return false;
1903
1904	for (;;) {
1905		if (limit-- == 0) {
1906			more = true;
1907			break;
1908		}
1909
1910		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1911			break;
1912
1913		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1914		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1915		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1916		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1917		    BUS_DMASYNC_POSTWRITE);
1918		m = txq->txq_mbufs[slot];
1919		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1920		txq->txq_mbufs[slot] = NULL;
1921		virtio_dequeue_commit(vsc, vq, slot);
1922		if_statinc(ifp, if_opackets);
1923		m_freem(m);
1924	}
1925
1926	return more;
1927}
1928
1929/* free all the mbufs already put on vq; called from if_stop(disable) */
1930static void
1931vioif_tx_drain(struct vioif_txqueue *txq)
1932{
1933	struct virtqueue *vq = txq->txq_vq;
1934	struct virtio_softc *vsc = vq->vq_owner;
1935	int i;
1936
1937	KASSERT(txq->txq_stopping);
1938
1939	for (i = 0; i < vq->vq_num; i++) {
1940		if (txq->txq_mbufs[i] == NULL)
1941			continue;
1942		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1943		m_freem(txq->txq_mbufs[i]);
1944		txq->txq_mbufs[i] = NULL;
1945	}
1946}
1947
1948/*
1949 * Control vq
1950 */
1951/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1952static void
1953vioif_ctrl_acquire(struct vioif_softc *sc)
1954{
1955	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1956
1957	mutex_enter(&ctrlq->ctrlq_wait_lock);
1958	while (ctrlq->ctrlq_inuse != FREE)
1959		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1960	ctrlq->ctrlq_inuse = INUSE;
1961	ctrlq->ctrlq_owner = curlwp;
1962	mutex_exit(&ctrlq->ctrlq_wait_lock);
1963}
1964
1965static void
1966vioif_ctrl_release(struct vioif_softc *sc)
1967{
1968	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1969
1970	KASSERT(ctrlq->ctrlq_inuse != FREE);
1971	KASSERT(ctrlq->ctrlq_owner == curlwp);
1972
1973	mutex_enter(&ctrlq->ctrlq_wait_lock);
1974	ctrlq->ctrlq_inuse = FREE;
1975	ctrlq->ctrlq_owner = NULL;
1976	cv_signal(&ctrlq->ctrlq_wait);
1977	mutex_exit(&ctrlq->ctrlq_wait_lock);
1978}
1979
1980static int
1981vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1982    struct vioif_ctrl_cmdspec *specs, int nspecs)
1983{
1984	struct virtio_softc *vsc = sc->sc_virtio;
1985	int i, r, loaded;
1986
1987	loaded = 0;
1988	for (i = 0; i < nspecs; i++) {
1989		r = bus_dmamap_load(virtio_dmat(vsc),
1990		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1991		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1992		if (r) {
1993			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1994			goto err;
1995		}
1996		loaded++;
1997
1998	}
1999
2000	return r;
2001
2002err:
2003	for (i = 0; i < loaded; i++) {
2004		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2005	}
2006
2007	return r;
2008}
2009
2010static void
2011vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2012    struct vioif_ctrl_cmdspec *specs, int nspecs)
2013{
2014	struct virtio_softc *vsc = sc->sc_virtio;
2015	int i;
2016
2017	for (i = 0; i < nspecs; i++) {
2018		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2019	}
2020}
2021
2022static int
2023vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2024    struct vioif_ctrl_cmdspec *specs, int nspecs)
2025{
2026	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2027	struct virtqueue *vq = ctrlq->ctrlq_vq;
2028	struct virtio_softc *vsc = sc->sc_virtio;
2029	int i, r, slot;
2030
2031	ctrlq->ctrlq_cmd->class = class;
2032	ctrlq->ctrlq_cmd->command = cmd;
2033
2034	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2035	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2036	for (i = 0; i < nspecs; i++) {
2037		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2038		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2039	}
2040	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2041	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2042
2043	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2044	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2045		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2046
2047	r = virtio_enqueue_prep(vsc, vq, &slot);
2048	if (r != 0)
2049		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2050	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2051	if (r != 0)
2052		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2053	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2054	for (i = 0; i < nspecs; i++) {
2055		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2056	}
2057	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2058	virtio_enqueue_commit(vsc, vq, slot, true);
2059
2060	/* wait for done */
2061	mutex_enter(&ctrlq->ctrlq_wait_lock);
2062	while (ctrlq->ctrlq_inuse != DONE)
2063		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2064	mutex_exit(&ctrlq->ctrlq_wait_lock);
2065	/* already dequeueued */
2066
2067	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2068	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2069	for (i = 0; i < nspecs; i++) {
2070		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2071		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2072	}
2073	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2074	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2075
2076	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2077		r = 0;
2078	else {
2079		device_printf(sc->sc_dev, "failed setting rx mode\n");
2080		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2081		r = EIO;
2082	}
2083
2084	return r;
2085}
2086
2087static int
2088vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2089{
2090	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2091	struct vioif_ctrl_cmdspec specs[1];
2092	int r;
2093
2094	if (!sc->sc_has_ctrl)
2095		return ENOTSUP;
2096
2097	vioif_ctrl_acquire(sc);
2098
2099	rx->onoff = onoff;
2100	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2101	specs[0].buf = rx;
2102	specs[0].bufsize = sizeof(*rx);
2103
2104	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2105	    specs, __arraycount(specs));
2106
2107	vioif_ctrl_release(sc);
2108	return r;
2109}
2110
2111static int
2112vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2113{
2114	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2115}
2116
2117static int
2118vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2119{
2120	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2121}
2122
2123/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2124static int
2125vioif_set_rx_filter(struct vioif_softc *sc)
2126{
2127	/* filter already set in ctrlq->ctrlq_mac_tbl */
2128	struct virtio_softc *vsc = sc->sc_virtio;
2129	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2130	struct vioif_ctrl_cmdspec specs[2];
2131	int nspecs = __arraycount(specs);
2132	int r;
2133
2134	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2135	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2136
2137	if (!sc->sc_has_ctrl)
2138		return ENOTSUP;
2139
2140	vioif_ctrl_acquire(sc);
2141
2142	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2143	specs[0].buf = mac_tbl_uc;
2144	specs[0].bufsize = sizeof(*mac_tbl_uc)
2145	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2146
2147	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2148	specs[1].buf = mac_tbl_mc;
2149	specs[1].bufsize = sizeof(*mac_tbl_mc)
2150	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2151
2152	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2153	if (r != 0)
2154		goto out;
2155
2156	r = vioif_ctrl_send_command(sc,
2157	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2158	    specs, nspecs);
2159
2160	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2161
2162out:
2163	vioif_ctrl_release(sc);
2164
2165	return r;
2166}
2167
2168static int
2169vioif_set_mac_addr(struct vioif_softc *sc)
2170{
2171	struct virtio_net_ctrl_mac_addr *ma =
2172	    sc->sc_ctrlq.ctrlq_mac_addr;
2173	struct vioif_ctrl_cmdspec specs[1];
2174	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2175	int nspecs = __arraycount(specs);
2176	uint64_t features;
2177	int r;
2178	size_t i;
2179
2180	if (!sc->sc_has_ctrl)
2181		return ENOTSUP;
2182
2183	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2184	    ETHER_ADDR_LEN) == 0) {
2185		return 0;
2186	}
2187
2188	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2189
2190	features = virtio_features(sc->sc_virtio);
2191	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2192		vioif_ctrl_acquire(sc);
2193
2194		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2195		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2196		specs[0].buf = ma;
2197		specs[0].bufsize = sizeof(*ma);
2198
2199		r = vioif_ctrl_send_command(sc,
2200		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2201		    specs, nspecs);
2202
2203		vioif_ctrl_release(sc);
2204	} else {
2205		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2206			virtio_write_device_config_1(sc->sc_virtio,
2207			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2208		}
2209		r = 0;
2210	}
2211
2212	return r;
2213}
2214
2215static int
2216vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2217{
2218	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2219	struct vioif_ctrl_cmdspec specs[1];
2220	int r;
2221
2222	if (!sc->sc_has_ctrl)
2223		return ENOTSUP;
2224
2225	if (nvq_pairs <= 1)
2226		return EINVAL;
2227
2228	vioif_ctrl_acquire(sc);
2229
2230	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2231	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2232	specs[0].buf = mq;
2233	specs[0].bufsize = sizeof(*mq);
2234
2235	r = vioif_ctrl_send_command(sc,
2236	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2237	    specs, __arraycount(specs));
2238
2239	vioif_ctrl_release(sc);
2240
2241	return r;
2242}
2243
2244/* ctrl vq interrupt; wake up the command issuer */
2245static int
2246vioif_ctrl_intr(void *arg)
2247{
2248	struct vioif_ctrlqueue *ctrlq = arg;
2249	struct virtqueue *vq = ctrlq->ctrlq_vq;
2250	struct virtio_softc *vsc = vq->vq_owner;
2251	int r, slot;
2252
2253	if (virtio_vq_is_enqueued(vsc, vq) == false)
2254		return 0;
2255
2256	r = virtio_dequeue(vsc, vq, &slot, NULL);
2257	if (r == ENOENT)
2258		return 0;
2259	virtio_dequeue_commit(vsc, vq, slot);
2260
2261	mutex_enter(&ctrlq->ctrlq_wait_lock);
2262	ctrlq->ctrlq_inuse = DONE;
2263	cv_signal(&ctrlq->ctrlq_wait);
2264	mutex_exit(&ctrlq->ctrlq_wait_lock);
2265
2266	return 1;
2267}
2268
2269static int
2270vioif_ifflags(struct vioif_softc *sc)
2271{
2272	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2273	bool onoff;
2274	int r;
2275
2276	if (!sc->sc_has_ctrl) {
2277		/* no ctrl vq; always promisc and allmulti */
2278		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2279		return 0;
2280	}
2281
2282	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2283	r = vioif_set_allmulti(sc, onoff);
2284	if (r != 0) {
2285		log(LOG_WARNING,
2286		    "%s: couldn't %sable ALLMULTI\n",
2287		    ifp->if_xname, onoff ? "en" : "dis");
2288		if (onoff == false) {
2289			ifp->if_flags |= IFF_ALLMULTI;
2290		}
2291	}
2292
2293	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2294	r = vioif_set_promisc(sc, onoff);
2295	if (r != 0) {
2296		log(LOG_WARNING,
2297		    "%s: couldn't %sable PROMISC\n",
2298		    ifp->if_xname, onoff ? "en" : "dis");
2299		if (onoff == false) {
2300			ifp->if_flags |= IFF_PROMISC;
2301		}
2302	}
2303
2304	return 0;
2305}
2306
2307static int
2308vioif_ifflags_cb(struct ethercom *ec)
2309{
2310	struct ifnet *ifp = &ec->ec_if;
2311	struct vioif_softc *sc = ifp->if_softc;
2312
2313	return vioif_ifflags(sc);
2314}
2315
2316/*
2317 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2318 * If large multicast filter exist use ALLMULTI
2319 * If setting rx filter fails fall back to ALLMULTI
2320 */
2321static int
2322vioif_rx_filter(struct vioif_softc *sc)
2323{
2324	struct virtio_softc *vsc = sc->sc_virtio;
2325	struct ethercom *ec = &sc->sc_ethercom;
2326	struct ifnet *ifp = &ec->ec_if;
2327	struct ether_multi *enm;
2328	struct ether_multistep step;
2329	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2330	int nentries;
2331	bool allmulti = 0;
2332	int r;
2333
2334	if (!sc->sc_has_ctrl) {
2335		goto set_ifflags;
2336	}
2337
2338	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2339	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2340
2341	nentries = 0;
2342	allmulti = false;
2343
2344	ETHER_LOCK(ec);
2345	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2346	    ETHER_NEXT_MULTI(step, enm)) {
2347		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2348			allmulti = true;
2349			break;
2350		}
2351		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2352			allmulti = true;
2353			break;
2354		}
2355
2356		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2357		    enm->enm_addrlo, ETHER_ADDR_LEN);
2358		nentries++;
2359	}
2360	ETHER_UNLOCK(ec);
2361
2362	r = vioif_set_mac_addr(sc);
2363	if (r != 0) {
2364		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2365		    ifp->if_xname);
2366	}
2367
2368	if (!allmulti) {
2369		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2370		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2371		r = vioif_set_rx_filter(sc);
2372		if (r != 0) {
2373			allmulti = true; /* fallback */
2374		}
2375	}
2376
2377	if (allmulti) {
2378		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2379		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2380		r = vioif_set_rx_filter(sc);
2381		if (r != 0) {
2382			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2383			    ifp->if_xname);
2384			/* what to do on failure? */
2385		}
2386
2387		ifp->if_flags |= IFF_ALLMULTI;
2388	}
2389
2390set_ifflags:
2391	r = vioif_ifflags(sc);
2392
2393	return r;
2394}
2395
2396static bool
2397vioif_is_link_up(struct vioif_softc *sc)
2398{
2399	struct virtio_softc *vsc = sc->sc_virtio;
2400	uint16_t status;
2401
2402	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2403		status = virtio_read_device_config_2(vsc,
2404		    VIRTIO_NET_CONFIG_STATUS);
2405	else
2406		status = VIRTIO_NET_S_LINK_UP;
2407
2408	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2409}
2410
2411/* change link status */
2412static void
2413vioif_update_link_status(struct vioif_softc *sc)
2414{
2415	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2416	struct vioif_txqueue *txq;
2417	bool active, changed;
2418	int link, i;
2419
2420	mutex_enter(&sc->sc_lock);
2421
2422	active = vioif_is_link_up(sc);
2423	changed = false;
2424
2425	if (active) {
2426		if (!sc->sc_link_active)
2427			changed = true;
2428
2429		link = LINK_STATE_UP;
2430		sc->sc_link_active = true;
2431	} else {
2432		if (sc->sc_link_active)
2433			changed = true;
2434
2435		link = LINK_STATE_DOWN;
2436		sc->sc_link_active = false;
2437	}
2438
2439	if (changed) {
2440		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2441			txq = &sc->sc_txq[i];
2442
2443			mutex_enter(txq->txq_lock);
2444			txq->txq_link_active = sc->sc_link_active;
2445			mutex_exit(txq->txq_lock);
2446		}
2447
2448		if_link_state_change(ifp, link);
2449	}
2450
2451	mutex_exit(&sc->sc_lock);
2452}
2453
2454static int
2455vioif_config_change(struct virtio_softc *vsc)
2456{
2457	struct vioif_softc *sc = device_private(virtio_child(vsc));
2458
2459	softint_schedule(sc->sc_ctl_softint);
2460	return 0;
2461}
2462
2463static void
2464vioif_ctl_softint(void *arg)
2465{
2466	struct vioif_softc *sc = arg;
2467	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2468
2469	vioif_update_link_status(sc);
2470	vioif_start(ifp);
2471}
2472
2473static struct workqueue *
2474vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2475{
2476	struct workqueue *wq;
2477	int error;
2478
2479	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2480	    prio, ipl, flags);
2481
2482	if (error)
2483		return NULL;
2484
2485	return wq;
2486}
2487
2488static void
2489vioif_workq_destroy(struct workqueue *wq)
2490{
2491
2492	workqueue_destroy(wq);
2493}
2494
2495static void
2496vioif_workq_work(struct work *wk, void *context)
2497{
2498	struct vioif_work *work;
2499
2500	work = container_of(wk, struct vioif_work, cookie);
2501
2502	atomic_store_relaxed(&work->added, 0);
2503	work->func(work->arg);
2504}
2505
2506static void
2507vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2508{
2509
2510	memset(work, 0, sizeof(*work));
2511	work->func = func;
2512	work->arg = arg;
2513}
2514
2515static void
2516vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2517{
2518
2519	if (atomic_load_relaxed(&work->added) != 0)
2520		return;
2521
2522	atomic_store_relaxed(&work->added, 1);
2523	kpreempt_disable();
2524	workqueue_enqueue(wq, &work->cookie, NULL);
2525	kpreempt_enable();
2526}
2527
2528static void
2529vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2530{
2531
2532	workqueue_wait(wq, &work->cookie);
2533}
2534
2535static int
2536vioif_setup_sysctl(struct vioif_softc *sc)
2537{
2538	const char *devname;
2539	struct sysctllog **log;
2540	const struct sysctlnode *rnode, *rxnode, *txnode;
2541	int error;
2542
2543	log = &sc->sc_sysctllog;
2544	devname = device_xname(sc->sc_dev);
2545
2546	error = sysctl_createv(log, 0, NULL, &rnode,
2547	    0, CTLTYPE_NODE, devname,
2548	    SYSCTL_DESCR("virtio-net information and settings"),
2549	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2550	if (error)
2551		goto out;
2552
2553	error = sysctl_createv(log, 0, &rnode, NULL,
2554	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2555	    SYSCTL_DESCR("Use workqueue for packet processing"),
2556	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2557	if (error)
2558		goto out;
2559
2560	error = sysctl_createv(log, 0, &rnode, &rxnode,
2561	    0, CTLTYPE_NODE, "rx",
2562	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2563	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2564	if (error)
2565		goto out;
2566
2567	error = sysctl_createv(log, 0, &rxnode, NULL,
2568	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2569	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2570	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2571	if (error)
2572		goto out;
2573
2574	error = sysctl_createv(log, 0, &rxnode, NULL,
2575	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2576	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2577	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2578	if (error)
2579		goto out;
2580
2581	error = sysctl_createv(log, 0, &rnode, &txnode,
2582	    0, CTLTYPE_NODE, "tx",
2583	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2584	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2585	if (error)
2586		goto out;
2587
2588	error = sysctl_createv(log, 0, &txnode, NULL,
2589	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2590	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2591	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2592	if (error)
2593		goto out;
2594
2595	error = sysctl_createv(log, 0, &txnode, NULL,
2596	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2597	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2598	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2599
2600out:
2601	if (error)
2602		sysctl_teardown(log);
2603
2604	return error;
2605}
2606
2607static void
2608vioif_setup_stats(struct vioif_softc *sc)
2609{
2610	struct vioif_rxqueue *rxq;
2611	struct vioif_txqueue *txq;
2612	int i;
2613
2614	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2615		rxq = &sc->sc_rxq[i];
2616		txq = &sc->sc_txq[i];
2617
2618		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2619		    device_xname(sc->sc_dev), i);
2620		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2621		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2622		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2623		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2624		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2625		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2626
2627		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2628		    device_xname(sc->sc_dev), i);
2629		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2630		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2631	}
2632
2633	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2634	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2635	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2636	    NULL, device_xname(sc->sc_dev), "control command failed");
2637}
2638
2639MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2640
2641#ifdef _MODULE
2642#include "ioconf.c"
2643#endif
2644
2645static int
2646if_vioif_modcmd(modcmd_t cmd, void *opaque)
2647{
2648	int error = 0;
2649
2650#ifdef _MODULE
2651	switch (cmd) {
2652	case MODULE_CMD_INIT:
2653		error = config_init_component(cfdriver_ioconf_if_vioif,
2654		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2655		break;
2656	case MODULE_CMD_FINI:
2657		error = config_fini_component(cfdriver_ioconf_if_vioif,
2658		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2659		break;
2660	default:
2661		error = ENOTTY;
2662		break;
2663	}
2664#endif
2665
2666	return error;
2667}
2668