if_vioif.c revision 1.71
1/*	$NetBSD: if_vioif.c,v 1.71 2021/10/28 01:36:43 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.71 2021/10/28 01:36:43 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54
55#include <dev/pci/virtioreg.h>
56#include <dev/pci/virtiovar.h>
57
58#include <net/if.h>
59#include <net/if_media.h>
60#include <net/if_ether.h>
61
62#include <net/bpf.h>
63
64#include "ioconf.h"
65
66#ifdef NET_MPSAFE
67#define VIOIF_MPSAFE	1
68#define VIOIF_MULTIQ	1
69#endif
70
71/*
72 * if_vioifreg.h:
73 */
74/* Configuration registers */
75#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
76#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
77#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
78#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
79
80/* Feature bits */
81#define VIRTIO_NET_F_CSUM		__BIT(0)
82#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
83#define VIRTIO_NET_F_MAC		__BIT(5)
84#define VIRTIO_NET_F_GSO		__BIT(6)
85#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
86#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
87#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
88#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
89#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
90#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
91#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
92#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
93#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
94#define VIRTIO_NET_F_STATUS		__BIT(16)
95#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
96#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
97#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
98#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
99#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
100#define VIRTIO_NET_F_MQ			__BIT(22)
101
102#define VIRTIO_NET_FLAG_BITS \
103	VIRTIO_COMMON_FLAG_BITS \
104	"\x17""MQ" \
105	"\x16""GUEST_ANNOUNCE" \
106	"\x15""CTRL_RX_EXTRA" \
107	"\x14""CTRL_VLAN" \
108	"\x13""CTRL_RX" \
109	"\x12""CTRL_VQ" \
110	"\x11""STATUS" \
111	"\x10""MRG_RXBUF" \
112	"\x0f""HOST_UFO" \
113	"\x0e""HOST_ECN" \
114	"\x0d""HOST_TSO6" \
115	"\x0c""HOST_TSO4" \
116	"\x0b""GUEST_UFO" \
117	"\x0a""GUEST_ECN" \
118	"\x09""GUEST_TSO6" \
119	"\x08""GUEST_TSO4" \
120	"\x07""GSO" \
121	"\x06""MAC" \
122	"\x02""GUEST_CSUM" \
123	"\x01""CSUM"
124
125/* Status */
126#define VIRTIO_NET_S_LINK_UP	1
127
128/* Packet header structure */
129struct virtio_net_hdr {
130	uint8_t		flags;
131	uint8_t		gso_type;
132	uint16_t	hdr_len;
133	uint16_t	gso_size;
134	uint16_t	csum_start;
135	uint16_t	csum_offset;
136
137	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
138} __packed;
139
140#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
141#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
142#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
145#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
146
147#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
148
149/* Control virtqueue */
150struct virtio_net_ctrl_cmd {
151	uint8_t	class;
152	uint8_t	command;
153} __packed;
154#define VIRTIO_NET_CTRL_RX		0
155# define VIRTIO_NET_CTRL_RX_PROMISC	0
156# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
157
158#define VIRTIO_NET_CTRL_MAC		1
159# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
160
161#define VIRTIO_NET_CTRL_VLAN		2
162# define VIRTIO_NET_CTRL_VLAN_ADD	0
163# define VIRTIO_NET_CTRL_VLAN_DEL	1
164
165#define VIRTIO_NET_CTRL_MQ			4
166# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
167# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
168# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
169
170struct virtio_net_ctrl_status {
171	uint8_t	ack;
172} __packed;
173#define VIRTIO_NET_OK			0
174#define VIRTIO_NET_ERR			1
175
176struct virtio_net_ctrl_rx {
177	uint8_t	onoff;
178} __packed;
179
180struct virtio_net_ctrl_mac_tbl {
181	uint32_t nentries;
182	uint8_t macs[][ETHER_ADDR_LEN];
183} __packed;
184
185struct virtio_net_ctrl_vlan {
186	uint16_t id;
187} __packed;
188
189struct virtio_net_ctrl_mq {
190	uint16_t virtqueue_pairs;
191} __packed;
192
193/*
194 * if_vioifvar.h:
195 */
196
197/*
198 * Locking notes:
199 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
200 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
201 *      - more than one lock cannot be held at onece
202 * + ctrlq_inuse is protected by ctrlq_wait_lock.
203 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
204 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
205 * + fields in vioif_softc except queues are protected by
206 *   sc->sc_lock(an adaptive mutex)
207 *      - the lock is held before acquisition of other locks
208 */
209
210struct vioif_ctrl_cmdspec {
211	bus_dmamap_t	dmamap;
212	void		*buf;
213	bus_size_t	bufsize;
214};
215
216struct vioif_work {
217	struct work	 cookie;
218	void		(*func)(void *);
219	void		*arg;
220	unsigned int	 added;
221};
222
223struct vioif_txqueue {
224	kmutex_t		*txq_lock;	/* lock for tx operations */
225
226	struct virtqueue	*txq_vq;
227	bool			txq_stopping;
228	bool			txq_link_active;
229	pcq_t			*txq_intrq;
230
231	struct virtio_net_hdr	*txq_hdrs;
232	bus_dmamap_t		*txq_hdr_dmamaps;
233
234	struct mbuf		**txq_mbufs;
235	bus_dmamap_t		*txq_dmamaps;
236
237	void			*txq_deferred_transmit;
238	void			*txq_handle_si;
239	struct vioif_work	 txq_work;
240	bool			 txq_workqueue;
241	bool			 txq_active;
242
243	char			 txq_evgroup[16];
244	struct evcnt		 txq_defrag_failed;
245	struct evcnt		 txq_mbuf_load_failed;
246	struct evcnt		 txq_enqueue_reserve_failed;
247};
248
249struct vioif_rxqueue {
250	kmutex_t		*rxq_lock;	/* lock for rx operations */
251
252	struct virtqueue	*rxq_vq;
253	bool			rxq_stopping;
254
255	struct virtio_net_hdr	*rxq_hdrs;
256	bus_dmamap_t		*rxq_hdr_dmamaps;
257
258	struct mbuf		**rxq_mbufs;
259	bus_dmamap_t		*rxq_dmamaps;
260
261	void			*rxq_handle_si;
262	struct vioif_work	 rxq_work;
263	bool			 rxq_workqueue;
264	bool			 rxq_active;
265
266	char			 rxq_evgroup[16];
267	struct evcnt		 rxq_mbuf_add_failed;
268};
269
270struct vioif_ctrlqueue {
271	struct virtqueue		*ctrlq_vq;
272	enum {
273		FREE, INUSE, DONE
274	}				ctrlq_inuse;
275	kcondvar_t			ctrlq_wait;
276	kmutex_t			ctrlq_wait_lock;
277	struct lwp			*ctrlq_owner;
278
279	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
280	struct virtio_net_ctrl_status	*ctrlq_status;
281	struct virtio_net_ctrl_rx	*ctrlq_rx;
282	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
283	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
284	struct virtio_net_ctrl_mq	*ctrlq_mq;
285
286	bus_dmamap_t			ctrlq_cmd_dmamap;
287	bus_dmamap_t			ctrlq_status_dmamap;
288	bus_dmamap_t			ctrlq_rx_dmamap;
289	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
290	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
291	bus_dmamap_t			ctrlq_mq_dmamap;
292
293	struct evcnt			ctrlq_cmd_load_failed;
294	struct evcnt			ctrlq_cmd_failed;
295};
296
297struct vioif_softc {
298	device_t		sc_dev;
299	kmutex_t		sc_lock;
300	struct sysctllog	*sc_sysctllog;
301
302	struct virtio_softc	*sc_virtio;
303	struct virtqueue	*sc_vqs;
304	u_int			 sc_hdr_size;
305
306	int			sc_max_nvq_pairs;
307	int			sc_req_nvq_pairs;
308	int			sc_act_nvq_pairs;
309
310	uint8_t			sc_mac[ETHER_ADDR_LEN];
311	struct ethercom		sc_ethercom;
312	short			sc_deferred_init_done;
313	bool			sc_link_active;
314
315	struct vioif_txqueue	*sc_txq;
316	struct vioif_rxqueue	*sc_rxq;
317
318	bool			sc_has_ctrl;
319	struct vioif_ctrlqueue	sc_ctrlq;
320
321	bus_dma_segment_t	sc_hdr_segs[1];
322	void			*sc_dmamem;
323	void			*sc_kmem;
324
325	void			*sc_ctl_softint;
326
327	struct workqueue	*sc_txrx_workqueue;
328	bool			 sc_txrx_workqueue_sysctl;
329	u_int			 sc_tx_intr_process_limit;
330	u_int			 sc_tx_process_limit;
331	u_int			 sc_rx_intr_process_limit;
332	u_int			 sc_rx_process_limit;
333};
334#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
335#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
336
337#define VIOIF_TX_INTR_PROCESS_LIMIT	256
338#define VIOIF_TX_PROCESS_LIMIT		256
339#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
340#define VIOIF_RX_PROCESS_LIMIT		256
341
342#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
343
344/* cfattach interface functions */
345static int	vioif_match(device_t, cfdata_t, void *);
346static void	vioif_attach(device_t, device_t, void *);
347static void	vioif_deferred_init(device_t);
348static int	vioif_finalize_teardown(device_t);
349
350/* ifnet interface functions */
351static int	vioif_init(struct ifnet *);
352static void	vioif_stop(struct ifnet *, int);
353static void	vioif_start(struct ifnet *);
354static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
355static int	vioif_transmit(struct ifnet *, struct mbuf *);
356static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
357static int	vioif_ioctl(struct ifnet *, u_long, void *);
358static void	vioif_watchdog(struct ifnet *);
359
360/* rx */
361static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
362static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
363static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
364		    struct vioif_rxqueue *);
365static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
366static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
367		    struct vioif_rxqueue *, u_int);
368static int	vioif_rx_intr(void *);
369static void	vioif_rx_handle(void *);
370static void	vioif_rx_sched_handle(struct vioif_softc *,
371		    struct vioif_rxqueue *);
372static void	vioif_rx_drain(struct vioif_rxqueue *);
373
374/* tx */
375static int	vioif_tx_intr(void *);
376static void	vioif_tx_handle(void *);
377static void	vioif_tx_sched_handle(struct vioif_softc *,
378		    struct vioif_txqueue *);
379static void	vioif_tx_queue_clear(struct vioif_txqueue *);
380static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
381		    struct vioif_txqueue *, u_int);
382static void	vioif_tx_drain(struct vioif_txqueue *);
383static void	vioif_deferred_transmit(void *);
384
385/* workqueue */
386static struct workqueue*
387		vioif_workq_create(const char *, pri_t, int, int);
388static void	vioif_workq_destroy(struct workqueue *);
389static void	vioif_workq_work(struct work *, void *);
390static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
391static void	vioif_work_add(struct workqueue *, struct vioif_work *);
392static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
393
394/* other control */
395static bool	vioif_is_link_up(struct vioif_softc *);
396static void	vioif_update_link_status(struct vioif_softc *);
397static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
398static int	vioif_set_promisc(struct vioif_softc *, bool);
399static int	vioif_set_allmulti(struct vioif_softc *, bool);
400static int	vioif_set_rx_filter(struct vioif_softc *);
401static int	vioif_rx_filter(struct vioif_softc *);
402static int	vioif_ctrl_intr(void *);
403static int	vioif_config_change(struct virtio_softc *);
404static void	vioif_ctl_softint(void *);
405static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
406static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
407static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
408static int	vioif_setup_sysctl(struct vioif_softc *);
409static void	vioif_setup_stats(struct vioif_softc *);
410
411CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
412		  vioif_match, vioif_attach, NULL, NULL);
413
414static int
415vioif_match(device_t parent, cfdata_t match, void *aux)
416{
417	struct virtio_attach_args *va = aux;
418
419	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
420		return 1;
421
422	return 0;
423}
424
425static int
426vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
427    bus_size_t size, int nsegs, const char *usage)
428{
429	int r;
430
431	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
432	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
433
434	if (r != 0) {
435		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
436		    "error code %d\n", usage, r);
437	}
438
439	return r;
440}
441
442static void
443vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
444{
445
446	if (*map) {
447		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
448		*map = NULL;
449	}
450}
451
452static int
453vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
454    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
455{
456	int r;
457
458	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
459	if (r != 0)
460		return 1;
461
462	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
463	    size, NULL, rw | BUS_DMA_NOWAIT);
464	if (r != 0) {
465		vioif_dmamap_destroy(sc, map);
466		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
467		    "error code %d\n", usage, r);
468	}
469
470	return r;
471}
472
473static void *
474vioif_assign_mem(intptr_t *p, size_t size)
475{
476	intptr_t rv;
477
478	rv = *p;
479	*p += size;
480
481	return (void *)rv;
482}
483
484static void
485vioif_alloc_queues(struct vioif_softc *sc)
486{
487	int nvq_pairs = sc->sc_max_nvq_pairs;
488	int nvqs = nvq_pairs * 2;
489	int i;
490
491	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
492
493	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
494	    KM_SLEEP);
495	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
496	    KM_SLEEP);
497
498	if (sc->sc_has_ctrl)
499		nvqs++;
500
501	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
502	nvqs = 0;
503	for (i = 0; i < nvq_pairs; i++) {
504		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
505		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
506	}
507
508	if (sc->sc_has_ctrl)
509		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
510}
511
512static void
513vioif_free_queues(struct vioif_softc *sc)
514{
515	int nvq_pairs = sc->sc_max_nvq_pairs;
516	int nvqs = nvq_pairs * 2;
517
518	if (sc->sc_ctrlq.ctrlq_vq)
519		nvqs++;
520
521	if (sc->sc_txq) {
522		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
523		sc->sc_txq = NULL;
524	}
525
526	if (sc->sc_rxq) {
527		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
528		sc->sc_rxq = NULL;
529	}
530
531	if (sc->sc_vqs) {
532		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
533		sc->sc_vqs = NULL;
534	}
535}
536
537/* allocate memory */
538/*
539 * dma memory is used for:
540 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
541 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
542 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
543 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
544 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
545 *			 (WRITE)
546 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
547 *			 class command (WRITE)
548 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
549 *			 class command (WRITE)
550 * ctrlq_* structures are allocated only one each; they are protected by
551 * ctrlq_inuse variable and ctrlq_wait condvar.
552 */
553/*
554 * dynamically allocated memory is used for:
555 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
556 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
557 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
558 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
559 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
560 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
561 */
562static int
563vioif_alloc_mems(struct vioif_softc *sc)
564{
565	struct virtio_softc *vsc = sc->sc_virtio;
566	struct vioif_txqueue *txq;
567	struct vioif_rxqueue *rxq;
568	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
569	int allocsize, allocsize2, r, rsegs, i, qid;
570	void *vaddr;
571	intptr_t p;
572
573	allocsize = 0;
574	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
575		rxq = &sc->sc_rxq[qid];
576		txq = &sc->sc_txq[qid];
577
578		allocsize += sizeof(struct virtio_net_hdr) *
579			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
580	}
581	if (sc->sc_has_ctrl) {
582		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
583		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
584		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
585		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
586		    + sizeof(struct virtio_net_ctrl_mac_tbl)
587		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
588		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
589	}
590	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
591	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
592	if (r != 0) {
593		aprint_error_dev(sc->sc_dev,
594		    "DMA memory allocation failed, size %d, "
595		    "error code %d\n", allocsize, r);
596		goto err_none;
597	}
598	r = bus_dmamem_map(virtio_dmat(vsc),
599	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
600	if (r != 0) {
601		aprint_error_dev(sc->sc_dev,
602		    "DMA memory map failed, error code %d\n", r);
603		goto err_dmamem_alloc;
604	}
605
606	memset(vaddr, 0, allocsize);
607	sc->sc_dmamem = vaddr;
608	p = (intptr_t) vaddr;
609
610	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
611		rxq = &sc->sc_rxq[qid];
612		txq = &sc->sc_txq[qid];
613
614		rxq->rxq_hdrs = vioif_assign_mem(&p,
615		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
616		txq->txq_hdrs = vioif_assign_mem(&p,
617		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
618	}
619	if (sc->sc_has_ctrl) {
620		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
621		    sizeof(*ctrlq->ctrlq_cmd));
622		ctrlq->ctrlq_status = vioif_assign_mem(&p,
623		    sizeof(*ctrlq->ctrlq_status));
624		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
625		    sizeof(*ctrlq->ctrlq_rx));
626		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
627		    sizeof(*ctrlq->ctrlq_mac_tbl_uc));
628		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
629		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
630		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
631		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
632	}
633
634	allocsize2 = 0;
635	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
636		int rxqsize, txqsize;
637
638		rxq = &sc->sc_rxq[qid];
639		txq = &sc->sc_txq[qid];
640		rxqsize = rxq->rxq_vq->vq_num;
641		txqsize = txq->txq_vq->vq_num;
642
643		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
644		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
645		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
646
647		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
648		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
649		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
650	}
651	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
652	sc->sc_kmem = vaddr;
653	p = (intptr_t) vaddr;
654
655	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
656		int rxqsize, txqsize;
657		rxq = &sc->sc_rxq[qid];
658		txq = &sc->sc_txq[qid];
659		rxqsize = rxq->rxq_vq->vq_num;
660		txqsize = txq->txq_vq->vq_num;
661
662		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
663		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
664		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
665		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
666		rxq->rxq_dmamaps = vioif_assign_mem(&p,
667		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
668		txq->txq_dmamaps = vioif_assign_mem(&p,
669		    sizeof(txq->txq_dmamaps[0]) * txqsize);
670		rxq->rxq_mbufs = vioif_assign_mem(&p,
671		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
672		txq->txq_mbufs = vioif_assign_mem(&p,
673		    sizeof(txq->txq_mbufs[0]) * txqsize);
674	}
675
676	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
677		rxq = &sc->sc_rxq[qid];
678		txq = &sc->sc_txq[qid];
679
680		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
681			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
682			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
683			    BUS_DMA_READ, "rx header");
684			if (r != 0)
685				goto err_reqs;
686
687			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
688			    MCLBYTES, 1, "rx payload");
689			if (r != 0)
690				goto err_reqs;
691		}
692
693		for (i = 0; i < txq->txq_vq->vq_num; i++) {
694			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
695			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
696			    BUS_DMA_READ, "tx header");
697			if (r != 0)
698				goto err_reqs;
699
700			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
701			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
702			if (r != 0)
703				goto err_reqs;
704		}
705	}
706
707	if (sc->sc_has_ctrl) {
708		/* control vq class & command */
709		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
710		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
711		    BUS_DMA_WRITE, "control command");
712		if (r != 0)
713			goto err_reqs;
714
715		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
716		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
717		    BUS_DMA_READ, "control status");
718		if (r != 0)
719			goto err_reqs;
720
721		/* control vq rx mode command parameter */
722		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
723		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
724		    BUS_DMA_WRITE, "rx mode control command");
725		if (r != 0)
726			goto err_reqs;
727
728		/* multiqueue set command */
729		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
730		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
731		    BUS_DMA_WRITE, "multiqueue set command");
732		if (r != 0)
733			goto err_reqs;
734
735		/* control vq MAC filter table for unicast */
736		/* do not load now since its length is variable */
737		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
738		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
739		    "unicast MAC address filter command");
740		if (r != 0)
741			goto err_reqs;
742
743		/* control vq MAC filter table for multicast */
744		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
745		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
746		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
747		    "multicast MAC address filter command");
748	}
749
750	return 0;
751
752err_reqs:
753	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
754	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
755	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
756	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
757	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
758	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
759		rxq = &sc->sc_rxq[qid];
760		txq = &sc->sc_txq[qid];
761
762		for (i = 0; i < txq->txq_vq->vq_num; i++) {
763			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
764			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
765		}
766		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
767			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
768			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
769		}
770	}
771	if (sc->sc_kmem) {
772		kmem_free(sc->sc_kmem, allocsize2);
773		sc->sc_kmem = NULL;
774	}
775	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
776err_dmamem_alloc:
777	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
778err_none:
779	return -1;
780}
781
782static void
783vioif_attach(device_t parent, device_t self, void *aux)
784{
785	struct vioif_softc *sc = device_private(self);
786	struct virtio_softc *vsc = device_private(parent);
787	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
788	struct vioif_txqueue *txq;
789	struct vioif_rxqueue *rxq;
790	uint64_t features, req_features;
791	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
792	u_int softint_flags;
793	int r, i, nvqs=0, req_flags;
794	char xnamebuf[MAXCOMLEN];
795
796	if (virtio_child(vsc) != NULL) {
797		aprint_normal(": child already attached for %s; "
798		    "something wrong...\n", device_xname(parent));
799		return;
800	}
801
802	sc->sc_dev = self;
803	sc->sc_virtio = vsc;
804	sc->sc_link_active = false;
805
806	sc->sc_max_nvq_pairs = 1;
807	sc->sc_req_nvq_pairs = 1;
808	sc->sc_act_nvq_pairs = 1;
809	sc->sc_txrx_workqueue_sysctl = true;
810	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
811	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
812	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
813	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
814
815	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
816
817	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
818	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
819	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
820	if (sc->sc_txrx_workqueue == NULL)
821		goto err;
822
823	req_flags = 0;
824
825#ifdef VIOIF_MPSAFE
826	req_flags |= VIRTIO_F_INTR_MPSAFE;
827#endif
828	req_flags |= VIRTIO_F_INTR_MSIX;
829
830	req_features =
831	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
832	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
833	req_features |= VIRTIO_F_RING_EVENT_IDX;
834#ifdef VIOIF_MULTIQ
835	req_features |= VIRTIO_NET_F_MQ;
836#endif
837	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
838	    vioif_config_change, virtio_vq_intrhand, req_flags,
839	    req_features, VIRTIO_NET_FLAG_BITS);
840
841	features = virtio_features(vsc);
842	if (features == 0)
843		goto err;
844
845	if (features & VIRTIO_NET_F_MAC) {
846		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
847			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
848			    VIRTIO_NET_CONFIG_MAC + i);
849		}
850	} else {
851		/* code stolen from sys/net/if_tap.c */
852		struct timeval tv;
853		uint32_t ui;
854		getmicrouptime(&tv);
855		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
856		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
857		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
858			virtio_write_device_config_1(vsc,
859			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
860		}
861	}
862
863	/* 'Ethernet' with capital follows other ethernet driver attachment */
864	aprint_normal_dev(self, "Ethernet address %s\n",
865	    ether_sprintf(sc->sc_mac));
866
867	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
868		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
869	} else {
870		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
871	}
872
873	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
874	    (features & VIRTIO_NET_F_CTRL_RX)) {
875		sc->sc_has_ctrl = true;
876
877		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
878		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
879		ctrlq->ctrlq_inuse = FREE;
880	} else {
881		sc->sc_has_ctrl = false;
882	}
883
884	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
885		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
886		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
887
888		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
889			goto err;
890
891		/* Limit the number of queue pairs to use */
892		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
893	}
894
895	vioif_alloc_queues(sc);
896	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
897
898#ifdef VIOIF_MPSAFE
899	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
900#else
901	softint_flags = SOFTINT_NET;
902#endif
903
904	/*
905	 * Allocating virtqueues
906	 */
907	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
908		rxq = &sc->sc_rxq[i];
909		txq = &sc->sc_txq[i];
910		char qname[32];
911
912		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
913
914		rxq->rxq_handle_si = softint_establish(softint_flags,
915		    vioif_rx_handle, rxq);
916		if (rxq->rxq_handle_si == NULL) {
917			aprint_error_dev(self, "cannot establish rx softint\n");
918			goto err;
919		}
920
921		snprintf(qname, sizeof(qname), "rx%d", i);
922		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
923		    MCLBYTES + sc->sc_hdr_size, 2, qname);
924		if (r != 0)
925			goto err;
926		nvqs++;
927		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
928		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
929		rxq->rxq_stopping = true;
930		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
931
932		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
933
934		txq->txq_deferred_transmit = softint_establish(softint_flags,
935		    vioif_deferred_transmit, txq);
936		if (txq->txq_deferred_transmit == NULL) {
937			aprint_error_dev(self, "cannot establish tx softint\n");
938			goto err;
939		}
940		txq->txq_handle_si = softint_establish(softint_flags,
941		    vioif_tx_handle, txq);
942		if (txq->txq_handle_si == NULL) {
943			aprint_error_dev(self, "cannot establish tx softint\n");
944			goto err;
945		}
946
947		snprintf(qname, sizeof(qname), "tx%d", i);
948		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
949		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
950		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
951		if (r != 0)
952			goto err;
953		nvqs++;
954		txq->txq_vq->vq_intrhand = vioif_tx_intr;
955		txq->txq_vq->vq_intrhand_arg = (void *)txq;
956		txq->txq_link_active = sc->sc_link_active;
957		txq->txq_stopping = false;
958		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
959		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
960	}
961
962	if (sc->sc_has_ctrl) {
963		/*
964		 * Allocating a virtqueue for control channel
965		 */
966		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
967		    NBPG, 1, "control");
968		if (r != 0) {
969			aprint_error_dev(self, "failed to allocate "
970			    "a virtqueue for control channel, error code %d\n",
971			    r);
972
973			sc->sc_has_ctrl = false;
974			cv_destroy(&ctrlq->ctrlq_wait);
975			mutex_destroy(&ctrlq->ctrlq_wait_lock);
976		} else {
977			nvqs++;
978			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
979			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
980		}
981	}
982
983	sc->sc_ctl_softint = softint_establish(softint_flags,
984	    vioif_ctl_softint, sc);
985	if (sc->sc_ctl_softint == NULL) {
986		aprint_error_dev(self, "cannot establish ctl softint\n");
987		goto err;
988	}
989
990	if (vioif_alloc_mems(sc) < 0)
991		goto err;
992
993	if (virtio_child_attach_finish(vsc) != 0)
994		goto err;
995
996	if (vioif_setup_sysctl(sc) != 0) {
997		aprint_error_dev(self, "unable to create sysctl node\n");
998		/* continue */
999	}
1000
1001	vioif_setup_stats(sc);
1002
1003	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1004	ifp->if_softc = sc;
1005	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1006#ifdef VIOIF_MPSAFE
1007	ifp->if_extflags = IFEF_MPSAFE;
1008#endif
1009	ifp->if_start = vioif_start;
1010	if (sc->sc_req_nvq_pairs > 1)
1011		ifp->if_transmit = vioif_transmit;
1012	ifp->if_ioctl = vioif_ioctl;
1013	ifp->if_init = vioif_init;
1014	ifp->if_stop = vioif_stop;
1015	ifp->if_capabilities = 0;
1016	ifp->if_watchdog = vioif_watchdog;
1017	txq = &sc->sc_txq[0];
1018	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1019	IFQ_SET_READY(&ifp->if_snd);
1020
1021	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1022
1023	if_attach(ifp);
1024	if_deferred_start_init(ifp, NULL);
1025	ether_ifattach(ifp, sc->sc_mac);
1026
1027	return;
1028
1029err:
1030	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1031		rxq = &sc->sc_rxq[i];
1032		txq = &sc->sc_txq[i];
1033
1034		if (rxq->rxq_lock) {
1035			mutex_obj_free(rxq->rxq_lock);
1036			rxq->rxq_lock = NULL;
1037		}
1038
1039		if (rxq->rxq_handle_si) {
1040			softint_disestablish(rxq->rxq_handle_si);
1041			rxq->rxq_handle_si = NULL;
1042		}
1043
1044		if (txq->txq_lock) {
1045			mutex_obj_free(txq->txq_lock);
1046			txq->txq_lock = NULL;
1047		}
1048
1049		if (txq->txq_handle_si) {
1050			softint_disestablish(txq->txq_handle_si);
1051			txq->txq_handle_si = NULL;
1052		}
1053
1054		if (txq->txq_deferred_transmit) {
1055			softint_disestablish(txq->txq_deferred_transmit);
1056			txq->txq_deferred_transmit = NULL;
1057		}
1058
1059		if (txq->txq_intrq) {
1060			pcq_destroy(txq->txq_intrq);
1061			txq->txq_intrq = NULL;
1062		}
1063	}
1064
1065	if (sc->sc_has_ctrl) {
1066		cv_destroy(&ctrlq->ctrlq_wait);
1067		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1068	}
1069
1070	while (nvqs > 0)
1071		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1072
1073	vioif_free_queues(sc);
1074	mutex_destroy(&sc->sc_lock);
1075	virtio_child_attach_failed(vsc);
1076	config_finalize_register(self, vioif_finalize_teardown);
1077
1078	return;
1079}
1080
1081static int
1082vioif_finalize_teardown(device_t self)
1083{
1084	struct vioif_softc *sc = device_private(self);
1085
1086	if (sc->sc_txrx_workqueue != NULL) {
1087		vioif_workq_destroy(sc->sc_txrx_workqueue);
1088		sc->sc_txrx_workqueue = NULL;
1089	}
1090
1091	return 0;
1092}
1093
1094/* we need interrupts to make promiscuous mode off */
1095static void
1096vioif_deferred_init(device_t self)
1097{
1098	struct vioif_softc *sc = device_private(self);
1099	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1100	int r;
1101
1102	if (ifp->if_flags & IFF_PROMISC)
1103		return;
1104
1105	r =  vioif_set_promisc(sc, false);
1106	if (r != 0)
1107		aprint_error_dev(self, "resetting promisc mode failed, "
1108		    "error code %d\n", r);
1109}
1110
1111static void
1112vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1113{
1114	struct virtio_softc *vsc = sc->sc_virtio;
1115	struct vioif_txqueue *txq;
1116	struct vioif_rxqueue *rxq;
1117	int i;
1118
1119	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1120		txq = &sc->sc_txq[i];
1121		rxq = &sc->sc_rxq[i];
1122
1123		virtio_start_vq_intr(vsc, txq->txq_vq);
1124		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1125	}
1126}
1127
1128static void
1129vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1130{
1131	struct virtio_softc *vsc = sc->sc_virtio;
1132	struct vioif_txqueue *txq;
1133	struct vioif_rxqueue *rxq;
1134	int i;
1135
1136	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1137		rxq = &sc->sc_rxq[i];
1138		txq = &sc->sc_txq[i];
1139
1140		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1141		virtio_stop_vq_intr(vsc, txq->txq_vq);
1142	}
1143}
1144
1145/*
1146 * Interface functions for ifnet
1147 */
1148static int
1149vioif_init(struct ifnet *ifp)
1150{
1151	struct vioif_softc *sc = ifp->if_softc;
1152	struct virtio_softc *vsc = sc->sc_virtio;
1153	struct vioif_rxqueue *rxq;
1154	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1155	int r, i;
1156
1157	vioif_stop(ifp, 0);
1158
1159	r = virtio_reinit_start(vsc);
1160	if (r != 0) {
1161		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1162		return EIO;
1163	}
1164
1165	virtio_negotiate_features(vsc, virtio_features(vsc));
1166
1167	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1168		rxq = &sc->sc_rxq[i];
1169
1170		/* Have to set false before vioif_populate_rx_mbufs */
1171		mutex_enter(rxq->rxq_lock);
1172		rxq->rxq_stopping = false;
1173		vioif_populate_rx_mbufs_locked(sc, rxq);
1174		mutex_exit(rxq->rxq_lock);
1175
1176	}
1177
1178	virtio_reinit_end(vsc);
1179
1180	if (sc->sc_has_ctrl)
1181		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1182
1183	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1184	if (r == 0)
1185		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1186	else
1187		sc->sc_act_nvq_pairs = 1;
1188
1189	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1190		sc->sc_txq[i].txq_stopping = false;
1191
1192	vioif_enable_interrupt_vqpairs(sc);
1193
1194	if (!sc->sc_deferred_init_done) {
1195		sc->sc_deferred_init_done = 1;
1196		if (sc->sc_has_ctrl)
1197			vioif_deferred_init(sc->sc_dev);
1198	}
1199
1200	vioif_update_link_status(sc);
1201	ifp->if_flags |= IFF_RUNNING;
1202	ifp->if_flags &= ~IFF_OACTIVE;
1203	vioif_rx_filter(sc);
1204
1205	return 0;
1206}
1207
1208static void
1209vioif_stop(struct ifnet *ifp, int disable)
1210{
1211	struct vioif_softc *sc = ifp->if_softc;
1212	struct virtio_softc *vsc = sc->sc_virtio;
1213	struct vioif_txqueue *txq;
1214	struct vioif_rxqueue *rxq;
1215	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1216	int i;
1217
1218	/* Take the locks to ensure that ongoing TX/RX finish */
1219	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1220		txq = &sc->sc_txq[i];
1221		rxq = &sc->sc_rxq[i];
1222
1223		mutex_enter(rxq->rxq_lock);
1224		rxq->rxq_stopping = true;
1225		mutex_exit(rxq->rxq_lock);
1226
1227		mutex_enter(txq->txq_lock);
1228		txq->txq_stopping = true;
1229		mutex_exit(txq->txq_lock);
1230	}
1231
1232	/* disable interrupts */
1233	vioif_disable_interrupt_vqpairs(sc);
1234
1235	if (sc->sc_has_ctrl)
1236		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1237
1238	/* only way to stop I/O and DMA is resetting... */
1239	virtio_reset(vsc);
1240
1241	/* rendezvous for finish of handlers */
1242	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1243		txq = &sc->sc_txq[i];
1244		rxq = &sc->sc_rxq[i];
1245
1246		mutex_enter(rxq->rxq_lock);
1247		mutex_exit(rxq->rxq_lock);
1248		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1249
1250		mutex_enter(txq->txq_lock);
1251		mutex_exit(txq->txq_lock);
1252		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1253	}
1254
1255	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1256		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1257		vioif_tx_queue_clear(&sc->sc_txq[i]);
1258	}
1259
1260	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1261	sc->sc_link_active = false;
1262
1263	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1264		txq = &sc->sc_txq[i];
1265		rxq = &sc->sc_rxq[i];
1266
1267		txq->txq_link_active = false;
1268
1269		if (disable)
1270			vioif_rx_drain(rxq);
1271
1272		vioif_tx_drain(txq);
1273	}
1274}
1275
1276static void
1277vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1278    bool is_transmit)
1279{
1280	struct vioif_softc *sc = ifp->if_softc;
1281	struct virtio_softc *vsc = sc->sc_virtio;
1282	struct virtqueue *vq = txq->txq_vq;
1283	struct virtio_net_hdr *hdr;
1284	struct mbuf *m;
1285	int queued = 0;
1286
1287	KASSERT(mutex_owned(txq->txq_lock));
1288
1289	if ((ifp->if_flags & IFF_RUNNING) == 0)
1290		return;
1291
1292	if (!txq->txq_link_active || txq->txq_stopping)
1293		return;
1294
1295	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1296		return;
1297
1298	for (;;) {
1299		int slot, r;
1300
1301		if (is_transmit)
1302			m = pcq_get(txq->txq_intrq);
1303		else
1304			IFQ_DEQUEUE(&ifp->if_snd, m);
1305
1306		if (m == NULL)
1307			break;
1308
1309		r = virtio_enqueue_prep(vsc, vq, &slot);
1310		if (r == EAGAIN) {
1311			ifp->if_flags |= IFF_OACTIVE;
1312			m_freem(m);
1313			break;
1314		}
1315		if (r != 0)
1316			panic("enqueue_prep for a tx buffer");
1317
1318		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1319		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1320		if (r != 0) {
1321			/* maybe just too fragmented */
1322			struct mbuf *newm;
1323
1324			newm = m_defrag(m, M_NOWAIT);
1325			if (newm == NULL) {
1326				txq->txq_defrag_failed.ev_count++;
1327				goto skip;
1328			}
1329
1330			m = newm;
1331			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1332			    txq->txq_dmamaps[slot], m,
1333			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1334			if (r != 0) {
1335				txq->txq_mbuf_load_failed.ev_count++;
1336skip:
1337				m_freem(m);
1338				virtio_enqueue_abort(vsc, vq, slot);
1339				continue;
1340			}
1341		}
1342
1343		/* This should actually never fail */
1344		r = virtio_enqueue_reserve(vsc, vq, slot,
1345		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1346		if (r != 0) {
1347			txq->txq_enqueue_reserve_failed.ev_count++;
1348			bus_dmamap_unload(virtio_dmat(vsc),
1349			     txq->txq_dmamaps[slot]);
1350			/* slot already freed by virtio_enqueue_reserve */
1351			m_freem(m);
1352			continue;
1353		}
1354
1355		txq->txq_mbufs[slot] = m;
1356
1357		hdr = &txq->txq_hdrs[slot];
1358		memset(hdr, 0, sc->sc_hdr_size);
1359		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1360		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1361		    BUS_DMASYNC_PREWRITE);
1362		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1363		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1364		    BUS_DMASYNC_PREWRITE);
1365		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1366		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1367		virtio_enqueue_commit(vsc, vq, slot, false);
1368
1369		queued++;
1370		bpf_mtap(ifp, m, BPF_D_OUT);
1371	}
1372
1373	if (queued > 0) {
1374		virtio_enqueue_commit(vsc, vq, -1, true);
1375		ifp->if_timer = 5;
1376	}
1377}
1378
1379static void
1380vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1381{
1382
1383	/*
1384	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1385	 */
1386	vioif_send_common_locked(ifp, txq, false);
1387
1388}
1389
1390static void
1391vioif_start(struct ifnet *ifp)
1392{
1393	struct vioif_softc *sc = ifp->if_softc;
1394	struct vioif_txqueue *txq = &sc->sc_txq[0];
1395
1396#ifdef VIOIF_MPSAFE
1397	KASSERT(if_is_mpsafe(ifp));
1398#endif
1399
1400	mutex_enter(txq->txq_lock);
1401	vioif_start_locked(ifp, txq);
1402	mutex_exit(txq->txq_lock);
1403}
1404
1405static inline int
1406vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1407{
1408	struct vioif_softc *sc = ifp->if_softc;
1409	u_int cpuid = cpu_index(curcpu());
1410
1411	return cpuid % sc->sc_act_nvq_pairs;
1412}
1413
1414static void
1415vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1416{
1417
1418	vioif_send_common_locked(ifp, txq, true);
1419}
1420
1421static int
1422vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1423{
1424	struct vioif_softc *sc = ifp->if_softc;
1425	struct vioif_txqueue *txq;
1426	int qid;
1427
1428	qid = vioif_select_txqueue(ifp, m);
1429	txq = &sc->sc_txq[qid];
1430
1431	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1432		m_freem(m);
1433		return ENOBUFS;
1434	}
1435
1436	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1437	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1438	if (m->m_flags & M_MCAST)
1439		if_statinc_ref(nsr, if_omcasts);
1440	IF_STAT_PUTREF(ifp);
1441
1442	if (mutex_tryenter(txq->txq_lock)) {
1443		vioif_transmit_locked(ifp, txq);
1444		mutex_exit(txq->txq_lock);
1445	}
1446
1447	return 0;
1448}
1449
1450static void
1451vioif_deferred_transmit(void *arg)
1452{
1453	struct vioif_txqueue *txq = arg;
1454	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1455	struct vioif_softc *sc = device_private(virtio_child(vsc));
1456	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1457
1458	mutex_enter(txq->txq_lock);
1459	vioif_send_common_locked(ifp, txq, true);
1460	mutex_exit(txq->txq_lock);
1461}
1462
1463static int
1464vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1465{
1466	int s, r;
1467
1468	s = splnet();
1469
1470	r = ether_ioctl(ifp, cmd, data);
1471	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1472	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1473		if (ifp->if_flags & IFF_RUNNING)
1474			r = vioif_rx_filter(ifp->if_softc);
1475		else
1476			r = 0;
1477	}
1478
1479	splx(s);
1480
1481	return r;
1482}
1483
1484void
1485vioif_watchdog(struct ifnet *ifp)
1486{
1487	struct vioif_softc *sc = ifp->if_softc;
1488	int i;
1489
1490	if (ifp->if_flags & IFF_RUNNING) {
1491		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1492			vioif_tx_queue_clear(&sc->sc_txq[i]);
1493		}
1494	}
1495}
1496
1497/*
1498 * Receive implementation
1499 */
1500/* allocate and initialize a mbuf for receive */
1501static int
1502vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1503{
1504	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1505	struct mbuf *m;
1506	int r;
1507
1508	MGETHDR(m, M_DONTWAIT, MT_DATA);
1509	if (m == NULL)
1510		return ENOBUFS;
1511	MCLGET(m, M_DONTWAIT);
1512	if ((m->m_flags & M_EXT) == 0) {
1513		m_freem(m);
1514		return ENOBUFS;
1515	}
1516	rxq->rxq_mbufs[i] = m;
1517	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1518	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1519	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1520	if (r) {
1521		m_freem(m);
1522		rxq->rxq_mbufs[i] = NULL;
1523		return r;
1524	}
1525
1526	return 0;
1527}
1528
1529/* free a mbuf for receive */
1530static void
1531vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1532{
1533	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1534
1535	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1536	m_freem(rxq->rxq_mbufs[i]);
1537	rxq->rxq_mbufs[i] = NULL;
1538}
1539
1540/* add mbufs for all the empty receive slots */
1541static void
1542vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1543{
1544	struct virtqueue *vq = rxq->rxq_vq;
1545	struct virtio_softc *vsc = vq->vq_owner;
1546	int i, r, ndone = 0;
1547
1548	KASSERT(mutex_owned(rxq->rxq_lock));
1549
1550	if (rxq->rxq_stopping)
1551		return;
1552
1553	for (i = 0; i < vq->vq_num; i++) {
1554		int slot;
1555		r = virtio_enqueue_prep(vsc, vq, &slot);
1556		if (r == EAGAIN)
1557			break;
1558		if (r != 0)
1559			panic("enqueue_prep for rx buffers");
1560		if (rxq->rxq_mbufs[slot] == NULL) {
1561			r = vioif_add_rx_mbuf(rxq, slot);
1562			if (r != 0) {
1563				rxq->rxq_mbuf_add_failed.ev_count++;
1564				break;
1565			}
1566		}
1567		r = virtio_enqueue_reserve(vsc, vq, slot,
1568		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1569		if (r != 0) {
1570			vioif_free_rx_mbuf(rxq, slot);
1571			break;
1572		}
1573		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1574		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1575		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1576		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1577		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1578		    false);
1579		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1580		virtio_enqueue_commit(vsc, vq, slot, false);
1581		ndone++;
1582	}
1583	if (ndone > 0)
1584		virtio_enqueue_commit(vsc, vq, -1, true);
1585}
1586
1587static void
1588vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1589{
1590	struct virtqueue *vq = rxq->rxq_vq;
1591	struct virtio_softc *vsc = vq->vq_owner;
1592	struct vioif_softc *sc = device_private(virtio_child(vsc));
1593	u_int limit = UINT_MAX;
1594	bool more;
1595
1596	KASSERT(rxq->rxq_stopping);
1597
1598	mutex_enter(rxq->rxq_lock);
1599	for (;;) {
1600		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1601		if (more == false)
1602			break;
1603	}
1604	mutex_exit(rxq->rxq_lock);
1605}
1606
1607/* dequeue received packets */
1608static bool
1609vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1610    struct vioif_rxqueue *rxq, u_int limit)
1611{
1612	struct virtqueue *vq = rxq->rxq_vq;
1613	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1614	struct mbuf *m;
1615	int slot, len;
1616	bool more = false, dequeued = false;
1617
1618	KASSERT(mutex_owned(rxq->rxq_lock));
1619
1620	if (virtio_vq_is_enqueued(vsc, vq) == false)
1621		return false;
1622
1623	for (;;) {
1624		if (limit-- == 0) {
1625			more = true;
1626			break;
1627		}
1628
1629		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1630			break;
1631
1632		dequeued = true;
1633
1634		len -= sc->sc_hdr_size;
1635		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1636		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1637		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1638		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1639		m = rxq->rxq_mbufs[slot];
1640		KASSERT(m != NULL);
1641		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1642		rxq->rxq_mbufs[slot] = NULL;
1643		virtio_dequeue_commit(vsc, vq, slot);
1644		m_set_rcvif(m, ifp);
1645		m->m_len = m->m_pkthdr.len = len;
1646
1647		mutex_exit(rxq->rxq_lock);
1648		if_percpuq_enqueue(ifp->if_percpuq, m);
1649		mutex_enter(rxq->rxq_lock);
1650
1651		if (rxq->rxq_stopping)
1652			break;
1653	}
1654
1655	if (dequeued)
1656		vioif_populate_rx_mbufs_locked(sc, rxq);
1657
1658	return more;
1659}
1660
1661/* rx interrupt; call _dequeue above and schedule a softint */
1662
1663static void
1664vioif_rx_handle_locked(void *xrxq, u_int limit)
1665{
1666	struct vioif_rxqueue *rxq = xrxq;
1667	struct virtqueue *vq = rxq->rxq_vq;
1668	struct virtio_softc *vsc = vq->vq_owner;
1669	struct vioif_softc *sc = device_private(virtio_child(vsc));
1670	bool more;
1671
1672	KASSERT(!rxq->rxq_stopping);
1673
1674	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1675	if (more) {
1676		vioif_rx_sched_handle(sc, rxq);
1677		return;
1678	}
1679	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1680	if (more) {
1681		vioif_rx_sched_handle(sc, rxq);
1682		return;
1683	}
1684	atomic_store_relaxed(&rxq->rxq_active, false);
1685}
1686
1687static int
1688vioif_rx_intr(void *arg)
1689{
1690	struct vioif_rxqueue *rxq = arg;
1691	struct virtqueue *vq = rxq->rxq_vq;
1692	struct virtio_softc *vsc = vq->vq_owner;
1693	struct vioif_softc *sc = device_private(virtio_child(vsc));
1694	u_int limit;
1695
1696	limit = sc->sc_rx_intr_process_limit;
1697
1698	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1699		return 1;
1700
1701	mutex_enter(rxq->rxq_lock);
1702
1703	if (!rxq->rxq_stopping) {
1704		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1705
1706		virtio_stop_vq_intr(vsc, vq);
1707		atomic_store_relaxed(&rxq->rxq_active, true);
1708
1709		vioif_rx_handle_locked(rxq, limit);
1710	}
1711
1712	mutex_exit(rxq->rxq_lock);
1713	return 1;
1714}
1715
1716static void
1717vioif_rx_handle(void *xrxq)
1718{
1719	struct vioif_rxqueue *rxq = xrxq;
1720	struct virtqueue *vq = rxq->rxq_vq;
1721	struct virtio_softc *vsc = vq->vq_owner;
1722	struct vioif_softc *sc = device_private(virtio_child(vsc));
1723	u_int limit;
1724
1725	limit = sc->sc_rx_process_limit;
1726
1727	mutex_enter(rxq->rxq_lock);
1728
1729	if (!rxq->rxq_stopping)
1730		vioif_rx_handle_locked(rxq, limit);
1731
1732	mutex_exit(rxq->rxq_lock);
1733}
1734
1735static void
1736vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1737{
1738
1739	if (rxq->rxq_workqueue)
1740		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1741	else
1742		softint_schedule(rxq->rxq_handle_si);
1743}
1744
1745/* free all the mbufs; called from if_stop(disable) */
1746static void
1747vioif_rx_drain(struct vioif_rxqueue *rxq)
1748{
1749	struct virtqueue *vq = rxq->rxq_vq;
1750	int i;
1751
1752	for (i = 0; i < vq->vq_num; i++) {
1753		if (rxq->rxq_mbufs[i] == NULL)
1754			continue;
1755		vioif_free_rx_mbuf(rxq, i);
1756	}
1757}
1758
1759/*
1760 * Transmition implementation
1761 */
1762/* actual transmission is done in if_start */
1763/* tx interrupt; dequeue and free mbufs */
1764/*
1765 * tx interrupt is actually disabled; this should be called upon
1766 * tx vq full and watchdog
1767 */
1768
1769static void
1770vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1771{
1772	struct virtqueue *vq = txq->txq_vq;
1773	struct virtio_softc *vsc = vq->vq_owner;
1774	struct vioif_softc *sc = device_private(virtio_child(vsc));
1775	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1776	bool more;
1777
1778	KASSERT(!txq->txq_stopping);
1779
1780	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1781	if (more) {
1782		vioif_tx_sched_handle(sc, txq);
1783		return;
1784	}
1785
1786	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1787		more = virtio_postpone_intr_smart(vsc, vq);
1788	else
1789		more = virtio_start_vq_intr(vsc, vq);
1790	if (more) {
1791		vioif_tx_sched_handle(sc, txq);
1792		return;
1793	}
1794
1795	atomic_store_relaxed(&txq->txq_active, false);
1796	/* for ALTQ */
1797	if (txq == &sc->sc_txq[0]) {
1798		if_schedule_deferred_start(ifp);
1799		ifp->if_flags &= ~IFF_OACTIVE;
1800	}
1801	softint_schedule(txq->txq_deferred_transmit);
1802}
1803
1804
1805static int
1806vioif_tx_intr(void *arg)
1807{
1808	struct vioif_txqueue *txq = arg;
1809	struct virtqueue *vq = txq->txq_vq;
1810	struct virtio_softc *vsc = vq->vq_owner;
1811	struct vioif_softc *sc = device_private(virtio_child(vsc));
1812	u_int limit;
1813
1814	limit = sc->sc_tx_intr_process_limit;
1815
1816	if (atomic_load_relaxed(&txq->txq_active) == true)
1817		return 1;
1818
1819	mutex_enter(txq->txq_lock);
1820
1821	if (!txq->txq_stopping) {
1822		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1823
1824		virtio_stop_vq_intr(vsc, vq);
1825		atomic_store_relaxed(&txq->txq_active, true);
1826
1827		vioif_tx_handle_locked(txq, limit);
1828	}
1829
1830	mutex_exit(txq->txq_lock);
1831
1832	return 1;
1833}
1834
1835static void
1836vioif_tx_handle(void *xtxq)
1837{
1838	struct vioif_txqueue *txq = xtxq;
1839	struct virtqueue *vq = txq->txq_vq;
1840	struct virtio_softc *vsc = vq->vq_owner;
1841	struct vioif_softc *sc = device_private(virtio_child(vsc));
1842	u_int limit;
1843
1844	limit = sc->sc_tx_process_limit;
1845
1846	mutex_enter(txq->txq_lock);
1847	if (!txq->txq_stopping)
1848		vioif_tx_handle_locked(txq, limit);
1849	mutex_exit(txq->txq_lock);
1850}
1851
1852static void
1853vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1854{
1855
1856	if (txq->txq_workqueue)
1857		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1858	else
1859		softint_schedule(txq->txq_handle_si);
1860}
1861
1862static void
1863vioif_tx_queue_clear(struct vioif_txqueue *txq)
1864{
1865	struct virtqueue *vq = txq->txq_vq;
1866	struct virtio_softc *vsc = vq->vq_owner;
1867	struct vioif_softc *sc = device_private(virtio_child(vsc));
1868	u_int limit = UINT_MAX;
1869	bool more;
1870
1871	mutex_enter(txq->txq_lock);
1872	for (;;) {
1873		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1874		if (more == false)
1875			break;
1876	}
1877	mutex_exit(txq->txq_lock);
1878}
1879
1880static bool
1881vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1882    struct vioif_txqueue *txq, u_int limit)
1883{
1884	struct virtqueue *vq = txq->txq_vq;
1885	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1886	struct mbuf *m;
1887	int slot, len;
1888	bool more = false;
1889
1890	KASSERT(mutex_owned(txq->txq_lock));
1891
1892	if (virtio_vq_is_enqueued(vsc, vq) == false)
1893		return false;
1894
1895	for (;;) {
1896		if (limit-- == 0) {
1897			more = true;
1898			break;
1899		}
1900
1901		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1902			break;
1903
1904		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1905		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1906		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1907		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1908		    BUS_DMASYNC_POSTWRITE);
1909		m = txq->txq_mbufs[slot];
1910		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1911		txq->txq_mbufs[slot] = NULL;
1912		virtio_dequeue_commit(vsc, vq, slot);
1913		if_statinc(ifp, if_opackets);
1914		m_freem(m);
1915	}
1916
1917	return more;
1918}
1919
1920/* free all the mbufs already put on vq; called from if_stop(disable) */
1921static void
1922vioif_tx_drain(struct vioif_txqueue *txq)
1923{
1924	struct virtqueue *vq = txq->txq_vq;
1925	struct virtio_softc *vsc = vq->vq_owner;
1926	int i;
1927
1928	KASSERT(txq->txq_stopping);
1929
1930	for (i = 0; i < vq->vq_num; i++) {
1931		if (txq->txq_mbufs[i] == NULL)
1932			continue;
1933		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1934		m_freem(txq->txq_mbufs[i]);
1935		txq->txq_mbufs[i] = NULL;
1936	}
1937}
1938
1939/*
1940 * Control vq
1941 */
1942/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1943static void
1944vioif_ctrl_acquire(struct vioif_softc *sc)
1945{
1946	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1947
1948	mutex_enter(&ctrlq->ctrlq_wait_lock);
1949	while (ctrlq->ctrlq_inuse != FREE)
1950		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1951	ctrlq->ctrlq_inuse = INUSE;
1952	ctrlq->ctrlq_owner = curlwp;
1953	mutex_exit(&ctrlq->ctrlq_wait_lock);
1954}
1955
1956static void
1957vioif_ctrl_release(struct vioif_softc *sc)
1958{
1959	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1960
1961	KASSERT(ctrlq->ctrlq_inuse != FREE);
1962	KASSERT(ctrlq->ctrlq_owner == curlwp);
1963
1964	mutex_enter(&ctrlq->ctrlq_wait_lock);
1965	ctrlq->ctrlq_inuse = FREE;
1966	ctrlq->ctrlq_owner = NULL;
1967	cv_signal(&ctrlq->ctrlq_wait);
1968	mutex_exit(&ctrlq->ctrlq_wait_lock);
1969}
1970
1971static int
1972vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1973    struct vioif_ctrl_cmdspec *specs, int nspecs)
1974{
1975	struct virtio_softc *vsc = sc->sc_virtio;
1976	int i, r, loaded;
1977
1978	loaded = 0;
1979	for (i = 0; i < nspecs; i++) {
1980		r = bus_dmamap_load(virtio_dmat(vsc),
1981		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1982		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1983		if (r) {
1984			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1985			goto err;
1986		}
1987		loaded++;
1988
1989	}
1990
1991	return r;
1992
1993err:
1994	for (i = 0; i < loaded; i++) {
1995		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1996	}
1997
1998	return r;
1999}
2000
2001static void
2002vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2003    struct vioif_ctrl_cmdspec *specs, int nspecs)
2004{
2005	struct virtio_softc *vsc = sc->sc_virtio;
2006	int i;
2007
2008	for (i = 0; i < nspecs; i++) {
2009		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2010	}
2011}
2012
2013static int
2014vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2015    struct vioif_ctrl_cmdspec *specs, int nspecs)
2016{
2017	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2018	struct virtqueue *vq = ctrlq->ctrlq_vq;
2019	struct virtio_softc *vsc = sc->sc_virtio;
2020	int i, r, slot;
2021
2022	ctrlq->ctrlq_cmd->class = class;
2023	ctrlq->ctrlq_cmd->command = cmd;
2024
2025	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2026	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2027	for (i = 0; i < nspecs; i++) {
2028		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2029		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2030	}
2031	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2032	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2033
2034	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2035	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2036		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2037
2038	r = virtio_enqueue_prep(vsc, vq, &slot);
2039	if (r != 0)
2040		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2041	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2042	if (r != 0)
2043		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2044	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2045	for (i = 0; i < nspecs; i++) {
2046		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2047	}
2048	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2049	virtio_enqueue_commit(vsc, vq, slot, true);
2050
2051	/* wait for done */
2052	mutex_enter(&ctrlq->ctrlq_wait_lock);
2053	while (ctrlq->ctrlq_inuse != DONE)
2054		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2055	mutex_exit(&ctrlq->ctrlq_wait_lock);
2056	/* already dequeueued */
2057
2058	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2059	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2060	for (i = 0; i < nspecs; i++) {
2061		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2062		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2063	}
2064	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2065	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2066
2067	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2068		r = 0;
2069	else {
2070		device_printf(sc->sc_dev, "failed setting rx mode\n");
2071		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2072		r = EIO;
2073	}
2074
2075	return r;
2076}
2077
2078static int
2079vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2080{
2081	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2082	struct vioif_ctrl_cmdspec specs[1];
2083	int r;
2084
2085	if (!sc->sc_has_ctrl)
2086		return ENOTSUP;
2087
2088	vioif_ctrl_acquire(sc);
2089
2090	rx->onoff = onoff;
2091	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2092	specs[0].buf = rx;
2093	specs[0].bufsize = sizeof(*rx);
2094
2095	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2096	    specs, __arraycount(specs));
2097
2098	vioif_ctrl_release(sc);
2099	return r;
2100}
2101
2102static int
2103vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2104{
2105	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2106}
2107
2108static int
2109vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2110{
2111	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2112}
2113
2114/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2115static int
2116vioif_set_rx_filter(struct vioif_softc *sc)
2117{
2118	/* filter already set in ctrlq->ctrlq_mac_tbl */
2119	struct virtio_softc *vsc = sc->sc_virtio;
2120	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2121	struct vioif_ctrl_cmdspec specs[2];
2122	int nspecs = __arraycount(specs);
2123	int r;
2124
2125	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2126	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2127
2128	if (!sc->sc_has_ctrl)
2129		return ENOTSUP;
2130
2131	vioif_ctrl_acquire(sc);
2132
2133	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2134	specs[0].buf = mac_tbl_uc;
2135	specs[0].bufsize = sizeof(*mac_tbl_uc)
2136	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2137
2138	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2139	specs[1].buf = mac_tbl_mc;
2140	specs[1].bufsize = sizeof(*mac_tbl_mc)
2141	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2142
2143	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2144	if (r != 0)
2145		goto out;
2146
2147	r = vioif_ctrl_send_command(sc,
2148	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2149	    specs, nspecs);
2150
2151	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2152
2153out:
2154	vioif_ctrl_release(sc);
2155
2156	return r;
2157}
2158
2159static int
2160vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2161{
2162	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2163	struct vioif_ctrl_cmdspec specs[1];
2164	int r;
2165
2166	if (!sc->sc_has_ctrl)
2167		return ENOTSUP;
2168
2169	if (nvq_pairs <= 1)
2170		return EINVAL;
2171
2172	vioif_ctrl_acquire(sc);
2173
2174	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2175	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2176	specs[0].buf = mq;
2177	specs[0].bufsize = sizeof(*mq);
2178
2179	r = vioif_ctrl_send_command(sc,
2180	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2181	    specs, __arraycount(specs));
2182
2183	vioif_ctrl_release(sc);
2184
2185	return r;
2186}
2187
2188/* ctrl vq interrupt; wake up the command issuer */
2189static int
2190vioif_ctrl_intr(void *arg)
2191{
2192	struct vioif_ctrlqueue *ctrlq = arg;
2193	struct virtqueue *vq = ctrlq->ctrlq_vq;
2194	struct virtio_softc *vsc = vq->vq_owner;
2195	int r, slot;
2196
2197	if (virtio_vq_is_enqueued(vsc, vq) == false)
2198		return 0;
2199
2200	r = virtio_dequeue(vsc, vq, &slot, NULL);
2201	if (r == ENOENT)
2202		return 0;
2203	virtio_dequeue_commit(vsc, vq, slot);
2204
2205	mutex_enter(&ctrlq->ctrlq_wait_lock);
2206	ctrlq->ctrlq_inuse = DONE;
2207	cv_signal(&ctrlq->ctrlq_wait);
2208	mutex_exit(&ctrlq->ctrlq_wait_lock);
2209
2210	return 1;
2211}
2212
2213/*
2214 * If IFF_PROMISC requested,  set promiscuous
2215 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2216 * If large multicast filter exist use ALLMULTI
2217 */
2218/*
2219 * If setting rx filter fails fall back to ALLMULTI
2220 * If ALLMULTI fails fall back to PROMISC
2221 */
2222static int
2223vioif_rx_filter(struct vioif_softc *sc)
2224{
2225	struct virtio_softc *vsc = sc->sc_virtio;
2226	struct ethercom *ec = &sc->sc_ethercom;
2227	struct ifnet *ifp = &ec->ec_if;
2228	struct ether_multi *enm;
2229	struct ether_multistep step;
2230	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2231	int nentries;
2232	int promisc = 0, allmulti = 0, rxfilter = 0;
2233	int r;
2234
2235	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
2236		ifp->if_flags |= IFF_PROMISC;
2237		return 0;
2238	}
2239
2240	if (ifp->if_flags & IFF_PROMISC) {
2241		promisc = 1;
2242		goto set;
2243	}
2244
2245	nentries = -1;
2246	ETHER_LOCK(ec);
2247	ETHER_FIRST_MULTI(step, ec, enm);
2248	while (nentries++, enm != NULL) {
2249		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2250			allmulti = 1;
2251			goto set_unlock;
2252		}
2253		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2254			allmulti = 1;
2255			goto set_unlock;
2256		}
2257		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2258		    enm->enm_addrlo, ETHER_ADDR_LEN);
2259		ETHER_NEXT_MULTI(step, enm);
2260	}
2261	rxfilter = 1;
2262
2263set_unlock:
2264	ETHER_UNLOCK(ec);
2265
2266set:
2267	if (rxfilter) {
2268		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2269		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2270		r = vioif_set_rx_filter(sc);
2271		if (r != 0) {
2272			rxfilter = 0;
2273			allmulti = 1; /* fallback */
2274		}
2275	} else {
2276		/* remove rx filter */
2277		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2278		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2279		r = vioif_set_rx_filter(sc);
2280		/* what to do on failure? */
2281	}
2282	if (allmulti) {
2283		r = vioif_set_allmulti(sc, true);
2284		if (r != 0) {
2285			allmulti = 0;
2286			promisc = 1; /* fallback */
2287		}
2288	} else {
2289		r = vioif_set_allmulti(sc, false);
2290		/* what to do on failure? */
2291	}
2292	if (promisc) {
2293		r = vioif_set_promisc(sc, true);
2294	} else {
2295		r = vioif_set_promisc(sc, false);
2296	}
2297
2298	return r;
2299}
2300
2301static bool
2302vioif_is_link_up(struct vioif_softc *sc)
2303{
2304	struct virtio_softc *vsc = sc->sc_virtio;
2305	uint16_t status;
2306
2307	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2308		status = virtio_read_device_config_2(vsc,
2309		    VIRTIO_NET_CONFIG_STATUS);
2310	else
2311		status = VIRTIO_NET_S_LINK_UP;
2312
2313	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2314}
2315
2316/* change link status */
2317static void
2318vioif_update_link_status(struct vioif_softc *sc)
2319{
2320	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2321	struct vioif_txqueue *txq;
2322	bool active, changed;
2323	int link, i;
2324
2325	mutex_enter(&sc->sc_lock);
2326
2327	active = vioif_is_link_up(sc);
2328	changed = false;
2329
2330	if (active) {
2331		if (!sc->sc_link_active)
2332			changed = true;
2333
2334		link = LINK_STATE_UP;
2335		sc->sc_link_active = true;
2336	} else {
2337		if (sc->sc_link_active)
2338			changed = true;
2339
2340		link = LINK_STATE_DOWN;
2341		sc->sc_link_active = false;
2342	}
2343
2344	if (changed) {
2345		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2346			txq = &sc->sc_txq[i];
2347
2348			mutex_enter(txq->txq_lock);
2349			txq->txq_link_active = sc->sc_link_active;
2350			mutex_exit(txq->txq_lock);
2351		}
2352
2353		if_link_state_change(ifp, link);
2354	}
2355
2356	mutex_exit(&sc->sc_lock);
2357}
2358
2359static int
2360vioif_config_change(struct virtio_softc *vsc)
2361{
2362	struct vioif_softc *sc = device_private(virtio_child(vsc));
2363
2364	softint_schedule(sc->sc_ctl_softint);
2365	return 0;
2366}
2367
2368static void
2369vioif_ctl_softint(void *arg)
2370{
2371	struct vioif_softc *sc = arg;
2372	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2373
2374	vioif_update_link_status(sc);
2375	vioif_start(ifp);
2376}
2377
2378static struct workqueue *
2379vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2380{
2381	struct workqueue *wq;
2382	int error;
2383
2384	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2385	    prio, ipl, flags);
2386
2387	if (error)
2388		return NULL;
2389
2390	return wq;
2391}
2392
2393static void
2394vioif_workq_destroy(struct workqueue *wq)
2395{
2396
2397	workqueue_destroy(wq);
2398}
2399
2400static void
2401vioif_workq_work(struct work *wk, void *context)
2402{
2403	struct vioif_work *work;
2404
2405	work = container_of(wk, struct vioif_work, cookie);
2406
2407	atomic_store_relaxed(&work->added, 0);
2408	work->func(work->arg);
2409}
2410
2411static void
2412vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2413{
2414
2415	memset(work, 0, sizeof(*work));
2416	work->func = func;
2417	work->arg = arg;
2418}
2419
2420static void
2421vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2422{
2423
2424	if (atomic_load_relaxed(&work->added) != 0)
2425		return;
2426
2427	atomic_store_relaxed(&work->added, 1);
2428	kpreempt_disable();
2429	workqueue_enqueue(wq, &work->cookie, NULL);
2430	kpreempt_enable();
2431}
2432
2433static void
2434vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2435{
2436
2437	workqueue_wait(wq, &work->cookie);
2438}
2439
2440static int
2441vioif_setup_sysctl(struct vioif_softc *sc)
2442{
2443	const char *devname;
2444	struct sysctllog **log;
2445	const struct sysctlnode *rnode, *rxnode, *txnode;
2446	int error;
2447
2448	log = &sc->sc_sysctllog;
2449	devname = device_xname(sc->sc_dev);
2450
2451	error = sysctl_createv(log, 0, NULL, &rnode,
2452	    0, CTLTYPE_NODE, devname,
2453	    SYSCTL_DESCR("virtio-net information and settings"),
2454	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2455	if (error)
2456		goto out;
2457
2458	error = sysctl_createv(log, 0, &rnode, NULL,
2459	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2460	    SYSCTL_DESCR("Use workqueue for packet processing"),
2461	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2462	if (error)
2463		goto out;
2464
2465	error = sysctl_createv(log, 0, &rnode, &rxnode,
2466	    0, CTLTYPE_NODE, "rx",
2467	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2468	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2469	if (error)
2470		goto out;
2471
2472	error = sysctl_createv(log, 0, &rxnode, NULL,
2473	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2474	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2475	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2476	if (error)
2477		goto out;
2478
2479	error = sysctl_createv(log, 0, &rxnode, NULL,
2480	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2481	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2482	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2483	if (error)
2484		goto out;
2485
2486	error = sysctl_createv(log, 0, &rnode, &txnode,
2487	    0, CTLTYPE_NODE, "tx",
2488	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2489	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2490	if (error)
2491		goto out;
2492
2493	error = sysctl_createv(log, 0, &txnode, NULL,
2494	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2495	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2496	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2497	if (error)
2498		goto out;
2499
2500	error = sysctl_createv(log, 0, &txnode, NULL,
2501	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2502	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2503	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2504
2505out:
2506	if (error)
2507		sysctl_teardown(log);
2508
2509	return error;
2510}
2511
2512static void
2513vioif_setup_stats(struct vioif_softc *sc)
2514{
2515	struct vioif_rxqueue *rxq;
2516	struct vioif_txqueue *txq;
2517	int i;
2518
2519	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2520		rxq = &sc->sc_rxq[i];
2521		txq = &sc->sc_txq[i];
2522
2523		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2524		    device_xname(sc->sc_dev), i);
2525		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2526		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2527		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2528		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2529		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2530		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2531
2532		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2533		    device_xname(sc->sc_dev), i);
2534		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2535		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2536	}
2537
2538	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2539	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2540	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2541	    NULL, device_xname(sc->sc_dev), "control command failed");
2542}
2543
2544MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2545
2546#ifdef _MODULE
2547#include "ioconf.c"
2548#endif
2549
2550static int
2551if_vioif_modcmd(modcmd_t cmd, void *opaque)
2552{
2553	int error = 0;
2554
2555#ifdef _MODULE
2556	switch (cmd) {
2557	case MODULE_CMD_INIT:
2558		error = config_init_component(cfdriver_ioconf_if_vioif,
2559		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2560		break;
2561	case MODULE_CMD_FINI:
2562		error = config_fini_component(cfdriver_ioconf_if_vioif,
2563		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2564		break;
2565	default:
2566		error = ENOTTY;
2567		break;
2568	}
2569#endif
2570
2571	return error;
2572}
2573