if_vioif.c revision 1.68
1/*	$NetBSD: if_vioif.c,v 1.68 2021/02/03 20:27:59 reinoud Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.68 2021/02/03 20:27:59 reinoud Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/cpu.h>
50#include <sys/module.h>
51#include <sys/pcq.h>
52#include <sys/workqueue.h>
53
54#include <dev/pci/virtioreg.h>
55#include <dev/pci/virtiovar.h>
56
57#include <net/if.h>
58#include <net/if_media.h>
59#include <net/if_ether.h>
60
61#include <net/bpf.h>
62
63#include "ioconf.h"
64
65#ifdef NET_MPSAFE
66#define VIOIF_MPSAFE	1
67#define VIOIF_MULTIQ	1
68#endif
69
70/*
71 * if_vioifreg.h:
72 */
73/* Configuration registers */
74#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
75#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
76#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
77#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
78
79/* Feature bits */
80#define VIRTIO_NET_F_CSUM		__BIT(0)
81#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
82#define VIRTIO_NET_F_MAC		__BIT(5)
83#define VIRTIO_NET_F_GSO		__BIT(6)
84#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
85#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
86#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
87#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
88#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
89#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
90#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
91#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
92#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
93#define VIRTIO_NET_F_STATUS		__BIT(16)
94#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
95#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
96#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
97#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
98#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
99#define VIRTIO_NET_F_MQ			__BIT(22)
100
101#define VIRTIO_NET_FLAG_BITS \
102	VIRTIO_COMMON_FLAG_BITS \
103	"\x17""MQ" \
104	"\x16""GUEST_ANNOUNCE" \
105	"\x15""CTRL_RX_EXTRA" \
106	"\x14""CTRL_VLAN" \
107	"\x13""CTRL_RX" \
108	"\x12""CTRL_VQ" \
109	"\x11""STATUS" \
110	"\x10""MRG_RXBUF" \
111	"\x0f""HOST_UFO" \
112	"\x0e""HOST_ECN" \
113	"\x0d""HOST_TSO6" \
114	"\x0c""HOST_TSO4" \
115	"\x0b""GUEST_UFO" \
116	"\x0a""GUEST_ECN" \
117	"\x09""GUEST_TSO6" \
118	"\x08""GUEST_TSO4" \
119	"\x07""GSO" \
120	"\x06""MAC" \
121	"\x02""GUEST_CSUM" \
122	"\x01""CSUM"
123
124/* Status */
125#define VIRTIO_NET_S_LINK_UP	1
126
127/* Packet header structure */
128struct virtio_net_hdr {
129	uint8_t		flags;
130	uint8_t		gso_type;
131	uint16_t	hdr_len;
132	uint16_t	gso_size;
133	uint16_t	csum_start;
134	uint16_t	csum_offset;
135
136	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
137} __packed;
138
139#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
140#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
141#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
142#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
145
146#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
147
148/* Control virtqueue */
149struct virtio_net_ctrl_cmd {
150	uint8_t	class;
151	uint8_t	command;
152} __packed;
153#define VIRTIO_NET_CTRL_RX		0
154# define VIRTIO_NET_CTRL_RX_PROMISC	0
155# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
156
157#define VIRTIO_NET_CTRL_MAC		1
158# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
159
160#define VIRTIO_NET_CTRL_VLAN		2
161# define VIRTIO_NET_CTRL_VLAN_ADD	0
162# define VIRTIO_NET_CTRL_VLAN_DEL	1
163
164#define VIRTIO_NET_CTRL_MQ			4
165# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
166# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
167# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
168
169struct virtio_net_ctrl_status {
170	uint8_t	ack;
171} __packed;
172#define VIRTIO_NET_OK			0
173#define VIRTIO_NET_ERR			1
174
175struct virtio_net_ctrl_rx {
176	uint8_t	onoff;
177} __packed;
178
179struct virtio_net_ctrl_mac_tbl {
180	uint32_t nentries;
181	uint8_t macs[][ETHER_ADDR_LEN];
182} __packed;
183
184struct virtio_net_ctrl_vlan {
185	uint16_t id;
186} __packed;
187
188struct virtio_net_ctrl_mq {
189	uint16_t virtqueue_pairs;
190} __packed;
191
192/*
193 * if_vioifvar.h:
194 */
195
196/*
197 * Locking notes:
198 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
199 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
200 *      - more than one lock cannot be held at onece
201 * + ctrlq_inuse is protected by ctrlq_wait_lock.
202 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
203 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
204 * + fields in vioif_softc except queues are protected by
205 *   sc->sc_lock(an adaptive mutex)
206 *      - the lock is held before acquisition of other locks
207 */
208
209struct vioif_ctrl_cmdspec {
210	bus_dmamap_t	dmamap;
211	void		*buf;
212	bus_size_t	bufsize;
213};
214
215struct vioif_work {
216	struct work	 cookie;
217	void		(*func)(void *);
218	void		*arg;
219	unsigned int	 added;
220};
221
222struct vioif_txqueue {
223	kmutex_t		*txq_lock;	/* lock for tx operations */
224
225	struct virtqueue	*txq_vq;
226	bool			txq_stopping;
227	bool			txq_link_active;
228	pcq_t			*txq_intrq;
229
230	struct virtio_net_hdr	*txq_hdrs;
231	bus_dmamap_t		*txq_hdr_dmamaps;
232
233	struct mbuf		**txq_mbufs;
234	bus_dmamap_t		*txq_dmamaps;
235
236	void			*txq_deferred_transmit;
237	void			*txq_handle_si;
238	struct vioif_work	 txq_work;
239	bool			 txq_workqueue;
240	bool			 txq_active;
241
242	char			 txq_evgroup[16];
243	struct evcnt		 txq_defrag_failed;
244	struct evcnt		 txq_mbuf_load_failed;
245	struct evcnt		 txq_enqueue_reserve_failed;
246};
247
248struct vioif_rxqueue {
249	kmutex_t		*rxq_lock;	/* lock for rx operations */
250
251	struct virtqueue	*rxq_vq;
252	bool			rxq_stopping;
253
254	struct virtio_net_hdr	*rxq_hdrs;
255	bus_dmamap_t		*rxq_hdr_dmamaps;
256
257	struct mbuf		**rxq_mbufs;
258	bus_dmamap_t		*rxq_dmamaps;
259
260	void			*rxq_handle_si;
261	struct vioif_work	 rxq_work;
262	bool			 rxq_workqueue;
263	bool			 rxq_active;
264
265	char			 rxq_evgroup[16];
266	struct evcnt		 rxq_mbuf_add_failed;
267};
268
269struct vioif_ctrlqueue {
270	struct virtqueue		*ctrlq_vq;
271	enum {
272		FREE, INUSE, DONE
273	}				ctrlq_inuse;
274	kcondvar_t			ctrlq_wait;
275	kmutex_t			ctrlq_wait_lock;
276	struct lwp			*ctrlq_owner;
277
278	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
279	struct virtio_net_ctrl_status	*ctrlq_status;
280	struct virtio_net_ctrl_rx	*ctrlq_rx;
281	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
282	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
283	struct virtio_net_ctrl_mq	*ctrlq_mq;
284
285	bus_dmamap_t			ctrlq_cmd_dmamap;
286	bus_dmamap_t			ctrlq_status_dmamap;
287	bus_dmamap_t			ctrlq_rx_dmamap;
288	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
289	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
290	bus_dmamap_t			ctrlq_mq_dmamap;
291
292	struct evcnt			ctrlq_cmd_load_failed;
293	struct evcnt			ctrlq_cmd_failed;
294};
295
296struct vioif_softc {
297	device_t		sc_dev;
298	kmutex_t		sc_lock;
299	struct sysctllog	*sc_sysctllog;
300
301	struct virtio_softc	*sc_virtio;
302	struct virtqueue	*sc_vqs;
303	u_int			 sc_hdr_size;
304
305	int			sc_max_nvq_pairs;
306	int			sc_req_nvq_pairs;
307	int			sc_act_nvq_pairs;
308
309	uint8_t			sc_mac[ETHER_ADDR_LEN];
310	struct ethercom		sc_ethercom;
311	short			sc_deferred_init_done;
312	bool			sc_link_active;
313
314	struct vioif_txqueue	*sc_txq;
315	struct vioif_rxqueue	*sc_rxq;
316
317	bool			sc_has_ctrl;
318	struct vioif_ctrlqueue	sc_ctrlq;
319
320	bus_dma_segment_t	sc_hdr_segs[1];
321	void			*sc_dmamem;
322	void			*sc_kmem;
323
324	void			*sc_ctl_softint;
325
326	struct workqueue	*sc_txrx_workqueue;
327	bool			 sc_txrx_workqueue_sysctl;
328	u_int			 sc_tx_intr_process_limit;
329	u_int			 sc_tx_process_limit;
330	u_int			 sc_rx_intr_process_limit;
331	u_int			 sc_rx_process_limit;
332};
333#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
334#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
335
336#define VIOIF_TX_INTR_PROCESS_LIMIT	256
337#define VIOIF_TX_PROCESS_LIMIT		256
338#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
339#define VIOIF_RX_PROCESS_LIMIT		256
340
341#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
342
343/* cfattach interface functions */
344static int	vioif_match(device_t, cfdata_t, void *);
345static void	vioif_attach(device_t, device_t, void *);
346static void	vioif_deferred_init(device_t);
347static int	vioif_finalize_teardown(device_t);
348
349/* ifnet interface functions */
350static int	vioif_init(struct ifnet *);
351static void	vioif_stop(struct ifnet *, int);
352static void	vioif_start(struct ifnet *);
353static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
354static int	vioif_transmit(struct ifnet *, struct mbuf *);
355static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
356static int	vioif_ioctl(struct ifnet *, u_long, void *);
357static void	vioif_watchdog(struct ifnet *);
358
359/* rx */
360static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
361static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
362static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
363		    struct vioif_rxqueue *);
364static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
365static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
366		    struct vioif_rxqueue *, u_int);
367static int	vioif_rx_intr(void *);
368static void	vioif_rx_handle(void *);
369static void	vioif_rx_sched_handle(struct vioif_softc *,
370		    struct vioif_rxqueue *);
371static void	vioif_rx_drain(struct vioif_rxqueue *);
372
373/* tx */
374static int	vioif_tx_intr(void *);
375static void	vioif_tx_handle(void *);
376static void	vioif_tx_sched_handle(struct vioif_softc *,
377		    struct vioif_txqueue *);
378static void	vioif_tx_queue_clear(struct vioif_txqueue *);
379static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
380		    struct vioif_txqueue *, u_int);
381static void	vioif_tx_drain(struct vioif_txqueue *);
382static void	vioif_deferred_transmit(void *);
383
384/* workqueue */
385static struct workqueue*
386		vioif_workq_create(const char *, pri_t, int, int);
387static void	vioif_workq_destroy(struct workqueue *);
388static void	vioif_workq_work(struct work *, void *);
389static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
390static void	vioif_work_add(struct workqueue *, struct vioif_work *);
391static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
392
393/* other control */
394static bool	vioif_is_link_up(struct vioif_softc *);
395static void	vioif_update_link_status(struct vioif_softc *);
396static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
397static int	vioif_set_promisc(struct vioif_softc *, bool);
398static int	vioif_set_allmulti(struct vioif_softc *, bool);
399static int	vioif_set_rx_filter(struct vioif_softc *);
400static int	vioif_rx_filter(struct vioif_softc *);
401static int	vioif_ctrl_intr(void *);
402static int	vioif_config_change(struct virtio_softc *);
403static void	vioif_ctl_softint(void *);
404static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
405static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
406static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
407static int	vioif_setup_sysctl(struct vioif_softc *);
408static void	vioif_setup_stats(struct vioif_softc *);
409
410CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
411		  vioif_match, vioif_attach, NULL, NULL);
412
413static int
414vioif_match(device_t parent, cfdata_t match, void *aux)
415{
416	struct virtio_attach_args *va = aux;
417
418	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
419		return 1;
420
421	return 0;
422}
423
424static int
425vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
426    bus_size_t size, int nsegs, const char *usage)
427{
428	int r;
429
430	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
431	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
432
433	if (r != 0) {
434		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
435		    "error code %d\n", usage, r);
436	}
437
438	return r;
439}
440
441static void
442vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
443{
444
445	if (*map) {
446		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
447		*map = NULL;
448	}
449}
450
451static int
452vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
453    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
454{
455	int r;
456
457	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
458	if (r != 0)
459		return 1;
460
461	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
462	    size, NULL, rw | BUS_DMA_NOWAIT);
463	if (r != 0) {
464		vioif_dmamap_destroy(sc, map);
465		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
466		    "error code %d\n", usage, r);
467	}
468
469	return r;
470}
471
472static void *
473vioif_assign_mem(intptr_t *p, size_t size)
474{
475	intptr_t rv;
476
477	rv = *p;
478	*p += size;
479
480	return (void *)rv;
481}
482
483static void
484vioif_alloc_queues(struct vioif_softc *sc)
485{
486	int nvq_pairs = sc->sc_max_nvq_pairs;
487	int nvqs = nvq_pairs * 2;
488	int i;
489
490	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
491
492	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
493	    KM_SLEEP);
494	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
495	    KM_SLEEP);
496
497	if (sc->sc_has_ctrl)
498		nvqs++;
499
500	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
501	nvqs = 0;
502	for (i = 0; i < nvq_pairs; i++) {
503		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
504		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
505	}
506
507	if (sc->sc_has_ctrl)
508		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
509}
510
511static void
512vioif_free_queues(struct vioif_softc *sc)
513{
514	int nvq_pairs = sc->sc_max_nvq_pairs;
515	int nvqs = nvq_pairs * 2;
516
517	if (sc->sc_ctrlq.ctrlq_vq)
518		nvqs++;
519
520	if (sc->sc_txq) {
521		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
522		sc->sc_txq = NULL;
523	}
524
525	if (sc->sc_rxq) {
526		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
527		sc->sc_rxq = NULL;
528	}
529
530	if (sc->sc_vqs) {
531		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
532		sc->sc_vqs = NULL;
533	}
534}
535
536/* allocate memory */
537/*
538 * dma memory is used for:
539 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
540 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
541 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
542 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
543 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
544 *			 (WRITE)
545 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
546 *			 class command (WRITE)
547 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
548 *			 class command (WRITE)
549 * ctrlq_* structures are allocated only one each; they are protected by
550 * ctrlq_inuse variable and ctrlq_wait condvar.
551 */
552/*
553 * dynamically allocated memory is used for:
554 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
555 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
556 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
557 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
558 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
559 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
560 */
561static int
562vioif_alloc_mems(struct vioif_softc *sc)
563{
564	struct virtio_softc *vsc = sc->sc_virtio;
565	struct vioif_txqueue *txq;
566	struct vioif_rxqueue *rxq;
567	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
568	int allocsize, allocsize2, r, rsegs, i, qid;
569	void *vaddr;
570	intptr_t p;
571
572	allocsize = 0;
573	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
574		rxq = &sc->sc_rxq[qid];
575		txq = &sc->sc_txq[qid];
576
577		allocsize += sizeof(struct virtio_net_hdr *) *
578			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
579	}
580	if (sc->sc_has_ctrl) {
581		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
582		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
583		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
584		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
585		    + sizeof(struct virtio_net_ctrl_mac_tbl)
586		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
587		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
588	}
589	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
590	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
591	if (r != 0) {
592		aprint_error_dev(sc->sc_dev,
593		    "DMA memory allocation failed, size %d, "
594		    "error code %d\n", allocsize, r);
595		goto err_none;
596	}
597	r = bus_dmamem_map(virtio_dmat(vsc),
598	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
599	if (r != 0) {
600		aprint_error_dev(sc->sc_dev,
601		    "DMA memory map failed, error code %d\n", r);
602		goto err_dmamem_alloc;
603	}
604
605	memset(vaddr, 0, allocsize);
606	sc->sc_dmamem = vaddr;
607	p = (intptr_t) vaddr;
608
609	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
610		rxq = &sc->sc_rxq[qid];
611		txq = &sc->sc_txq[qid];
612
613		rxq->rxq_hdrs = vioif_assign_mem(&p,
614		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
615		txq->txq_hdrs = vioif_assign_mem(&p,
616		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
617	}
618	if (sc->sc_has_ctrl) {
619		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
620		    sizeof(*ctrlq->ctrlq_cmd));
621		ctrlq->ctrlq_status = vioif_assign_mem(&p,
622		    sizeof(*ctrlq->ctrlq_status));
623		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
624		    sizeof(*ctrlq->ctrlq_rx));
625		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
626		    sizeof(*ctrlq->ctrlq_mac_tbl_uc));
627		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
628		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
629		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
630		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
631	}
632
633	allocsize2 = 0;
634	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
635		int rxqsize, txqsize;
636
637		rxq = &sc->sc_rxq[qid];
638		txq = &sc->sc_txq[qid];
639		rxqsize = rxq->rxq_vq->vq_num;
640		txqsize = txq->txq_vq->vq_num;
641
642		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
643		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
644		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
645
646		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
647		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
648		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
649	}
650	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
651	sc->sc_kmem = vaddr;
652	p = (intptr_t) vaddr;
653
654	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
655		int rxqsize, txqsize;
656		rxq = &sc->sc_rxq[qid];
657		txq = &sc->sc_txq[qid];
658		rxqsize = rxq->rxq_vq->vq_num;
659		txqsize = txq->txq_vq->vq_num;
660
661		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
662		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
663		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
664		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
665		rxq->rxq_dmamaps = vioif_assign_mem(&p,
666		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
667		txq->txq_dmamaps = vioif_assign_mem(&p,
668		    sizeof(txq->txq_dmamaps[0]) * txqsize);
669		rxq->rxq_mbufs = vioif_assign_mem(&p,
670		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
671		txq->txq_mbufs = vioif_assign_mem(&p,
672		    sizeof(txq->txq_mbufs[0]) * txqsize);
673	}
674
675	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
676		rxq = &sc->sc_rxq[qid];
677		txq = &sc->sc_txq[qid];
678
679		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
680			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
681			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
682			    BUS_DMA_READ, "rx header");
683			if (r != 0)
684				goto err_reqs;
685
686			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
687			    MCLBYTES, 1, "rx payload");
688			if (r != 0)
689				goto err_reqs;
690		}
691
692		for (i = 0; i < txq->txq_vq->vq_num; i++) {
693			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
694			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
695			    BUS_DMA_READ, "tx header");
696			if (r != 0)
697				goto err_reqs;
698
699			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
700			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
701			if (r != 0)
702				goto err_reqs;
703		}
704	}
705
706	if (sc->sc_has_ctrl) {
707		/* control vq class & command */
708		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
709		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
710		    BUS_DMA_WRITE, "control command");
711		if (r != 0)
712			goto err_reqs;
713
714		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
715		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
716		    BUS_DMA_READ, "control status");
717		if (r != 0)
718			goto err_reqs;
719
720		/* control vq rx mode command parameter */
721		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
722		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
723		    BUS_DMA_WRITE, "rx mode control command");
724		if (r != 0)
725			goto err_reqs;
726
727		/* multiqueue set command */
728		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
729		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
730		    BUS_DMA_WRITE, "multiqueue set command");
731		if (r != 0)
732			goto err_reqs;
733
734		/* control vq MAC filter table for unicast */
735		/* do not load now since its length is variable */
736		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
737		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
738		    "unicast MAC address filter command");
739		if (r != 0)
740			goto err_reqs;
741
742		/* control vq MAC filter table for multicast */
743		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
744		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
745		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
746		    "multicast MAC address filter command");
747	}
748
749	return 0;
750
751err_reqs:
752	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
753	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
754	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
755	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
756	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
757	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
758		rxq = &sc->sc_rxq[qid];
759		txq = &sc->sc_txq[qid];
760
761		for (i = 0; i < txq->txq_vq->vq_num; i++) {
762			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
763			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
764		}
765		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
766			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
767			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
768		}
769	}
770	if (sc->sc_kmem) {
771		kmem_free(sc->sc_kmem, allocsize2);
772		sc->sc_kmem = NULL;
773	}
774	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
775err_dmamem_alloc:
776	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
777err_none:
778	return -1;
779}
780
781static void
782vioif_attach(device_t parent, device_t self, void *aux)
783{
784	struct vioif_softc *sc = device_private(self);
785	struct virtio_softc *vsc = device_private(parent);
786	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
787	struct vioif_txqueue *txq;
788	struct vioif_rxqueue *rxq;
789	uint64_t features, req_features;
790	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
791	u_int softint_flags;
792	int r, i, nvqs=0, req_flags;
793	char xnamebuf[MAXCOMLEN];
794
795	if (virtio_child(vsc) != NULL) {
796		aprint_normal(": child already attached for %s; "
797		    "something wrong...\n", device_xname(parent));
798		return;
799	}
800
801	sc->sc_dev = self;
802	sc->sc_virtio = vsc;
803	sc->sc_link_active = false;
804
805	sc->sc_max_nvq_pairs = 1;
806	sc->sc_req_nvq_pairs = 1;
807	sc->sc_act_nvq_pairs = 1;
808	sc->sc_txrx_workqueue_sysctl = true;
809	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
810	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
811	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
812	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
813
814	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
815
816	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
817	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
818	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
819	if (sc->sc_txrx_workqueue == NULL)
820		goto err;
821
822	req_flags = 0;
823
824#ifdef VIOIF_MPSAFE
825	req_flags |= VIRTIO_F_INTR_MPSAFE;
826#endif
827	req_flags |= VIRTIO_F_INTR_MSIX;
828
829	req_features =
830	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
831	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
832	req_features |= VIRTIO_F_RING_EVENT_IDX;
833#ifdef VIOIF_MULTIQ
834	req_features |= VIRTIO_NET_F_MQ;
835#endif
836	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
837	    vioif_config_change, virtio_vq_intrhand, req_flags,
838	    req_features, VIRTIO_NET_FLAG_BITS);
839
840	features = virtio_features(vsc);
841	if (features == 0)
842		goto err;
843
844	if (features & VIRTIO_NET_F_MAC) {
845		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
846			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
847			    VIRTIO_NET_CONFIG_MAC + i);
848		}
849	} else {
850		/* code stolen from sys/net/if_tap.c */
851		struct timeval tv;
852		uint32_t ui;
853		getmicrouptime(&tv);
854		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
855		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
856		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
857			virtio_write_device_config_1(vsc,
858			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
859		}
860	}
861
862	/* 'Ethernet' with capital follows other ethernet driver attachment */
863	aprint_normal_dev(self, "Ethernet address %s\n",
864	    ether_sprintf(sc->sc_mac));
865
866	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
867		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
868	} else {
869		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
870	}
871
872	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
873	    (features & VIRTIO_NET_F_CTRL_RX)) {
874		sc->sc_has_ctrl = true;
875
876		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
877		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
878		ctrlq->ctrlq_inuse = FREE;
879	} else {
880		sc->sc_has_ctrl = false;
881	}
882
883	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
884		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
885		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
886
887		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
888			goto err;
889
890		/* Limit the number of queue pairs to use */
891		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
892	}
893
894	vioif_alloc_queues(sc);
895	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
896
897#ifdef VIOIF_MPSAFE
898	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
899#else
900	softint_flags = SOFTINT_NET;
901#endif
902
903	/*
904	 * Allocating virtqueues
905	 */
906	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
907		rxq = &sc->sc_rxq[i];
908		txq = &sc->sc_txq[i];
909		char qname[32];
910
911		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
912
913		rxq->rxq_handle_si = softint_establish(softint_flags,
914		    vioif_rx_handle, rxq);
915		if (rxq->rxq_handle_si == NULL) {
916			aprint_error_dev(self, "cannot establish rx softint\n");
917			goto err;
918		}
919
920		snprintf(qname, sizeof(qname), "rx%d", i);
921		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
922		    MCLBYTES + sc->sc_hdr_size, 2, qname);
923		if (r != 0)
924			goto err;
925		nvqs++;
926		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
927		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
928		rxq->rxq_stopping = true;
929		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
930
931		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
932
933		txq->txq_deferred_transmit = softint_establish(softint_flags,
934		    vioif_deferred_transmit, txq);
935		if (txq->txq_deferred_transmit == NULL) {
936			aprint_error_dev(self, "cannot establish tx softint\n");
937			goto err;
938		}
939		txq->txq_handle_si = softint_establish(softint_flags,
940		    vioif_tx_handle, txq);
941		if (txq->txq_handle_si == NULL) {
942			aprint_error_dev(self, "cannot establish tx softint\n");
943			goto err;
944		}
945
946		snprintf(qname, sizeof(qname), "tx%d", i);
947		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
948		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
949		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
950		if (r != 0)
951			goto err;
952		nvqs++;
953		txq->txq_vq->vq_intrhand = vioif_tx_intr;
954		txq->txq_vq->vq_intrhand_arg = (void *)txq;
955		txq->txq_link_active = sc->sc_link_active;
956		txq->txq_stopping = false;
957		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
958		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
959	}
960
961	if (sc->sc_has_ctrl) {
962		/*
963		 * Allocating a virtqueue for control channel
964		 */
965		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
966		    NBPG, 1, "control");
967		if (r != 0) {
968			aprint_error_dev(self, "failed to allocate "
969			    "a virtqueue for control channel, error code %d\n",
970			    r);
971
972			sc->sc_has_ctrl = false;
973			cv_destroy(&ctrlq->ctrlq_wait);
974			mutex_destroy(&ctrlq->ctrlq_wait_lock);
975		} else {
976			nvqs++;
977			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
978			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
979		}
980	}
981
982	sc->sc_ctl_softint = softint_establish(softint_flags,
983	    vioif_ctl_softint, sc);
984	if (sc->sc_ctl_softint == NULL) {
985		aprint_error_dev(self, "cannot establish ctl softint\n");
986		goto err;
987	}
988
989	if (vioif_alloc_mems(sc) < 0)
990		goto err;
991
992	if (virtio_child_attach_finish(vsc) != 0)
993		goto err;
994
995	if (vioif_setup_sysctl(sc) != 0) {
996		aprint_error_dev(self, "unable to create sysctl node\n");
997		/* continue */
998	}
999
1000	vioif_setup_stats(sc);
1001
1002	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1003	ifp->if_softc = sc;
1004	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1005#ifdef VIOIF_MPSAFE
1006	ifp->if_extflags = IFEF_MPSAFE;
1007#endif
1008	ifp->if_start = vioif_start;
1009	if (sc->sc_req_nvq_pairs > 1)
1010		ifp->if_transmit = vioif_transmit;
1011	ifp->if_ioctl = vioif_ioctl;
1012	ifp->if_init = vioif_init;
1013	ifp->if_stop = vioif_stop;
1014	ifp->if_capabilities = 0;
1015	ifp->if_watchdog = vioif_watchdog;
1016	txq = &sc->sc_txq[0];
1017	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1018	IFQ_SET_READY(&ifp->if_snd);
1019
1020	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1021
1022	if_attach(ifp);
1023	if_deferred_start_init(ifp, NULL);
1024	ether_ifattach(ifp, sc->sc_mac);
1025
1026	return;
1027
1028err:
1029	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1030		rxq = &sc->sc_rxq[i];
1031		txq = &sc->sc_txq[i];
1032
1033		if (rxq->rxq_lock) {
1034			mutex_obj_free(rxq->rxq_lock);
1035			rxq->rxq_lock = NULL;
1036		}
1037
1038		if (rxq->rxq_handle_si) {
1039			softint_disestablish(rxq->rxq_handle_si);
1040			rxq->rxq_handle_si = NULL;
1041		}
1042
1043		if (txq->txq_lock) {
1044			mutex_obj_free(txq->txq_lock);
1045			txq->txq_lock = NULL;
1046		}
1047
1048		if (txq->txq_handle_si) {
1049			softint_disestablish(txq->txq_handle_si);
1050			txq->txq_handle_si = NULL;
1051		}
1052
1053		if (txq->txq_deferred_transmit) {
1054			softint_disestablish(txq->txq_deferred_transmit);
1055			txq->txq_deferred_transmit = NULL;
1056		}
1057
1058		if (txq->txq_intrq) {
1059			pcq_destroy(txq->txq_intrq);
1060			txq->txq_intrq = NULL;
1061		}
1062	}
1063
1064	if (sc->sc_has_ctrl) {
1065		cv_destroy(&ctrlq->ctrlq_wait);
1066		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1067	}
1068
1069	while (nvqs > 0)
1070		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1071
1072	vioif_free_queues(sc);
1073	mutex_destroy(&sc->sc_lock);
1074	virtio_child_attach_failed(vsc);
1075	config_finalize_register(self, vioif_finalize_teardown);
1076
1077	return;
1078}
1079
1080static int
1081vioif_finalize_teardown(device_t self)
1082{
1083	struct vioif_softc *sc = device_private(self);
1084
1085	if (sc->sc_txrx_workqueue != NULL) {
1086		vioif_workq_destroy(sc->sc_txrx_workqueue);
1087		sc->sc_txrx_workqueue = NULL;
1088	}
1089
1090	return 0;
1091}
1092
1093/* we need interrupts to make promiscuous mode off */
1094static void
1095vioif_deferred_init(device_t self)
1096{
1097	struct vioif_softc *sc = device_private(self);
1098	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1099	int r;
1100
1101	if (ifp->if_flags & IFF_PROMISC)
1102		return;
1103
1104	r =  vioif_set_promisc(sc, false);
1105	if (r != 0)
1106		aprint_error_dev(self, "resetting promisc mode failed, "
1107		    "error code %d\n", r);
1108}
1109
1110static void
1111vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1112{
1113	struct virtio_softc *vsc = sc->sc_virtio;
1114	struct vioif_txqueue *txq;
1115	struct vioif_rxqueue *rxq;
1116	int i;
1117
1118	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1119		txq = &sc->sc_txq[i];
1120		rxq = &sc->sc_rxq[i];
1121
1122		virtio_start_vq_intr(vsc, txq->txq_vq);
1123		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1124	}
1125}
1126
1127static void
1128vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1129{
1130	struct virtio_softc *vsc = sc->sc_virtio;
1131	struct vioif_txqueue *txq;
1132	struct vioif_rxqueue *rxq;
1133	int i;
1134
1135	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1136		rxq = &sc->sc_rxq[i];
1137		txq = &sc->sc_txq[i];
1138
1139		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1140		virtio_stop_vq_intr(vsc, txq->txq_vq);
1141	}
1142}
1143
1144/*
1145 * Interface functions for ifnet
1146 */
1147static int
1148vioif_init(struct ifnet *ifp)
1149{
1150	struct vioif_softc *sc = ifp->if_softc;
1151	struct virtio_softc *vsc = sc->sc_virtio;
1152	struct vioif_rxqueue *rxq;
1153	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1154	int r, i;
1155
1156	vioif_stop(ifp, 0);
1157
1158	virtio_reinit_start(vsc);
1159	virtio_negotiate_features(vsc, virtio_features(vsc));
1160
1161	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1162		rxq = &sc->sc_rxq[i];
1163
1164		/* Have to set false before vioif_populate_rx_mbufs */
1165		mutex_enter(rxq->rxq_lock);
1166		rxq->rxq_stopping = false;
1167		vioif_populate_rx_mbufs_locked(sc, rxq);
1168		mutex_exit(rxq->rxq_lock);
1169
1170	}
1171
1172	virtio_reinit_end(vsc);
1173
1174	if (sc->sc_has_ctrl)
1175		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1176
1177	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1178	if (r == 0)
1179		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1180	else
1181		sc->sc_act_nvq_pairs = 1;
1182
1183	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1184		sc->sc_txq[i].txq_stopping = false;
1185
1186	vioif_enable_interrupt_vqpairs(sc);
1187
1188	if (!sc->sc_deferred_init_done) {
1189		sc->sc_deferred_init_done = 1;
1190		if (sc->sc_has_ctrl)
1191			vioif_deferred_init(sc->sc_dev);
1192	}
1193
1194	vioif_update_link_status(sc);
1195	ifp->if_flags |= IFF_RUNNING;
1196	ifp->if_flags &= ~IFF_OACTIVE;
1197	vioif_rx_filter(sc);
1198
1199	return 0;
1200}
1201
1202static void
1203vioif_stop(struct ifnet *ifp, int disable)
1204{
1205	struct vioif_softc *sc = ifp->if_softc;
1206	struct virtio_softc *vsc = sc->sc_virtio;
1207	struct vioif_txqueue *txq;
1208	struct vioif_rxqueue *rxq;
1209	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1210	int i;
1211
1212	/* Take the locks to ensure that ongoing TX/RX finish */
1213	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1214		txq = &sc->sc_txq[i];
1215		rxq = &sc->sc_rxq[i];
1216
1217		mutex_enter(rxq->rxq_lock);
1218		rxq->rxq_stopping = true;
1219		mutex_exit(rxq->rxq_lock);
1220
1221		mutex_enter(txq->txq_lock);
1222		txq->txq_stopping = true;
1223		mutex_exit(txq->txq_lock);
1224	}
1225
1226	/* disable interrupts */
1227	vioif_disable_interrupt_vqpairs(sc);
1228
1229	if (sc->sc_has_ctrl)
1230		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1231
1232	/* only way to stop I/O and DMA is resetting... */
1233	virtio_reset(vsc);
1234
1235	/* rendezvous for finish of handlers */
1236	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1237		txq = &sc->sc_txq[i];
1238		rxq = &sc->sc_rxq[i];
1239
1240		mutex_enter(rxq->rxq_lock);
1241		mutex_exit(rxq->rxq_lock);
1242		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1243
1244		mutex_enter(txq->txq_lock);
1245		mutex_exit(txq->txq_lock);
1246		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1247	}
1248
1249	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1250		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1251		vioif_tx_queue_clear(&sc->sc_txq[i]);
1252	}
1253
1254	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1255	sc->sc_link_active = false;
1256
1257	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1258		txq = &sc->sc_txq[i];
1259		rxq = &sc->sc_rxq[i];
1260
1261		txq->txq_link_active = false;
1262
1263		if (disable)
1264			vioif_rx_drain(rxq);
1265
1266		vioif_tx_drain(txq);
1267	}
1268}
1269
1270static void
1271vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1272    bool is_transmit)
1273{
1274	struct vioif_softc *sc = ifp->if_softc;
1275	struct virtio_softc *vsc = sc->sc_virtio;
1276	struct virtqueue *vq = txq->txq_vq;
1277	struct virtio_net_hdr *hdr;
1278	struct mbuf *m;
1279	int queued = 0;
1280
1281	KASSERT(mutex_owned(txq->txq_lock));
1282
1283	if ((ifp->if_flags & IFF_RUNNING) == 0)
1284		return;
1285
1286	if (!txq->txq_link_active || txq->txq_stopping)
1287		return;
1288
1289	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1290		return;
1291
1292	for (;;) {
1293		int slot, r;
1294
1295		if (is_transmit)
1296			m = pcq_get(txq->txq_intrq);
1297		else
1298			IFQ_DEQUEUE(&ifp->if_snd, m);
1299
1300		if (m == NULL)
1301			break;
1302
1303		r = virtio_enqueue_prep(vsc, vq, &slot);
1304		if (r == EAGAIN) {
1305			ifp->if_flags |= IFF_OACTIVE;
1306			m_freem(m);
1307			break;
1308		}
1309		if (r != 0)
1310			panic("enqueue_prep for a tx buffer");
1311
1312		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1313		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1314		if (r != 0) {
1315			/* maybe just too fragmented */
1316			struct mbuf *newm;
1317
1318			newm = m_defrag(m, M_NOWAIT);
1319			if (newm == NULL) {
1320				txq->txq_defrag_failed.ev_count++;
1321				goto skip;
1322			}
1323
1324			m = newm;
1325			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1326			    txq->txq_dmamaps[slot], m,
1327			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1328			if (r != 0) {
1329				txq->txq_mbuf_load_failed.ev_count++;
1330skip:
1331				m_freem(m);
1332				virtio_enqueue_abort(vsc, vq, slot);
1333				continue;
1334			}
1335		}
1336
1337		/* This should actually never fail */
1338		r = virtio_enqueue_reserve(vsc, vq, slot,
1339		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1340		if (r != 0) {
1341			txq->txq_enqueue_reserve_failed.ev_count++;
1342			bus_dmamap_unload(virtio_dmat(vsc),
1343			     txq->txq_dmamaps[slot]);
1344			/* slot already freed by virtio_enqueue_reserve */
1345			m_freem(m);
1346			continue;
1347		}
1348
1349		txq->txq_mbufs[slot] = m;
1350
1351		hdr = &txq->txq_hdrs[slot];
1352		memset(hdr, 0, sc->sc_hdr_size);
1353		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1354		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1355		    BUS_DMASYNC_PREWRITE);
1356		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1357		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1358		    BUS_DMASYNC_PREWRITE);
1359		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1360		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1361		virtio_enqueue_commit(vsc, vq, slot, false);
1362
1363		queued++;
1364		bpf_mtap(ifp, m, BPF_D_OUT);
1365	}
1366
1367	if (queued > 0) {
1368		virtio_enqueue_commit(vsc, vq, -1, true);
1369		ifp->if_timer = 5;
1370	}
1371}
1372
1373static void
1374vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1375{
1376
1377	/*
1378	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1379	 */
1380	vioif_send_common_locked(ifp, txq, false);
1381
1382}
1383
1384static void
1385vioif_start(struct ifnet *ifp)
1386{
1387	struct vioif_softc *sc = ifp->if_softc;
1388	struct vioif_txqueue *txq = &sc->sc_txq[0];
1389
1390#ifdef VIOIF_MPSAFE
1391	KASSERT(if_is_mpsafe(ifp));
1392#endif
1393
1394	mutex_enter(txq->txq_lock);
1395	vioif_start_locked(ifp, txq);
1396	mutex_exit(txq->txq_lock);
1397}
1398
1399static inline int
1400vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1401{
1402	struct vioif_softc *sc = ifp->if_softc;
1403	u_int cpuid = cpu_index(curcpu());
1404
1405	return cpuid % sc->sc_act_nvq_pairs;
1406}
1407
1408static void
1409vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1410{
1411
1412	vioif_send_common_locked(ifp, txq, true);
1413}
1414
1415static int
1416vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1417{
1418	struct vioif_softc *sc = ifp->if_softc;
1419	struct vioif_txqueue *txq;
1420	int qid;
1421
1422	qid = vioif_select_txqueue(ifp, m);
1423	txq = &sc->sc_txq[qid];
1424
1425	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1426		m_freem(m);
1427		return ENOBUFS;
1428	}
1429
1430	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1431	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1432	if (m->m_flags & M_MCAST)
1433		if_statinc_ref(nsr, if_omcasts);
1434	IF_STAT_PUTREF(ifp);
1435
1436	if (mutex_tryenter(txq->txq_lock)) {
1437		vioif_transmit_locked(ifp, txq);
1438		mutex_exit(txq->txq_lock);
1439	}
1440
1441	return 0;
1442}
1443
1444static void
1445vioif_deferred_transmit(void *arg)
1446{
1447	struct vioif_txqueue *txq = arg;
1448	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1449	struct vioif_softc *sc = device_private(virtio_child(vsc));
1450	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1451
1452	mutex_enter(txq->txq_lock);
1453	vioif_send_common_locked(ifp, txq, true);
1454	mutex_exit(txq->txq_lock);
1455}
1456
1457static int
1458vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1459{
1460	int s, r;
1461
1462	s = splnet();
1463
1464	r = ether_ioctl(ifp, cmd, data);
1465	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1466	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1467		if (ifp->if_flags & IFF_RUNNING)
1468			r = vioif_rx_filter(ifp->if_softc);
1469		else
1470			r = 0;
1471	}
1472
1473	splx(s);
1474
1475	return r;
1476}
1477
1478void
1479vioif_watchdog(struct ifnet *ifp)
1480{
1481	struct vioif_softc *sc = ifp->if_softc;
1482	int i;
1483
1484	if (ifp->if_flags & IFF_RUNNING) {
1485		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1486			vioif_tx_queue_clear(&sc->sc_txq[i]);
1487		}
1488	}
1489}
1490
1491/*
1492 * Receive implementation
1493 */
1494/* allocate and initialize a mbuf for receive */
1495static int
1496vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1497{
1498	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1499	struct mbuf *m;
1500	int r;
1501
1502	MGETHDR(m, M_DONTWAIT, MT_DATA);
1503	if (m == NULL)
1504		return ENOBUFS;
1505	MCLGET(m, M_DONTWAIT);
1506	if ((m->m_flags & M_EXT) == 0) {
1507		m_freem(m);
1508		return ENOBUFS;
1509	}
1510	rxq->rxq_mbufs[i] = m;
1511	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1512	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1513	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1514	if (r) {
1515		m_freem(m);
1516		rxq->rxq_mbufs[i] = NULL;
1517		return r;
1518	}
1519
1520	return 0;
1521}
1522
1523/* free a mbuf for receive */
1524static void
1525vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1526{
1527	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1528
1529	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1530	m_freem(rxq->rxq_mbufs[i]);
1531	rxq->rxq_mbufs[i] = NULL;
1532}
1533
1534/* add mbufs for all the empty receive slots */
1535static void
1536vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1537{
1538	struct virtqueue *vq = rxq->rxq_vq;
1539	struct virtio_softc *vsc = vq->vq_owner;
1540	int i, r, ndone = 0;
1541
1542	KASSERT(mutex_owned(rxq->rxq_lock));
1543
1544	if (rxq->rxq_stopping)
1545		return;
1546
1547	for (i = 0; i < vq->vq_num; i++) {
1548		int slot;
1549		r = virtio_enqueue_prep(vsc, vq, &slot);
1550		if (r == EAGAIN)
1551			break;
1552		if (r != 0)
1553			panic("enqueue_prep for rx buffers");
1554		if (rxq->rxq_mbufs[slot] == NULL) {
1555			r = vioif_add_rx_mbuf(rxq, slot);
1556			if (r != 0) {
1557				rxq->rxq_mbuf_add_failed.ev_count++;
1558				break;
1559			}
1560		}
1561		r = virtio_enqueue_reserve(vsc, vq, slot,
1562		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1563		if (r != 0) {
1564			vioif_free_rx_mbuf(rxq, slot);
1565			break;
1566		}
1567		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1568		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1569		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1570		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1571		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1572		    false);
1573		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1574		virtio_enqueue_commit(vsc, vq, slot, false);
1575		ndone++;
1576	}
1577	if (ndone > 0)
1578		virtio_enqueue_commit(vsc, vq, -1, true);
1579}
1580
1581static void
1582vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1583{
1584	struct virtqueue *vq = rxq->rxq_vq;
1585	struct virtio_softc *vsc = vq->vq_owner;
1586	struct vioif_softc *sc = device_private(virtio_child(vsc));
1587	u_int limit = UINT_MAX;
1588	bool more;
1589
1590	KASSERT(rxq->rxq_stopping);
1591
1592	mutex_enter(rxq->rxq_lock);
1593	for (;;) {
1594		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1595		if (more == false)
1596			break;
1597	}
1598	mutex_exit(rxq->rxq_lock);
1599}
1600
1601/* dequeue received packets */
1602static bool
1603vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1604    struct vioif_rxqueue *rxq, u_int limit)
1605{
1606	struct virtqueue *vq = rxq->rxq_vq;
1607	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1608	struct mbuf *m;
1609	int slot, len;
1610	bool more = false, dequeued = false;
1611
1612	KASSERT(mutex_owned(rxq->rxq_lock));
1613
1614	if (virtio_vq_is_enqueued(vsc, vq) == false)
1615		return false;
1616
1617	for (;;) {
1618		if (limit-- == 0) {
1619			more = true;
1620			break;
1621		}
1622
1623		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1624			break;
1625
1626		dequeued = true;
1627
1628		len -= sc->sc_hdr_size;
1629		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1630		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1631		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1632		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1633		m = rxq->rxq_mbufs[slot];
1634		KASSERT(m != NULL);
1635		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1636		rxq->rxq_mbufs[slot] = NULL;
1637		virtio_dequeue_commit(vsc, vq, slot);
1638		m_set_rcvif(m, ifp);
1639		m->m_len = m->m_pkthdr.len = len;
1640
1641		mutex_exit(rxq->rxq_lock);
1642		if_percpuq_enqueue(ifp->if_percpuq, m);
1643		mutex_enter(rxq->rxq_lock);
1644
1645		if (rxq->rxq_stopping)
1646			break;
1647	}
1648
1649	if (dequeued)
1650		vioif_populate_rx_mbufs_locked(sc, rxq);
1651
1652	return more;
1653}
1654
1655/* rx interrupt; call _dequeue above and schedule a softint */
1656
1657static void
1658vioif_rx_handle_locked(void *xrxq, u_int limit)
1659{
1660	struct vioif_rxqueue *rxq = xrxq;
1661	struct virtqueue *vq = rxq->rxq_vq;
1662	struct virtio_softc *vsc = vq->vq_owner;
1663	struct vioif_softc *sc = device_private(virtio_child(vsc));
1664	bool more;
1665
1666	KASSERT(!rxq->rxq_stopping);
1667
1668	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1669	if (more) {
1670		vioif_rx_sched_handle(sc, rxq);
1671		return;
1672	}
1673	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1674	if (more) {
1675		vioif_rx_sched_handle(sc, rxq);
1676		return;
1677	}
1678	atomic_store_relaxed(&rxq->rxq_active, false);
1679}
1680
1681static int
1682vioif_rx_intr(void *arg)
1683{
1684	struct vioif_rxqueue *rxq = arg;
1685	struct virtqueue *vq = rxq->rxq_vq;
1686	struct virtio_softc *vsc = vq->vq_owner;
1687	struct vioif_softc *sc = device_private(virtio_child(vsc));
1688	u_int limit;
1689
1690	limit = sc->sc_rx_intr_process_limit;
1691
1692	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1693		return 1;
1694
1695	mutex_enter(rxq->rxq_lock);
1696
1697	if (!rxq->rxq_stopping) {
1698		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1699
1700		virtio_stop_vq_intr(vsc, vq);
1701		atomic_store_relaxed(&rxq->rxq_active, true);
1702
1703		vioif_rx_handle_locked(rxq, limit);
1704	}
1705
1706	mutex_exit(rxq->rxq_lock);
1707	return 1;
1708}
1709
1710static void
1711vioif_rx_handle(void *xrxq)
1712{
1713	struct vioif_rxqueue *rxq = xrxq;
1714	struct virtqueue *vq = rxq->rxq_vq;
1715	struct virtio_softc *vsc = vq->vq_owner;
1716	struct vioif_softc *sc = device_private(virtio_child(vsc));
1717	u_int limit;
1718
1719	limit = sc->sc_rx_process_limit;
1720
1721	mutex_enter(rxq->rxq_lock);
1722
1723	if (!rxq->rxq_stopping)
1724		vioif_rx_handle_locked(rxq, limit);
1725
1726	mutex_exit(rxq->rxq_lock);
1727}
1728
1729static void
1730vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1731{
1732
1733	if (rxq->rxq_workqueue)
1734		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1735	else
1736		softint_schedule(rxq->rxq_handle_si);
1737}
1738
1739/* free all the mbufs; called from if_stop(disable) */
1740static void
1741vioif_rx_drain(struct vioif_rxqueue *rxq)
1742{
1743	struct virtqueue *vq = rxq->rxq_vq;
1744	int i;
1745
1746	for (i = 0; i < vq->vq_num; i++) {
1747		if (rxq->rxq_mbufs[i] == NULL)
1748			continue;
1749		vioif_free_rx_mbuf(rxq, i);
1750	}
1751}
1752
1753/*
1754 * Transmition implementation
1755 */
1756/* actual transmission is done in if_start */
1757/* tx interrupt; dequeue and free mbufs */
1758/*
1759 * tx interrupt is actually disabled; this should be called upon
1760 * tx vq full and watchdog
1761 */
1762
1763static void
1764vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1765{
1766	struct virtqueue *vq = txq->txq_vq;
1767	struct virtio_softc *vsc = vq->vq_owner;
1768	struct vioif_softc *sc = device_private(virtio_child(vsc));
1769	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1770	bool more;
1771
1772	KASSERT(!txq->txq_stopping);
1773
1774	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1775	if (more) {
1776		vioif_tx_sched_handle(sc, txq);
1777		return;
1778	}
1779
1780	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1781		more = virtio_postpone_intr_smart(vsc, vq);
1782	else
1783		more = virtio_start_vq_intr(vsc, vq);
1784	if (more) {
1785		vioif_tx_sched_handle(sc, txq);
1786		return;
1787	}
1788
1789	atomic_store_relaxed(&txq->txq_active, false);
1790	/* for ALTQ */
1791	if (txq == &sc->sc_txq[0]) {
1792		if_schedule_deferred_start(ifp);
1793		ifp->if_flags &= ~IFF_OACTIVE;
1794	}
1795	softint_schedule(txq->txq_deferred_transmit);
1796}
1797
1798
1799static int
1800vioif_tx_intr(void *arg)
1801{
1802	struct vioif_txqueue *txq = arg;
1803	struct virtqueue *vq = txq->txq_vq;
1804	struct virtio_softc *vsc = vq->vq_owner;
1805	struct vioif_softc *sc = device_private(virtio_child(vsc));
1806	u_int limit;
1807
1808	limit = sc->sc_tx_intr_process_limit;
1809
1810	if (atomic_load_relaxed(&txq->txq_active) == true)
1811		return 1;
1812
1813	mutex_enter(txq->txq_lock);
1814
1815	if (!txq->txq_stopping) {
1816		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1817
1818		virtio_stop_vq_intr(vsc, vq);
1819		atomic_store_relaxed(&txq->txq_active, true);
1820
1821		vioif_tx_handle_locked(txq, limit);
1822	}
1823
1824	mutex_exit(txq->txq_lock);
1825
1826	return 1;
1827}
1828
1829static void
1830vioif_tx_handle(void *xtxq)
1831{
1832	struct vioif_txqueue *txq = xtxq;
1833	struct virtqueue *vq = txq->txq_vq;
1834	struct virtio_softc *vsc = vq->vq_owner;
1835	struct vioif_softc *sc = device_private(virtio_child(vsc));
1836	u_int limit;
1837
1838	limit = sc->sc_tx_process_limit;
1839
1840	mutex_enter(txq->txq_lock);
1841	if (!txq->txq_stopping)
1842		vioif_tx_handle_locked(txq, limit);
1843	mutex_exit(txq->txq_lock);
1844}
1845
1846static void
1847vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1848{
1849
1850	if (txq->txq_workqueue)
1851		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1852	else
1853		softint_schedule(txq->txq_handle_si);
1854}
1855
1856static void
1857vioif_tx_queue_clear(struct vioif_txqueue *txq)
1858{
1859	struct virtqueue *vq = txq->txq_vq;
1860	struct virtio_softc *vsc = vq->vq_owner;
1861	struct vioif_softc *sc = device_private(virtio_child(vsc));
1862	u_int limit = UINT_MAX;
1863	bool more;
1864
1865	mutex_enter(txq->txq_lock);
1866	for (;;) {
1867		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1868		if (more == false)
1869			break;
1870	}
1871	mutex_exit(txq->txq_lock);
1872}
1873
1874static bool
1875vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1876    struct vioif_txqueue *txq, u_int limit)
1877{
1878	struct virtqueue *vq = txq->txq_vq;
1879	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1880	struct mbuf *m;
1881	int slot, len;
1882	bool more = false;
1883
1884	KASSERT(mutex_owned(txq->txq_lock));
1885
1886	if (virtio_vq_is_enqueued(vsc, vq) == false)
1887		return false;
1888
1889	for (;;) {
1890		if (limit-- == 0) {
1891			more = true;
1892			break;
1893		}
1894
1895		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1896			break;
1897
1898		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1899		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1900		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1901		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1902		    BUS_DMASYNC_POSTWRITE);
1903		m = txq->txq_mbufs[slot];
1904		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1905		txq->txq_mbufs[slot] = NULL;
1906		virtio_dequeue_commit(vsc, vq, slot);
1907		if_statinc(ifp, if_opackets);
1908		m_freem(m);
1909	}
1910
1911	return more;
1912}
1913
1914/* free all the mbufs already put on vq; called from if_stop(disable) */
1915static void
1916vioif_tx_drain(struct vioif_txqueue *txq)
1917{
1918	struct virtqueue *vq = txq->txq_vq;
1919	struct virtio_softc *vsc = vq->vq_owner;
1920	int i;
1921
1922	KASSERT(txq->txq_stopping);
1923
1924	for (i = 0; i < vq->vq_num; i++) {
1925		if (txq->txq_mbufs[i] == NULL)
1926			continue;
1927		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1928		m_freem(txq->txq_mbufs[i]);
1929		txq->txq_mbufs[i] = NULL;
1930	}
1931}
1932
1933/*
1934 * Control vq
1935 */
1936/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1937static void
1938vioif_ctrl_acquire(struct vioif_softc *sc)
1939{
1940	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1941
1942	mutex_enter(&ctrlq->ctrlq_wait_lock);
1943	while (ctrlq->ctrlq_inuse != FREE)
1944		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1945	ctrlq->ctrlq_inuse = INUSE;
1946	ctrlq->ctrlq_owner = curlwp;
1947	mutex_exit(&ctrlq->ctrlq_wait_lock);
1948}
1949
1950static void
1951vioif_ctrl_release(struct vioif_softc *sc)
1952{
1953	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1954
1955	KASSERT(ctrlq->ctrlq_inuse != FREE);
1956	KASSERT(ctrlq->ctrlq_owner == curlwp);
1957
1958	mutex_enter(&ctrlq->ctrlq_wait_lock);
1959	ctrlq->ctrlq_inuse = FREE;
1960	ctrlq->ctrlq_owner = NULL;
1961	cv_signal(&ctrlq->ctrlq_wait);
1962	mutex_exit(&ctrlq->ctrlq_wait_lock);
1963}
1964
1965static int
1966vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1967    struct vioif_ctrl_cmdspec *specs, int nspecs)
1968{
1969	struct virtio_softc *vsc = sc->sc_virtio;
1970	int i, r, loaded;
1971
1972	loaded = 0;
1973	for (i = 0; i < nspecs; i++) {
1974		r = bus_dmamap_load(virtio_dmat(vsc),
1975		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1976		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1977		if (r) {
1978			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1979			goto err;
1980		}
1981		loaded++;
1982
1983	}
1984
1985	return r;
1986
1987err:
1988	for (i = 0; i < loaded; i++) {
1989		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1990	}
1991
1992	return r;
1993}
1994
1995static void
1996vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1997    struct vioif_ctrl_cmdspec *specs, int nspecs)
1998{
1999	struct virtio_softc *vsc = sc->sc_virtio;
2000	int i;
2001
2002	for (i = 0; i < nspecs; i++) {
2003		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2004	}
2005}
2006
2007static int
2008vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2009    struct vioif_ctrl_cmdspec *specs, int nspecs)
2010{
2011	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2012	struct virtqueue *vq = ctrlq->ctrlq_vq;
2013	struct virtio_softc *vsc = sc->sc_virtio;
2014	int i, r, slot;
2015
2016	ctrlq->ctrlq_cmd->class = class;
2017	ctrlq->ctrlq_cmd->command = cmd;
2018
2019	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2020	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2021	for (i = 0; i < nspecs; i++) {
2022		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2023		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2024	}
2025	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2026	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2027
2028	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2029	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2030		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2031
2032	r = virtio_enqueue_prep(vsc, vq, &slot);
2033	if (r != 0)
2034		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2035	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2036	if (r != 0)
2037		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2038	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2039	for (i = 0; i < nspecs; i++) {
2040		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2041	}
2042	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2043	virtio_enqueue_commit(vsc, vq, slot, true);
2044
2045	/* wait for done */
2046	mutex_enter(&ctrlq->ctrlq_wait_lock);
2047	while (ctrlq->ctrlq_inuse != DONE)
2048		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2049	mutex_exit(&ctrlq->ctrlq_wait_lock);
2050	/* already dequeueued */
2051
2052	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2053	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2054	for (i = 0; i < nspecs; i++) {
2055		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2056		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2057	}
2058	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2059	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2060
2061	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2062		r = 0;
2063	else {
2064		device_printf(sc->sc_dev, "failed setting rx mode\n");
2065		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2066		r = EIO;
2067	}
2068
2069	return r;
2070}
2071
2072static int
2073vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2074{
2075	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2076	struct vioif_ctrl_cmdspec specs[1];
2077	int r;
2078
2079	if (!sc->sc_has_ctrl)
2080		return ENOTSUP;
2081
2082	vioif_ctrl_acquire(sc);
2083
2084	rx->onoff = onoff;
2085	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2086	specs[0].buf = rx;
2087	specs[0].bufsize = sizeof(*rx);
2088
2089	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2090	    specs, __arraycount(specs));
2091
2092	vioif_ctrl_release(sc);
2093	return r;
2094}
2095
2096static int
2097vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2098{
2099	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2100}
2101
2102static int
2103vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2104{
2105	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2106}
2107
2108/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2109static int
2110vioif_set_rx_filter(struct vioif_softc *sc)
2111{
2112	/* filter already set in ctrlq->ctrlq_mac_tbl */
2113	struct virtio_softc *vsc = sc->sc_virtio;
2114	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2115	struct vioif_ctrl_cmdspec specs[2];
2116	int nspecs = __arraycount(specs);
2117	int r;
2118
2119	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2120	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2121
2122	if (!sc->sc_has_ctrl)
2123		return ENOTSUP;
2124
2125	vioif_ctrl_acquire(sc);
2126
2127	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2128	specs[0].buf = mac_tbl_uc;
2129	specs[0].bufsize = sizeof(*mac_tbl_uc)
2130	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2131
2132	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2133	specs[1].buf = mac_tbl_mc;
2134	specs[1].bufsize = sizeof(*mac_tbl_mc)
2135	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2136
2137	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2138	if (r != 0)
2139		goto out;
2140
2141	r = vioif_ctrl_send_command(sc,
2142	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2143	    specs, nspecs);
2144
2145	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2146
2147out:
2148	vioif_ctrl_release(sc);
2149
2150	return r;
2151}
2152
2153static int
2154vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2155{
2156	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2157	struct vioif_ctrl_cmdspec specs[1];
2158	int r;
2159
2160	if (!sc->sc_has_ctrl)
2161		return ENOTSUP;
2162
2163	if (nvq_pairs <= 1)
2164		return EINVAL;
2165
2166	vioif_ctrl_acquire(sc);
2167
2168	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2169	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2170	specs[0].buf = mq;
2171	specs[0].bufsize = sizeof(*mq);
2172
2173	r = vioif_ctrl_send_command(sc,
2174	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2175	    specs, __arraycount(specs));
2176
2177	vioif_ctrl_release(sc);
2178
2179	return r;
2180}
2181
2182/* ctrl vq interrupt; wake up the command issuer */
2183static int
2184vioif_ctrl_intr(void *arg)
2185{
2186	struct vioif_ctrlqueue *ctrlq = arg;
2187	struct virtqueue *vq = ctrlq->ctrlq_vq;
2188	struct virtio_softc *vsc = vq->vq_owner;
2189	int r, slot;
2190
2191	if (virtio_vq_is_enqueued(vsc, vq) == false)
2192		return 0;
2193
2194	r = virtio_dequeue(vsc, vq, &slot, NULL);
2195	if (r == ENOENT)
2196		return 0;
2197	virtio_dequeue_commit(vsc, vq, slot);
2198
2199	mutex_enter(&ctrlq->ctrlq_wait_lock);
2200	ctrlq->ctrlq_inuse = DONE;
2201	cv_signal(&ctrlq->ctrlq_wait);
2202	mutex_exit(&ctrlq->ctrlq_wait_lock);
2203
2204	return 1;
2205}
2206
2207/*
2208 * If IFF_PROMISC requested,  set promiscuous
2209 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2210 * If large multicast filter exist use ALLMULTI
2211 */
2212/*
2213 * If setting rx filter fails fall back to ALLMULTI
2214 * If ALLMULTI fails fall back to PROMISC
2215 */
2216static int
2217vioif_rx_filter(struct vioif_softc *sc)
2218{
2219	struct virtio_softc *vsc = sc->sc_virtio;
2220	struct ethercom *ec = &sc->sc_ethercom;
2221	struct ifnet *ifp = &ec->ec_if;
2222	struct ether_multi *enm;
2223	struct ether_multistep step;
2224	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2225	int nentries;
2226	int promisc = 0, allmulti = 0, rxfilter = 0;
2227	int r;
2228
2229	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
2230		ifp->if_flags |= IFF_PROMISC;
2231		return 0;
2232	}
2233
2234	if (ifp->if_flags & IFF_PROMISC) {
2235		promisc = 1;
2236		goto set;
2237	}
2238
2239	nentries = -1;
2240	ETHER_LOCK(ec);
2241	ETHER_FIRST_MULTI(step, ec, enm);
2242	while (nentries++, enm != NULL) {
2243		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2244			allmulti = 1;
2245			goto set_unlock;
2246		}
2247		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2248			allmulti = 1;
2249			goto set_unlock;
2250		}
2251		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2252		    enm->enm_addrlo, ETHER_ADDR_LEN);
2253		ETHER_NEXT_MULTI(step, enm);
2254	}
2255	rxfilter = 1;
2256
2257set_unlock:
2258	ETHER_UNLOCK(ec);
2259
2260set:
2261	if (rxfilter) {
2262		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2263		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2264		r = vioif_set_rx_filter(sc);
2265		if (r != 0) {
2266			rxfilter = 0;
2267			allmulti = 1; /* fallback */
2268		}
2269	} else {
2270		/* remove rx filter */
2271		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2272		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2273		r = vioif_set_rx_filter(sc);
2274		/* what to do on failure? */
2275	}
2276	if (allmulti) {
2277		r = vioif_set_allmulti(sc, true);
2278		if (r != 0) {
2279			allmulti = 0;
2280			promisc = 1; /* fallback */
2281		}
2282	} else {
2283		r = vioif_set_allmulti(sc, false);
2284		/* what to do on failure? */
2285	}
2286	if (promisc) {
2287		r = vioif_set_promisc(sc, true);
2288	} else {
2289		r = vioif_set_promisc(sc, false);
2290	}
2291
2292	return r;
2293}
2294
2295static bool
2296vioif_is_link_up(struct vioif_softc *sc)
2297{
2298	struct virtio_softc *vsc = sc->sc_virtio;
2299	uint16_t status;
2300
2301	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2302		status = virtio_read_device_config_2(vsc,
2303		    VIRTIO_NET_CONFIG_STATUS);
2304	else
2305		status = VIRTIO_NET_S_LINK_UP;
2306
2307	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2308}
2309
2310/* change link status */
2311static void
2312vioif_update_link_status(struct vioif_softc *sc)
2313{
2314	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2315	struct vioif_txqueue *txq;
2316	bool active, changed;
2317	int link, i;
2318
2319	mutex_enter(&sc->sc_lock);
2320
2321	active = vioif_is_link_up(sc);
2322	changed = false;
2323
2324	if (active) {
2325		if (!sc->sc_link_active)
2326			changed = true;
2327
2328		link = LINK_STATE_UP;
2329		sc->sc_link_active = true;
2330	} else {
2331		if (sc->sc_link_active)
2332			changed = true;
2333
2334		link = LINK_STATE_DOWN;
2335		sc->sc_link_active = false;
2336	}
2337
2338	if (changed) {
2339		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2340			txq = &sc->sc_txq[i];
2341
2342			mutex_enter(txq->txq_lock);
2343			txq->txq_link_active = sc->sc_link_active;
2344			mutex_exit(txq->txq_lock);
2345		}
2346
2347		if_link_state_change(ifp, link);
2348	}
2349
2350	mutex_exit(&sc->sc_lock);
2351}
2352
2353static int
2354vioif_config_change(struct virtio_softc *vsc)
2355{
2356	struct vioif_softc *sc = device_private(virtio_child(vsc));
2357
2358	softint_schedule(sc->sc_ctl_softint);
2359	return 0;
2360}
2361
2362static void
2363vioif_ctl_softint(void *arg)
2364{
2365	struct vioif_softc *sc = arg;
2366	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2367
2368	vioif_update_link_status(sc);
2369	vioif_start(ifp);
2370}
2371
2372static struct workqueue *
2373vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2374{
2375	struct workqueue *wq;
2376	int error;
2377
2378	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2379	    prio, ipl, flags);
2380
2381	if (error)
2382		return NULL;
2383
2384	return wq;
2385}
2386
2387static void
2388vioif_workq_destroy(struct workqueue *wq)
2389{
2390
2391	workqueue_destroy(wq);
2392}
2393
2394static void
2395vioif_workq_work(struct work *wk, void *context)
2396{
2397	struct vioif_work *work;
2398
2399	work = container_of(wk, struct vioif_work, cookie);
2400
2401	atomic_store_relaxed(&work->added, 0);
2402	work->func(work->arg);
2403}
2404
2405static void
2406vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2407{
2408
2409	memset(work, 0, sizeof(*work));
2410	work->func = func;
2411	work->arg = arg;
2412}
2413
2414static void
2415vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2416{
2417
2418	if (atomic_load_relaxed(&work->added) != 0)
2419		return;
2420
2421	atomic_store_relaxed(&work->added, 1);
2422	kpreempt_disable();
2423	workqueue_enqueue(wq, &work->cookie, NULL);
2424	kpreempt_enable();
2425}
2426
2427static void
2428vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2429{
2430
2431	workqueue_wait(wq, &work->cookie);
2432}
2433
2434static int
2435vioif_setup_sysctl(struct vioif_softc *sc)
2436{
2437	const char *devname;
2438	struct sysctllog **log;
2439	const struct sysctlnode *rnode, *rxnode, *txnode;
2440	int error;
2441
2442	log = &sc->sc_sysctllog;
2443	devname = device_xname(sc->sc_dev);
2444
2445	error = sysctl_createv(log, 0, NULL, &rnode,
2446	    0, CTLTYPE_NODE, devname,
2447	    SYSCTL_DESCR("virtio-net information and settings"),
2448	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2449	if (error)
2450		goto out;
2451
2452	error = sysctl_createv(log, 0, &rnode, NULL,
2453	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2454	    SYSCTL_DESCR("Use workqueue for packet processing"),
2455	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2456	if (error)
2457		goto out;
2458
2459	error = sysctl_createv(log, 0, &rnode, &rxnode,
2460	    0, CTLTYPE_NODE, "rx",
2461	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2462	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2463	if (error)
2464		goto out;
2465
2466	error = sysctl_createv(log, 0, &rxnode, NULL,
2467	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2468	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2469	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2470	if (error)
2471		goto out;
2472
2473	error = sysctl_createv(log, 0, &rxnode, NULL,
2474	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2475	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2476	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2477	if (error)
2478		goto out;
2479
2480	error = sysctl_createv(log, 0, &rnode, &txnode,
2481	    0, CTLTYPE_NODE, "tx",
2482	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2483	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2484	if (error)
2485		goto out;
2486
2487	error = sysctl_createv(log, 0, &txnode, NULL,
2488	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2489	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2490	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2491	if (error)
2492		goto out;
2493
2494	error = sysctl_createv(log, 0, &txnode, NULL,
2495	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2496	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2497	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2498
2499out:
2500	if (error)
2501		sysctl_teardown(log);
2502
2503	return error;
2504}
2505
2506static void
2507vioif_setup_stats(struct vioif_softc *sc)
2508{
2509	struct vioif_rxqueue *rxq;
2510	struct vioif_txqueue *txq;
2511	int i;
2512
2513	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2514		rxq = &sc->sc_rxq[i];
2515		txq = &sc->sc_txq[i];
2516
2517		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2518		    device_xname(sc->sc_dev), i);
2519		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2520		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2521		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2522		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2523		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2524		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2525
2526		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2527		    device_xname(sc->sc_dev), i);
2528		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2529		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2530	}
2531
2532	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2533	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2534	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2535	    NULL, device_xname(sc->sc_dev), "control command failed");
2536}
2537
2538MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2539
2540#ifdef _MODULE
2541#include "ioconf.c"
2542#endif
2543
2544static int
2545if_vioif_modcmd(modcmd_t cmd, void *opaque)
2546{
2547	int error = 0;
2548
2549#ifdef _MODULE
2550	switch (cmd) {
2551	case MODULE_CMD_INIT:
2552		error = config_init_component(cfdriver_ioconf_if_vioif,
2553		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2554		break;
2555	case MODULE_CMD_FINI:
2556		error = config_fini_component(cfdriver_ioconf_if_vioif,
2557		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2558		break;
2559	default:
2560		error = ENOTTY;
2561		break;
2562	}
2563#endif
2564
2565	return error;
2566}
2567