if_vioif.c revision 1.46
1/*	$NetBSD: if_vioif.c,v 1.46 2019/01/14 15:00:27 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.46 2019/01/14 15:00:27 yamaguchi Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48#include <sys/pcq.h>
49
50#include <dev/pci/virtioreg.h>
51#include <dev/pci/virtiovar.h>
52
53#include <net/if.h>
54#include <net/if_media.h>
55#include <net/if_ether.h>
56
57#include <net/bpf.h>
58
59#include "ioconf.h"
60
61#ifdef NET_MPSAFE
62#define VIOIF_MPSAFE	1
63#define VIOIF_MULTIQ	1
64#endif
65
66#ifdef SOFTINT_INTR
67#define VIOIF_SOFTINT_INTR	1
68#endif
69
70/*
71 * if_vioifreg.h:
72 */
73/* Configuration registers */
74#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
75#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
76#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	8 /* 16bit */
77
78/* Feature bits */
79#define VIRTIO_NET_F_CSUM		__BIT(0)
80#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
81#define VIRTIO_NET_F_MAC		__BIT(5)
82#define VIRTIO_NET_F_GSO		__BIT(6)
83#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
84#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
85#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
86#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
87#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
88#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
89#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
90#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
91#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
92#define VIRTIO_NET_F_STATUS		__BIT(16)
93#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
94#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
95#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
96#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
97#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
98#define VIRTIO_NET_F_MQ			__BIT(22)
99
100#define VIRTIO_NET_FLAG_BITS \
101	VIRTIO_COMMON_FLAG_BITS \
102	"\x17""MQ" \
103	"\x16""GUEST_ANNOUNCE" \
104	"\x15""CTRL_RX_EXTRA" \
105	"\x14""CTRL_VLAN" \
106	"\x13""CTRL_RX" \
107	"\x12""CTRL_VQ" \
108	"\x11""STATUS" \
109	"\x10""MRG_RXBUF" \
110	"\x0f""HOST_UFO" \
111	"\x0e""HOST_ECN" \
112	"\x0d""HOST_TSO6" \
113	"\x0c""HOST_TSO4" \
114	"\x0b""GUEST_UFO" \
115	"\x0a""GUEST_ECN" \
116	"\x09""GUEST_TSO6" \
117	"\x08""GUEST_TSO4" \
118	"\x07""GSO" \
119	"\x06""MAC" \
120	"\x02""GUEST_CSUM" \
121	"\x01""CSUM"
122
123/* Status */
124#define VIRTIO_NET_S_LINK_UP	1
125
126/* Packet header structure */
127struct virtio_net_hdr {
128	uint8_t		flags;
129	uint8_t		gso_type;
130	uint16_t	hdr_len;
131	uint16_t	gso_size;
132	uint16_t	csum_start;
133	uint16_t	csum_offset;
134#if 0
135	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
136#endif
137} __packed;
138
139#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
140#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
141#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
142#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
145
146#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
147
148/* Control virtqueue */
149struct virtio_net_ctrl_cmd {
150	uint8_t	class;
151	uint8_t	command;
152} __packed;
153#define VIRTIO_NET_CTRL_RX		0
154# define VIRTIO_NET_CTRL_RX_PROMISC	0
155# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
156
157#define VIRTIO_NET_CTRL_MAC		1
158# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
159
160#define VIRTIO_NET_CTRL_VLAN		2
161# define VIRTIO_NET_CTRL_VLAN_ADD	0
162# define VIRTIO_NET_CTRL_VLAN_DEL	1
163
164#define VIRTIO_NET_CTRL_MQ			4
165# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
166# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
167# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
168
169struct virtio_net_ctrl_status {
170	uint8_t	ack;
171} __packed;
172#define VIRTIO_NET_OK			0
173#define VIRTIO_NET_ERR			1
174
175struct virtio_net_ctrl_rx {
176	uint8_t	onoff;
177} __packed;
178
179struct virtio_net_ctrl_mac_tbl {
180	uint32_t nentries;
181	uint8_t macs[][ETHER_ADDR_LEN];
182} __packed;
183
184struct virtio_net_ctrl_vlan {
185	uint16_t id;
186} __packed;
187
188struct virtio_net_ctrl_mq {
189	uint16_t virtqueue_pairs;
190} __packed;
191
192struct vioif_ctrl_cmdspec {
193	bus_dmamap_t	dmamap;
194	void		*buf;
195	bus_size_t	bufsize;
196};
197
198/*
199 * if_vioifvar.h:
200 */
201
202/*
203 * Locking notes:
204 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
205 *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
206 *      - more than one lock cannot be held at onece
207 * + ctrlq_inuse is protected by ctrlq_wait_lock.
208 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
209 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
210 */
211
212struct vioif_txqueue {
213	kmutex_t		*txq_lock;	/* lock for tx operations */
214
215	struct virtqueue	*txq_vq;
216	bool			txq_stopping;
217	bool			txq_link_active;
218	pcq_t			*txq_intrq;
219
220	struct virtio_net_hdr	*txq_hdrs;
221	bus_dmamap_t		*txq_hdr_dmamaps;
222
223	struct mbuf		**txq_mbufs;
224	bus_dmamap_t		*txq_dmamaps;
225
226	void			*txq_deferred_transmit;
227};
228
229struct vioif_rxqueue {
230	kmutex_t		*rxq_lock;	/* lock for rx operations */
231
232	struct virtqueue	*rxq_vq;
233	bool			rxq_stopping;
234
235	struct virtio_net_hdr	*rxq_hdrs;
236	bus_dmamap_t		*rxq_hdr_dmamaps;
237
238	struct mbuf		**rxq_mbufs;
239	bus_dmamap_t		*rxq_dmamaps;
240
241	void			*rxq_softint;
242};
243
244struct vioif_ctrlqueue {
245	struct virtqueue		*ctrlq_vq;
246	enum {
247		FREE, INUSE, DONE
248	}				ctrlq_inuse;
249	kcondvar_t			ctrlq_wait;
250	kmutex_t			ctrlq_wait_lock;
251	struct lwp			*ctrlq_owner;
252
253	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
254	struct virtio_net_ctrl_status	*ctrlq_status;
255	struct virtio_net_ctrl_rx	*ctrlq_rx;
256	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
257	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
258	struct virtio_net_ctrl_mq	*ctrlq_mq;
259
260	bus_dmamap_t			ctrlq_cmd_dmamap;
261	bus_dmamap_t			ctrlq_status_dmamap;
262	bus_dmamap_t			ctrlq_rx_dmamap;
263	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
264	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
265	bus_dmamap_t			ctrlq_mq_dmamap;
266};
267
268struct vioif_softc {
269	device_t		sc_dev;
270
271	struct virtio_softc	*sc_virtio;
272	struct virtqueue	*sc_vqs;
273
274	int			sc_max_nvq_pairs;
275	int			sc_req_nvq_pairs;
276	int			sc_act_nvq_pairs;
277
278	uint8_t			sc_mac[ETHER_ADDR_LEN];
279	struct ethercom		sc_ethercom;
280	short			sc_deferred_init_done;
281	bool			sc_link_active;
282
283	struct vioif_txqueue	*sc_txq;
284	struct vioif_rxqueue	*sc_rxq;
285
286	bool			sc_has_ctrl;
287	struct vioif_ctrlqueue	sc_ctrlq;
288
289	bus_dma_segment_t	sc_hdr_segs[1];
290	void			*sc_dmamem;
291	void			*sc_kmem;
292
293	void			*sc_ctl_softint;
294};
295#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
296#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
297
298/* cfattach interface functions */
299static int	vioif_match(device_t, cfdata_t, void *);
300static void	vioif_attach(device_t, device_t, void *);
301static void	vioif_deferred_init(device_t);
302
303/* ifnet interface functions */
304static int	vioif_init(struct ifnet *);
305static void	vioif_stop(struct ifnet *, int);
306static void	vioif_start(struct ifnet *);
307static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
308static int	vioif_transmit(struct ifnet *, struct mbuf *);
309static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
310static int	vioif_ioctl(struct ifnet *, u_long, void *);
311static void	vioif_watchdog(struct ifnet *);
312
313/* rx */
314static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
315static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
316static void	vioif_populate_rx_mbufs(struct vioif_rxqueue *);
317static void	vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *);
318static int	vioif_rx_deq(struct vioif_rxqueue *);
319static int	vioif_rx_deq_locked(struct vioif_rxqueue *);
320static int	vioif_rx_vq_done(struct virtqueue *);
321static void	vioif_rx_softint(void *);
322static void	vioif_rx_drain(struct vioif_rxqueue *);
323
324/* tx */
325static int	vioif_tx_vq_done(struct virtqueue *);
326static int	vioif_tx_vq_done_locked(struct virtqueue *);
327static void	vioif_tx_drain(struct vioif_txqueue *);
328static void	vioif_deferred_transmit(void *);
329
330/* other control */
331static bool	vioif_is_link_up(struct vioif_softc *);
332static void	vioif_update_link_status(struct vioif_softc *);
333static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
334static int	vioif_set_promisc(struct vioif_softc *, bool);
335static int	vioif_set_allmulti(struct vioif_softc *, bool);
336static int	vioif_set_rx_filter(struct vioif_softc *);
337static int	vioif_rx_filter(struct vioif_softc *);
338static int	vioif_ctrl_vq_done(struct virtqueue *);
339static int	vioif_config_change(struct virtio_softc *);
340static void	vioif_ctl_softint(void *);
341static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
342static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
343static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
344
345CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
346		  vioif_match, vioif_attach, NULL, NULL);
347
348static int
349vioif_match(device_t parent, cfdata_t match, void *aux)
350{
351	struct virtio_attach_args *va = aux;
352
353	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
354		return 1;
355
356	return 0;
357}
358
359static int
360vioif_alloc_queues(struct vioif_softc *sc)
361{
362	int nvq_pairs = sc->sc_max_nvq_pairs;
363	int nvqs = nvq_pairs * 2;
364	int i;
365
366	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
367
368	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
369	    KM_NOSLEEP);
370	if (sc->sc_rxq == NULL)
371		return -1;
372
373	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
374	    KM_NOSLEEP);
375	if (sc->sc_txq == NULL)
376		return -1;
377
378	if (sc->sc_has_ctrl)
379		nvqs++;
380
381	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_NOSLEEP);
382	if (sc->sc_vqs == NULL)
383		return -1;
384
385	nvqs = 0;
386	for (i = 0; i < nvq_pairs; i++) {
387		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
388		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
389	}
390
391	if (sc->sc_has_ctrl)
392		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
393
394	return 0;
395}
396
397static void
398vioif_free_queues(struct vioif_softc *sc)
399{
400	int nvq_pairs = sc->sc_max_nvq_pairs;
401	int nvqs = nvq_pairs * 2;
402
403	if (sc->sc_ctrlq.ctrlq_vq)
404		nvqs++;
405
406	if (sc->sc_txq) {
407		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
408		sc->sc_txq = NULL;
409	}
410
411	if (sc->sc_rxq) {
412		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
413		sc->sc_rxq = NULL;
414	}
415
416	if (sc->sc_vqs) {
417		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
418		sc->sc_vqs = NULL;
419	}
420}
421
422/* allocate memory */
423/*
424 * dma memory is used for:
425 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
426 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
427 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
428 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
429 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
430 *			 (WRITE)
431 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
432 *			 class command (WRITE)
433 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
434 *			 class command (WRITE)
435 * ctrlq_* structures are allocated only one each; they are protected by
436 * ctrlq_inuse variable and ctrlq_wait condvar.
437 */
438/*
439 * dynamically allocated memory is used for:
440 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
441 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
442 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
443 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
444 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
445 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
446 */
447static int
448vioif_alloc_mems(struct vioif_softc *sc)
449{
450	struct virtio_softc *vsc = sc->sc_virtio;
451	struct vioif_txqueue *txq;
452	struct vioif_rxqueue *rxq;
453	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
454	int allocsize, allocsize2, r, rsegs, i, qid;
455	void *vaddr;
456	intptr_t p;
457
458	allocsize = 0;
459	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
460		rxq = &sc->sc_rxq[qid];
461		txq = &sc->sc_txq[qid];
462
463		allocsize +=
464		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num;
465		allocsize +=
466		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num;
467	}
468	if (sc->sc_has_ctrl) {
469		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
470		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
471		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
472		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
473			+ sizeof(struct virtio_net_ctrl_mac_tbl)
474			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
475		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
476	}
477	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
478			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
479	if (r != 0) {
480		aprint_error_dev(sc->sc_dev,
481				 "DMA memory allocation failed, size %d, "
482				 "error code %d\n", allocsize, r);
483		goto err_none;
484	}
485	r = bus_dmamem_map(virtio_dmat(vsc),
486			   &sc->sc_hdr_segs[0], 1, allocsize,
487			   &vaddr, BUS_DMA_NOWAIT);
488	if (r != 0) {
489		aprint_error_dev(sc->sc_dev,
490				 "DMA memory map failed, "
491				 "error code %d\n", r);
492		goto err_dmamem_alloc;
493	}
494
495#define P(p, p0, p0size)	do { p0 = (void *) p;		\
496				     p += p0size; } while (0)
497	memset(vaddr, 0, allocsize);
498	sc->sc_dmamem = vaddr;
499	p = (intptr_t) vaddr;
500
501	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
502		rxq = &sc->sc_rxq[qid];
503		txq = &sc->sc_txq[qid];
504
505		P(p, rxq->rxq_hdrs,
506		    sizeof(rxq->rxq_hdrs[0]) * rxq->rxq_vq->vq_num);
507		P(p, txq->txq_hdrs,
508		    sizeof(txq->txq_hdrs[0]) * txq->txq_vq->vq_num);
509	}
510	if (sc->sc_has_ctrl) {
511		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
512		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
513		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
514		P(p, ctrlq->ctrlq_mac_tbl_uc, sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0);
515		P(p, ctrlq->ctrlq_mac_tbl_mc,
516		    (sizeof(*ctrlq->ctrlq_mac_tbl_mc)
517		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
518		P(p, ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq));
519	}
520
521	allocsize2 = 0;
522	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
523		int rxqsize, txqsize;
524
525		rxq = &sc->sc_rxq[qid];
526		txq = &sc->sc_txq[qid];
527		rxqsize = rxq->rxq_vq->vq_num;
528		txqsize = txq->txq_vq->vq_num;
529
530		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
531		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
532		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
533
534		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
535		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
536		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
537	}
538	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
539	sc->sc_kmem = vaddr;
540	p = (intptr_t) vaddr;
541
542	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
543		int rxqsize, txqsize;
544		rxq = &sc->sc_rxq[qid];
545		txq = &sc->sc_txq[qid];
546		rxqsize = rxq->rxq_vq->vq_num;
547		txqsize = txq->txq_vq->vq_num;
548
549		P(p, rxq->rxq_hdr_dmamaps, sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
550		P(p, txq->txq_hdr_dmamaps, sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
551		P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
552		P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
553		P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
554		P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
555	}
556#undef P
557
558#define C(map, size, nsegs, usage)						\
559	do {									\
560		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0,	\
561				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,		\
562				      &map);					\
563		if (r != 0) {							\
564			aprint_error_dev(sc->sc_dev,				\
565			    "%s dmamap creation failed, "			\
566			    "error code %d\n", usage, r);			\
567			goto err_reqs;						\
568		}								\
569	} while (0)
570#define C_L(map, buf, size, nsegs, rw, usage)				\
571	C(map, size, nsegs, usage);					\
572	do {								\
573		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
574				    buf, size, NULL,			\
575				    rw | BUS_DMA_NOWAIT);		\
576		if (r != 0) {						\
577			aprint_error_dev(sc->sc_dev,			\
578					 usage " dmamap load failed, "	\
579					 "error code %d\n", r);		\
580			goto err_reqs;					\
581		}							\
582	} while (0)
583
584	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
585		rxq = &sc->sc_rxq[qid];
586		txq = &sc->sc_txq[qid];
587
588		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
589			C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i],
590			    sizeof(rxq->rxq_hdrs[0]), 1,
591			    BUS_DMA_READ, "rx header");
592			C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
593		}
594
595		for (i = 0; i < txq->txq_vq->vq_num; i++) {
596			C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i],
597			    sizeof(txq->txq_hdrs[0]), 1,
598			    BUS_DMA_READ, "tx header");
599			C(txq->txq_dmamaps[i], ETHER_MAX_LEN,
600			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
601		}
602	}
603
604	if (sc->sc_has_ctrl) {
605		/* control vq class & command */
606		C_L(ctrlq->ctrlq_cmd_dmamap,
607		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
608		    BUS_DMA_WRITE, "control command");
609		C_L(ctrlq->ctrlq_status_dmamap,
610		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
611		    BUS_DMA_READ, "control status");
612
613		/* control vq rx mode command parameter */
614		C_L(ctrlq->ctrlq_rx_dmamap,
615		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
616		    BUS_DMA_WRITE, "rx mode control command");
617
618		/* multiqueue set command */
619		C_L(ctrlq->ctrlq_mq_dmamap,
620		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
621		    BUS_DMA_WRITE, "multiqueue set command");
622
623		/* control vq MAC filter table for unicast */
624		/* do not load now since its length is variable */
625		C(ctrlq->ctrlq_tbl_uc_dmamap,
626		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
627		    "unicast MAC address filter command");
628
629		/* control vq MAC filter table for multicast */
630		C(ctrlq->ctrlq_tbl_mc_dmamap,
631		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
632		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
633		    "multicast MAC address filter command");
634	}
635#undef C_L
636#undef C
637
638	return 0;
639
640err_reqs:
641#define D(map)								\
642	do {								\
643		if (map) {						\
644			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
645			map = NULL;					\
646		}							\
647	} while (0)
648	D(ctrlq->ctrlq_tbl_mc_dmamap);
649	D(ctrlq->ctrlq_tbl_uc_dmamap);
650	D(ctrlq->ctrlq_rx_dmamap);
651	D(ctrlq->ctrlq_status_dmamap);
652	D(ctrlq->ctrlq_cmd_dmamap);
653	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
654		rxq = &sc->sc_rxq[qid];
655		txq = &sc->sc_txq[qid];
656
657		for (i = 0; i < txq->txq_vq->vq_num; i++) {
658			D(txq->txq_dmamaps[i]);
659			D(txq->txq_hdr_dmamaps[i]);
660		}
661		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
662			D(rxq->rxq_dmamaps[i]);
663			D(rxq->rxq_hdr_dmamaps[i]);
664		}
665	}
666#undef D
667	if (sc->sc_kmem) {
668		kmem_free(sc->sc_kmem, allocsize2);
669		sc->sc_kmem = NULL;
670	}
671	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
672err_dmamem_alloc:
673	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
674err_none:
675	return -1;
676}
677
678static void
679vioif_attach(device_t parent, device_t self, void *aux)
680{
681	struct vioif_softc *sc = device_private(self);
682	struct virtio_softc *vsc = device_private(parent);
683	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
684	struct vioif_txqueue *txq;
685	struct vioif_rxqueue *rxq;
686	uint32_t features, req_features;
687	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
688	u_int softint_flags;
689	int r, i, nvqs=0, req_flags;
690
691	if (virtio_child(vsc) != NULL) {
692		aprint_normal(": child already attached for %s; "
693			      "something wrong...\n",
694			      device_xname(parent));
695		return;
696	}
697
698	sc->sc_dev = self;
699	sc->sc_virtio = vsc;
700	sc->sc_link_active = false;
701
702	sc->sc_max_nvq_pairs = 1;
703	sc->sc_req_nvq_pairs = 1;
704	sc->sc_act_nvq_pairs = 1;
705
706	req_flags = 0;
707
708#ifdef VIOIF_MPSAFE
709	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
710#endif
711#ifdef VIOIF_SOFTINT_INTR
712	req_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
713#endif
714	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
715
716	req_features =
717	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
718	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
719#ifdef VIOIF_MULTIQ
720	req_features |= VIRTIO_NET_F_MQ;
721#endif
722	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
723	    vioif_config_change, virtio_vq_intr, req_flags,
724	    req_features, VIRTIO_NET_FLAG_BITS);
725
726	features = virtio_features(vsc);
727
728	if (features & VIRTIO_NET_F_MAC) {
729		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
730						    VIRTIO_NET_CONFIG_MAC+0);
731		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
732						    VIRTIO_NET_CONFIG_MAC+1);
733		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
734						    VIRTIO_NET_CONFIG_MAC+2);
735		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
736						    VIRTIO_NET_CONFIG_MAC+3);
737		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
738						    VIRTIO_NET_CONFIG_MAC+4);
739		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
740						    VIRTIO_NET_CONFIG_MAC+5);
741	} else {
742		/* code stolen from sys/net/if_tap.c */
743		struct timeval tv;
744		uint32_t ui;
745		getmicrouptime(&tv);
746		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
747		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
748		virtio_write_device_config_1(vsc,
749					     VIRTIO_NET_CONFIG_MAC+0,
750					     sc->sc_mac[0]);
751		virtio_write_device_config_1(vsc,
752					     VIRTIO_NET_CONFIG_MAC+1,
753					     sc->sc_mac[1]);
754		virtio_write_device_config_1(vsc,
755					     VIRTIO_NET_CONFIG_MAC+2,
756					     sc->sc_mac[2]);
757		virtio_write_device_config_1(vsc,
758					     VIRTIO_NET_CONFIG_MAC+3,
759					     sc->sc_mac[3]);
760		virtio_write_device_config_1(vsc,
761					     VIRTIO_NET_CONFIG_MAC+4,
762					     sc->sc_mac[4]);
763		virtio_write_device_config_1(vsc,
764					     VIRTIO_NET_CONFIG_MAC+5,
765					     sc->sc_mac[5]);
766	}
767
768	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(sc->sc_mac));
769
770	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
771	    (features & VIRTIO_NET_F_CTRL_RX)) {
772		sc->sc_has_ctrl = true;
773
774		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
775		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
776		ctrlq->ctrlq_inuse = FREE;
777	} else {
778		sc->sc_has_ctrl = false;
779	}
780
781	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
782		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
783		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
784
785		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
786			goto err;
787
788		/* Limit the number of queue pairs to use */
789		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
790	}
791
792	r = vioif_alloc_queues(sc);
793	if (r != 0)
794		goto err;
795
796	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
797
798#ifdef VIOIF_MPSAFE
799	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
800#else
801	softint_flags = SOFTINT_NET;
802#endif
803
804	/*
805	 * Allocating a virtqueues
806	 */
807	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
808		rxq = &sc->sc_rxq[i];
809		txq = &sc->sc_txq[i];
810		char qname[32];
811
812		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
813
814		rxq->rxq_softint = softint_establish(softint_flags, vioif_rx_softint, rxq);
815		if (rxq->rxq_softint == NULL) {
816			aprint_error_dev(self, "cannot establish rx softint\n");
817			goto err;
818		}
819		snprintf(qname, sizeof(qname), "rx%d", i);
820		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
821		    MCLBYTES+sizeof(struct virtio_net_hdr), nvqs, qname);
822		if (r != 0)
823			goto err;
824		nvqs++;
825		rxq->rxq_vq->vq_done = vioif_rx_vq_done;
826		rxq->rxq_vq->vq_done_ctx = (void *)rxq;
827		rxq->rxq_stopping = true;
828
829		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
830		txq->txq_deferred_transmit = softint_establish(softint_flags,
831		    vioif_deferred_transmit, txq);
832		if (txq->txq_deferred_transmit == NULL) {
833			aprint_error_dev(self, "cannot establish tx softint\n");
834			goto err;
835		}
836		snprintf(qname, sizeof(qname), "tx%d", i);
837		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
838		    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
839		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
840		if (r != 0)
841			goto err;
842		nvqs++;
843		txq->txq_vq->vq_done = vioif_tx_vq_done;
844		txq->txq_vq->vq_done_ctx = (void *)txq;
845		txq->txq_link_active = sc->sc_link_active;
846		txq->txq_stopping = false;
847		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_NOSLEEP);
848		if (txq->txq_intrq == NULL)
849			goto err;
850	}
851
852	if (sc->sc_has_ctrl) {
853		/*
854		 * Allocating a virtqueue for control channel
855		 */
856		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
857		    NBPG, 1, "control");
858		if (r != 0) {
859			aprint_error_dev(self, "failed to allocate "
860			    "a virtqueue for control channel\n");
861
862			sc->sc_has_ctrl = false;
863			cv_destroy(&ctrlq->ctrlq_wait);
864			mutex_destroy(&ctrlq->ctrlq_wait_lock);
865		} else {
866			nvqs++;
867			ctrlq->ctrlq_vq->vq_done = vioif_ctrl_vq_done;
868			ctrlq->ctrlq_vq->vq_done_ctx = (void *) ctrlq;
869		}
870	}
871
872	sc->sc_ctl_softint = softint_establish(softint_flags, vioif_ctl_softint, sc);
873	if (sc->sc_ctl_softint == NULL) {
874		aprint_error_dev(self, "cannot establish ctl softint\n");
875		goto err;
876	}
877
878	if (vioif_alloc_mems(sc) < 0)
879		goto err;
880
881	if (virtio_child_attach_finish(vsc) != 0)
882		goto err;
883
884	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
885	ifp->if_softc = sc;
886	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
887#ifdef VIOIF_MPSAFE
888	ifp->if_extflags = IFEF_MPSAFE;
889#endif
890	ifp->if_start = vioif_start;
891	if (sc->sc_req_nvq_pairs > 1)
892		ifp->if_transmit = vioif_transmit;
893	ifp->if_ioctl = vioif_ioctl;
894	ifp->if_init = vioif_init;
895	ifp->if_stop = vioif_stop;
896	ifp->if_capabilities = 0;
897	ifp->if_watchdog = vioif_watchdog;
898	txq = &sc->sc_txq[0];
899	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
900	IFQ_SET_READY(&ifp->if_snd);
901
902	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
903
904	if_attach(ifp);
905	if_deferred_start_init(ifp, NULL);
906	ether_ifattach(ifp, sc->sc_mac);
907
908	return;
909
910err:
911	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
912		rxq = &sc->sc_rxq[i];
913		txq = &sc->sc_txq[i];
914
915		if (rxq->rxq_lock) {
916			mutex_obj_free(rxq->rxq_lock);
917			rxq->rxq_lock = NULL;
918		}
919
920		if (rxq->rxq_softint) {
921			softint_disestablish(rxq->rxq_softint);
922			rxq->rxq_softint = NULL;
923		}
924
925		if (txq->txq_lock) {
926			mutex_obj_free(txq->txq_lock);
927			txq->txq_lock = NULL;
928		}
929
930		if (txq->txq_deferred_transmit) {
931			softint_disestablish(txq->txq_deferred_transmit);
932			txq->txq_deferred_transmit = NULL;
933		}
934
935		if (txq->txq_intrq) {
936			pcq_destroy(txq->txq_intrq);
937			txq->txq_intrq = NULL;
938		}
939	}
940
941	if (sc->sc_has_ctrl) {
942		cv_destroy(&ctrlq->ctrlq_wait);
943		mutex_destroy(&ctrlq->ctrlq_wait_lock);
944	}
945
946	while (nvqs > 0)
947		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
948
949	vioif_free_queues(sc);
950
951	virtio_child_attach_failed(vsc);
952	return;
953}
954
955/* we need interrupts to make promiscuous mode off */
956static void
957vioif_deferred_init(device_t self)
958{
959	struct vioif_softc *sc = device_private(self);
960	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
961	int r;
962
963	if (ifp->if_flags & IFF_PROMISC)
964		return;
965
966	r =  vioif_set_promisc(sc, false);
967	if (r != 0)
968		aprint_error_dev(self, "resetting promisc mode failed, "
969				 "errror code %d\n", r);
970}
971
972static void
973vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
974{
975	struct virtio_softc *vsc = sc->sc_virtio;
976	struct vioif_txqueue *txq;
977	struct vioif_rxqueue *rxq;
978	int i;
979
980	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
981		txq = &sc->sc_txq[i];
982		rxq = &sc->sc_rxq[i];
983
984		virtio_start_vq_intr(vsc, txq->txq_vq);
985		virtio_start_vq_intr(vsc, rxq->rxq_vq);
986	}
987}
988
989static void
990vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
991{
992	struct virtio_softc *vsc = sc->sc_virtio;
993	struct vioif_txqueue *txq;
994	struct vioif_rxqueue *rxq;
995	int i;
996
997	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
998		txq = &sc->sc_txq[i];
999		rxq = &sc->sc_rxq[i];
1000
1001		virtio_stop_vq_intr(vsc, txq->txq_vq);
1002		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1003	}
1004}
1005
1006/*
1007 * Interface functions for ifnet
1008 */
1009static int
1010vioif_init(struct ifnet *ifp)
1011{
1012	struct vioif_softc *sc = ifp->if_softc;
1013	struct virtio_softc *vsc = sc->sc_virtio;
1014	struct vioif_rxqueue *rxq;
1015	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1016	int r, i;
1017
1018	vioif_stop(ifp, 0);
1019
1020	virtio_reinit_start(vsc);
1021	virtio_negotiate_features(vsc, virtio_features(vsc));
1022
1023	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1024		rxq = &sc->sc_rxq[i];
1025
1026		/* Have to set false before vioif_populate_rx_mbufs */
1027		rxq->rxq_stopping = false;
1028		vioif_populate_rx_mbufs(rxq);
1029	}
1030
1031	virtio_reinit_end(vsc);
1032		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1033
1034	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1035	if (r == 0)
1036		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1037	else
1038		sc->sc_act_nvq_pairs = 1;
1039
1040	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1041		sc->sc_txq[i].txq_stopping = false;
1042
1043	vioif_enable_interrupt_vqpairs(sc);
1044
1045	if (!sc->sc_deferred_init_done) {
1046		sc->sc_deferred_init_done = 1;
1047		if (sc->sc_has_ctrl)
1048			vioif_deferred_init(sc->sc_dev);
1049	}
1050
1051	vioif_update_link_status(sc);
1052	ifp->if_flags |= IFF_RUNNING;
1053	ifp->if_flags &= ~IFF_OACTIVE;
1054	vioif_rx_filter(sc);
1055
1056	return 0;
1057}
1058
1059static void
1060vioif_stop(struct ifnet *ifp, int disable)
1061{
1062	struct vioif_softc *sc = ifp->if_softc;
1063	struct virtio_softc *vsc = sc->sc_virtio;
1064	struct vioif_txqueue *txq;
1065	struct vioif_rxqueue *rxq;
1066	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1067	int i;
1068
1069	/* Take the locks to ensure that ongoing TX/RX finish */
1070	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1071		txq = &sc->sc_txq[i];
1072		rxq = &sc->sc_rxq[i];
1073
1074		mutex_enter(txq->txq_lock);
1075		txq->txq_stopping = true;
1076		mutex_exit(txq->txq_lock);
1077
1078		mutex_enter(rxq->rxq_lock);
1079		rxq->rxq_stopping = true;
1080		mutex_exit(rxq->rxq_lock);
1081	}
1082
1083	/* disable interrupts */
1084	vioif_disable_interrupt_vqpairs(sc);
1085
1086	if (sc->sc_has_ctrl)
1087		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1088
1089	/* only way to stop I/O and DMA is resetting... */
1090	virtio_reset(vsc);
1091	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1092		vioif_rx_deq(&sc->sc_rxq[i]);
1093
1094	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1095	sc->sc_link_active = false;
1096
1097	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1098		txq = &sc->sc_txq[i];
1099		rxq = &sc->sc_rxq[i];
1100
1101		txq->txq_link_active = false;
1102
1103		if (disable)
1104			vioif_rx_drain(rxq);
1105
1106		vioif_tx_drain(txq);
1107	}
1108}
1109
1110static void
1111vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq, bool is_transmit)
1112{
1113	struct vioif_softc *sc = ifp->if_softc;
1114	struct virtio_softc *vsc = sc->sc_virtio;
1115	struct virtqueue *vq = txq->txq_vq;
1116	struct mbuf *m;
1117	int queued = 0;
1118
1119	KASSERT(mutex_owned(txq->txq_lock));
1120
1121	if ((ifp->if_flags & IFF_RUNNING) == 0)
1122		return;
1123
1124	if (!txq->txq_link_active || txq->txq_stopping)
1125		return;
1126
1127	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1128		return;
1129
1130	for (;;) {
1131		int slot, r;
1132
1133		if (is_transmit)
1134			m = pcq_get(txq->txq_intrq);
1135		else
1136			IFQ_DEQUEUE(&ifp->if_snd, m);
1137
1138		if (m == NULL)
1139			break;
1140
1141		r = virtio_enqueue_prep(vsc, vq, &slot);
1142		if (r == EAGAIN) {
1143			ifp->if_flags |= IFF_OACTIVE;
1144			m_freem(m);
1145			break;
1146		}
1147		if (r != 0)
1148			panic("enqueue_prep for a tx buffer");
1149
1150		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1151					 txq->txq_dmamaps[slot],
1152					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1153		if (r != 0) {
1154			/* maybe just too fragmented */
1155			struct mbuf *newm;
1156
1157			newm = m_defrag(m, M_NOWAIT);
1158			if (newm == NULL) {
1159				aprint_error_dev(sc->sc_dev,
1160				    "m_defrag() failed\n");
1161				goto skip;
1162			}
1163
1164			m = newm;
1165			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1166					 txq->txq_dmamaps[slot],
1167					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1168			if (r != 0) {
1169				aprint_error_dev(sc->sc_dev,
1170	   			    "tx dmamap load failed, error code %d\n",
1171				    r);
1172skip:
1173				m_freem(m);
1174				virtio_enqueue_abort(vsc, vq, slot);
1175				continue;
1176			}
1177		}
1178
1179		/* This should actually never fail */
1180		r = virtio_enqueue_reserve(vsc, vq, slot,
1181					txq->txq_dmamaps[slot]->dm_nsegs + 1);
1182		if (r != 0) {
1183			aprint_error_dev(sc->sc_dev,
1184	   		    "virtio_enqueue_reserve failed, error code %d\n",
1185			    r);
1186			bus_dmamap_unload(virtio_dmat(vsc),
1187					  txq->txq_dmamaps[slot]);
1188			/* slot already freed by virtio_enqueue_reserve */
1189			m_freem(m);
1190			continue;
1191		}
1192
1193		txq->txq_mbufs[slot] = m;
1194
1195		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
1196		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1197				0, txq->txq_dmamaps[slot]->dm_mapsize,
1198				BUS_DMASYNC_PREWRITE);
1199		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1200				0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1201				BUS_DMASYNC_PREWRITE);
1202		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1203		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1204		virtio_enqueue_commit(vsc, vq, slot, false);
1205
1206		queued++;
1207		bpf_mtap(ifp, m, BPF_D_OUT);
1208	}
1209
1210	if (queued > 0) {
1211		virtio_enqueue_commit(vsc, vq, -1, true);
1212		ifp->if_timer = 5;
1213	}
1214}
1215
1216static void
1217vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1218{
1219
1220	/*
1221	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1222	 */
1223	vioif_send_common_locked(ifp, txq, false);
1224
1225}
1226
1227static void
1228vioif_start(struct ifnet *ifp)
1229{
1230	struct vioif_softc *sc = ifp->if_softc;
1231	struct vioif_txqueue *txq = &sc->sc_txq[0];
1232
1233#ifdef VIOIF_MPSAFE
1234	KASSERT(if_is_mpsafe(ifp));
1235#endif
1236
1237	mutex_enter(txq->txq_lock);
1238	if (!txq->txq_stopping)
1239		vioif_start_locked(ifp, txq);
1240	mutex_exit(txq->txq_lock);
1241}
1242
1243static inline int
1244vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1245{
1246	struct vioif_softc *sc = ifp->if_softc;
1247	u_int cpuid = cpu_index(curcpu());
1248
1249	return cpuid % sc->sc_act_nvq_pairs;
1250}
1251
1252static void
1253vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1254{
1255
1256	vioif_send_common_locked(ifp, txq, true);
1257}
1258
1259static int
1260vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1261{
1262	struct vioif_softc *sc = ifp->if_softc;
1263	struct vioif_txqueue *txq;
1264	int qid;
1265
1266	qid = vioif_select_txqueue(ifp, m);
1267	txq = &sc->sc_txq[qid];
1268
1269	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1270		m_freem(m);
1271		return ENOBUFS;
1272	}
1273
1274	ifp->if_obytes += m->m_pkthdr.len;
1275	if (m->m_flags & M_MCAST)
1276		ifp->if_omcasts++;
1277
1278	if (mutex_tryenter(txq->txq_lock)) {
1279		if (!txq->txq_stopping)
1280			vioif_transmit_locked(ifp, txq);
1281		mutex_exit(txq->txq_lock);
1282	}
1283
1284	return 0;
1285}
1286
1287static void
1288vioif_deferred_transmit(void *arg)
1289{
1290	struct vioif_txqueue *txq = arg;
1291	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1292	struct vioif_softc *sc = device_private(virtio_child(vsc));
1293	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1294
1295	if (mutex_tryenter(txq->txq_lock)) {
1296		vioif_send_common_locked(ifp, txq, true);
1297		mutex_exit(txq->txq_lock);
1298	}
1299}
1300
1301static int
1302vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1303{
1304	int s, r;
1305
1306	s = splnet();
1307
1308	r = ether_ioctl(ifp, cmd, data);
1309	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1310	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1311		if (ifp->if_flags & IFF_RUNNING)
1312			r = vioif_rx_filter(ifp->if_softc);
1313		else
1314			r = 0;
1315	}
1316
1317	splx(s);
1318
1319	return r;
1320}
1321
1322void
1323vioif_watchdog(struct ifnet *ifp)
1324{
1325	struct vioif_softc *sc = ifp->if_softc;
1326	int i;
1327
1328	if (ifp->if_flags & IFF_RUNNING) {
1329		for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1330			vioif_tx_vq_done(sc->sc_txq[i].txq_vq);
1331	}
1332}
1333
1334
1335/*
1336 * Receive implementation
1337 */
1338/* allocate and initialize a mbuf for receive */
1339static int
1340vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1341{
1342	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1343	struct mbuf *m;
1344	int r;
1345
1346	MGETHDR(m, M_DONTWAIT, MT_DATA);
1347	if (m == NULL)
1348		return ENOBUFS;
1349	MCLGET(m, M_DONTWAIT);
1350	if ((m->m_flags & M_EXT) == 0) {
1351		m_freem(m);
1352		return ENOBUFS;
1353	}
1354	rxq->rxq_mbufs[i] = m;
1355	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1356	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1357				 rxq->rxq_dmamaps[i],
1358				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
1359	if (r) {
1360		m_freem(m);
1361		rxq->rxq_mbufs[i] = NULL;
1362		return r;
1363	}
1364
1365	return 0;
1366}
1367
1368/* free a mbuf for receive */
1369static void
1370vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1371{
1372	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1373
1374	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1375	m_freem(rxq->rxq_mbufs[i]);
1376	rxq->rxq_mbufs[i] = NULL;
1377}
1378
1379/* add mbufs for all the empty receive slots */
1380static void
1381vioif_populate_rx_mbufs(struct vioif_rxqueue *rxq)
1382{
1383
1384	mutex_enter(rxq->rxq_lock);
1385	vioif_populate_rx_mbufs_locked(rxq);
1386	mutex_exit(rxq->rxq_lock);
1387}
1388
1389static void
1390vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *rxq)
1391{
1392	struct virtqueue *vq = rxq->rxq_vq;
1393	struct virtio_softc *vsc = vq->vq_owner;
1394	struct vioif_softc *sc = device_private(virtio_child(vsc));
1395	int i, r, ndone = 0;
1396
1397	KASSERT(mutex_owned(rxq->rxq_lock));
1398
1399	if (rxq->rxq_stopping)
1400		return;
1401
1402	for (i = 0; i < vq->vq_num; i++) {
1403		int slot;
1404		r = virtio_enqueue_prep(vsc, vq, &slot);
1405		if (r == EAGAIN)
1406			break;
1407		if (r != 0)
1408			panic("enqueue_prep for rx buffers");
1409		if (rxq->rxq_mbufs[slot] == NULL) {
1410			r = vioif_add_rx_mbuf(rxq, slot);
1411			if (r != 0) {
1412				printf("%s: rx mbuf allocation failed, "
1413				       "error code %d\n",
1414				       device_xname(sc->sc_dev), r);
1415				break;
1416			}
1417		}
1418		r = virtio_enqueue_reserve(vsc, vq, slot,
1419					rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1420		if (r != 0) {
1421			vioif_free_rx_mbuf(rxq, slot);
1422			break;
1423		}
1424		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1425			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1426		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1427			0, MCLBYTES, BUS_DMASYNC_PREREAD);
1428		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot], false);
1429		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1430		virtio_enqueue_commit(vsc, vq, slot, false);
1431		ndone++;
1432	}
1433	if (ndone > 0)
1434		virtio_enqueue_commit(vsc, vq, -1, true);
1435}
1436
1437/* dequeue received packets */
1438static int
1439vioif_rx_deq(struct vioif_rxqueue *rxq)
1440{
1441	int r;
1442
1443	KASSERT(rxq->rxq_stopping);
1444
1445	mutex_enter(rxq->rxq_lock);
1446	r = vioif_rx_deq_locked(rxq);
1447	mutex_exit(rxq->rxq_lock);
1448
1449	return r;
1450}
1451
1452/* dequeue received packets */
1453static int
1454vioif_rx_deq_locked(struct vioif_rxqueue *rxq)
1455{
1456	struct virtqueue *vq = rxq->rxq_vq;
1457	struct virtio_softc *vsc = vq->vq_owner;
1458	struct vioif_softc *sc = device_private(virtio_child(vsc));
1459	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1460	struct mbuf *m;
1461	int r = 0;
1462	int slot, len;
1463
1464	KASSERT(mutex_owned(rxq->rxq_lock));
1465
1466	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1467		len -= sizeof(struct virtio_net_hdr);
1468		r = 1;
1469		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1470				0, sizeof(struct virtio_net_hdr),
1471				BUS_DMASYNC_POSTREAD);
1472		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1473				0, MCLBYTES,
1474				BUS_DMASYNC_POSTREAD);
1475		m = rxq->rxq_mbufs[slot];
1476		KASSERT(m != NULL);
1477		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1478		rxq->rxq_mbufs[slot] = NULL;
1479		virtio_dequeue_commit(vsc, vq, slot);
1480		m_set_rcvif(m, ifp);
1481		m->m_len = m->m_pkthdr.len = len;
1482
1483		mutex_exit(rxq->rxq_lock);
1484		if_percpuq_enqueue(ifp->if_percpuq, m);
1485		mutex_enter(rxq->rxq_lock);
1486
1487		if (rxq->rxq_stopping)
1488			break;
1489	}
1490
1491	return r;
1492}
1493
1494/* rx interrupt; call _dequeue above and schedule a softint */
1495static int
1496vioif_rx_vq_done(struct virtqueue *vq)
1497{
1498	struct vioif_rxqueue *rxq = vq->vq_done_ctx;
1499	int r = 0;
1500
1501#ifdef VIOIF_SOFTINT_INTR
1502	KASSERT(!cpu_intr_p());
1503#endif
1504
1505	mutex_enter(rxq->rxq_lock);
1506
1507	if (rxq->rxq_stopping)
1508		goto out;
1509
1510	r = vioif_rx_deq_locked(rxq);
1511	if (r)
1512#ifdef VIOIF_SOFTINT_INTR
1513		vioif_populate_rx_mbufs_locked(rxq);
1514#else
1515		softint_schedule(rxq->rxq_softint);
1516#endif
1517
1518out:
1519	mutex_exit(rxq->rxq_lock);
1520	return r;
1521}
1522
1523/* softint: enqueue receive requests for new incoming packets */
1524static void
1525vioif_rx_softint(void *arg)
1526{
1527	struct vioif_rxqueue *rxq = arg;
1528
1529	vioif_populate_rx_mbufs(rxq);
1530}
1531
1532/* free all the mbufs; called from if_stop(disable) */
1533static void
1534vioif_rx_drain(struct vioif_rxqueue *rxq)
1535{
1536	struct virtqueue *vq = rxq->rxq_vq;
1537	int i;
1538
1539	for (i = 0; i < vq->vq_num; i++) {
1540		if (rxq->rxq_mbufs[i] == NULL)
1541			continue;
1542		vioif_free_rx_mbuf(rxq, i);
1543	}
1544}
1545
1546
1547/*
1548 * Transmition implementation
1549 */
1550/* actual transmission is done in if_start */
1551/* tx interrupt; dequeue and free mbufs */
1552/*
1553 * tx interrupt is actually disabled; this should be called upon
1554 * tx vq full and watchdog
1555 */
1556static int
1557vioif_tx_vq_done(struct virtqueue *vq)
1558{
1559	struct virtio_softc *vsc = vq->vq_owner;
1560	struct vioif_softc *sc = device_private(virtio_child(vsc));
1561	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1562	struct vioif_txqueue *txq = vq->vq_done_ctx;
1563	int r = 0;
1564
1565	mutex_enter(txq->txq_lock);
1566
1567	if (txq->txq_stopping)
1568		goto out;
1569
1570	r = vioif_tx_vq_done_locked(vq);
1571
1572out:
1573	mutex_exit(txq->txq_lock);
1574	if (r) {
1575		if_schedule_deferred_start(ifp);
1576
1577		KASSERT(txq->txq_deferred_transmit != NULL);
1578		softint_schedule(txq->txq_deferred_transmit);
1579	}
1580	return r;
1581}
1582
1583static int
1584vioif_tx_vq_done_locked(struct virtqueue *vq)
1585{
1586	struct virtio_softc *vsc = vq->vq_owner;
1587	struct vioif_softc *sc = device_private(virtio_child(vsc));
1588	struct vioif_txqueue *txq = vq->vq_done_ctx;
1589	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1590	struct mbuf *m;
1591	int r = 0;
1592	int slot, len;
1593
1594	KASSERT(mutex_owned(txq->txq_lock));
1595
1596	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1597		r++;
1598		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1599				0, sizeof(struct virtio_net_hdr),
1600				BUS_DMASYNC_POSTWRITE);
1601		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1602				0, txq->txq_dmamaps[slot]->dm_mapsize,
1603				BUS_DMASYNC_POSTWRITE);
1604		m = txq->txq_mbufs[slot];
1605		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1606		txq->txq_mbufs[slot] = NULL;
1607		virtio_dequeue_commit(vsc, vq, slot);
1608		ifp->if_opackets++;
1609		m_freem(m);
1610	}
1611
1612	if (r)
1613		ifp->if_flags &= ~IFF_OACTIVE;
1614	return r;
1615}
1616
1617/* free all the mbufs already put on vq; called from if_stop(disable) */
1618static void
1619vioif_tx_drain(struct vioif_txqueue *txq)
1620{
1621	struct virtqueue *vq = txq->txq_vq;
1622	struct virtio_softc *vsc = vq->vq_owner;
1623	int i;
1624
1625	KASSERT(txq->txq_stopping);
1626
1627	for (i = 0; i < vq->vq_num; i++) {
1628		if (txq->txq_mbufs[i] == NULL)
1629			continue;
1630		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1631		m_freem(txq->txq_mbufs[i]);
1632		txq->txq_mbufs[i] = NULL;
1633	}
1634}
1635
1636/*
1637 * Control vq
1638 */
1639/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1640static void
1641vioif_ctrl_acquire(struct vioif_softc *sc)
1642{
1643	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1644
1645	mutex_enter(&ctrlq->ctrlq_wait_lock);
1646	while (ctrlq->ctrlq_inuse != FREE)
1647		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1648	ctrlq->ctrlq_inuse = INUSE;
1649	ctrlq->ctrlq_owner = curlwp;
1650	mutex_exit(&ctrlq->ctrlq_wait_lock);
1651}
1652
1653static void
1654vioif_ctrl_release(struct vioif_softc *sc)
1655{
1656	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1657
1658	KASSERT(ctrlq->ctrlq_inuse != FREE);
1659	KASSERT(ctrlq->ctrlq_owner == curlwp);
1660
1661	mutex_enter(&ctrlq->ctrlq_wait_lock);
1662	ctrlq->ctrlq_inuse = FREE;
1663	ctrlq->ctrlq_owner = NULL;
1664	cv_signal(&ctrlq->ctrlq_wait);
1665	mutex_exit(&ctrlq->ctrlq_wait_lock);
1666}
1667
1668static int
1669vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1670    struct vioif_ctrl_cmdspec *specs, int nspecs)
1671{
1672	struct virtio_softc *vsc = sc->sc_virtio;
1673	int i, r, loaded;
1674
1675	loaded = 0;
1676	for (i = 0; i < nspecs; i++) {
1677		r = bus_dmamap_load(virtio_dmat(vsc),
1678		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1679		    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1680		if (r) {
1681			printf("%s: control command dmamap load failed, "
1682			       "error code %d\n", device_xname(sc->sc_dev), r);
1683			goto err;
1684		}
1685		loaded++;
1686
1687	}
1688
1689	return r;
1690
1691err:
1692	for (i = 0; i < loaded; i++) {
1693		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1694	}
1695
1696	return r;
1697}
1698
1699static void
1700vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1701    struct vioif_ctrl_cmdspec *specs, int nspecs)
1702{
1703	struct virtio_softc *vsc = sc->sc_virtio;
1704	int i;
1705
1706	for (i = 0; i < nspecs; i++) {
1707		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1708	}
1709}
1710
1711static int
1712vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1713    struct vioif_ctrl_cmdspec *specs, int nspecs)
1714{
1715	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1716	struct virtqueue *vq = ctrlq->ctrlq_vq;
1717	struct virtio_softc *vsc = sc->sc_virtio;
1718	int i, r, slot;
1719
1720	ctrlq->ctrlq_cmd->class = class;
1721	ctrlq->ctrlq_cmd->command = cmd;
1722
1723	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1724			0, sizeof(struct virtio_net_ctrl_cmd),
1725			BUS_DMASYNC_PREWRITE);
1726	for (i = 0; i < nspecs; i++) {
1727		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1728				0, specs[i].bufsize,
1729				BUS_DMASYNC_PREWRITE);
1730	}
1731	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1732			0, sizeof(struct virtio_net_ctrl_status),
1733			BUS_DMASYNC_PREREAD);
1734
1735	r = virtio_enqueue_prep(vsc, vq, &slot);
1736	if (r != 0)
1737		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1738	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1739	if (r != 0)
1740		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1741	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1742	for (i = 0; i < nspecs; i++) {
1743		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1744	}
1745	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1746	virtio_enqueue_commit(vsc, vq, slot, true);
1747
1748	/* wait for done */
1749	mutex_enter(&ctrlq->ctrlq_wait_lock);
1750	while (ctrlq->ctrlq_inuse != DONE)
1751		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1752	mutex_exit(&ctrlq->ctrlq_wait_lock);
1753	/* already dequeueued */
1754
1755	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1756			sizeof(struct virtio_net_ctrl_cmd),
1757			BUS_DMASYNC_POSTWRITE);
1758	for (i = 0; i < nspecs; i++) {
1759		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1760				specs[i].bufsize,
1761				BUS_DMASYNC_POSTWRITE);
1762	}
1763	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1764			sizeof(struct virtio_net_ctrl_status),
1765			BUS_DMASYNC_POSTREAD);
1766
1767	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1768		r = 0;
1769	else {
1770		printf("%s: failed setting rx mode\n",
1771		       device_xname(sc->sc_dev));
1772		r = EIO;
1773	}
1774
1775	return r;
1776}
1777
1778static int
1779vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1780{
1781	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1782	struct vioif_ctrl_cmdspec specs[1];
1783	int r;
1784
1785	if (!sc->sc_has_ctrl)
1786		return ENOTSUP;
1787
1788	vioif_ctrl_acquire(sc);
1789
1790	rx->onoff = onoff;
1791	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1792	specs[0].buf = rx;
1793	specs[0].bufsize = sizeof(*rx);
1794
1795	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1796	    specs, __arraycount(specs));
1797
1798	vioif_ctrl_release(sc);
1799	return r;
1800}
1801
1802static int
1803vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1804{
1805	int r;
1806
1807	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1808
1809	return r;
1810}
1811
1812static int
1813vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1814{
1815	int r;
1816
1817	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1818
1819	return r;
1820}
1821
1822/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1823static int
1824vioif_set_rx_filter(struct vioif_softc *sc)
1825{
1826	/* filter already set in ctrlq->ctrlq_mac_tbl */
1827	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1828	struct vioif_ctrl_cmdspec specs[2];
1829	int nspecs = __arraycount(specs);
1830	int r;
1831
1832	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1833	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1834
1835	if (!sc->sc_has_ctrl)
1836		return ENOTSUP;
1837
1838	vioif_ctrl_acquire(sc);
1839
1840	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1841	specs[0].buf = mac_tbl_uc;
1842	specs[0].bufsize = sizeof(*mac_tbl_uc)
1843	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1844
1845	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1846	specs[1].buf = mac_tbl_mc;
1847	specs[1].bufsize = sizeof(*mac_tbl_mc)
1848	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1849
1850	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1851	if (r != 0)
1852		goto out;
1853
1854	r = vioif_ctrl_send_command(sc,
1855	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1856	    specs, nspecs);
1857
1858	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1859
1860out:
1861	vioif_ctrl_release(sc);
1862
1863	return r;
1864}
1865
1866static int
1867vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
1868{
1869	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
1870	struct vioif_ctrl_cmdspec specs[1];
1871	int r;
1872
1873	if (!sc->sc_has_ctrl)
1874		return ENOTSUP;
1875
1876	if (nvq_pairs <= 1)
1877		return EINVAL;
1878
1879	vioif_ctrl_acquire(sc);
1880
1881	mq->virtqueue_pairs = nvq_pairs;
1882	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
1883	specs[0].buf = mq;
1884	specs[0].bufsize = sizeof(*mq);
1885
1886	r = vioif_ctrl_send_command(sc,
1887	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
1888	    specs, __arraycount(specs));
1889
1890	vioif_ctrl_release(sc);
1891
1892	return r;
1893}
1894
1895/* ctrl vq interrupt; wake up the command issuer */
1896static int
1897vioif_ctrl_vq_done(struct virtqueue *vq)
1898{
1899	struct virtio_softc *vsc = vq->vq_owner;
1900	struct vioif_ctrlqueue *ctrlq = vq->vq_done_ctx;
1901	int r, slot;
1902
1903	r = virtio_dequeue(vsc, vq, &slot, NULL);
1904	if (r == ENOENT)
1905		return 0;
1906	virtio_dequeue_commit(vsc, vq, slot);
1907
1908	mutex_enter(&ctrlq->ctrlq_wait_lock);
1909	ctrlq->ctrlq_inuse = DONE;
1910	cv_signal(&ctrlq->ctrlq_wait);
1911	mutex_exit(&ctrlq->ctrlq_wait_lock);
1912
1913	return 1;
1914}
1915
1916/*
1917 * If IFF_PROMISC requested,  set promiscuous
1918 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1919 * If large multicast filter exist use ALLMULTI
1920 */
1921/*
1922 * If setting rx filter fails fall back to ALLMULTI
1923 * If ALLMULTI fails fall back to PROMISC
1924 */
1925static int
1926vioif_rx_filter(struct vioif_softc *sc)
1927{
1928	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1929	struct ether_multi *enm;
1930	struct ether_multistep step;
1931	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1932	int nentries;
1933	int promisc = 0, allmulti = 0, rxfilter = 0;
1934	int r;
1935
1936	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1937		ifp->if_flags |= IFF_PROMISC;
1938		return 0;
1939	}
1940
1941	if (ifp->if_flags & IFF_PROMISC) {
1942		promisc = 1;
1943		goto set;
1944	}
1945
1946	nentries = -1;
1947	ETHER_LOCK(&sc->sc_ethercom);
1948	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1949	while (nentries++, enm != NULL) {
1950		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1951			allmulti = 1;
1952			goto set_unlock;
1953		}
1954		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1955			   ETHER_ADDR_LEN)) {
1956			allmulti = 1;
1957			goto set_unlock;
1958		}
1959		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1960		       enm->enm_addrlo, ETHER_ADDR_LEN);
1961		ETHER_NEXT_MULTI(step, enm);
1962	}
1963	rxfilter = 1;
1964
1965set_unlock:
1966	ETHER_UNLOCK(&sc->sc_ethercom);
1967
1968set:
1969	if (rxfilter) {
1970		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1971		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1972		r = vioif_set_rx_filter(sc);
1973		if (r != 0) {
1974			rxfilter = 0;
1975			allmulti = 1; /* fallback */
1976		}
1977	} else {
1978		/* remove rx filter */
1979		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1980		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1981		r = vioif_set_rx_filter(sc);
1982		/* what to do on failure? */
1983	}
1984	if (allmulti) {
1985		r = vioif_set_allmulti(sc, true);
1986		if (r != 0) {
1987			allmulti = 0;
1988			promisc = 1; /* fallback */
1989		}
1990	} else {
1991		r = vioif_set_allmulti(sc, false);
1992		/* what to do on failure? */
1993	}
1994	if (promisc) {
1995		r = vioif_set_promisc(sc, true);
1996	} else {
1997		r = vioif_set_promisc(sc, false);
1998	}
1999
2000	return r;
2001}
2002
2003static bool
2004vioif_is_link_up(struct vioif_softc *sc)
2005{
2006	struct virtio_softc *vsc = sc->sc_virtio;
2007	uint16_t status;
2008
2009	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2010		status = virtio_read_device_config_2(vsc,
2011		    VIRTIO_NET_CONFIG_STATUS);
2012	else
2013		status = VIRTIO_NET_S_LINK_UP;
2014
2015	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2016}
2017
2018/* change link status */
2019static void
2020vioif_update_link_status(struct vioif_softc *sc)
2021{
2022	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2023	struct vioif_txqueue *txq;
2024	bool active, changed;
2025	int link, i;
2026
2027	active = vioif_is_link_up(sc);
2028	changed = false;
2029
2030	if (active) {
2031		if (!sc->sc_link_active)
2032			changed = true;
2033
2034		link = LINK_STATE_UP;
2035		sc->sc_link_active = true;
2036	} else {
2037		if (sc->sc_link_active)
2038			changed = true;
2039
2040		link = LINK_STATE_DOWN;
2041		sc->sc_link_active = false;
2042	}
2043
2044	if (changed) {
2045		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2046			txq = &sc->sc_txq[i];
2047
2048			mutex_enter(txq->txq_lock);
2049			txq->txq_link_active = sc->sc_link_active;
2050			mutex_exit(txq->txq_lock);
2051		}
2052
2053		if_link_state_change(ifp, link);
2054	}
2055}
2056
2057static int
2058vioif_config_change(struct virtio_softc *vsc)
2059{
2060	struct vioif_softc *sc = device_private(virtio_child(vsc));
2061
2062#ifdef VIOIF_SOFTINT_INTR
2063	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2064#endif
2065
2066#ifdef VIOIF_SOFTINT_INTR
2067	KASSERT(!cpu_intr_p());
2068	vioif_update_link_status(sc);
2069	vioif_start(ifp);
2070#else
2071	softint_schedule(sc->sc_ctl_softint);
2072#endif
2073
2074	return 0;
2075}
2076
2077static void
2078vioif_ctl_softint(void *arg)
2079{
2080	struct vioif_softc *sc = arg;
2081	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2082
2083	vioif_update_link_status(sc);
2084	vioif_start(ifp);
2085}
2086
2087MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2088
2089#ifdef _MODULE
2090#include "ioconf.c"
2091#endif
2092
2093static int
2094if_vioif_modcmd(modcmd_t cmd, void *opaque)
2095{
2096	int error = 0;
2097
2098#ifdef _MODULE
2099	switch (cmd) {
2100	case MODULE_CMD_INIT:
2101		error = config_init_component(cfdriver_ioconf_if_vioif,
2102		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2103		break;
2104	case MODULE_CMD_FINI:
2105		error = config_fini_component(cfdriver_ioconf_if_vioif,
2106		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2107		break;
2108	default:
2109		error = ENOTTY;
2110		break;
2111	}
2112#endif
2113
2114	return error;
2115}
2116