if_vioif.c revision 1.51
1/*	$NetBSD: if_vioif.c,v 1.51 2019/10/01 18:00:08 chs Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.51 2019/10/01 18:00:08 chs Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48#include <sys/pcq.h>
49
50#include <dev/pci/virtioreg.h>
51#include <dev/pci/virtiovar.h>
52
53#include <net/if.h>
54#include <net/if_media.h>
55#include <net/if_ether.h>
56
57#include <net/bpf.h>
58
59#include "ioconf.h"
60
61#ifdef NET_MPSAFE
62#define VIOIF_MPSAFE	1
63#define VIOIF_MULTIQ	1
64#endif
65
66#ifdef SOFTINT_INTR
67#define VIOIF_SOFTINT_INTR	1
68#endif
69
70/*
71 * if_vioifreg.h:
72 */
73/* Configuration registers */
74#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
75#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
76#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	8 /* 16bit */
77
78/* Feature bits */
79#define VIRTIO_NET_F_CSUM		__BIT(0)
80#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
81#define VIRTIO_NET_F_MAC		__BIT(5)
82#define VIRTIO_NET_F_GSO		__BIT(6)
83#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
84#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
85#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
86#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
87#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
88#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
89#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
90#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
91#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
92#define VIRTIO_NET_F_STATUS		__BIT(16)
93#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
94#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
95#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
96#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
97#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
98#define VIRTIO_NET_F_MQ			__BIT(22)
99
100#define VIRTIO_NET_FLAG_BITS \
101	VIRTIO_COMMON_FLAG_BITS \
102	"\x17""MQ" \
103	"\x16""GUEST_ANNOUNCE" \
104	"\x15""CTRL_RX_EXTRA" \
105	"\x14""CTRL_VLAN" \
106	"\x13""CTRL_RX" \
107	"\x12""CTRL_VQ" \
108	"\x11""STATUS" \
109	"\x10""MRG_RXBUF" \
110	"\x0f""HOST_UFO" \
111	"\x0e""HOST_ECN" \
112	"\x0d""HOST_TSO6" \
113	"\x0c""HOST_TSO4" \
114	"\x0b""GUEST_UFO" \
115	"\x0a""GUEST_ECN" \
116	"\x09""GUEST_TSO6" \
117	"\x08""GUEST_TSO4" \
118	"\x07""GSO" \
119	"\x06""MAC" \
120	"\x02""GUEST_CSUM" \
121	"\x01""CSUM"
122
123/* Status */
124#define VIRTIO_NET_S_LINK_UP	1
125
126/* Packet header structure */
127struct virtio_net_hdr {
128	uint8_t		flags;
129	uint8_t		gso_type;
130	uint16_t	hdr_len;
131	uint16_t	gso_size;
132	uint16_t	csum_start;
133	uint16_t	csum_offset;
134#if 0
135	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
136#endif
137} __packed;
138
139#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
140#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
141#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
142#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
145
146#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
147
148/* Control virtqueue */
149struct virtio_net_ctrl_cmd {
150	uint8_t	class;
151	uint8_t	command;
152} __packed;
153#define VIRTIO_NET_CTRL_RX		0
154# define VIRTIO_NET_CTRL_RX_PROMISC	0
155# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
156
157#define VIRTIO_NET_CTRL_MAC		1
158# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
159
160#define VIRTIO_NET_CTRL_VLAN		2
161# define VIRTIO_NET_CTRL_VLAN_ADD	0
162# define VIRTIO_NET_CTRL_VLAN_DEL	1
163
164#define VIRTIO_NET_CTRL_MQ			4
165# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
166# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
167# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
168
169struct virtio_net_ctrl_status {
170	uint8_t	ack;
171} __packed;
172#define VIRTIO_NET_OK			0
173#define VIRTIO_NET_ERR			1
174
175struct virtio_net_ctrl_rx {
176	uint8_t	onoff;
177} __packed;
178
179struct virtio_net_ctrl_mac_tbl {
180	uint32_t nentries;
181	uint8_t macs[][ETHER_ADDR_LEN];
182} __packed;
183
184struct virtio_net_ctrl_vlan {
185	uint16_t id;
186} __packed;
187
188struct virtio_net_ctrl_mq {
189	uint16_t virtqueue_pairs;
190} __packed;
191
192struct vioif_ctrl_cmdspec {
193	bus_dmamap_t	dmamap;
194	void		*buf;
195	bus_size_t	bufsize;
196};
197
198/*
199 * if_vioifvar.h:
200 */
201
202/*
203 * Locking notes:
204 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
205 *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
206 *      - more than one lock cannot be held at onece
207 * + ctrlq_inuse is protected by ctrlq_wait_lock.
208 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
209 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
210 */
211
212struct vioif_txqueue {
213	kmutex_t		*txq_lock;	/* lock for tx operations */
214
215	struct virtqueue	*txq_vq;
216	bool			txq_stopping;
217	bool			txq_link_active;
218	pcq_t			*txq_intrq;
219
220	struct virtio_net_hdr	*txq_hdrs;
221	bus_dmamap_t		*txq_hdr_dmamaps;
222
223	struct mbuf		**txq_mbufs;
224	bus_dmamap_t		*txq_dmamaps;
225
226	void			*txq_deferred_transmit;
227};
228
229struct vioif_rxqueue {
230	kmutex_t		*rxq_lock;	/* lock for rx operations */
231
232	struct virtqueue	*rxq_vq;
233	bool			rxq_stopping;
234
235	struct virtio_net_hdr	*rxq_hdrs;
236	bus_dmamap_t		*rxq_hdr_dmamaps;
237
238	struct mbuf		**rxq_mbufs;
239	bus_dmamap_t		*rxq_dmamaps;
240
241	void			*rxq_softint;
242};
243
244struct vioif_ctrlqueue {
245	struct virtqueue		*ctrlq_vq;
246	enum {
247		FREE, INUSE, DONE
248	}				ctrlq_inuse;
249	kcondvar_t			ctrlq_wait;
250	kmutex_t			ctrlq_wait_lock;
251	struct lwp			*ctrlq_owner;
252
253	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
254	struct virtio_net_ctrl_status	*ctrlq_status;
255	struct virtio_net_ctrl_rx	*ctrlq_rx;
256	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
257	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
258	struct virtio_net_ctrl_mq	*ctrlq_mq;
259
260	bus_dmamap_t			ctrlq_cmd_dmamap;
261	bus_dmamap_t			ctrlq_status_dmamap;
262	bus_dmamap_t			ctrlq_rx_dmamap;
263	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
264	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
265	bus_dmamap_t			ctrlq_mq_dmamap;
266};
267
268struct vioif_softc {
269	device_t		sc_dev;
270
271	struct virtio_softc	*sc_virtio;
272	struct virtqueue	*sc_vqs;
273
274	int			sc_max_nvq_pairs;
275	int			sc_req_nvq_pairs;
276	int			sc_act_nvq_pairs;
277
278	uint8_t			sc_mac[ETHER_ADDR_LEN];
279	struct ethercom		sc_ethercom;
280	short			sc_deferred_init_done;
281	bool			sc_link_active;
282
283	struct vioif_txqueue	*sc_txq;
284	struct vioif_rxqueue	*sc_rxq;
285
286	bool			sc_has_ctrl;
287	struct vioif_ctrlqueue	sc_ctrlq;
288
289	bus_dma_segment_t	sc_hdr_segs[1];
290	void			*sc_dmamem;
291	void			*sc_kmem;
292
293	void			*sc_ctl_softint;
294};
295#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
296#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
297
298/* cfattach interface functions */
299static int	vioif_match(device_t, cfdata_t, void *);
300static void	vioif_attach(device_t, device_t, void *);
301static void	vioif_deferred_init(device_t);
302
303/* ifnet interface functions */
304static int	vioif_init(struct ifnet *);
305static void	vioif_stop(struct ifnet *, int);
306static void	vioif_start(struct ifnet *);
307static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
308static int	vioif_transmit(struct ifnet *, struct mbuf *);
309static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
310static int	vioif_ioctl(struct ifnet *, u_long, void *);
311static void	vioif_watchdog(struct ifnet *);
312
313/* rx */
314static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
315static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
316static void	vioif_populate_rx_mbufs(struct vioif_rxqueue *);
317static void	vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *);
318static int	vioif_rx_deq(struct vioif_rxqueue *);
319static int	vioif_rx_deq_locked(struct vioif_rxqueue *);
320static int	vioif_rx_vq_done(struct virtqueue *);
321static void	vioif_rx_softint(void *);
322static void	vioif_rx_drain(struct vioif_rxqueue *);
323
324/* tx */
325static int	vioif_tx_vq_done(struct virtqueue *);
326static int	vioif_tx_vq_done_locked(struct virtqueue *);
327static void	vioif_tx_drain(struct vioif_txqueue *);
328static void	vioif_deferred_transmit(void *);
329
330/* other control */
331static bool	vioif_is_link_up(struct vioif_softc *);
332static void	vioif_update_link_status(struct vioif_softc *);
333static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
334static int	vioif_set_promisc(struct vioif_softc *, bool);
335static int	vioif_set_allmulti(struct vioif_softc *, bool);
336static int	vioif_set_rx_filter(struct vioif_softc *);
337static int	vioif_rx_filter(struct vioif_softc *);
338static int	vioif_ctrl_vq_done(struct virtqueue *);
339static int	vioif_config_change(struct virtio_softc *);
340static void	vioif_ctl_softint(void *);
341static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
342static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
343static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
344
345CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
346		  vioif_match, vioif_attach, NULL, NULL);
347
348static int
349vioif_match(device_t parent, cfdata_t match, void *aux)
350{
351	struct virtio_attach_args *va = aux;
352
353	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
354		return 1;
355
356	return 0;
357}
358
359static void
360vioif_alloc_queues(struct vioif_softc *sc)
361{
362	int nvq_pairs = sc->sc_max_nvq_pairs;
363	int nvqs = nvq_pairs * 2;
364	int i;
365
366	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
367
368	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
369	    KM_SLEEP);
370	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
371	    KM_SLEEP);
372
373	if (sc->sc_has_ctrl)
374		nvqs++;
375
376	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
377	nvqs = 0;
378	for (i = 0; i < nvq_pairs; i++) {
379		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
380		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
381	}
382
383	if (sc->sc_has_ctrl)
384		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
385}
386
387static void
388vioif_free_queues(struct vioif_softc *sc)
389{
390	int nvq_pairs = sc->sc_max_nvq_pairs;
391	int nvqs = nvq_pairs * 2;
392
393	if (sc->sc_ctrlq.ctrlq_vq)
394		nvqs++;
395
396	if (sc->sc_txq) {
397		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
398		sc->sc_txq = NULL;
399	}
400
401	if (sc->sc_rxq) {
402		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
403		sc->sc_rxq = NULL;
404	}
405
406	if (sc->sc_vqs) {
407		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
408		sc->sc_vqs = NULL;
409	}
410}
411
412/* allocate memory */
413/*
414 * dma memory is used for:
415 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
416 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
417 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
418 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
419 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
420 *			 (WRITE)
421 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
422 *			 class command (WRITE)
423 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
424 *			 class command (WRITE)
425 * ctrlq_* structures are allocated only one each; they are protected by
426 * ctrlq_inuse variable and ctrlq_wait condvar.
427 */
428/*
429 * dynamically allocated memory is used for:
430 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
431 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
432 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
433 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
434 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
435 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
436 */
437static int
438vioif_alloc_mems(struct vioif_softc *sc)
439{
440	struct virtio_softc *vsc = sc->sc_virtio;
441	struct vioif_txqueue *txq;
442	struct vioif_rxqueue *rxq;
443	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
444	int allocsize, allocsize2, r, rsegs, i, qid;
445	void *vaddr;
446	intptr_t p;
447
448	allocsize = 0;
449	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
450		rxq = &sc->sc_rxq[qid];
451		txq = &sc->sc_txq[qid];
452
453		allocsize +=
454		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num;
455		allocsize +=
456		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num;
457	}
458	if (sc->sc_has_ctrl) {
459		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
460		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
461		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
462		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
463		    + sizeof(struct virtio_net_ctrl_mac_tbl)
464		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
465		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
466	}
467	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
468	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
469	if (r != 0) {
470		aprint_error_dev(sc->sc_dev,
471		    "DMA memory allocation failed, size %d, "
472		    "error code %d\n", allocsize, r);
473		goto err_none;
474	}
475	r = bus_dmamem_map(virtio_dmat(vsc),
476	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
477	if (r != 0) {
478		aprint_error_dev(sc->sc_dev,
479		    "DMA memory map failed, error code %d\n", r);
480		goto err_dmamem_alloc;
481	}
482
483#define P(p, p0, p0size)	do { p0 = (void *) p;		\
484				     p += p0size; } while (0)
485	memset(vaddr, 0, allocsize);
486	sc->sc_dmamem = vaddr;
487	p = (intptr_t) vaddr;
488
489	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
490		rxq = &sc->sc_rxq[qid];
491		txq = &sc->sc_txq[qid];
492
493		P(p, rxq->rxq_hdrs,
494		    sizeof(rxq->rxq_hdrs[0]) * rxq->rxq_vq->vq_num);
495		P(p, txq->txq_hdrs,
496		    sizeof(txq->txq_hdrs[0]) * txq->txq_vq->vq_num);
497	}
498	if (sc->sc_has_ctrl) {
499		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
500		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
501		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
502		P(p, ctrlq->ctrlq_mac_tbl_uc, sizeof(*ctrlq->ctrlq_mac_tbl_uc));
503		P(p, ctrlq->ctrlq_mac_tbl_mc, sizeof(*ctrlq->ctrlq_mac_tbl_mc)
504		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
505		P(p, ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq));
506	}
507
508	allocsize2 = 0;
509	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
510		int rxqsize, txqsize;
511
512		rxq = &sc->sc_rxq[qid];
513		txq = &sc->sc_txq[qid];
514		rxqsize = rxq->rxq_vq->vq_num;
515		txqsize = txq->txq_vq->vq_num;
516
517		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
518		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
519		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
520
521		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
522		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
523		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
524	}
525	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
526	sc->sc_kmem = vaddr;
527	p = (intptr_t) vaddr;
528
529	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
530		int rxqsize, txqsize;
531		rxq = &sc->sc_rxq[qid];
532		txq = &sc->sc_txq[qid];
533		rxqsize = rxq->rxq_vq->vq_num;
534		txqsize = txq->txq_vq->vq_num;
535
536		P(p, rxq->rxq_hdr_dmamaps,
537		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
538		P(p, txq->txq_hdr_dmamaps,
539		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
540		P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
541		P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
542		P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
543		P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
544	}
545#undef P
546
547#define C(map, size, nsegs, usage)					      \
548	do {								      \
549		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0, \
550		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,			      \
551		    &map);						      \
552		if (r != 0) {						      \
553			aprint_error_dev(sc->sc_dev,			      \
554			    usage " dmamap creation failed, "		      \
555			    "error code %d\n", r);			      \
556			goto err_reqs;					      \
557		}							      \
558	} while (0)
559#define C_L(map, buf, size, nsegs, rw, usage)				\
560	C(map, size, nsegs, usage);					\
561	do {								\
562		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
563				    buf, size, NULL,			\
564				    rw | BUS_DMA_NOWAIT);		\
565		if (r != 0) {						\
566			aprint_error_dev(sc->sc_dev,			\
567			    usage " dmamap load failed, "		\
568			    "error code %d\n", r);			\
569			goto err_reqs;					\
570		}							\
571	} while (0)
572
573	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
574		rxq = &sc->sc_rxq[qid];
575		txq = &sc->sc_txq[qid];
576
577		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
578			C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i],
579			    sizeof(rxq->rxq_hdrs[0]), 1,
580			    BUS_DMA_READ, "rx header");
581			C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
582		}
583
584		for (i = 0; i < txq->txq_vq->vq_num; i++) {
585			C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i],
586			    sizeof(txq->txq_hdrs[0]), 1,
587			    BUS_DMA_READ, "tx header");
588			C(txq->txq_dmamaps[i], ETHER_MAX_LEN,
589			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
590		}
591	}
592
593	if (sc->sc_has_ctrl) {
594		/* control vq class & command */
595		C_L(ctrlq->ctrlq_cmd_dmamap,
596		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
597		    BUS_DMA_WRITE, "control command");
598		C_L(ctrlq->ctrlq_status_dmamap,
599		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
600		    BUS_DMA_READ, "control status");
601
602		/* control vq rx mode command parameter */
603		C_L(ctrlq->ctrlq_rx_dmamap,
604		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
605		    BUS_DMA_WRITE, "rx mode control command");
606
607		/* multiqueue set command */
608		C_L(ctrlq->ctrlq_mq_dmamap,
609		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
610		    BUS_DMA_WRITE, "multiqueue set command");
611
612		/* control vq MAC filter table for unicast */
613		/* do not load now since its length is variable */
614		C(ctrlq->ctrlq_tbl_uc_dmamap,
615		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
616		    "unicast MAC address filter command");
617
618		/* control vq MAC filter table for multicast */
619		C(ctrlq->ctrlq_tbl_mc_dmamap,
620		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
621		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
622		    "multicast MAC address filter command");
623	}
624#undef C_L
625#undef C
626
627	return 0;
628
629err_reqs:
630#define D(map)								\
631	do {								\
632		if (map) {						\
633			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
634			map = NULL;					\
635		}							\
636	} while (0)
637	D(ctrlq->ctrlq_tbl_mc_dmamap);
638	D(ctrlq->ctrlq_tbl_uc_dmamap);
639	D(ctrlq->ctrlq_rx_dmamap);
640	D(ctrlq->ctrlq_status_dmamap);
641	D(ctrlq->ctrlq_cmd_dmamap);
642	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
643		rxq = &sc->sc_rxq[qid];
644		txq = &sc->sc_txq[qid];
645
646		for (i = 0; i < txq->txq_vq->vq_num; i++) {
647			D(txq->txq_dmamaps[i]);
648			D(txq->txq_hdr_dmamaps[i]);
649		}
650		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
651			D(rxq->rxq_dmamaps[i]);
652			D(rxq->rxq_hdr_dmamaps[i]);
653		}
654	}
655#undef D
656	if (sc->sc_kmem) {
657		kmem_free(sc->sc_kmem, allocsize2);
658		sc->sc_kmem = NULL;
659	}
660	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
661err_dmamem_alloc:
662	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
663err_none:
664	return -1;
665}
666
667static void
668vioif_attach(device_t parent, device_t self, void *aux)
669{
670	struct vioif_softc *sc = device_private(self);
671	struct virtio_softc *vsc = device_private(parent);
672	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
673	struct vioif_txqueue *txq;
674	struct vioif_rxqueue *rxq;
675	uint32_t features, req_features;
676	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
677	u_int softint_flags;
678	int r, i, nvqs=0, req_flags;
679
680	if (virtio_child(vsc) != NULL) {
681		aprint_normal(": child already attached for %s; "
682		    "something wrong...\n", device_xname(parent));
683		return;
684	}
685
686	sc->sc_dev = self;
687	sc->sc_virtio = vsc;
688	sc->sc_link_active = false;
689
690	sc->sc_max_nvq_pairs = 1;
691	sc->sc_req_nvq_pairs = 1;
692	sc->sc_act_nvq_pairs = 1;
693
694	req_flags = 0;
695
696#ifdef VIOIF_MPSAFE
697	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
698#endif
699#ifdef VIOIF_SOFTINT_INTR
700	req_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
701#endif
702	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
703
704	req_features =
705	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
706	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
707#ifdef VIOIF_MULTIQ
708	req_features |= VIRTIO_NET_F_MQ;
709#endif
710	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
711	    vioif_config_change, virtio_vq_intr, req_flags,
712	    req_features, VIRTIO_NET_FLAG_BITS);
713
714	features = virtio_features(vsc);
715
716	if (features & VIRTIO_NET_F_MAC) {
717		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
718			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
719			    VIRTIO_NET_CONFIG_MAC + i);
720		}
721	} else {
722		/* code stolen from sys/net/if_tap.c */
723		struct timeval tv;
724		uint32_t ui;
725		getmicrouptime(&tv);
726		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
727		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
728		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
729			virtio_write_device_config_1(vsc,
730			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
731		}
732	}
733
734	aprint_normal_dev(self, "Ethernet address %s\n",
735	    ether_sprintf(sc->sc_mac));
736
737	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
738	    (features & VIRTIO_NET_F_CTRL_RX)) {
739		sc->sc_has_ctrl = true;
740
741		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
742		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
743		ctrlq->ctrlq_inuse = FREE;
744	} else {
745		sc->sc_has_ctrl = false;
746	}
747
748	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
749		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
750		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
751
752		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
753			goto err;
754
755		/* Limit the number of queue pairs to use */
756		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
757	}
758
759	vioif_alloc_queues(sc);
760	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
761
762#ifdef VIOIF_MPSAFE
763	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
764#else
765	softint_flags = SOFTINT_NET;
766#endif
767
768	/*
769	 * Allocating a virtqueues
770	 */
771	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
772		rxq = &sc->sc_rxq[i];
773		txq = &sc->sc_txq[i];
774		char qname[32];
775
776		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
777
778		rxq->rxq_softint = softint_establish(softint_flags,
779		    vioif_rx_softint, rxq);
780		if (rxq->rxq_softint == NULL) {
781			aprint_error_dev(self, "cannot establish rx softint\n");
782			goto err;
783		}
784		snprintf(qname, sizeof(qname), "rx%d", i);
785		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
786		    MCLBYTES+sizeof(struct virtio_net_hdr), nvqs, qname);
787		if (r != 0)
788			goto err;
789		nvqs++;
790		rxq->rxq_vq->vq_done = vioif_rx_vq_done;
791		rxq->rxq_vq->vq_done_ctx = (void *)rxq;
792		rxq->rxq_stopping = true;
793
794		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
795		txq->txq_deferred_transmit = softint_establish(softint_flags,
796		    vioif_deferred_transmit, txq);
797		if (txq->txq_deferred_transmit == NULL) {
798			aprint_error_dev(self, "cannot establish tx softint\n");
799			goto err;
800		}
801		snprintf(qname, sizeof(qname), "tx%d", i);
802		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
803		    sizeof(struct virtio_net_hdr)
804		    + (ETHER_MAX_LEN - ETHER_HDR_LEN),
805		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
806		if (r != 0)
807			goto err;
808		nvqs++;
809		txq->txq_vq->vq_done = vioif_tx_vq_done;
810		txq->txq_vq->vq_done_ctx = (void *)txq;
811		txq->txq_link_active = sc->sc_link_active;
812		txq->txq_stopping = false;
813		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
814	}
815
816	if (sc->sc_has_ctrl) {
817		/*
818		 * Allocating a virtqueue for control channel
819		 */
820		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
821		    NBPG, 1, "control");
822		if (r != 0) {
823			aprint_error_dev(self, "failed to allocate "
824			    "a virtqueue for control channel, error code %d\n",
825			    r);
826
827			sc->sc_has_ctrl = false;
828			cv_destroy(&ctrlq->ctrlq_wait);
829			mutex_destroy(&ctrlq->ctrlq_wait_lock);
830		} else {
831			nvqs++;
832			ctrlq->ctrlq_vq->vq_done = vioif_ctrl_vq_done;
833			ctrlq->ctrlq_vq->vq_done_ctx = (void *) ctrlq;
834		}
835	}
836
837	sc->sc_ctl_softint = softint_establish(softint_flags,
838	    vioif_ctl_softint, sc);
839	if (sc->sc_ctl_softint == NULL) {
840		aprint_error_dev(self, "cannot establish ctl softint\n");
841		goto err;
842	}
843
844	if (vioif_alloc_mems(sc) < 0)
845		goto err;
846
847	if (virtio_child_attach_finish(vsc) != 0)
848		goto err;
849
850	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
851	ifp->if_softc = sc;
852	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
853#ifdef VIOIF_MPSAFE
854	ifp->if_extflags = IFEF_MPSAFE;
855#endif
856	ifp->if_start = vioif_start;
857	if (sc->sc_req_nvq_pairs > 1)
858		ifp->if_transmit = vioif_transmit;
859	ifp->if_ioctl = vioif_ioctl;
860	ifp->if_init = vioif_init;
861	ifp->if_stop = vioif_stop;
862	ifp->if_capabilities = 0;
863	ifp->if_watchdog = vioif_watchdog;
864	txq = &sc->sc_txq[0];
865	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
866	IFQ_SET_READY(&ifp->if_snd);
867
868	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
869
870	if_attach(ifp);
871	if_deferred_start_init(ifp, NULL);
872	ether_ifattach(ifp, sc->sc_mac);
873
874	return;
875
876err:
877	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
878		rxq = &sc->sc_rxq[i];
879		txq = &sc->sc_txq[i];
880
881		if (rxq->rxq_lock) {
882			mutex_obj_free(rxq->rxq_lock);
883			rxq->rxq_lock = NULL;
884		}
885
886		if (rxq->rxq_softint) {
887			softint_disestablish(rxq->rxq_softint);
888			rxq->rxq_softint = NULL;
889		}
890
891		if (txq->txq_lock) {
892			mutex_obj_free(txq->txq_lock);
893			txq->txq_lock = NULL;
894		}
895
896		if (txq->txq_deferred_transmit) {
897			softint_disestablish(txq->txq_deferred_transmit);
898			txq->txq_deferred_transmit = NULL;
899		}
900
901		if (txq->txq_intrq) {
902			pcq_destroy(txq->txq_intrq);
903			txq->txq_intrq = NULL;
904		}
905	}
906
907	if (sc->sc_has_ctrl) {
908		cv_destroy(&ctrlq->ctrlq_wait);
909		mutex_destroy(&ctrlq->ctrlq_wait_lock);
910	}
911
912	while (nvqs > 0)
913		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
914
915	vioif_free_queues(sc);
916
917	virtio_child_attach_failed(vsc);
918	return;
919}
920
921/* we need interrupts to make promiscuous mode off */
922static void
923vioif_deferred_init(device_t self)
924{
925	struct vioif_softc *sc = device_private(self);
926	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
927	int r;
928
929	if (ifp->if_flags & IFF_PROMISC)
930		return;
931
932	r =  vioif_set_promisc(sc, false);
933	if (r != 0)
934		aprint_error_dev(self, "resetting promisc mode failed, "
935		    "error code %d\n", r);
936}
937
938static void
939vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
940{
941	struct virtio_softc *vsc = sc->sc_virtio;
942	struct vioif_txqueue *txq;
943	struct vioif_rxqueue *rxq;
944	int i;
945
946	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
947		txq = &sc->sc_txq[i];
948		rxq = &sc->sc_rxq[i];
949
950		virtio_start_vq_intr(vsc, txq->txq_vq);
951		virtio_start_vq_intr(vsc, rxq->rxq_vq);
952	}
953}
954
955static void
956vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
957{
958	struct virtio_softc *vsc = sc->sc_virtio;
959	struct vioif_txqueue *txq;
960	struct vioif_rxqueue *rxq;
961	int i;
962
963	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
964		txq = &sc->sc_txq[i];
965		rxq = &sc->sc_rxq[i];
966
967		virtio_stop_vq_intr(vsc, txq->txq_vq);
968		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
969	}
970}
971
972/*
973 * Interface functions for ifnet
974 */
975static int
976vioif_init(struct ifnet *ifp)
977{
978	struct vioif_softc *sc = ifp->if_softc;
979	struct virtio_softc *vsc = sc->sc_virtio;
980	struct vioif_rxqueue *rxq;
981	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
982	int r, i;
983
984	vioif_stop(ifp, 0);
985
986	virtio_reinit_start(vsc);
987	virtio_negotiate_features(vsc, virtio_features(vsc));
988
989	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
990		rxq = &sc->sc_rxq[i];
991
992		/* Have to set false before vioif_populate_rx_mbufs */
993		rxq->rxq_stopping = false;
994		vioif_populate_rx_mbufs(rxq);
995	}
996
997	virtio_reinit_end(vsc);
998
999	if (sc->sc_has_ctrl)
1000		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1001
1002	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1003	if (r == 0)
1004		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1005	else
1006		sc->sc_act_nvq_pairs = 1;
1007
1008	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1009		sc->sc_txq[i].txq_stopping = false;
1010
1011	vioif_enable_interrupt_vqpairs(sc);
1012
1013	if (!sc->sc_deferred_init_done) {
1014		sc->sc_deferred_init_done = 1;
1015		if (sc->sc_has_ctrl)
1016			vioif_deferred_init(sc->sc_dev);
1017	}
1018
1019	vioif_update_link_status(sc);
1020	ifp->if_flags |= IFF_RUNNING;
1021	ifp->if_flags &= ~IFF_OACTIVE;
1022	vioif_rx_filter(sc);
1023
1024	return 0;
1025}
1026
1027static void
1028vioif_stop(struct ifnet *ifp, int disable)
1029{
1030	struct vioif_softc *sc = ifp->if_softc;
1031	struct virtio_softc *vsc = sc->sc_virtio;
1032	struct vioif_txqueue *txq;
1033	struct vioif_rxqueue *rxq;
1034	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1035	int i;
1036
1037	/* Take the locks to ensure that ongoing TX/RX finish */
1038	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1039		txq = &sc->sc_txq[i];
1040		rxq = &sc->sc_rxq[i];
1041
1042		mutex_enter(txq->txq_lock);
1043		txq->txq_stopping = true;
1044		mutex_exit(txq->txq_lock);
1045
1046		mutex_enter(rxq->rxq_lock);
1047		rxq->rxq_stopping = true;
1048		mutex_exit(rxq->rxq_lock);
1049	}
1050
1051	/* disable interrupts */
1052	vioif_disable_interrupt_vqpairs(sc);
1053
1054	if (sc->sc_has_ctrl)
1055		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1056
1057	/* only way to stop I/O and DMA is resetting... */
1058	virtio_reset(vsc);
1059	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1060		vioif_rx_deq(&sc->sc_rxq[i]);
1061
1062	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1063	sc->sc_link_active = false;
1064
1065	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1066		txq = &sc->sc_txq[i];
1067		rxq = &sc->sc_rxq[i];
1068
1069		txq->txq_link_active = false;
1070
1071		if (disable)
1072			vioif_rx_drain(rxq);
1073
1074		vioif_tx_drain(txq);
1075	}
1076}
1077
1078static void
1079vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1080    bool is_transmit)
1081{
1082	struct vioif_softc *sc = ifp->if_softc;
1083	struct virtio_softc *vsc = sc->sc_virtio;
1084	struct virtqueue *vq = txq->txq_vq;
1085	struct mbuf *m;
1086	int queued = 0;
1087
1088	KASSERT(mutex_owned(txq->txq_lock));
1089
1090	if ((ifp->if_flags & IFF_RUNNING) == 0)
1091		return;
1092
1093	if (!txq->txq_link_active || txq->txq_stopping)
1094		return;
1095
1096	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1097		return;
1098
1099	for (;;) {
1100		int slot, r;
1101
1102		if (is_transmit)
1103			m = pcq_get(txq->txq_intrq);
1104		else
1105			IFQ_DEQUEUE(&ifp->if_snd, m);
1106
1107		if (m == NULL)
1108			break;
1109
1110		r = virtio_enqueue_prep(vsc, vq, &slot);
1111		if (r == EAGAIN) {
1112			ifp->if_flags |= IFF_OACTIVE;
1113			m_freem(m);
1114			break;
1115		}
1116		if (r != 0)
1117			panic("enqueue_prep for a tx buffer");
1118
1119		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1120		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1121		if (r != 0) {
1122			/* maybe just too fragmented */
1123			struct mbuf *newm;
1124
1125			newm = m_defrag(m, M_NOWAIT);
1126			if (newm == NULL) {
1127				aprint_error_dev(sc->sc_dev,
1128				    "m_defrag() failed\n");
1129				goto skip;
1130			}
1131
1132			m = newm;
1133			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1134			    txq->txq_dmamaps[slot], m,
1135			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1136			if (r != 0) {
1137				aprint_error_dev(sc->sc_dev,
1138				    "tx dmamap load failed, error code %d\n",
1139				    r);
1140skip:
1141				m_freem(m);
1142				virtio_enqueue_abort(vsc, vq, slot);
1143				continue;
1144			}
1145		}
1146
1147		/* This should actually never fail */
1148		r = virtio_enqueue_reserve(vsc, vq, slot,
1149					txq->txq_dmamaps[slot]->dm_nsegs + 1);
1150		if (r != 0) {
1151			aprint_error_dev(sc->sc_dev,
1152			    "virtio_enqueue_reserve failed, error code %d\n",
1153			    r);
1154			bus_dmamap_unload(virtio_dmat(vsc),
1155					  txq->txq_dmamaps[slot]);
1156			/* slot already freed by virtio_enqueue_reserve */
1157			m_freem(m);
1158			continue;
1159		}
1160
1161		txq->txq_mbufs[slot] = m;
1162
1163		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
1164		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1165		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1166		    BUS_DMASYNC_PREWRITE);
1167		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1168		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1169		    BUS_DMASYNC_PREWRITE);
1170		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1171		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1172		virtio_enqueue_commit(vsc, vq, slot, false);
1173
1174		queued++;
1175		bpf_mtap(ifp, m, BPF_D_OUT);
1176	}
1177
1178	if (queued > 0) {
1179		virtio_enqueue_commit(vsc, vq, -1, true);
1180		ifp->if_timer = 5;
1181	}
1182}
1183
1184static void
1185vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1186{
1187
1188	/*
1189	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1190	 */
1191	vioif_send_common_locked(ifp, txq, false);
1192
1193}
1194
1195static void
1196vioif_start(struct ifnet *ifp)
1197{
1198	struct vioif_softc *sc = ifp->if_softc;
1199	struct vioif_txqueue *txq = &sc->sc_txq[0];
1200
1201#ifdef VIOIF_MPSAFE
1202	KASSERT(if_is_mpsafe(ifp));
1203#endif
1204
1205	mutex_enter(txq->txq_lock);
1206	if (!txq->txq_stopping)
1207		vioif_start_locked(ifp, txq);
1208	mutex_exit(txq->txq_lock);
1209}
1210
1211static inline int
1212vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1213{
1214	struct vioif_softc *sc = ifp->if_softc;
1215	u_int cpuid = cpu_index(curcpu());
1216
1217	return cpuid % sc->sc_act_nvq_pairs;
1218}
1219
1220static void
1221vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1222{
1223
1224	vioif_send_common_locked(ifp, txq, true);
1225}
1226
1227static int
1228vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1229{
1230	struct vioif_softc *sc = ifp->if_softc;
1231	struct vioif_txqueue *txq;
1232	int qid;
1233
1234	qid = vioif_select_txqueue(ifp, m);
1235	txq = &sc->sc_txq[qid];
1236
1237	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1238		m_freem(m);
1239		return ENOBUFS;
1240	}
1241
1242	ifp->if_obytes += m->m_pkthdr.len;
1243	if (m->m_flags & M_MCAST)
1244		ifp->if_omcasts++;
1245
1246	if (mutex_tryenter(txq->txq_lock)) {
1247		if (!txq->txq_stopping)
1248			vioif_transmit_locked(ifp, txq);
1249		mutex_exit(txq->txq_lock);
1250	}
1251
1252	return 0;
1253}
1254
1255static void
1256vioif_deferred_transmit(void *arg)
1257{
1258	struct vioif_txqueue *txq = arg;
1259	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1260	struct vioif_softc *sc = device_private(virtio_child(vsc));
1261	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1262
1263	if (mutex_tryenter(txq->txq_lock)) {
1264		vioif_send_common_locked(ifp, txq, true);
1265		mutex_exit(txq->txq_lock);
1266	}
1267}
1268
1269static int
1270vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1271{
1272	int s, r;
1273
1274	s = splnet();
1275
1276	r = ether_ioctl(ifp, cmd, data);
1277	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1278	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1279		if (ifp->if_flags & IFF_RUNNING)
1280			r = vioif_rx_filter(ifp->if_softc);
1281		else
1282			r = 0;
1283	}
1284
1285	splx(s);
1286
1287	return r;
1288}
1289
1290void
1291vioif_watchdog(struct ifnet *ifp)
1292{
1293	struct vioif_softc *sc = ifp->if_softc;
1294	int i;
1295
1296	if (ifp->if_flags & IFF_RUNNING) {
1297		for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1298			vioif_tx_vq_done(sc->sc_txq[i].txq_vq);
1299	}
1300}
1301
1302
1303/*
1304 * Receive implementation
1305 */
1306/* allocate and initialize a mbuf for receive */
1307static int
1308vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1309{
1310	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1311	struct mbuf *m;
1312	int r;
1313
1314	MGETHDR(m, M_DONTWAIT, MT_DATA);
1315	if (m == NULL)
1316		return ENOBUFS;
1317	MCLGET(m, M_DONTWAIT);
1318	if ((m->m_flags & M_EXT) == 0) {
1319		m_freem(m);
1320		return ENOBUFS;
1321	}
1322	rxq->rxq_mbufs[i] = m;
1323	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1324	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1325	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1326	if (r) {
1327		m_freem(m);
1328		rxq->rxq_mbufs[i] = NULL;
1329		return r;
1330	}
1331
1332	return 0;
1333}
1334
1335/* free a mbuf for receive */
1336static void
1337vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1338{
1339	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1340
1341	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1342	m_freem(rxq->rxq_mbufs[i]);
1343	rxq->rxq_mbufs[i] = NULL;
1344}
1345
1346/* add mbufs for all the empty receive slots */
1347static void
1348vioif_populate_rx_mbufs(struct vioif_rxqueue *rxq)
1349{
1350
1351	mutex_enter(rxq->rxq_lock);
1352	vioif_populate_rx_mbufs_locked(rxq);
1353	mutex_exit(rxq->rxq_lock);
1354}
1355
1356static void
1357vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *rxq)
1358{
1359	struct virtqueue *vq = rxq->rxq_vq;
1360	struct virtio_softc *vsc = vq->vq_owner;
1361	struct vioif_softc *sc = device_private(virtio_child(vsc));
1362	int i, r, ndone = 0;
1363
1364	KASSERT(mutex_owned(rxq->rxq_lock));
1365
1366	if (rxq->rxq_stopping)
1367		return;
1368
1369	for (i = 0; i < vq->vq_num; i++) {
1370		int slot;
1371		r = virtio_enqueue_prep(vsc, vq, &slot);
1372		if (r == EAGAIN)
1373			break;
1374		if (r != 0)
1375			panic("enqueue_prep for rx buffers");
1376		if (rxq->rxq_mbufs[slot] == NULL) {
1377			r = vioif_add_rx_mbuf(rxq, slot);
1378			if (r != 0) {
1379				aprint_error_dev(sc->sc_dev,
1380				    "rx mbuf allocation failed, "
1381				    "error code %d\n", r);
1382				break;
1383			}
1384		}
1385		r = virtio_enqueue_reserve(vsc, vq, slot,
1386		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1387		if (r != 0) {
1388			vioif_free_rx_mbuf(rxq, slot);
1389			break;
1390		}
1391		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1392		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1393		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1394		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1395		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1396		    false);
1397		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1398		virtio_enqueue_commit(vsc, vq, slot, false);
1399		ndone++;
1400	}
1401	if (ndone > 0)
1402		virtio_enqueue_commit(vsc, vq, -1, true);
1403}
1404
1405/* dequeue received packets */
1406static int
1407vioif_rx_deq(struct vioif_rxqueue *rxq)
1408{
1409	int r;
1410
1411	KASSERT(rxq->rxq_stopping);
1412
1413	mutex_enter(rxq->rxq_lock);
1414	r = vioif_rx_deq_locked(rxq);
1415	mutex_exit(rxq->rxq_lock);
1416
1417	return r;
1418}
1419
1420/* dequeue received packets */
1421static int
1422vioif_rx_deq_locked(struct vioif_rxqueue *rxq)
1423{
1424	struct virtqueue *vq = rxq->rxq_vq;
1425	struct virtio_softc *vsc = vq->vq_owner;
1426	struct vioif_softc *sc = device_private(virtio_child(vsc));
1427	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1428	struct mbuf *m;
1429	int r = 0;
1430	int slot, len;
1431
1432	KASSERT(mutex_owned(rxq->rxq_lock));
1433
1434	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1435		len -= sizeof(struct virtio_net_hdr);
1436		r = 1;
1437		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1438		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_POSTREAD);
1439		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1440		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1441		m = rxq->rxq_mbufs[slot];
1442		KASSERT(m != NULL);
1443		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1444		rxq->rxq_mbufs[slot] = NULL;
1445		virtio_dequeue_commit(vsc, vq, slot);
1446		m_set_rcvif(m, ifp);
1447		m->m_len = m->m_pkthdr.len = len;
1448
1449		mutex_exit(rxq->rxq_lock);
1450		if_percpuq_enqueue(ifp->if_percpuq, m);
1451		mutex_enter(rxq->rxq_lock);
1452
1453		if (rxq->rxq_stopping)
1454			break;
1455	}
1456
1457	return r;
1458}
1459
1460/* rx interrupt; call _dequeue above and schedule a softint */
1461static int
1462vioif_rx_vq_done(struct virtqueue *vq)
1463{
1464	struct vioif_rxqueue *rxq = vq->vq_done_ctx;
1465	int r = 0;
1466
1467#ifdef VIOIF_SOFTINT_INTR
1468	KASSERT(!cpu_intr_p());
1469#endif
1470
1471	mutex_enter(rxq->rxq_lock);
1472
1473	if (rxq->rxq_stopping)
1474		goto out;
1475
1476	r = vioif_rx_deq_locked(rxq);
1477	if (r)
1478#ifdef VIOIF_SOFTINT_INTR
1479		vioif_populate_rx_mbufs_locked(rxq);
1480#else
1481		softint_schedule(rxq->rxq_softint);
1482#endif
1483
1484out:
1485	mutex_exit(rxq->rxq_lock);
1486	return r;
1487}
1488
1489/* softint: enqueue receive requests for new incoming packets */
1490static void
1491vioif_rx_softint(void *arg)
1492{
1493	struct vioif_rxqueue *rxq = arg;
1494
1495	vioif_populate_rx_mbufs(rxq);
1496}
1497
1498/* free all the mbufs; called from if_stop(disable) */
1499static void
1500vioif_rx_drain(struct vioif_rxqueue *rxq)
1501{
1502	struct virtqueue *vq = rxq->rxq_vq;
1503	int i;
1504
1505	for (i = 0; i < vq->vq_num; i++) {
1506		if (rxq->rxq_mbufs[i] == NULL)
1507			continue;
1508		vioif_free_rx_mbuf(rxq, i);
1509	}
1510}
1511
1512
1513/*
1514 * Transmition implementation
1515 */
1516/* actual transmission is done in if_start */
1517/* tx interrupt; dequeue and free mbufs */
1518/*
1519 * tx interrupt is actually disabled; this should be called upon
1520 * tx vq full and watchdog
1521 */
1522static int
1523vioif_tx_vq_done(struct virtqueue *vq)
1524{
1525	struct virtio_softc *vsc = vq->vq_owner;
1526	struct vioif_softc *sc = device_private(virtio_child(vsc));
1527	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1528	struct vioif_txqueue *txq = vq->vq_done_ctx;
1529	int r = 0;
1530
1531	mutex_enter(txq->txq_lock);
1532
1533	if (txq->txq_stopping)
1534		goto out;
1535
1536	r = vioif_tx_vq_done_locked(vq);
1537
1538out:
1539	mutex_exit(txq->txq_lock);
1540	if (r) {
1541		if_schedule_deferred_start(ifp);
1542
1543		KASSERT(txq->txq_deferred_transmit != NULL);
1544		softint_schedule(txq->txq_deferred_transmit);
1545	}
1546	return r;
1547}
1548
1549static int
1550vioif_tx_vq_done_locked(struct virtqueue *vq)
1551{
1552	struct virtio_softc *vsc = vq->vq_owner;
1553	struct vioif_softc *sc = device_private(virtio_child(vsc));
1554	struct vioif_txqueue *txq = vq->vq_done_ctx;
1555	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1556	struct mbuf *m;
1557	int r = 0;
1558	int slot, len;
1559
1560	KASSERT(mutex_owned(txq->txq_lock));
1561
1562	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1563		r++;
1564		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1565		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_POSTWRITE);
1566		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1567		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1568		    BUS_DMASYNC_POSTWRITE);
1569		m = txq->txq_mbufs[slot];
1570		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1571		txq->txq_mbufs[slot] = NULL;
1572		virtio_dequeue_commit(vsc, vq, slot);
1573		ifp->if_opackets++;
1574		m_freem(m);
1575	}
1576
1577	if (r)
1578		ifp->if_flags &= ~IFF_OACTIVE;
1579	return r;
1580}
1581
1582/* free all the mbufs already put on vq; called from if_stop(disable) */
1583static void
1584vioif_tx_drain(struct vioif_txqueue *txq)
1585{
1586	struct virtqueue *vq = txq->txq_vq;
1587	struct virtio_softc *vsc = vq->vq_owner;
1588	int i;
1589
1590	KASSERT(txq->txq_stopping);
1591
1592	for (i = 0; i < vq->vq_num; i++) {
1593		if (txq->txq_mbufs[i] == NULL)
1594			continue;
1595		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1596		m_freem(txq->txq_mbufs[i]);
1597		txq->txq_mbufs[i] = NULL;
1598	}
1599}
1600
1601/*
1602 * Control vq
1603 */
1604/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1605static void
1606vioif_ctrl_acquire(struct vioif_softc *sc)
1607{
1608	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1609
1610	mutex_enter(&ctrlq->ctrlq_wait_lock);
1611	while (ctrlq->ctrlq_inuse != FREE)
1612		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1613	ctrlq->ctrlq_inuse = INUSE;
1614	ctrlq->ctrlq_owner = curlwp;
1615	mutex_exit(&ctrlq->ctrlq_wait_lock);
1616}
1617
1618static void
1619vioif_ctrl_release(struct vioif_softc *sc)
1620{
1621	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1622
1623	KASSERT(ctrlq->ctrlq_inuse != FREE);
1624	KASSERT(ctrlq->ctrlq_owner == curlwp);
1625
1626	mutex_enter(&ctrlq->ctrlq_wait_lock);
1627	ctrlq->ctrlq_inuse = FREE;
1628	ctrlq->ctrlq_owner = NULL;
1629	cv_signal(&ctrlq->ctrlq_wait);
1630	mutex_exit(&ctrlq->ctrlq_wait_lock);
1631}
1632
1633static int
1634vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1635    struct vioif_ctrl_cmdspec *specs, int nspecs)
1636{
1637	struct virtio_softc *vsc = sc->sc_virtio;
1638	int i, r, loaded;
1639
1640	loaded = 0;
1641	for (i = 0; i < nspecs; i++) {
1642		r = bus_dmamap_load(virtio_dmat(vsc),
1643		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1644		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1645		if (r) {
1646			aprint_error_dev(sc->sc_dev, "control command dmamap"
1647			    " load failed, error code %d\n", r);
1648			goto err;
1649		}
1650		loaded++;
1651
1652	}
1653
1654	return r;
1655
1656err:
1657	for (i = 0; i < loaded; i++) {
1658		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1659	}
1660
1661	return r;
1662}
1663
1664static void
1665vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1666    struct vioif_ctrl_cmdspec *specs, int nspecs)
1667{
1668	struct virtio_softc *vsc = sc->sc_virtio;
1669	int i;
1670
1671	for (i = 0; i < nspecs; i++) {
1672		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1673	}
1674}
1675
1676static int
1677vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1678    struct vioif_ctrl_cmdspec *specs, int nspecs)
1679{
1680	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1681	struct virtqueue *vq = ctrlq->ctrlq_vq;
1682	struct virtio_softc *vsc = sc->sc_virtio;
1683	int i, r, slot;
1684
1685	ctrlq->ctrlq_cmd->class = class;
1686	ctrlq->ctrlq_cmd->command = cmd;
1687
1688	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1689	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1690	for (i = 0; i < nspecs; i++) {
1691		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1692		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
1693	}
1694	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1695	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
1696
1697	r = virtio_enqueue_prep(vsc, vq, &slot);
1698	if (r != 0)
1699		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1700	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1701	if (r != 0)
1702		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1703	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1704	for (i = 0; i < nspecs; i++) {
1705		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1706	}
1707	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1708	virtio_enqueue_commit(vsc, vq, slot, true);
1709
1710	/* wait for done */
1711	mutex_enter(&ctrlq->ctrlq_wait_lock);
1712	while (ctrlq->ctrlq_inuse != DONE)
1713		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1714	mutex_exit(&ctrlq->ctrlq_wait_lock);
1715	/* already dequeueued */
1716
1717	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1718	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1719	for (i = 0; i < nspecs; i++) {
1720		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1721		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
1722	}
1723	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1724	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
1725
1726	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1727		r = 0;
1728	else {
1729		aprint_error_dev(sc->sc_dev, "failed setting rx mode\n");
1730		r = EIO;
1731	}
1732
1733	return r;
1734}
1735
1736static int
1737vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1738{
1739	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1740	struct vioif_ctrl_cmdspec specs[1];
1741	int r;
1742
1743	if (!sc->sc_has_ctrl)
1744		return ENOTSUP;
1745
1746	vioif_ctrl_acquire(sc);
1747
1748	rx->onoff = onoff;
1749	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1750	specs[0].buf = rx;
1751	specs[0].bufsize = sizeof(*rx);
1752
1753	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1754	    specs, __arraycount(specs));
1755
1756	vioif_ctrl_release(sc);
1757	return r;
1758}
1759
1760static int
1761vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1762{
1763	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1764}
1765
1766static int
1767vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1768{
1769	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1770}
1771
1772/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1773static int
1774vioif_set_rx_filter(struct vioif_softc *sc)
1775{
1776	/* filter already set in ctrlq->ctrlq_mac_tbl */
1777	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1778	struct vioif_ctrl_cmdspec specs[2];
1779	int nspecs = __arraycount(specs);
1780	int r;
1781
1782	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1783	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1784
1785	if (!sc->sc_has_ctrl)
1786		return ENOTSUP;
1787
1788	vioif_ctrl_acquire(sc);
1789
1790	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1791	specs[0].buf = mac_tbl_uc;
1792	specs[0].bufsize = sizeof(*mac_tbl_uc)
1793	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1794
1795	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1796	specs[1].buf = mac_tbl_mc;
1797	specs[1].bufsize = sizeof(*mac_tbl_mc)
1798	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1799
1800	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1801	if (r != 0)
1802		goto out;
1803
1804	r = vioif_ctrl_send_command(sc,
1805	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1806	    specs, nspecs);
1807
1808	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1809
1810out:
1811	vioif_ctrl_release(sc);
1812
1813	return r;
1814}
1815
1816static int
1817vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
1818{
1819	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
1820	struct vioif_ctrl_cmdspec specs[1];
1821	int r;
1822
1823	if (!sc->sc_has_ctrl)
1824		return ENOTSUP;
1825
1826	if (nvq_pairs <= 1)
1827		return EINVAL;
1828
1829	vioif_ctrl_acquire(sc);
1830
1831	mq->virtqueue_pairs = nvq_pairs;
1832	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
1833	specs[0].buf = mq;
1834	specs[0].bufsize = sizeof(*mq);
1835
1836	r = vioif_ctrl_send_command(sc,
1837	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
1838	    specs, __arraycount(specs));
1839
1840	vioif_ctrl_release(sc);
1841
1842	return r;
1843}
1844
1845/* ctrl vq interrupt; wake up the command issuer */
1846static int
1847vioif_ctrl_vq_done(struct virtqueue *vq)
1848{
1849	struct virtio_softc *vsc = vq->vq_owner;
1850	struct vioif_ctrlqueue *ctrlq = vq->vq_done_ctx;
1851	int r, slot;
1852
1853	r = virtio_dequeue(vsc, vq, &slot, NULL);
1854	if (r == ENOENT)
1855		return 0;
1856	virtio_dequeue_commit(vsc, vq, slot);
1857
1858	mutex_enter(&ctrlq->ctrlq_wait_lock);
1859	ctrlq->ctrlq_inuse = DONE;
1860	cv_signal(&ctrlq->ctrlq_wait);
1861	mutex_exit(&ctrlq->ctrlq_wait_lock);
1862
1863	return 1;
1864}
1865
1866/*
1867 * If IFF_PROMISC requested,  set promiscuous
1868 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1869 * If large multicast filter exist use ALLMULTI
1870 */
1871/*
1872 * If setting rx filter fails fall back to ALLMULTI
1873 * If ALLMULTI fails fall back to PROMISC
1874 */
1875static int
1876vioif_rx_filter(struct vioif_softc *sc)
1877{
1878	struct ethercom *ec = &sc->sc_ethercom;
1879	struct ifnet *ifp = &ec->ec_if;
1880	struct ether_multi *enm;
1881	struct ether_multistep step;
1882	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1883	int nentries;
1884	int promisc = 0, allmulti = 0, rxfilter = 0;
1885	int r;
1886
1887	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1888		ifp->if_flags |= IFF_PROMISC;
1889		return 0;
1890	}
1891
1892	if (ifp->if_flags & IFF_PROMISC) {
1893		promisc = 1;
1894		goto set;
1895	}
1896
1897	nentries = -1;
1898	ETHER_LOCK(ec);
1899	ETHER_FIRST_MULTI(step, ec, enm);
1900	while (nentries++, enm != NULL) {
1901		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1902			allmulti = 1;
1903			goto set_unlock;
1904		}
1905		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
1906			allmulti = 1;
1907			goto set_unlock;
1908		}
1909		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1910		    enm->enm_addrlo, ETHER_ADDR_LEN);
1911		ETHER_NEXT_MULTI(step, enm);
1912	}
1913	rxfilter = 1;
1914
1915set_unlock:
1916	ETHER_UNLOCK(ec);
1917
1918set:
1919	if (rxfilter) {
1920		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1921		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1922		r = vioif_set_rx_filter(sc);
1923		if (r != 0) {
1924			rxfilter = 0;
1925			allmulti = 1; /* fallback */
1926		}
1927	} else {
1928		/* remove rx filter */
1929		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1930		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1931		r = vioif_set_rx_filter(sc);
1932		/* what to do on failure? */
1933	}
1934	if (allmulti) {
1935		r = vioif_set_allmulti(sc, true);
1936		if (r != 0) {
1937			allmulti = 0;
1938			promisc = 1; /* fallback */
1939		}
1940	} else {
1941		r = vioif_set_allmulti(sc, false);
1942		/* what to do on failure? */
1943	}
1944	if (promisc) {
1945		r = vioif_set_promisc(sc, true);
1946	} else {
1947		r = vioif_set_promisc(sc, false);
1948	}
1949
1950	return r;
1951}
1952
1953static bool
1954vioif_is_link_up(struct vioif_softc *sc)
1955{
1956	struct virtio_softc *vsc = sc->sc_virtio;
1957	uint16_t status;
1958
1959	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
1960		status = virtio_read_device_config_2(vsc,
1961		    VIRTIO_NET_CONFIG_STATUS);
1962	else
1963		status = VIRTIO_NET_S_LINK_UP;
1964
1965	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
1966}
1967
1968/* change link status */
1969static void
1970vioif_update_link_status(struct vioif_softc *sc)
1971{
1972	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1973	struct vioif_txqueue *txq;
1974	bool active, changed;
1975	int link, i;
1976
1977	active = vioif_is_link_up(sc);
1978	changed = false;
1979
1980	if (active) {
1981		if (!sc->sc_link_active)
1982			changed = true;
1983
1984		link = LINK_STATE_UP;
1985		sc->sc_link_active = true;
1986	} else {
1987		if (sc->sc_link_active)
1988			changed = true;
1989
1990		link = LINK_STATE_DOWN;
1991		sc->sc_link_active = false;
1992	}
1993
1994	if (changed) {
1995		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1996			txq = &sc->sc_txq[i];
1997
1998			mutex_enter(txq->txq_lock);
1999			txq->txq_link_active = sc->sc_link_active;
2000			mutex_exit(txq->txq_lock);
2001		}
2002
2003		if_link_state_change(ifp, link);
2004	}
2005}
2006
2007static int
2008vioif_config_change(struct virtio_softc *vsc)
2009{
2010	struct vioif_softc *sc = device_private(virtio_child(vsc));
2011
2012#ifdef VIOIF_SOFTINT_INTR
2013	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2014#endif
2015
2016#ifdef VIOIF_SOFTINT_INTR
2017	KASSERT(!cpu_intr_p());
2018	vioif_update_link_status(sc);
2019	vioif_start(ifp);
2020#else
2021	softint_schedule(sc->sc_ctl_softint);
2022#endif
2023
2024	return 0;
2025}
2026
2027static void
2028vioif_ctl_softint(void *arg)
2029{
2030	struct vioif_softc *sc = arg;
2031	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2032
2033	vioif_update_link_status(sc);
2034	vioif_start(ifp);
2035}
2036
2037MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2038
2039#ifdef _MODULE
2040#include "ioconf.c"
2041#endif
2042
2043static int
2044if_vioif_modcmd(modcmd_t cmd, void *opaque)
2045{
2046	int error = 0;
2047
2048#ifdef _MODULE
2049	switch (cmd) {
2050	case MODULE_CMD_INIT:
2051		error = config_init_component(cfdriver_ioconf_if_vioif,
2052		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2053		break;
2054	case MODULE_CMD_FINI:
2055		error = config_fini_component(cfdriver_ioconf_if_vioif,
2056		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2057		break;
2058	default:
2059		error = ENOTTY;
2060		break;
2061	}
2062#endif
2063
2064	return error;
2065}
2066