if_vioif.c revision 1.54
1/*	$NetBSD: if_vioif.c,v 1.54 2020/05/25 07:52:16 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.54 2020/05/25 07:52:16 yamaguchi Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48#include <sys/pcq.h>
49
50#include <dev/pci/virtioreg.h>
51#include <dev/pci/virtiovar.h>
52
53#include <net/if.h>
54#include <net/if_media.h>
55#include <net/if_ether.h>
56
57#include <net/bpf.h>
58
59#include "ioconf.h"
60
61#ifdef NET_MPSAFE
62#define VIOIF_MPSAFE	1
63#define VIOIF_MULTIQ	1
64#endif
65
66/*
67 * if_vioifreg.h:
68 */
69/* Configuration registers */
70#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
71#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
72#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	8 /* 16bit */
73
74/* Feature bits */
75#define VIRTIO_NET_F_CSUM		__BIT(0)
76#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
77#define VIRTIO_NET_F_MAC		__BIT(5)
78#define VIRTIO_NET_F_GSO		__BIT(6)
79#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
80#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
81#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
82#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
83#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
84#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
85#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
86#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
87#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
88#define VIRTIO_NET_F_STATUS		__BIT(16)
89#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
90#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
91#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
92#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
93#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
94#define VIRTIO_NET_F_MQ			__BIT(22)
95
96#define VIRTIO_NET_FLAG_BITS \
97	VIRTIO_COMMON_FLAG_BITS \
98	"\x17""MQ" \
99	"\x16""GUEST_ANNOUNCE" \
100	"\x15""CTRL_RX_EXTRA" \
101	"\x14""CTRL_VLAN" \
102	"\x13""CTRL_RX" \
103	"\x12""CTRL_VQ" \
104	"\x11""STATUS" \
105	"\x10""MRG_RXBUF" \
106	"\x0f""HOST_UFO" \
107	"\x0e""HOST_ECN" \
108	"\x0d""HOST_TSO6" \
109	"\x0c""HOST_TSO4" \
110	"\x0b""GUEST_UFO" \
111	"\x0a""GUEST_ECN" \
112	"\x09""GUEST_TSO6" \
113	"\x08""GUEST_TSO4" \
114	"\x07""GSO" \
115	"\x06""MAC" \
116	"\x02""GUEST_CSUM" \
117	"\x01""CSUM"
118
119/* Status */
120#define VIRTIO_NET_S_LINK_UP	1
121
122/* Packet header structure */
123struct virtio_net_hdr {
124	uint8_t		flags;
125	uint8_t		gso_type;
126	uint16_t	hdr_len;
127	uint16_t	gso_size;
128	uint16_t	csum_start;
129	uint16_t	csum_offset;
130#if 0
131	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
132#endif
133} __packed;
134
135#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
136#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
137#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
138#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
139#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
140#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
141
142#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
143
144/* Control virtqueue */
145struct virtio_net_ctrl_cmd {
146	uint8_t	class;
147	uint8_t	command;
148} __packed;
149#define VIRTIO_NET_CTRL_RX		0
150# define VIRTIO_NET_CTRL_RX_PROMISC	0
151# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
152
153#define VIRTIO_NET_CTRL_MAC		1
154# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
155
156#define VIRTIO_NET_CTRL_VLAN		2
157# define VIRTIO_NET_CTRL_VLAN_ADD	0
158# define VIRTIO_NET_CTRL_VLAN_DEL	1
159
160#define VIRTIO_NET_CTRL_MQ			4
161# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
162# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
163# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
164
165struct virtio_net_ctrl_status {
166	uint8_t	ack;
167} __packed;
168#define VIRTIO_NET_OK			0
169#define VIRTIO_NET_ERR			1
170
171struct virtio_net_ctrl_rx {
172	uint8_t	onoff;
173} __packed;
174
175struct virtio_net_ctrl_mac_tbl {
176	uint32_t nentries;
177	uint8_t macs[][ETHER_ADDR_LEN];
178} __packed;
179
180struct virtio_net_ctrl_vlan {
181	uint16_t id;
182} __packed;
183
184struct virtio_net_ctrl_mq {
185	uint16_t virtqueue_pairs;
186} __packed;
187
188struct vioif_ctrl_cmdspec {
189	bus_dmamap_t	dmamap;
190	void		*buf;
191	bus_size_t	bufsize;
192};
193
194/*
195 * if_vioifvar.h:
196 */
197
198/*
199 * Locking notes:
200 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
201 *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
202 *      - more than one lock cannot be held at onece
203 * + ctrlq_inuse is protected by ctrlq_wait_lock.
204 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
205 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
206 */
207
208struct vioif_txqueue {
209	kmutex_t		*txq_lock;	/* lock for tx operations */
210
211	struct virtqueue	*txq_vq;
212	bool			txq_stopping;
213	bool			txq_link_active;
214	pcq_t			*txq_intrq;
215
216	struct virtio_net_hdr	*txq_hdrs;
217	bus_dmamap_t		*txq_hdr_dmamaps;
218
219	struct mbuf		**txq_mbufs;
220	bus_dmamap_t		*txq_dmamaps;
221
222	void			*txq_deferred_transmit;
223};
224
225struct vioif_rxqueue {
226	kmutex_t		*rxq_lock;	/* lock for rx operations */
227
228	struct virtqueue	*rxq_vq;
229	bool			rxq_stopping;
230
231	struct virtio_net_hdr	*rxq_hdrs;
232	bus_dmamap_t		*rxq_hdr_dmamaps;
233
234	struct mbuf		**rxq_mbufs;
235	bus_dmamap_t		*rxq_dmamaps;
236
237	void			*rxq_softint;
238};
239
240struct vioif_ctrlqueue {
241	struct virtqueue		*ctrlq_vq;
242	enum {
243		FREE, INUSE, DONE
244	}				ctrlq_inuse;
245	kcondvar_t			ctrlq_wait;
246	kmutex_t			ctrlq_wait_lock;
247	struct lwp			*ctrlq_owner;
248
249	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
250	struct virtio_net_ctrl_status	*ctrlq_status;
251	struct virtio_net_ctrl_rx	*ctrlq_rx;
252	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
253	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
254	struct virtio_net_ctrl_mq	*ctrlq_mq;
255
256	bus_dmamap_t			ctrlq_cmd_dmamap;
257	bus_dmamap_t			ctrlq_status_dmamap;
258	bus_dmamap_t			ctrlq_rx_dmamap;
259	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
260	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
261	bus_dmamap_t			ctrlq_mq_dmamap;
262};
263
264struct vioif_softc {
265	device_t		sc_dev;
266
267	struct virtio_softc	*sc_virtio;
268	struct virtqueue	*sc_vqs;
269
270	int			sc_max_nvq_pairs;
271	int			sc_req_nvq_pairs;
272	int			sc_act_nvq_pairs;
273
274	uint8_t			sc_mac[ETHER_ADDR_LEN];
275	struct ethercom		sc_ethercom;
276	short			sc_deferred_init_done;
277	bool			sc_link_active;
278
279	struct vioif_txqueue	*sc_txq;
280	struct vioif_rxqueue	*sc_rxq;
281
282	bool			sc_has_ctrl;
283	struct vioif_ctrlqueue	sc_ctrlq;
284
285	bus_dma_segment_t	sc_hdr_segs[1];
286	void			*sc_dmamem;
287	void			*sc_kmem;
288
289	void			*sc_ctl_softint;
290};
291#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
292#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
293
294/* cfattach interface functions */
295static int	vioif_match(device_t, cfdata_t, void *);
296static void	vioif_attach(device_t, device_t, void *);
297static void	vioif_deferred_init(device_t);
298
299/* ifnet interface functions */
300static int	vioif_init(struct ifnet *);
301static void	vioif_stop(struct ifnet *, int);
302static void	vioif_start(struct ifnet *);
303static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
304static int	vioif_transmit(struct ifnet *, struct mbuf *);
305static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
306static int	vioif_ioctl(struct ifnet *, u_long, void *);
307static void	vioif_watchdog(struct ifnet *);
308
309/* rx */
310static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
311static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
312static void	vioif_populate_rx_mbufs(struct vioif_rxqueue *);
313static void	vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *);
314static int	vioif_rx_deq(struct vioif_rxqueue *);
315static int	vioif_rx_deq_locked(struct vioif_rxqueue *);
316static int	vioif_rx_intr(void *);
317static void	vioif_rx_softint(void *);
318static void	vioif_rx_drain(struct vioif_rxqueue *);
319
320/* tx */
321static int	vioif_tx_intr(void *);
322static int	vioif_tx_deq_locked(struct virtqueue *);
323static void	vioif_tx_drain(struct vioif_txqueue *);
324static void	vioif_deferred_transmit(void *);
325
326/* other control */
327static bool	vioif_is_link_up(struct vioif_softc *);
328static void	vioif_update_link_status(struct vioif_softc *);
329static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
330static int	vioif_set_promisc(struct vioif_softc *, bool);
331static int	vioif_set_allmulti(struct vioif_softc *, bool);
332static int	vioif_set_rx_filter(struct vioif_softc *);
333static int	vioif_rx_filter(struct vioif_softc *);
334static int	vioif_ctrl_intr(void *);
335static int	vioif_config_change(struct virtio_softc *);
336static void	vioif_ctl_softint(void *);
337static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
338static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
339static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
340
341CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
342		  vioif_match, vioif_attach, NULL, NULL);
343
344static int
345vioif_match(device_t parent, cfdata_t match, void *aux)
346{
347	struct virtio_attach_args *va = aux;
348
349	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
350		return 1;
351
352	return 0;
353}
354
355static void
356vioif_alloc_queues(struct vioif_softc *sc)
357{
358	int nvq_pairs = sc->sc_max_nvq_pairs;
359	int nvqs = nvq_pairs * 2;
360	int i;
361
362	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
363
364	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
365	    KM_SLEEP);
366	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
367	    KM_SLEEP);
368
369	if (sc->sc_has_ctrl)
370		nvqs++;
371
372	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
373	nvqs = 0;
374	for (i = 0; i < nvq_pairs; i++) {
375		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
376		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
377	}
378
379	if (sc->sc_has_ctrl)
380		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
381}
382
383static void
384vioif_free_queues(struct vioif_softc *sc)
385{
386	int nvq_pairs = sc->sc_max_nvq_pairs;
387	int nvqs = nvq_pairs * 2;
388
389	if (sc->sc_ctrlq.ctrlq_vq)
390		nvqs++;
391
392	if (sc->sc_txq) {
393		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
394		sc->sc_txq = NULL;
395	}
396
397	if (sc->sc_rxq) {
398		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
399		sc->sc_rxq = NULL;
400	}
401
402	if (sc->sc_vqs) {
403		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
404		sc->sc_vqs = NULL;
405	}
406}
407
408/* allocate memory */
409/*
410 * dma memory is used for:
411 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
412 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
413 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
414 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
415 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
416 *			 (WRITE)
417 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
418 *			 class command (WRITE)
419 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
420 *			 class command (WRITE)
421 * ctrlq_* structures are allocated only one each; they are protected by
422 * ctrlq_inuse variable and ctrlq_wait condvar.
423 */
424/*
425 * dynamically allocated memory is used for:
426 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
427 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
428 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
429 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
430 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
431 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
432 */
433static int
434vioif_alloc_mems(struct vioif_softc *sc)
435{
436	struct virtio_softc *vsc = sc->sc_virtio;
437	struct vioif_txqueue *txq;
438	struct vioif_rxqueue *rxq;
439	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
440	int allocsize, allocsize2, r, rsegs, i, qid;
441	void *vaddr;
442	intptr_t p;
443
444	allocsize = 0;
445	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
446		rxq = &sc->sc_rxq[qid];
447		txq = &sc->sc_txq[qid];
448
449		allocsize +=
450		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num;
451		allocsize +=
452		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num;
453	}
454	if (sc->sc_has_ctrl) {
455		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
456		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
457		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
458		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
459		    + sizeof(struct virtio_net_ctrl_mac_tbl)
460		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
461		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
462	}
463	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
464	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
465	if (r != 0) {
466		aprint_error_dev(sc->sc_dev,
467		    "DMA memory allocation failed, size %d, "
468		    "error code %d\n", allocsize, r);
469		goto err_none;
470	}
471	r = bus_dmamem_map(virtio_dmat(vsc),
472	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
473	if (r != 0) {
474		aprint_error_dev(sc->sc_dev,
475		    "DMA memory map failed, error code %d\n", r);
476		goto err_dmamem_alloc;
477	}
478
479#define P(p, p0, p0size)	do { p0 = (void *) p;		\
480				     p += p0size; } while (0)
481	memset(vaddr, 0, allocsize);
482	sc->sc_dmamem = vaddr;
483	p = (intptr_t) vaddr;
484
485	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
486		rxq = &sc->sc_rxq[qid];
487		txq = &sc->sc_txq[qid];
488
489		P(p, rxq->rxq_hdrs,
490		    sizeof(rxq->rxq_hdrs[0]) * rxq->rxq_vq->vq_num);
491		P(p, txq->txq_hdrs,
492		    sizeof(txq->txq_hdrs[0]) * txq->txq_vq->vq_num);
493	}
494	if (sc->sc_has_ctrl) {
495		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
496		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
497		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
498		P(p, ctrlq->ctrlq_mac_tbl_uc, sizeof(*ctrlq->ctrlq_mac_tbl_uc));
499		P(p, ctrlq->ctrlq_mac_tbl_mc, sizeof(*ctrlq->ctrlq_mac_tbl_mc)
500		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
501		P(p, ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq));
502	}
503
504	allocsize2 = 0;
505	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
506		int rxqsize, txqsize;
507
508		rxq = &sc->sc_rxq[qid];
509		txq = &sc->sc_txq[qid];
510		rxqsize = rxq->rxq_vq->vq_num;
511		txqsize = txq->txq_vq->vq_num;
512
513		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
514		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
515		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
516
517		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
518		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
519		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
520	}
521	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
522	sc->sc_kmem = vaddr;
523	p = (intptr_t) vaddr;
524
525	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
526		int rxqsize, txqsize;
527		rxq = &sc->sc_rxq[qid];
528		txq = &sc->sc_txq[qid];
529		rxqsize = rxq->rxq_vq->vq_num;
530		txqsize = txq->txq_vq->vq_num;
531
532		P(p, rxq->rxq_hdr_dmamaps,
533		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
534		P(p, txq->txq_hdr_dmamaps,
535		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
536		P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
537		P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
538		P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
539		P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
540	}
541#undef P
542
543#define C(map, size, nsegs, usage)					      \
544	do {								      \
545		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0, \
546		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,			      \
547		    &map);						      \
548		if (r != 0) {						      \
549			aprint_error_dev(sc->sc_dev,			      \
550			    usage " dmamap creation failed, "		      \
551			    "error code %d\n", r);			      \
552			goto err_reqs;					      \
553		}							      \
554	} while (0)
555#define C_L(map, buf, size, nsegs, rw, usage)				\
556	C(map, size, nsegs, usage);					\
557	do {								\
558		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
559				    buf, size, NULL,			\
560				    rw | BUS_DMA_NOWAIT);		\
561		if (r != 0) {						\
562			aprint_error_dev(sc->sc_dev,			\
563			    usage " dmamap load failed, "		\
564			    "error code %d\n", r);			\
565			goto err_reqs;					\
566		}							\
567	} while (0)
568
569	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
570		rxq = &sc->sc_rxq[qid];
571		txq = &sc->sc_txq[qid];
572
573		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
574			C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i],
575			    sizeof(rxq->rxq_hdrs[0]), 1,
576			    BUS_DMA_READ, "rx header");
577			C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
578		}
579
580		for (i = 0; i < txq->txq_vq->vq_num; i++) {
581			C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i],
582			    sizeof(txq->txq_hdrs[0]), 1,
583			    BUS_DMA_READ, "tx header");
584			C(txq->txq_dmamaps[i], ETHER_MAX_LEN,
585			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
586		}
587	}
588
589	if (sc->sc_has_ctrl) {
590		/* control vq class & command */
591		C_L(ctrlq->ctrlq_cmd_dmamap,
592		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
593		    BUS_DMA_WRITE, "control command");
594		C_L(ctrlq->ctrlq_status_dmamap,
595		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
596		    BUS_DMA_READ, "control status");
597
598		/* control vq rx mode command parameter */
599		C_L(ctrlq->ctrlq_rx_dmamap,
600		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
601		    BUS_DMA_WRITE, "rx mode control command");
602
603		/* multiqueue set command */
604		C_L(ctrlq->ctrlq_mq_dmamap,
605		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
606		    BUS_DMA_WRITE, "multiqueue set command");
607
608		/* control vq MAC filter table for unicast */
609		/* do not load now since its length is variable */
610		C(ctrlq->ctrlq_tbl_uc_dmamap,
611		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
612		    "unicast MAC address filter command");
613
614		/* control vq MAC filter table for multicast */
615		C(ctrlq->ctrlq_tbl_mc_dmamap,
616		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
617		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
618		    "multicast MAC address filter command");
619	}
620#undef C_L
621#undef C
622
623	return 0;
624
625err_reqs:
626#define D(map)								\
627	do {								\
628		if (map) {						\
629			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
630			map = NULL;					\
631		}							\
632	} while (0)
633	D(ctrlq->ctrlq_tbl_mc_dmamap);
634	D(ctrlq->ctrlq_tbl_uc_dmamap);
635	D(ctrlq->ctrlq_rx_dmamap);
636	D(ctrlq->ctrlq_status_dmamap);
637	D(ctrlq->ctrlq_cmd_dmamap);
638	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
639		rxq = &sc->sc_rxq[qid];
640		txq = &sc->sc_txq[qid];
641
642		for (i = 0; i < txq->txq_vq->vq_num; i++) {
643			D(txq->txq_dmamaps[i]);
644			D(txq->txq_hdr_dmamaps[i]);
645		}
646		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
647			D(rxq->rxq_dmamaps[i]);
648			D(rxq->rxq_hdr_dmamaps[i]);
649		}
650	}
651#undef D
652	if (sc->sc_kmem) {
653		kmem_free(sc->sc_kmem, allocsize2);
654		sc->sc_kmem = NULL;
655	}
656	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
657err_dmamem_alloc:
658	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
659err_none:
660	return -1;
661}
662
663static void
664vioif_attach(device_t parent, device_t self, void *aux)
665{
666	struct vioif_softc *sc = device_private(self);
667	struct virtio_softc *vsc = device_private(parent);
668	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
669	struct vioif_txqueue *txq;
670	struct vioif_rxqueue *rxq;
671	uint32_t features, req_features;
672	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
673	u_int softint_flags;
674	int r, i, nvqs=0, req_flags;
675
676	if (virtio_child(vsc) != NULL) {
677		aprint_normal(": child already attached for %s; "
678		    "something wrong...\n", device_xname(parent));
679		return;
680	}
681
682	sc->sc_dev = self;
683	sc->sc_virtio = vsc;
684	sc->sc_link_active = false;
685
686	sc->sc_max_nvq_pairs = 1;
687	sc->sc_req_nvq_pairs = 1;
688	sc->sc_act_nvq_pairs = 1;
689
690	req_flags = 0;
691
692#ifdef VIOIF_MPSAFE
693	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
694#endif
695	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
696
697	req_features =
698	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
699	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
700#ifdef VIOIF_MULTIQ
701	req_features |= VIRTIO_NET_F_MQ;
702#endif
703	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
704	    vioif_config_change, virtio_vq_intrhand, req_flags,
705	    req_features, VIRTIO_NET_FLAG_BITS);
706
707	features = virtio_features(vsc);
708
709	if (features & VIRTIO_NET_F_MAC) {
710		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
711			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
712			    VIRTIO_NET_CONFIG_MAC + i);
713		}
714	} else {
715		/* code stolen from sys/net/if_tap.c */
716		struct timeval tv;
717		uint32_t ui;
718		getmicrouptime(&tv);
719		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
720		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
721		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
722			virtio_write_device_config_1(vsc,
723			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
724		}
725	}
726
727	aprint_normal_dev(self, "Ethernet address %s\n",
728	    ether_sprintf(sc->sc_mac));
729
730	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
731	    (features & VIRTIO_NET_F_CTRL_RX)) {
732		sc->sc_has_ctrl = true;
733
734		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
735		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
736		ctrlq->ctrlq_inuse = FREE;
737	} else {
738		sc->sc_has_ctrl = false;
739	}
740
741	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
742		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
743		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
744
745		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
746			goto err;
747
748		/* Limit the number of queue pairs to use */
749		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
750	}
751
752	vioif_alloc_queues(sc);
753	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
754
755#ifdef VIOIF_MPSAFE
756	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
757#else
758	softint_flags = SOFTINT_NET;
759#endif
760
761	/*
762	 * Allocating a virtqueues
763	 */
764	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
765		rxq = &sc->sc_rxq[i];
766		txq = &sc->sc_txq[i];
767		char qname[32];
768
769		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
770
771		rxq->rxq_softint = softint_establish(softint_flags,
772		    vioif_rx_softint, rxq);
773		if (rxq->rxq_softint == NULL) {
774			aprint_error_dev(self, "cannot establish rx softint\n");
775			goto err;
776		}
777		snprintf(qname, sizeof(qname), "rx%d", i);
778		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
779		    MCLBYTES+sizeof(struct virtio_net_hdr), nvqs, qname);
780		if (r != 0)
781			goto err;
782		nvqs++;
783		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
784		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
785		rxq->rxq_stopping = true;
786
787		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
788		txq->txq_deferred_transmit = softint_establish(softint_flags,
789		    vioif_deferred_transmit, txq);
790		if (txq->txq_deferred_transmit == NULL) {
791			aprint_error_dev(self, "cannot establish tx softint\n");
792			goto err;
793		}
794		snprintf(qname, sizeof(qname), "tx%d", i);
795		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
796		    sizeof(struct virtio_net_hdr)
797		    + (ETHER_MAX_LEN - ETHER_HDR_LEN),
798		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
799		if (r != 0)
800			goto err;
801		nvqs++;
802		txq->txq_vq->vq_intrhand = vioif_tx_intr;
803		txq->txq_vq->vq_intrhand_arg = (void *)txq;
804		txq->txq_link_active = sc->sc_link_active;
805		txq->txq_stopping = false;
806		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
807	}
808
809	if (sc->sc_has_ctrl) {
810		/*
811		 * Allocating a virtqueue for control channel
812		 */
813		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
814		    NBPG, 1, "control");
815		if (r != 0) {
816			aprint_error_dev(self, "failed to allocate "
817			    "a virtqueue for control channel, error code %d\n",
818			    r);
819
820			sc->sc_has_ctrl = false;
821			cv_destroy(&ctrlq->ctrlq_wait);
822			mutex_destroy(&ctrlq->ctrlq_wait_lock);
823		} else {
824			nvqs++;
825			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
826			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
827		}
828	}
829
830	sc->sc_ctl_softint = softint_establish(softint_flags,
831	    vioif_ctl_softint, sc);
832	if (sc->sc_ctl_softint == NULL) {
833		aprint_error_dev(self, "cannot establish ctl softint\n");
834		goto err;
835	}
836
837	if (vioif_alloc_mems(sc) < 0)
838		goto err;
839
840	if (virtio_child_attach_finish(vsc) != 0)
841		goto err;
842
843	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
844	ifp->if_softc = sc;
845	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
846#ifdef VIOIF_MPSAFE
847	ifp->if_extflags = IFEF_MPSAFE;
848#endif
849	ifp->if_start = vioif_start;
850	if (sc->sc_req_nvq_pairs > 1)
851		ifp->if_transmit = vioif_transmit;
852	ifp->if_ioctl = vioif_ioctl;
853	ifp->if_init = vioif_init;
854	ifp->if_stop = vioif_stop;
855	ifp->if_capabilities = 0;
856	ifp->if_watchdog = vioif_watchdog;
857	txq = &sc->sc_txq[0];
858	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
859	IFQ_SET_READY(&ifp->if_snd);
860
861	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
862
863	if_attach(ifp);
864	if_deferred_start_init(ifp, NULL);
865	ether_ifattach(ifp, sc->sc_mac);
866
867	return;
868
869err:
870	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
871		rxq = &sc->sc_rxq[i];
872		txq = &sc->sc_txq[i];
873
874		if (rxq->rxq_lock) {
875			mutex_obj_free(rxq->rxq_lock);
876			rxq->rxq_lock = NULL;
877		}
878
879		if (rxq->rxq_softint) {
880			softint_disestablish(rxq->rxq_softint);
881			rxq->rxq_softint = NULL;
882		}
883
884		if (txq->txq_lock) {
885			mutex_obj_free(txq->txq_lock);
886			txq->txq_lock = NULL;
887		}
888
889		if (txq->txq_deferred_transmit) {
890			softint_disestablish(txq->txq_deferred_transmit);
891			txq->txq_deferred_transmit = NULL;
892		}
893
894		if (txq->txq_intrq) {
895			pcq_destroy(txq->txq_intrq);
896			txq->txq_intrq = NULL;
897		}
898	}
899
900	if (sc->sc_has_ctrl) {
901		cv_destroy(&ctrlq->ctrlq_wait);
902		mutex_destroy(&ctrlq->ctrlq_wait_lock);
903	}
904
905	while (nvqs > 0)
906		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
907
908	vioif_free_queues(sc);
909
910	virtio_child_attach_failed(vsc);
911	return;
912}
913
914/* we need interrupts to make promiscuous mode off */
915static void
916vioif_deferred_init(device_t self)
917{
918	struct vioif_softc *sc = device_private(self);
919	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
920	int r;
921
922	if (ifp->if_flags & IFF_PROMISC)
923		return;
924
925	r =  vioif_set_promisc(sc, false);
926	if (r != 0)
927		aprint_error_dev(self, "resetting promisc mode failed, "
928		    "error code %d\n", r);
929}
930
931static void
932vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
933{
934	struct virtio_softc *vsc = sc->sc_virtio;
935	struct vioif_txqueue *txq;
936	struct vioif_rxqueue *rxq;
937	int i;
938
939	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
940		txq = &sc->sc_txq[i];
941		rxq = &sc->sc_rxq[i];
942
943		virtio_start_vq_intr(vsc, txq->txq_vq);
944		virtio_start_vq_intr(vsc, rxq->rxq_vq);
945	}
946}
947
948static void
949vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
950{
951	struct virtio_softc *vsc = sc->sc_virtio;
952	struct vioif_txqueue *txq;
953	struct vioif_rxqueue *rxq;
954	int i;
955
956	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
957		txq = &sc->sc_txq[i];
958		rxq = &sc->sc_rxq[i];
959
960		virtio_stop_vq_intr(vsc, txq->txq_vq);
961		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
962	}
963}
964
965/*
966 * Interface functions for ifnet
967 */
968static int
969vioif_init(struct ifnet *ifp)
970{
971	struct vioif_softc *sc = ifp->if_softc;
972	struct virtio_softc *vsc = sc->sc_virtio;
973	struct vioif_rxqueue *rxq;
974	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
975	int r, i;
976
977	vioif_stop(ifp, 0);
978
979	virtio_reinit_start(vsc);
980	virtio_negotiate_features(vsc, virtio_features(vsc));
981
982	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
983		rxq = &sc->sc_rxq[i];
984
985		/* Have to set false before vioif_populate_rx_mbufs */
986		rxq->rxq_stopping = false;
987		vioif_populate_rx_mbufs(rxq);
988	}
989
990	virtio_reinit_end(vsc);
991
992	if (sc->sc_has_ctrl)
993		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
994
995	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
996	if (r == 0)
997		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
998	else
999		sc->sc_act_nvq_pairs = 1;
1000
1001	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1002		sc->sc_txq[i].txq_stopping = false;
1003
1004	vioif_enable_interrupt_vqpairs(sc);
1005
1006	if (!sc->sc_deferred_init_done) {
1007		sc->sc_deferred_init_done = 1;
1008		if (sc->sc_has_ctrl)
1009			vioif_deferred_init(sc->sc_dev);
1010	}
1011
1012	vioif_update_link_status(sc);
1013	ifp->if_flags |= IFF_RUNNING;
1014	ifp->if_flags &= ~IFF_OACTIVE;
1015	vioif_rx_filter(sc);
1016
1017	return 0;
1018}
1019
1020static void
1021vioif_stop(struct ifnet *ifp, int disable)
1022{
1023	struct vioif_softc *sc = ifp->if_softc;
1024	struct virtio_softc *vsc = sc->sc_virtio;
1025	struct vioif_txqueue *txq;
1026	struct vioif_rxqueue *rxq;
1027	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1028	int i;
1029
1030	/* Take the locks to ensure that ongoing TX/RX finish */
1031	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1032		txq = &sc->sc_txq[i];
1033		rxq = &sc->sc_rxq[i];
1034
1035		mutex_enter(txq->txq_lock);
1036		txq->txq_stopping = true;
1037		mutex_exit(txq->txq_lock);
1038
1039		mutex_enter(rxq->rxq_lock);
1040		rxq->rxq_stopping = true;
1041		mutex_exit(rxq->rxq_lock);
1042	}
1043
1044	/* disable interrupts */
1045	vioif_disable_interrupt_vqpairs(sc);
1046
1047	if (sc->sc_has_ctrl)
1048		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1049
1050	/* only way to stop I/O and DMA is resetting... */
1051	virtio_reset(vsc);
1052	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1053		vioif_rx_deq(&sc->sc_rxq[i]);
1054
1055	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1056	sc->sc_link_active = false;
1057
1058	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1059		txq = &sc->sc_txq[i];
1060		rxq = &sc->sc_rxq[i];
1061
1062		txq->txq_link_active = false;
1063
1064		if (disable)
1065			vioif_rx_drain(rxq);
1066
1067		vioif_tx_drain(txq);
1068	}
1069}
1070
1071static void
1072vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1073    bool is_transmit)
1074{
1075	struct vioif_softc *sc = ifp->if_softc;
1076	struct virtio_softc *vsc = sc->sc_virtio;
1077	struct virtqueue *vq = txq->txq_vq;
1078	struct mbuf *m;
1079	int queued = 0;
1080
1081	KASSERT(mutex_owned(txq->txq_lock));
1082
1083	if ((ifp->if_flags & IFF_RUNNING) == 0)
1084		return;
1085
1086	if (!txq->txq_link_active || txq->txq_stopping)
1087		return;
1088
1089	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1090		return;
1091
1092	for (;;) {
1093		int slot, r;
1094
1095		if (is_transmit)
1096			m = pcq_get(txq->txq_intrq);
1097		else
1098			IFQ_DEQUEUE(&ifp->if_snd, m);
1099
1100		if (m == NULL)
1101			break;
1102
1103		r = virtio_enqueue_prep(vsc, vq, &slot);
1104		if (r == EAGAIN) {
1105			ifp->if_flags |= IFF_OACTIVE;
1106			m_freem(m);
1107			break;
1108		}
1109		if (r != 0)
1110			panic("enqueue_prep for a tx buffer");
1111
1112		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1113		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1114		if (r != 0) {
1115			/* maybe just too fragmented */
1116			struct mbuf *newm;
1117
1118			newm = m_defrag(m, M_NOWAIT);
1119			if (newm == NULL) {
1120				aprint_error_dev(sc->sc_dev,
1121				    "m_defrag() failed\n");
1122				goto skip;
1123			}
1124
1125			m = newm;
1126			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1127			    txq->txq_dmamaps[slot], m,
1128			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1129			if (r != 0) {
1130				aprint_error_dev(sc->sc_dev,
1131				    "tx dmamap load failed, error code %d\n",
1132				    r);
1133skip:
1134				m_freem(m);
1135				virtio_enqueue_abort(vsc, vq, slot);
1136				continue;
1137			}
1138		}
1139
1140		/* This should actually never fail */
1141		r = virtio_enqueue_reserve(vsc, vq, slot,
1142					txq->txq_dmamaps[slot]->dm_nsegs + 1);
1143		if (r != 0) {
1144			aprint_error_dev(sc->sc_dev,
1145			    "virtio_enqueue_reserve failed, error code %d\n",
1146			    r);
1147			bus_dmamap_unload(virtio_dmat(vsc),
1148					  txq->txq_dmamaps[slot]);
1149			/* slot already freed by virtio_enqueue_reserve */
1150			m_freem(m);
1151			continue;
1152		}
1153
1154		txq->txq_mbufs[slot] = m;
1155
1156		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
1157		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1158		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1159		    BUS_DMASYNC_PREWRITE);
1160		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1161		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1162		    BUS_DMASYNC_PREWRITE);
1163		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1164		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1165		virtio_enqueue_commit(vsc, vq, slot, false);
1166
1167		queued++;
1168		bpf_mtap(ifp, m, BPF_D_OUT);
1169	}
1170
1171	if (queued > 0) {
1172		virtio_enqueue_commit(vsc, vq, -1, true);
1173		ifp->if_timer = 5;
1174	}
1175}
1176
1177static void
1178vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1179{
1180
1181	/*
1182	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1183	 */
1184	vioif_send_common_locked(ifp, txq, false);
1185
1186}
1187
1188static void
1189vioif_start(struct ifnet *ifp)
1190{
1191	struct vioif_softc *sc = ifp->if_softc;
1192	struct vioif_txqueue *txq = &sc->sc_txq[0];
1193
1194#ifdef VIOIF_MPSAFE
1195	KASSERT(if_is_mpsafe(ifp));
1196#endif
1197
1198	mutex_enter(txq->txq_lock);
1199	if (!txq->txq_stopping)
1200		vioif_start_locked(ifp, txq);
1201	mutex_exit(txq->txq_lock);
1202}
1203
1204static inline int
1205vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1206{
1207	struct vioif_softc *sc = ifp->if_softc;
1208	u_int cpuid = cpu_index(curcpu());
1209
1210	return cpuid % sc->sc_act_nvq_pairs;
1211}
1212
1213static void
1214vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1215{
1216
1217	vioif_send_common_locked(ifp, txq, true);
1218}
1219
1220static int
1221vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1222{
1223	struct vioif_softc *sc = ifp->if_softc;
1224	struct vioif_txqueue *txq;
1225	int qid;
1226
1227	qid = vioif_select_txqueue(ifp, m);
1228	txq = &sc->sc_txq[qid];
1229
1230	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1231		m_freem(m);
1232		return ENOBUFS;
1233	}
1234
1235	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1236	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1237	if (m->m_flags & M_MCAST)
1238		if_statinc_ref(nsr, if_omcasts);
1239	IF_STAT_PUTREF(ifp);
1240
1241	if (mutex_tryenter(txq->txq_lock)) {
1242		if (!txq->txq_stopping)
1243			vioif_transmit_locked(ifp, txq);
1244		mutex_exit(txq->txq_lock);
1245	}
1246
1247	return 0;
1248}
1249
1250static void
1251vioif_deferred_transmit(void *arg)
1252{
1253	struct vioif_txqueue *txq = arg;
1254	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1255	struct vioif_softc *sc = device_private(virtio_child(vsc));
1256	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1257
1258	if (mutex_tryenter(txq->txq_lock)) {
1259		vioif_send_common_locked(ifp, txq, true);
1260		mutex_exit(txq->txq_lock);
1261	}
1262}
1263
1264static int
1265vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1266{
1267	int s, r;
1268
1269	s = splnet();
1270
1271	r = ether_ioctl(ifp, cmd, data);
1272	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1273	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1274		if (ifp->if_flags & IFF_RUNNING)
1275			r = vioif_rx_filter(ifp->if_softc);
1276		else
1277			r = 0;
1278	}
1279
1280	splx(s);
1281
1282	return r;
1283}
1284
1285void
1286vioif_watchdog(struct ifnet *ifp)
1287{
1288	struct vioif_softc *sc = ifp->if_softc;
1289	int i;
1290
1291	if (ifp->if_flags & IFF_RUNNING) {
1292		for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1293			vioif_tx_intr(sc->sc_txq[i].txq_vq);
1294	}
1295}
1296
1297/*
1298 * Receive implementation
1299 */
1300/* allocate and initialize a mbuf for receive */
1301static int
1302vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1303{
1304	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1305	struct mbuf *m;
1306	int r;
1307
1308	MGETHDR(m, M_DONTWAIT, MT_DATA);
1309	if (m == NULL)
1310		return ENOBUFS;
1311	MCLGET(m, M_DONTWAIT);
1312	if ((m->m_flags & M_EXT) == 0) {
1313		m_freem(m);
1314		return ENOBUFS;
1315	}
1316	rxq->rxq_mbufs[i] = m;
1317	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1318	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1319	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1320	if (r) {
1321		m_freem(m);
1322		rxq->rxq_mbufs[i] = NULL;
1323		return r;
1324	}
1325
1326	return 0;
1327}
1328
1329/* free a mbuf for receive */
1330static void
1331vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1332{
1333	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1334
1335	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1336	m_freem(rxq->rxq_mbufs[i]);
1337	rxq->rxq_mbufs[i] = NULL;
1338}
1339
1340/* add mbufs for all the empty receive slots */
1341static void
1342vioif_populate_rx_mbufs(struct vioif_rxqueue *rxq)
1343{
1344
1345	mutex_enter(rxq->rxq_lock);
1346	vioif_populate_rx_mbufs_locked(rxq);
1347	mutex_exit(rxq->rxq_lock);
1348}
1349
1350static void
1351vioif_populate_rx_mbufs_locked(struct vioif_rxqueue *rxq)
1352{
1353	struct virtqueue *vq = rxq->rxq_vq;
1354	struct virtio_softc *vsc = vq->vq_owner;
1355	struct vioif_softc *sc = device_private(virtio_child(vsc));
1356	int i, r, ndone = 0;
1357
1358	KASSERT(mutex_owned(rxq->rxq_lock));
1359
1360	if (rxq->rxq_stopping)
1361		return;
1362
1363	for (i = 0; i < vq->vq_num; i++) {
1364		int slot;
1365		r = virtio_enqueue_prep(vsc, vq, &slot);
1366		if (r == EAGAIN)
1367			break;
1368		if (r != 0)
1369			panic("enqueue_prep for rx buffers");
1370		if (rxq->rxq_mbufs[slot] == NULL) {
1371			r = vioif_add_rx_mbuf(rxq, slot);
1372			if (r != 0) {
1373				aprint_error_dev(sc->sc_dev,
1374				    "rx mbuf allocation failed, "
1375				    "error code %d\n", r);
1376				break;
1377			}
1378		}
1379		r = virtio_enqueue_reserve(vsc, vq, slot,
1380		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1381		if (r != 0) {
1382			vioif_free_rx_mbuf(rxq, slot);
1383			break;
1384		}
1385		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1386		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1387		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1388		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1389		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1390		    false);
1391		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1392		virtio_enqueue_commit(vsc, vq, slot, false);
1393		ndone++;
1394	}
1395	if (ndone > 0)
1396		virtio_enqueue_commit(vsc, vq, -1, true);
1397}
1398
1399/* dequeue received packets */
1400static int
1401vioif_rx_deq(struct vioif_rxqueue *rxq)
1402{
1403	int r;
1404
1405	KASSERT(rxq->rxq_stopping);
1406
1407	mutex_enter(rxq->rxq_lock);
1408	r = vioif_rx_deq_locked(rxq);
1409	mutex_exit(rxq->rxq_lock);
1410
1411	return r;
1412}
1413
1414/* dequeue received packets */
1415static int
1416vioif_rx_deq_locked(struct vioif_rxqueue *rxq)
1417{
1418	struct virtqueue *vq = rxq->rxq_vq;
1419	struct virtio_softc *vsc = vq->vq_owner;
1420	struct vioif_softc *sc = device_private(virtio_child(vsc));
1421	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1422	struct mbuf *m;
1423	int r = 0;
1424	int slot, len;
1425
1426	KASSERT(mutex_owned(rxq->rxq_lock));
1427
1428	if (virtio_vq_is_enqueued(vsc, vq) == false)
1429		return r;
1430
1431	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1432		len -= sizeof(struct virtio_net_hdr);
1433		r = 1;
1434		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1435		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_POSTREAD);
1436		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1437		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1438		m = rxq->rxq_mbufs[slot];
1439		KASSERT(m != NULL);
1440		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1441		rxq->rxq_mbufs[slot] = NULL;
1442		virtio_dequeue_commit(vsc, vq, slot);
1443		m_set_rcvif(m, ifp);
1444		m->m_len = m->m_pkthdr.len = len;
1445
1446		mutex_exit(rxq->rxq_lock);
1447		if_percpuq_enqueue(ifp->if_percpuq, m);
1448		mutex_enter(rxq->rxq_lock);
1449
1450		if (rxq->rxq_stopping)
1451			break;
1452	}
1453
1454	return r;
1455}
1456
1457/* rx interrupt; call _dequeue above and schedule a softint */
1458static int
1459vioif_rx_intr(void *arg)
1460{
1461	struct vioif_rxqueue *rxq = arg;
1462	int r = 0;
1463
1464	mutex_enter(rxq->rxq_lock);
1465
1466	if (rxq->rxq_stopping)
1467		goto out;
1468
1469	r = vioif_rx_deq_locked(rxq);
1470	if (r)
1471		softint_schedule(rxq->rxq_softint);
1472
1473out:
1474	mutex_exit(rxq->rxq_lock);
1475	return r;
1476}
1477
1478/* softint: enqueue receive requests for new incoming packets */
1479static void
1480vioif_rx_softint(void *arg)
1481{
1482	struct vioif_rxqueue *rxq = arg;
1483
1484	vioif_populate_rx_mbufs(rxq);
1485}
1486
1487/* free all the mbufs; called from if_stop(disable) */
1488static void
1489vioif_rx_drain(struct vioif_rxqueue *rxq)
1490{
1491	struct virtqueue *vq = rxq->rxq_vq;
1492	int i;
1493
1494	for (i = 0; i < vq->vq_num; i++) {
1495		if (rxq->rxq_mbufs[i] == NULL)
1496			continue;
1497		vioif_free_rx_mbuf(rxq, i);
1498	}
1499}
1500
1501/*
1502 * Transmition implementation
1503 */
1504/* actual transmission is done in if_start */
1505/* tx interrupt; dequeue and free mbufs */
1506/*
1507 * tx interrupt is actually disabled; this should be called upon
1508 * tx vq full and watchdog
1509 */
1510static int
1511vioif_tx_intr(void *arg)
1512{
1513	struct vioif_txqueue *txq = arg;
1514	struct virtqueue *vq = txq->txq_vq;
1515	struct virtio_softc *vsc = vq->vq_owner;
1516	struct vioif_softc *sc = device_private(virtio_child(vsc));
1517	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1518	int r = 0;
1519
1520	mutex_enter(txq->txq_lock);
1521
1522	if (txq->txq_stopping)
1523		goto out;
1524
1525	r = vioif_tx_deq_locked(vq);
1526
1527out:
1528	mutex_exit(txq->txq_lock);
1529	if (r) {
1530		if_schedule_deferred_start(ifp);
1531
1532		KASSERT(txq->txq_deferred_transmit != NULL);
1533		softint_schedule(txq->txq_deferred_transmit);
1534	}
1535	return r;
1536}
1537
1538static int
1539vioif_tx_deq_locked(struct virtqueue *vq)
1540{
1541	struct virtio_softc *vsc = vq->vq_owner;
1542	struct vioif_softc *sc = device_private(virtio_child(vsc));
1543	struct vioif_txqueue *txq = vq->vq_intrhand_arg;
1544	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1545	struct mbuf *m;
1546	int r = 0;
1547	int slot, len;
1548
1549	KASSERT(mutex_owned(txq->txq_lock));
1550
1551	if (virtio_vq_is_enqueued(vsc, vq) == false)
1552		return r;
1553
1554	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1555		r++;
1556		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1557		    0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_POSTWRITE);
1558		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1559		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1560		    BUS_DMASYNC_POSTWRITE);
1561		m = txq->txq_mbufs[slot];
1562		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1563		txq->txq_mbufs[slot] = NULL;
1564		virtio_dequeue_commit(vsc, vq, slot);
1565		if_statinc(ifp, if_opackets);
1566		m_freem(m);
1567	}
1568
1569	if (r)
1570		ifp->if_flags &= ~IFF_OACTIVE;
1571	return r;
1572}
1573
1574/* free all the mbufs already put on vq; called from if_stop(disable) */
1575static void
1576vioif_tx_drain(struct vioif_txqueue *txq)
1577{
1578	struct virtqueue *vq = txq->txq_vq;
1579	struct virtio_softc *vsc = vq->vq_owner;
1580	int i;
1581
1582	KASSERT(txq->txq_stopping);
1583
1584	for (i = 0; i < vq->vq_num; i++) {
1585		if (txq->txq_mbufs[i] == NULL)
1586			continue;
1587		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1588		m_freem(txq->txq_mbufs[i]);
1589		txq->txq_mbufs[i] = NULL;
1590	}
1591}
1592
1593/*
1594 * Control vq
1595 */
1596/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1597static void
1598vioif_ctrl_acquire(struct vioif_softc *sc)
1599{
1600	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1601
1602	mutex_enter(&ctrlq->ctrlq_wait_lock);
1603	while (ctrlq->ctrlq_inuse != FREE)
1604		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1605	ctrlq->ctrlq_inuse = INUSE;
1606	ctrlq->ctrlq_owner = curlwp;
1607	mutex_exit(&ctrlq->ctrlq_wait_lock);
1608}
1609
1610static void
1611vioif_ctrl_release(struct vioif_softc *sc)
1612{
1613	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1614
1615	KASSERT(ctrlq->ctrlq_inuse != FREE);
1616	KASSERT(ctrlq->ctrlq_owner == curlwp);
1617
1618	mutex_enter(&ctrlq->ctrlq_wait_lock);
1619	ctrlq->ctrlq_inuse = FREE;
1620	ctrlq->ctrlq_owner = NULL;
1621	cv_signal(&ctrlq->ctrlq_wait);
1622	mutex_exit(&ctrlq->ctrlq_wait_lock);
1623}
1624
1625static int
1626vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1627    struct vioif_ctrl_cmdspec *specs, int nspecs)
1628{
1629	struct virtio_softc *vsc = sc->sc_virtio;
1630	int i, r, loaded;
1631
1632	loaded = 0;
1633	for (i = 0; i < nspecs; i++) {
1634		r = bus_dmamap_load(virtio_dmat(vsc),
1635		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1636		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1637		if (r) {
1638			aprint_error_dev(sc->sc_dev, "control command dmamap"
1639			    " load failed, error code %d\n", r);
1640			goto err;
1641		}
1642		loaded++;
1643
1644	}
1645
1646	return r;
1647
1648err:
1649	for (i = 0; i < loaded; i++) {
1650		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1651	}
1652
1653	return r;
1654}
1655
1656static void
1657vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1658    struct vioif_ctrl_cmdspec *specs, int nspecs)
1659{
1660	struct virtio_softc *vsc = sc->sc_virtio;
1661	int i;
1662
1663	for (i = 0; i < nspecs; i++) {
1664		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1665	}
1666}
1667
1668static int
1669vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1670    struct vioif_ctrl_cmdspec *specs, int nspecs)
1671{
1672	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1673	struct virtqueue *vq = ctrlq->ctrlq_vq;
1674	struct virtio_softc *vsc = sc->sc_virtio;
1675	int i, r, slot;
1676
1677	ctrlq->ctrlq_cmd->class = class;
1678	ctrlq->ctrlq_cmd->command = cmd;
1679
1680	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1681	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1682	for (i = 0; i < nspecs; i++) {
1683		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1684		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
1685	}
1686	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1687	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
1688
1689	r = virtio_enqueue_prep(vsc, vq, &slot);
1690	if (r != 0)
1691		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1692	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1693	if (r != 0)
1694		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1695	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1696	for (i = 0; i < nspecs; i++) {
1697		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1698	}
1699	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1700	virtio_enqueue_commit(vsc, vq, slot, true);
1701
1702	/* wait for done */
1703	mutex_enter(&ctrlq->ctrlq_wait_lock);
1704	while (ctrlq->ctrlq_inuse != DONE)
1705		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1706	mutex_exit(&ctrlq->ctrlq_wait_lock);
1707	/* already dequeueued */
1708
1709	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1710	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1711	for (i = 0; i < nspecs; i++) {
1712		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1713		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
1714	}
1715	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1716	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
1717
1718	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1719		r = 0;
1720	else {
1721		aprint_error_dev(sc->sc_dev, "failed setting rx mode\n");
1722		r = EIO;
1723	}
1724
1725	return r;
1726}
1727
1728static int
1729vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1730{
1731	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1732	struct vioif_ctrl_cmdspec specs[1];
1733	int r;
1734
1735	if (!sc->sc_has_ctrl)
1736		return ENOTSUP;
1737
1738	vioif_ctrl_acquire(sc);
1739
1740	rx->onoff = onoff;
1741	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1742	specs[0].buf = rx;
1743	specs[0].bufsize = sizeof(*rx);
1744
1745	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1746	    specs, __arraycount(specs));
1747
1748	vioif_ctrl_release(sc);
1749	return r;
1750}
1751
1752static int
1753vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1754{
1755	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1756}
1757
1758static int
1759vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1760{
1761	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1762}
1763
1764/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1765static int
1766vioif_set_rx_filter(struct vioif_softc *sc)
1767{
1768	/* filter already set in ctrlq->ctrlq_mac_tbl */
1769	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1770	struct vioif_ctrl_cmdspec specs[2];
1771	int nspecs = __arraycount(specs);
1772	int r;
1773
1774	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1775	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1776
1777	if (!sc->sc_has_ctrl)
1778		return ENOTSUP;
1779
1780	vioif_ctrl_acquire(sc);
1781
1782	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1783	specs[0].buf = mac_tbl_uc;
1784	specs[0].bufsize = sizeof(*mac_tbl_uc)
1785	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1786
1787	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1788	specs[1].buf = mac_tbl_mc;
1789	specs[1].bufsize = sizeof(*mac_tbl_mc)
1790	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1791
1792	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1793	if (r != 0)
1794		goto out;
1795
1796	r = vioif_ctrl_send_command(sc,
1797	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1798	    specs, nspecs);
1799
1800	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1801
1802out:
1803	vioif_ctrl_release(sc);
1804
1805	return r;
1806}
1807
1808static int
1809vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
1810{
1811	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
1812	struct vioif_ctrl_cmdspec specs[1];
1813	int r;
1814
1815	if (!sc->sc_has_ctrl)
1816		return ENOTSUP;
1817
1818	if (nvq_pairs <= 1)
1819		return EINVAL;
1820
1821	vioif_ctrl_acquire(sc);
1822
1823	mq->virtqueue_pairs = nvq_pairs;
1824	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
1825	specs[0].buf = mq;
1826	specs[0].bufsize = sizeof(*mq);
1827
1828	r = vioif_ctrl_send_command(sc,
1829	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
1830	    specs, __arraycount(specs));
1831
1832	vioif_ctrl_release(sc);
1833
1834	return r;
1835}
1836
1837/* ctrl vq interrupt; wake up the command issuer */
1838static int
1839vioif_ctrl_intr(void *arg)
1840{
1841	struct vioif_ctrlqueue *ctrlq = arg;
1842	struct virtqueue *vq = ctrlq->ctrlq_vq;
1843	struct virtio_softc *vsc = vq->vq_owner;
1844	int r, slot;
1845
1846	if (virtio_vq_is_enqueued(vsc, vq) == false)
1847		return 0;
1848
1849	r = virtio_dequeue(vsc, vq, &slot, NULL);
1850	if (r == ENOENT)
1851		return 0;
1852	virtio_dequeue_commit(vsc, vq, slot);
1853
1854	mutex_enter(&ctrlq->ctrlq_wait_lock);
1855	ctrlq->ctrlq_inuse = DONE;
1856	cv_signal(&ctrlq->ctrlq_wait);
1857	mutex_exit(&ctrlq->ctrlq_wait_lock);
1858
1859	return 1;
1860}
1861
1862/*
1863 * If IFF_PROMISC requested,  set promiscuous
1864 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1865 * If large multicast filter exist use ALLMULTI
1866 */
1867/*
1868 * If setting rx filter fails fall back to ALLMULTI
1869 * If ALLMULTI fails fall back to PROMISC
1870 */
1871static int
1872vioif_rx_filter(struct vioif_softc *sc)
1873{
1874	struct ethercom *ec = &sc->sc_ethercom;
1875	struct ifnet *ifp = &ec->ec_if;
1876	struct ether_multi *enm;
1877	struct ether_multistep step;
1878	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1879	int nentries;
1880	int promisc = 0, allmulti = 0, rxfilter = 0;
1881	int r;
1882
1883	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1884		ifp->if_flags |= IFF_PROMISC;
1885		return 0;
1886	}
1887
1888	if (ifp->if_flags & IFF_PROMISC) {
1889		promisc = 1;
1890		goto set;
1891	}
1892
1893	nentries = -1;
1894	ETHER_LOCK(ec);
1895	ETHER_FIRST_MULTI(step, ec, enm);
1896	while (nentries++, enm != NULL) {
1897		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1898			allmulti = 1;
1899			goto set_unlock;
1900		}
1901		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
1902			allmulti = 1;
1903			goto set_unlock;
1904		}
1905		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1906		    enm->enm_addrlo, ETHER_ADDR_LEN);
1907		ETHER_NEXT_MULTI(step, enm);
1908	}
1909	rxfilter = 1;
1910
1911set_unlock:
1912	ETHER_UNLOCK(ec);
1913
1914set:
1915	if (rxfilter) {
1916		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1917		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1918		r = vioif_set_rx_filter(sc);
1919		if (r != 0) {
1920			rxfilter = 0;
1921			allmulti = 1; /* fallback */
1922		}
1923	} else {
1924		/* remove rx filter */
1925		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1926		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1927		r = vioif_set_rx_filter(sc);
1928		/* what to do on failure? */
1929	}
1930	if (allmulti) {
1931		r = vioif_set_allmulti(sc, true);
1932		if (r != 0) {
1933			allmulti = 0;
1934			promisc = 1; /* fallback */
1935		}
1936	} else {
1937		r = vioif_set_allmulti(sc, false);
1938		/* what to do on failure? */
1939	}
1940	if (promisc) {
1941		r = vioif_set_promisc(sc, true);
1942	} else {
1943		r = vioif_set_promisc(sc, false);
1944	}
1945
1946	return r;
1947}
1948
1949static bool
1950vioif_is_link_up(struct vioif_softc *sc)
1951{
1952	struct virtio_softc *vsc = sc->sc_virtio;
1953	uint16_t status;
1954
1955	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
1956		status = virtio_read_device_config_2(vsc,
1957		    VIRTIO_NET_CONFIG_STATUS);
1958	else
1959		status = VIRTIO_NET_S_LINK_UP;
1960
1961	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
1962}
1963
1964/* change link status */
1965static void
1966vioif_update_link_status(struct vioif_softc *sc)
1967{
1968	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1969	struct vioif_txqueue *txq;
1970	bool active, changed;
1971	int link, i;
1972
1973	active = vioif_is_link_up(sc);
1974	changed = false;
1975
1976	if (active) {
1977		if (!sc->sc_link_active)
1978			changed = true;
1979
1980		link = LINK_STATE_UP;
1981		sc->sc_link_active = true;
1982	} else {
1983		if (sc->sc_link_active)
1984			changed = true;
1985
1986		link = LINK_STATE_DOWN;
1987		sc->sc_link_active = false;
1988	}
1989
1990	if (changed) {
1991		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1992			txq = &sc->sc_txq[i];
1993
1994			mutex_enter(txq->txq_lock);
1995			txq->txq_link_active = sc->sc_link_active;
1996			mutex_exit(txq->txq_lock);
1997		}
1998
1999		if_link_state_change(ifp, link);
2000	}
2001}
2002
2003static int
2004vioif_config_change(struct virtio_softc *vsc)
2005{
2006	struct vioif_softc *sc = device_private(virtio_child(vsc));
2007
2008	softint_schedule(sc->sc_ctl_softint);
2009	return 0;
2010}
2011
2012static void
2013vioif_ctl_softint(void *arg)
2014{
2015	struct vioif_softc *sc = arg;
2016	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2017
2018	vioif_update_link_status(sc);
2019	vioif_start(ifp);
2020}
2021
2022MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2023
2024#ifdef _MODULE
2025#include "ioconf.c"
2026#endif
2027
2028static int
2029if_vioif_modcmd(modcmd_t cmd, void *opaque)
2030{
2031	int error = 0;
2032
2033#ifdef _MODULE
2034	switch (cmd) {
2035	case MODULE_CMD_INIT:
2036		error = config_init_component(cfdriver_ioconf_if_vioif,
2037		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2038		break;
2039	case MODULE_CMD_FINI:
2040		error = config_fini_component(cfdriver_ioconf_if_vioif,
2041		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2042		break;
2043	default:
2044		error = ENOTTY;
2045		break;
2046	}
2047#endif
2048
2049	return error;
2050}
2051