if_vioif.c revision 1.44
1/*	$NetBSD: if_vioif.c,v 1.44 2019/01/14 14:52:57 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.44 2019/01/14 14:52:57 yamaguchi Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48
49#include <dev/pci/virtioreg.h>
50#include <dev/pci/virtiovar.h>
51
52#include <net/if.h>
53#include <net/if_media.h>
54#include <net/if_ether.h>
55
56#include <net/bpf.h>
57
58#include "ioconf.h"
59
60#ifdef NET_MPSAFE
61#define VIOIF_MPSAFE	1
62#endif
63
64#ifdef SOFTINT_INTR
65#define VIOIF_SOFTINT_INTR	1
66#endif
67
68/*
69 * if_vioifreg.h:
70 */
71/* Configuration registers */
72#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
73#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
74
75/* Feature bits */
76#define VIRTIO_NET_F_CSUM	(1<<0)
77#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
78#define VIRTIO_NET_F_MAC	(1<<5)
79#define VIRTIO_NET_F_GSO	(1<<6)
80#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
81#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
82#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
83#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
84#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
85#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
86#define VIRTIO_NET_F_HOST_ECN	(1<<13)
87#define VIRTIO_NET_F_HOST_UFO	(1<<14)
88#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
89#define VIRTIO_NET_F_STATUS	(1<<16)
90#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
91#define VIRTIO_NET_F_CTRL_RX	(1<<18)
92#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
93
94#define VIRTIO_NET_FLAG_BITS \
95	VIRTIO_COMMON_FLAG_BITS \
96	"\x14""CTRL_VLAN" \
97	"\x13""CTRL_RX" \
98	"\x12""CTRL_VQ" \
99	"\x11""STATUS" \
100	"\x10""MRG_RXBUF" \
101	"\x0f""HOST_UFO" \
102	"\x0e""HOST_ECN" \
103	"\x0d""HOST_TSO6" \
104	"\x0c""HOST_TSO4" \
105	"\x0b""GUEST_UFO" \
106	"\x0a""GUEST_ECN" \
107	"\x09""GUEST_TSO6" \
108	"\x08""GUEST_TSO4" \
109	"\x07""GSO" \
110	"\x06""MAC" \
111	"\x02""GUEST_CSUM" \
112	"\x01""CSUM"
113
114/* Status */
115#define VIRTIO_NET_S_LINK_UP	1
116
117/* Packet header structure */
118struct virtio_net_hdr {
119	uint8_t		flags;
120	uint8_t		gso_type;
121	uint16_t	hdr_len;
122	uint16_t	gso_size;
123	uint16_t	csum_start;
124	uint16_t	csum_offset;
125#if 0
126	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
127#endif
128} __packed;
129
130#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
131#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
132#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
133#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
134#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
135#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
136
137#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
138
139/* Control virtqueue */
140struct virtio_net_ctrl_cmd {
141	uint8_t	class;
142	uint8_t	command;
143} __packed;
144#define VIRTIO_NET_CTRL_RX		0
145# define VIRTIO_NET_CTRL_RX_PROMISC	0
146# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
147
148#define VIRTIO_NET_CTRL_MAC		1
149# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
150
151#define VIRTIO_NET_CTRL_VLAN		2
152# define VIRTIO_NET_CTRL_VLAN_ADD	0
153# define VIRTIO_NET_CTRL_VLAN_DEL	1
154
155struct virtio_net_ctrl_status {
156	uint8_t	ack;
157} __packed;
158#define VIRTIO_NET_OK			0
159#define VIRTIO_NET_ERR			1
160
161struct virtio_net_ctrl_rx {
162	uint8_t	onoff;
163} __packed;
164
165struct virtio_net_ctrl_mac_tbl {
166	uint32_t nentries;
167	uint8_t macs[][ETHER_ADDR_LEN];
168} __packed;
169
170struct virtio_net_ctrl_vlan {
171	uint16_t id;
172} __packed;
173
174struct vioif_ctrl_cmdspec {
175	bus_dmamap_t	dmamap;
176	void		*buf;
177	bus_size_t	bufsize;
178};
179
180/*
181 * if_vioifvar.h:
182 */
183
184/*
185 * Locking notes:
186 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
187 *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
188 *      - more than one lock cannot be held at onece
189 * + ctrlq_inuse is protected by ctrlq_wait_lock.
190 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
191 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
192 */
193
194struct vioif_txqueue {
195	kmutex_t		*txq_lock;	/* lock for tx operations */
196
197	struct virtqueue	*txq_vq;
198	bool			txq_stopping;
199	bool			txq_link_active;
200
201	struct virtio_net_hdr	*txq_hdrs;
202	bus_dmamap_t		*txq_hdr_dmamaps;
203
204	struct mbuf		**txq_mbufs;
205	bus_dmamap_t		*txq_dmamaps;
206};
207
208struct vioif_rxqueue {
209	kmutex_t		*rxq_lock;	/* lock for rx operations */
210
211	struct virtqueue	*rxq_vq;
212	bool			rxq_stopping;
213
214	struct virtio_net_hdr	*rxq_hdrs;
215	bus_dmamap_t		*rxq_hdr_dmamaps;
216
217	struct mbuf		**rxq_mbufs;
218	bus_dmamap_t		*rxq_dmamaps;
219
220	void			*rxq_softint;
221};
222
223struct vioif_ctrlqueue {
224	struct virtqueue		*ctrlq_vq;
225	enum {
226		FREE, INUSE, DONE
227	}				ctrlq_inuse;
228	kcondvar_t			ctrlq_wait;
229	kmutex_t			ctrlq_wait_lock;
230	struct lwp			*ctrlq_owner;
231
232	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
233	struct virtio_net_ctrl_status	*ctrlq_status;
234	struct virtio_net_ctrl_rx	*ctrlq_rx;
235	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
236	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
237
238	bus_dmamap_t			ctrlq_cmd_dmamap;
239	bus_dmamap_t			ctrlq_status_dmamap;
240	bus_dmamap_t			ctrlq_rx_dmamap;
241	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
242	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
243};
244
245struct vioif_softc {
246	device_t		sc_dev;
247
248	struct virtio_softc	*sc_virtio;
249	struct virtqueue	sc_vq[3];
250#define VQ_RX	0
251#define VQ_TX	1
252#define VQ_CTRL	2
253
254	uint8_t			sc_mac[ETHER_ADDR_LEN];
255	struct ethercom		sc_ethercom;
256	short			sc_deferred_init_done;
257	bool			sc_link_active;
258
259	struct vioif_txqueue	sc_txq;
260	struct vioif_rxqueue	sc_rxq;
261
262	bool			sc_has_ctrl;
263	struct vioif_ctrlqueue	sc_ctrlq;
264
265	bus_dma_segment_t	sc_hdr_segs[1];
266	void			*sc_dmamem;
267	void			*sc_kmem;
268
269	void			*sc_ctl_softint;
270};
271#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
272#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
273
274#define VIOIF_TXQ_LOCK(_q)	mutex_enter((_q)->txq_lock)
275#define VIOIF_TXQ_UNLOCK(_q)	mutex_exit((_q)->txq_lock)
276#define VIOIF_TXQ_LOCKED(_q)	mutex_owned((_q)->txq_lock)
277
278#define VIOIF_RXQ_LOCK(_q)	mutex_enter((_q)->rxq_lock)
279#define VIOIF_RXQ_UNLOCK(_q)	mutex_exit((_q)->rxq_lock)
280#define VIOIF_RXQ_LOCKED(_q)	mutex_owned((_q)->rxq_lock)
281
282/* cfattach interface functions */
283static int	vioif_match(device_t, cfdata_t, void *);
284static void	vioif_attach(device_t, device_t, void *);
285static void	vioif_deferred_init(device_t);
286
287/* ifnet interface functions */
288static int	vioif_init(struct ifnet *);
289static void	vioif_stop(struct ifnet *, int);
290static void	vioif_start(struct ifnet *);
291static int	vioif_ioctl(struct ifnet *, u_long, void *);
292static void	vioif_watchdog(struct ifnet *);
293
294/* rx */
295static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
296static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
297static void	vioif_populate_rx_mbufs(struct vioif_softc *);
298static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
299static int	vioif_rx_deq(struct vioif_softc *);
300static int	vioif_rx_deq_locked(struct vioif_softc *);
301static int	vioif_rx_vq_done(struct virtqueue *);
302static void	vioif_rx_softint(void *);
303static void	vioif_rx_drain(struct vioif_softc *);
304
305/* tx */
306static int	vioif_tx_vq_done(struct virtqueue *);
307static int	vioif_tx_vq_done_locked(struct virtqueue *);
308static void	vioif_tx_drain(struct vioif_softc *);
309
310/* other control */
311static bool	vioif_is_link_up(struct vioif_softc *);
312static void	vioif_update_link_status(struct vioif_softc *);
313static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
314static int	vioif_set_promisc(struct vioif_softc *, bool);
315static int	vioif_set_allmulti(struct vioif_softc *, bool);
316static int	vioif_set_rx_filter(struct vioif_softc *);
317static int	vioif_rx_filter(struct vioif_softc *);
318static int	vioif_ctrl_vq_done(struct virtqueue *);
319static int	vioif_config_change(struct virtio_softc *);
320static void	vioif_ctl_softint(void *);
321
322CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
323		  vioif_match, vioif_attach, NULL, NULL);
324
325static int
326vioif_match(device_t parent, cfdata_t match, void *aux)
327{
328	struct virtio_attach_args *va = aux;
329
330	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
331		return 1;
332
333	return 0;
334}
335
336/* allocate memory */
337/*
338 * dma memory is used for:
339 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
340 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
341 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
342 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
343 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
344 *			 (WRITE)
345 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
346 *			 class command (WRITE)
347 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
348 *			 class command (WRITE)
349 * ctrlq_* structures are allocated only one each; they are protected by
350 * ctrlq_inuse variable and ctrlq_wait condvar.
351 */
352/*
353 * dynamically allocated memory is used for:
354 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
355 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
356 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
357 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
358 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
359 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
360 */
361static int
362vioif_alloc_mems(struct vioif_softc *sc)
363{
364	struct virtio_softc *vsc = sc->sc_virtio;
365	struct vioif_txqueue *txq = &sc->sc_txq;
366	struct vioif_rxqueue *rxq = &sc->sc_rxq;
367	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
368	int allocsize, allocsize2, r, rsegs, i;
369	void *vaddr;
370	intptr_t p;
371	int rxqsize, txqsize;
372
373	rxqsize = rxq->rxq_vq->vq_num;
374	txqsize = txq->txq_vq->vq_num;
375
376	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
377	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
378	if (sc->sc_has_ctrl) {
379		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
380		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
381		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
382		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
383			+ sizeof(struct virtio_net_ctrl_mac_tbl)
384			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
385	}
386	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
387			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
388	if (r != 0) {
389		aprint_error_dev(sc->sc_dev,
390				 "DMA memory allocation failed, size %d, "
391				 "error code %d\n", allocsize, r);
392		goto err_none;
393	}
394	r = bus_dmamem_map(virtio_dmat(vsc),
395			   &sc->sc_hdr_segs[0], 1, allocsize,
396			   &vaddr, BUS_DMA_NOWAIT);
397	if (r != 0) {
398		aprint_error_dev(sc->sc_dev,
399				 "DMA memory map failed, "
400				 "error code %d\n", r);
401		goto err_dmamem_alloc;
402	}
403
404#define P(p, p0, p0size)	do { p0 = (void *) p;		\
405				     p += p0size; } while (0)
406	memset(vaddr, 0, allocsize);
407	sc->sc_dmamem = vaddr;
408	p = (intptr_t) vaddr;
409
410	P(p, rxq->rxq_hdrs, sizeof(rxq->rxq_hdrs[0]) * rxqsize);
411	P(p, txq->txq_hdrs, sizeof(txq->txq_hdrs[0]) * txqsize);
412	if (sc->sc_has_ctrl) {
413		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
414		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
415		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
416		P(p, ctrlq->ctrlq_mac_tbl_uc, sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0);
417		P(p, ctrlq->ctrlq_mac_tbl_mc,
418		    (sizeof(*ctrlq->ctrlq_mac_tbl_mc)
419		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
420	}
421
422	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
423	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
424	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
425	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
426	sc->sc_kmem = vaddr;
427	p = (intptr_t) vaddr;
428
429	P(p, rxq->rxq_hdr_dmamaps, sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
430	P(p, txq->txq_hdr_dmamaps, sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
431	P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
432	P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
433	P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
434	P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
435#undef P
436
437#define C(map, size, nsegs, usage)						\
438	do {									\
439		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0,	\
440				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,		\
441				      &map);					\
442		if (r != 0) {							\
443			aprint_error_dev(sc->sc_dev,				\
444			    "%s dmamap creation failed, "			\
445			    "error code %d\n", usage, r);			\
446			goto err_reqs;						\
447		}								\
448	} while (0)
449#define C_L(map, buf, size, nsegs, rw, usage)				\
450	C(map, size, nsegs, usage);					\
451	do {								\
452		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
453				    buf, size, NULL,			\
454				    rw | BUS_DMA_NOWAIT);		\
455		if (r != 0) {						\
456			aprint_error_dev(sc->sc_dev,			\
457					 usage " dmamap load failed, "	\
458					 "error code %d\n", r);		\
459			goto err_reqs;					\
460		}							\
461	} while (0)
462	for (i = 0; i < rxqsize; i++) {
463		C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i], sizeof(rxq->rxq_hdrs[0]), 1,
464		    BUS_DMA_READ, "rx header");
465		C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
466	}
467
468	for (i = 0; i < txqsize; i++) {
469		C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i], sizeof(txq->txq_hdrs[0]), 1,
470		    BUS_DMA_READ, "tx header");
471		C(txq->txq_dmamaps[i], ETHER_MAX_LEN, VIRTIO_NET_TX_MAXNSEGS, "tx payload");
472	}
473
474	if (sc->sc_has_ctrl) {
475		/* control vq class & command */
476		C_L(ctrlq->ctrlq_cmd_dmamap,
477		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
478		    BUS_DMA_WRITE, "control command");
479		C_L(ctrlq->ctrlq_status_dmamap,
480		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
481		    BUS_DMA_READ, "control status");
482
483		/* control vq rx mode command parameter */
484		C_L(ctrlq->ctrlq_rx_dmamap,
485		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
486		    BUS_DMA_WRITE, "rx mode control command");
487
488		/* control vq MAC filter table for unicast */
489		/* do not load now since its length is variable */
490		C(ctrlq->ctrlq_tbl_uc_dmamap,
491		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
492		    "unicast MAC address filter command");
493
494		/* control vq MAC filter table for multicast */
495		C(ctrlq->ctrlq_tbl_mc_dmamap,
496		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
497		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
498		    "multicast MAC address filter command");
499	}
500#undef C_L
501#undef C
502
503	return 0;
504
505err_reqs:
506#define D(map)								\
507	do {								\
508		if (map) {						\
509			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
510			map = NULL;					\
511		}							\
512	} while (0)
513	D(ctrlq->ctrlq_tbl_mc_dmamap);
514	D(ctrlq->ctrlq_tbl_uc_dmamap);
515	D(ctrlq->ctrlq_rx_dmamap);
516	D(ctrlq->ctrlq_status_dmamap);
517	D(ctrlq->ctrlq_cmd_dmamap);
518	for (i = 0; i < txqsize; i++) {
519		D(txq->txq_dmamaps[i]);
520		D(txq->txq_hdr_dmamaps[i]);
521	}
522	for (i = 0; i < rxqsize; i++) {
523		D(rxq->rxq_dmamaps[i]);
524		D(rxq->rxq_hdr_dmamaps[i]);
525	}
526#undef D
527	if (sc->sc_kmem) {
528		kmem_free(sc->sc_kmem, allocsize2);
529		sc->sc_kmem = NULL;
530	}
531	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
532err_dmamem_alloc:
533	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
534err_none:
535	return -1;
536}
537
538static void
539vioif_attach(device_t parent, device_t self, void *aux)
540{
541	struct vioif_softc *sc = device_private(self);
542	struct virtio_softc *vsc = device_private(parent);
543	struct vioif_txqueue *txq = &sc->sc_txq;
544	struct vioif_rxqueue *rxq = &sc->sc_rxq;
545	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
546	uint32_t features;
547	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
548	u_int softint_flags;
549	int r, nvqs=0, req_flags;
550
551	if (virtio_child(vsc) != NULL) {
552		aprint_normal(": child already attached for %s; "
553			      "something wrong...\n",
554			      device_xname(parent));
555		return;
556	}
557
558	sc->sc_dev = self;
559	sc->sc_virtio = vsc;
560	sc->sc_link_active = false;
561
562	req_flags = 0;
563
564#ifdef VIOIF_MPSAFE
565	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
566#endif
567#ifdef VIOIF_SOFTINT_INTR
568	req_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
569#endif
570	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
571
572	virtio_child_attach_start(vsc, self, IPL_NET, sc->sc_vq,
573	    vioif_config_change, virtio_vq_intr, req_flags,
574	    (VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
575	     VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY),
576	    VIRTIO_NET_FLAG_BITS);
577
578	features = virtio_features(vsc);
579
580	if (features & VIRTIO_NET_F_MAC) {
581		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
582						    VIRTIO_NET_CONFIG_MAC+0);
583		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
584						    VIRTIO_NET_CONFIG_MAC+1);
585		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
586						    VIRTIO_NET_CONFIG_MAC+2);
587		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
588						    VIRTIO_NET_CONFIG_MAC+3);
589		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
590						    VIRTIO_NET_CONFIG_MAC+4);
591		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
592						    VIRTIO_NET_CONFIG_MAC+5);
593	} else {
594		/* code stolen from sys/net/if_tap.c */
595		struct timeval tv;
596		uint32_t ui;
597		getmicrouptime(&tv);
598		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
599		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
600		virtio_write_device_config_1(vsc,
601					     VIRTIO_NET_CONFIG_MAC+0,
602					     sc->sc_mac[0]);
603		virtio_write_device_config_1(vsc,
604					     VIRTIO_NET_CONFIG_MAC+1,
605					     sc->sc_mac[1]);
606		virtio_write_device_config_1(vsc,
607					     VIRTIO_NET_CONFIG_MAC+2,
608					     sc->sc_mac[2]);
609		virtio_write_device_config_1(vsc,
610					     VIRTIO_NET_CONFIG_MAC+3,
611					     sc->sc_mac[3]);
612		virtio_write_device_config_1(vsc,
613					     VIRTIO_NET_CONFIG_MAC+4,
614					     sc->sc_mac[4]);
615		virtio_write_device_config_1(vsc,
616					     VIRTIO_NET_CONFIG_MAC+5,
617					     sc->sc_mac[5]);
618	}
619
620	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(sc->sc_mac));
621
622#ifdef VIOIF_MPSAFE
623	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
624#else
625	softint_flags = SOFTINT_NET;
626#endif
627
628	/*
629	 * Allocating a virtqueue for Rx
630	 */
631	rxq->rxq_vq = &sc->sc_vq[VQ_RX];
632	rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
633
634	rxq->rxq_softint = softint_establish(softint_flags, vioif_rx_softint, sc);
635	if (rxq->rxq_softint == NULL) {
636		aprint_error_dev(self, "cannot establish rx softint\n");
637		goto err;
638	}
639	r = virtio_alloc_vq(vsc, rxq->rxq_vq, VQ_RX,
640	    MCLBYTES+sizeof(struct virtio_net_hdr), 2, "rx");
641	if (r != 0)
642		goto err;
643	nvqs = 1;
644	rxq->rxq_vq->vq_done = vioif_rx_vq_done;
645	rxq->rxq_stopping = true;
646
647	/*
648	 * Allocating a virtqueue for Tx
649	 */
650	txq->txq_vq = &sc->sc_vq[VQ_TX];
651	txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
652	r = virtio_alloc_vq(vsc, txq->txq_vq, VQ_TX,
653	    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
654	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx");
655	if (r != 0)
656		goto err;
657	nvqs = 2;
658	txq->txq_vq->vq_done = vioif_tx_vq_done;
659	txq->txq_link_active = sc->sc_link_active;
660	txq->txq_stopping = false;
661
662	virtio_start_vq_intr(vsc, rxq->rxq_vq);
663	virtio_stop_vq_intr(vsc, txq->txq_vq); /* not urgent; do it later */
664
665	ctrlq->ctrlq_vq = &sc->sc_vq[VQ_CTRL];
666	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
667	    (features & VIRTIO_NET_F_CTRL_RX)) {
668		/*
669		 * Allocating a virtqueue for control channel
670		 */
671		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, VQ_CTRL,
672		    NBPG, 1, "control");
673		if (r != 0) {
674			aprint_error_dev(self, "failed to allocate "
675			    "a virtqueue for control channel\n");
676			goto skip;
677		}
678
679		ctrlq->ctrlq_vq->vq_done = vioif_ctrl_vq_done;
680		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
681		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
682		ctrlq->ctrlq_inuse = FREE;
683		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
684		sc->sc_has_ctrl = true;
685		nvqs = 3;
686	}
687skip:
688
689
690	sc->sc_ctl_softint = softint_establish(softint_flags, vioif_ctl_softint, sc);
691	if (sc->sc_ctl_softint == NULL) {
692		aprint_error_dev(self, "cannot establish ctl softint\n");
693		goto err;
694	}
695
696	if (vioif_alloc_mems(sc) < 0)
697		goto err;
698
699	if (virtio_child_attach_finish(vsc) != 0)
700		goto err;
701
702	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
703	ifp->if_softc = sc;
704	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
705	ifp->if_start = vioif_start;
706	ifp->if_ioctl = vioif_ioctl;
707	ifp->if_init = vioif_init;
708	ifp->if_stop = vioif_stop;
709	ifp->if_capabilities = 0;
710	ifp->if_watchdog = vioif_watchdog;
711	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
712	IFQ_SET_READY(&ifp->if_snd);
713
714	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
715
716	if_attach(ifp);
717	if_deferred_start_init(ifp, NULL);
718	ether_ifattach(ifp, sc->sc_mac);
719
720	return;
721
722err:
723	if (rxq->rxq_lock) {
724		mutex_obj_free(rxq->rxq_lock);
725		rxq->rxq_lock = NULL;
726	}
727
728	if (rxq->rxq_softint) {
729		softint_disestablish(rxq->rxq_softint);
730		rxq->rxq_softint = NULL;
731	}
732
733	if (txq->txq_lock) {
734		mutex_obj_free(txq->txq_lock);
735		txq->txq_lock = NULL;
736	}
737
738	if (sc->sc_has_ctrl) {
739		cv_destroy(&ctrlq->ctrlq_wait);
740		mutex_destroy(&ctrlq->ctrlq_wait_lock);
741	}
742
743	while (nvqs > 0)
744		virtio_free_vq(vsc, &sc->sc_vq[--nvqs]);
745
746	virtio_child_attach_failed(vsc);
747	return;
748}
749
750/* we need interrupts to make promiscuous mode off */
751static void
752vioif_deferred_init(device_t self)
753{
754	struct vioif_softc *sc = device_private(self);
755	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
756	int r;
757
758	if (ifp->if_flags & IFF_PROMISC)
759		return;
760
761	r =  vioif_set_promisc(sc, false);
762	if (r != 0)
763		aprint_error_dev(self, "resetting promisc mode failed, "
764				 "errror code %d\n", r);
765}
766
767/*
768 * Interface functions for ifnet
769 */
770static int
771vioif_init(struct ifnet *ifp)
772{
773	struct vioif_softc *sc = ifp->if_softc;
774	struct virtio_softc *vsc = sc->sc_virtio;
775	struct vioif_txqueue *txq = &sc->sc_txq;
776	struct vioif_rxqueue *rxq = &sc->sc_rxq;
777	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
778
779	vioif_stop(ifp, 0);
780
781	virtio_reinit_start(vsc);
782	virtio_negotiate_features(vsc, virtio_features(vsc));
783	virtio_start_vq_intr(vsc, rxq->rxq_vq);
784	virtio_stop_vq_intr(vsc, txq->txq_vq);
785	if (sc->sc_has_ctrl)
786		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
787	virtio_reinit_end(vsc);
788
789	if (!sc->sc_deferred_init_done) {
790		sc->sc_deferred_init_done = 1;
791		if (sc->sc_has_ctrl)
792			vioif_deferred_init(sc->sc_dev);
793	}
794
795	/* Have to set false before vioif_populate_rx_mbufs */
796	rxq->rxq_stopping = false;
797	txq->txq_stopping = false;
798
799	vioif_populate_rx_mbufs(sc);
800
801	vioif_update_link_status(sc);
802	ifp->if_flags |= IFF_RUNNING;
803	ifp->if_flags &= ~IFF_OACTIVE;
804	vioif_rx_filter(sc);
805
806	return 0;
807}
808
809static void
810vioif_stop(struct ifnet *ifp, int disable)
811{
812	struct vioif_softc *sc = ifp->if_softc;
813	struct virtio_softc *vsc = sc->sc_virtio;
814	struct vioif_txqueue *txq = &sc->sc_txq;
815	struct vioif_rxqueue *rxq = &sc->sc_rxq;
816	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
817
818	/* Take the locks to ensure that ongoing TX/RX finish */
819	VIOIF_TXQ_LOCK(txq);
820	txq->txq_stopping = true;
821	VIOIF_TXQ_UNLOCK(txq);
822
823	VIOIF_RXQ_LOCK(rxq);
824	rxq->rxq_stopping = true;
825	VIOIF_RXQ_UNLOCK(rxq);
826
827	/* disable interrupts */
828	virtio_stop_vq_intr(vsc, rxq->rxq_vq);
829	virtio_stop_vq_intr(vsc, txq->txq_vq);
830	if (sc->sc_has_ctrl)
831		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
832
833	/* only way to stop I/O and DMA is resetting... */
834	virtio_reset(vsc);
835	vioif_rx_deq(sc);
836	vioif_tx_drain(sc);
837	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
838	sc->sc_link_active = false;
839	txq->txq_link_active = false;
840
841	if (disable)
842		vioif_rx_drain(sc);
843}
844
845static void
846vioif_start(struct ifnet *ifp)
847{
848	struct vioif_softc *sc = ifp->if_softc;
849	struct virtio_softc *vsc = sc->sc_virtio;
850	struct vioif_txqueue *txq = &sc->sc_txq;
851	struct virtqueue *vq = txq->txq_vq;
852	struct mbuf *m;
853	int queued = 0;
854
855	VIOIF_TXQ_LOCK(txq);
856
857	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING ||
858	    !txq->txq_link_active)
859		goto out;
860
861	if (txq->txq_stopping)
862		goto out;
863
864	for (;;) {
865		int slot, r;
866
867		IFQ_DEQUEUE(&ifp->if_snd, m);
868		if (m == NULL)
869			break;
870
871		r = virtio_enqueue_prep(vsc, vq, &slot);
872		if (r == EAGAIN) {
873			ifp->if_flags |= IFF_OACTIVE;
874			m_freem(m);
875			break;
876		}
877		if (r != 0)
878			panic("enqueue_prep for a tx buffer");
879
880		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
881					 txq->txq_dmamaps[slot],
882					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
883		if (r != 0) {
884			/* maybe just too fragmented */
885			struct mbuf *newm;
886
887			newm = m_defrag(m, M_NOWAIT);
888			if (newm == NULL) {
889				aprint_error_dev(sc->sc_dev,
890				    "m_defrag() failed\n");
891				goto skip;
892			}
893
894			m = newm;
895			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
896					 txq->txq_dmamaps[slot],
897					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
898			if (r != 0) {
899				aprint_error_dev(sc->sc_dev,
900	   			    "tx dmamap load failed, error code %d\n",
901				    r);
902skip:
903				m_freem(m);
904				virtio_enqueue_abort(vsc, vq, slot);
905				continue;
906			}
907		}
908
909		/* This should actually never fail */
910		r = virtio_enqueue_reserve(vsc, vq, slot,
911					txq->txq_dmamaps[slot]->dm_nsegs + 1);
912		if (r != 0) {
913			aprint_error_dev(sc->sc_dev,
914	   		    "virtio_enqueue_reserve failed, error code %d\n",
915			    r);
916			bus_dmamap_unload(virtio_dmat(vsc),
917					  txq->txq_dmamaps[slot]);
918			/* slot already freed by virtio_enqueue_reserve */
919			m_freem(m);
920			continue;
921		}
922
923		txq->txq_mbufs[slot] = m;
924
925		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
926		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
927				0, txq->txq_dmamaps[slot]->dm_mapsize,
928				BUS_DMASYNC_PREWRITE);
929		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
930				0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
931				BUS_DMASYNC_PREWRITE);
932		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
933		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
934		virtio_enqueue_commit(vsc, vq, slot, false);
935
936		queued++;
937		bpf_mtap(ifp, m, BPF_D_OUT);
938	}
939
940	if (queued > 0) {
941		virtio_enqueue_commit(vsc, vq, -1, true);
942		ifp->if_timer = 5;
943	}
944
945out:
946	VIOIF_TXQ_UNLOCK(txq);
947}
948
949static int
950vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
951{
952	int s, r;
953
954	s = splnet();
955
956	r = ether_ioctl(ifp, cmd, data);
957	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
958	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
959		if (ifp->if_flags & IFF_RUNNING)
960			r = vioif_rx_filter(ifp->if_softc);
961		else
962			r = 0;
963	}
964
965	splx(s);
966
967	return r;
968}
969
970void
971vioif_watchdog(struct ifnet *ifp)
972{
973	struct vioif_softc *sc = ifp->if_softc;
974	struct vioif_txqueue *txq = &sc->sc_txq;
975
976	if (ifp->if_flags & IFF_RUNNING)
977		vioif_tx_vq_done(txq->txq_vq);
978}
979
980
981/*
982 * Receive implementation
983 */
984/* allocate and initialize a mbuf for receive */
985static int
986vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
987{
988	struct vioif_rxqueue *rxq = &sc->sc_rxq;
989	struct mbuf *m;
990	int r;
991
992	MGETHDR(m, M_DONTWAIT, MT_DATA);
993	if (m == NULL)
994		return ENOBUFS;
995	MCLGET(m, M_DONTWAIT);
996	if ((m->m_flags & M_EXT) == 0) {
997		m_freem(m);
998		return ENOBUFS;
999	}
1000	rxq->rxq_mbufs[i] = m;
1001	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1002	r = bus_dmamap_load_mbuf(virtio_dmat(sc->sc_virtio),
1003				 rxq->rxq_dmamaps[i],
1004				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
1005	if (r) {
1006		m_freem(m);
1007		rxq->rxq_mbufs[i] = 0;
1008		return r;
1009	}
1010
1011	return 0;
1012}
1013
1014/* free a mbuf for receive */
1015static void
1016vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
1017{
1018	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1019
1020	bus_dmamap_unload(virtio_dmat(sc->sc_virtio), rxq->rxq_dmamaps[i]);
1021	m_freem(rxq->rxq_mbufs[i]);
1022	rxq->rxq_mbufs[i] = NULL;
1023}
1024
1025/* add mbufs for all the empty receive slots */
1026static void
1027vioif_populate_rx_mbufs(struct vioif_softc *sc)
1028{
1029	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1030
1031	VIOIF_RXQ_LOCK(rxq);
1032	vioif_populate_rx_mbufs_locked(sc);
1033	VIOIF_RXQ_UNLOCK(rxq);
1034}
1035
1036static void
1037vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
1038{
1039	struct virtio_softc *vsc = sc->sc_virtio;
1040	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1041	int i, r, ndone = 0;
1042	struct virtqueue *vq = rxq->rxq_vq;
1043
1044	KASSERT(VIOIF_RXQ_LOCKED(rxq));
1045
1046	if (rxq->rxq_stopping)
1047		return;
1048
1049	for (i = 0; i < vq->vq_num; i++) {
1050		int slot;
1051		r = virtio_enqueue_prep(vsc, vq, &slot);
1052		if (r == EAGAIN)
1053			break;
1054		if (r != 0)
1055			panic("enqueue_prep for rx buffers");
1056		if (rxq->rxq_mbufs[slot] == NULL) {
1057			r = vioif_add_rx_mbuf(sc, slot);
1058			if (r != 0) {
1059				printf("%s: rx mbuf allocation failed, "
1060				       "error code %d\n",
1061				       device_xname(sc->sc_dev), r);
1062				break;
1063			}
1064		}
1065		r = virtio_enqueue_reserve(vsc, vq, slot,
1066					rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1067		if (r != 0) {
1068			vioif_free_rx_mbuf(sc, slot);
1069			break;
1070		}
1071		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1072			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1073		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1074			0, MCLBYTES, BUS_DMASYNC_PREREAD);
1075		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot], false);
1076		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1077		virtio_enqueue_commit(vsc, vq, slot, false);
1078		ndone++;
1079	}
1080	if (ndone > 0)
1081		virtio_enqueue_commit(vsc, vq, -1, true);
1082}
1083
1084/* dequeue received packets */
1085static int
1086vioif_rx_deq(struct vioif_softc *sc)
1087{
1088	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1089	int r;
1090
1091	KASSERT(rxq->rxq_stopping);
1092
1093	VIOIF_RXQ_LOCK(rxq);
1094	r = vioif_rx_deq_locked(sc);
1095	VIOIF_RXQ_UNLOCK(rxq);
1096
1097	return r;
1098}
1099
1100/* dequeue received packets */
1101static int
1102vioif_rx_deq_locked(struct vioif_softc *sc)
1103{
1104	struct virtio_softc *vsc = sc->sc_virtio;
1105	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1106	struct virtqueue *vq = rxq->rxq_vq;
1107	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1108	struct mbuf *m;
1109	int r = 0;
1110	int slot, len;
1111
1112	KASSERT(VIOIF_RXQ_LOCKED(rxq));
1113
1114	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1115		len -= sizeof(struct virtio_net_hdr);
1116		r = 1;
1117		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1118				0, sizeof(struct virtio_net_hdr),
1119				BUS_DMASYNC_POSTREAD);
1120		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1121				0, MCLBYTES,
1122				BUS_DMASYNC_POSTREAD);
1123		m = rxq->rxq_mbufs[slot];
1124		KASSERT(m != NULL);
1125		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1126		rxq->rxq_mbufs[slot] = 0;
1127		virtio_dequeue_commit(vsc, vq, slot);
1128		m_set_rcvif(m, ifp);
1129		m->m_len = m->m_pkthdr.len = len;
1130
1131		VIOIF_RXQ_UNLOCK(rxq);
1132		if_percpuq_enqueue(ifp->if_percpuq, m);
1133		VIOIF_RXQ_LOCK(rxq);
1134
1135		if (rxq->rxq_stopping)
1136			break;
1137	}
1138
1139	return r;
1140}
1141
1142/* rx interrupt; call _dequeue above and schedule a softint */
1143static int
1144vioif_rx_vq_done(struct virtqueue *vq)
1145{
1146	struct virtio_softc *vsc = vq->vq_owner;
1147	struct vioif_softc *sc = device_private(virtio_child(vsc));
1148	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1149	int r = 0;
1150
1151#ifdef VIOIF_SOFTINT_INTR
1152	KASSERT(!cpu_intr_p());
1153#endif
1154
1155	VIOIF_RXQ_LOCK(rxq);
1156
1157	if (rxq->rxq_stopping)
1158		goto out;
1159
1160	r = vioif_rx_deq_locked(sc);
1161	if (r)
1162#ifdef VIOIF_SOFTINT_INTR
1163		vioif_populate_rx_mbufs_locked(sc);
1164#else
1165		softint_schedule(rxq->rxq_softint);
1166#endif
1167
1168out:
1169	VIOIF_RXQ_UNLOCK(rxq);
1170	return r;
1171}
1172
1173/* softint: enqueue receive requests for new incoming packets */
1174static void
1175vioif_rx_softint(void *arg)
1176{
1177	struct vioif_softc *sc = arg;
1178
1179	vioif_populate_rx_mbufs(sc);
1180}
1181
1182/* free all the mbufs; called from if_stop(disable) */
1183static void
1184vioif_rx_drain(struct vioif_softc *sc)
1185{
1186	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1187	struct virtqueue *vq = rxq->rxq_vq;
1188	int i;
1189
1190	for (i = 0; i < vq->vq_num; i++) {
1191		if (rxq->rxq_mbufs[i] == NULL)
1192			continue;
1193		vioif_free_rx_mbuf(sc, i);
1194	}
1195}
1196
1197
1198/*
1199 * Transmition implementation
1200 */
1201/* actual transmission is done in if_start */
1202/* tx interrupt; dequeue and free mbufs */
1203/*
1204 * tx interrupt is actually disabled; this should be called upon
1205 * tx vq full and watchdog
1206 */
1207static int
1208vioif_tx_vq_done(struct virtqueue *vq)
1209{
1210	struct virtio_softc *vsc = vq->vq_owner;
1211	struct vioif_softc *sc = device_private(virtio_child(vsc));
1212	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1213	struct vioif_txqueue *txq = &sc->sc_txq;
1214	int r = 0;
1215
1216	VIOIF_TXQ_LOCK(txq);
1217
1218	if (txq->txq_stopping)
1219		goto out;
1220
1221	r = vioif_tx_vq_done_locked(vq);
1222
1223out:
1224	VIOIF_TXQ_UNLOCK(txq);
1225	if (r)
1226		if_schedule_deferred_start(ifp);
1227	return r;
1228}
1229
1230static int
1231vioif_tx_vq_done_locked(struct virtqueue *vq)
1232{
1233	struct virtio_softc *vsc = vq->vq_owner;
1234	struct vioif_softc *sc = device_private(virtio_child(vsc));
1235	struct vioif_txqueue *txq = &sc->sc_txq;
1236	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1237	struct mbuf *m;
1238	int r = 0;
1239	int slot, len;
1240
1241	KASSERT(VIOIF_TXQ_LOCKED(txq));
1242
1243	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1244		r++;
1245		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1246				0, sizeof(struct virtio_net_hdr),
1247				BUS_DMASYNC_POSTWRITE);
1248		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1249				0, txq->txq_dmamaps[slot]->dm_mapsize,
1250				BUS_DMASYNC_POSTWRITE);
1251		m = txq->txq_mbufs[slot];
1252		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1253		txq->txq_mbufs[slot] = 0;
1254		virtio_dequeue_commit(vsc, vq, slot);
1255		ifp->if_opackets++;
1256		m_freem(m);
1257	}
1258
1259	if (r)
1260		ifp->if_flags &= ~IFF_OACTIVE;
1261	return r;
1262}
1263
1264/* free all the mbufs already put on vq; called from if_stop(disable) */
1265static void
1266vioif_tx_drain(struct vioif_softc *sc)
1267{
1268	struct virtio_softc *vsc = sc->sc_virtio;
1269	struct vioif_txqueue *txq = &sc->sc_txq;
1270	struct virtqueue *vq = txq->txq_vq;
1271	int i;
1272
1273	KASSERT(txq->txq_stopping);
1274
1275	for (i = 0; i < vq->vq_num; i++) {
1276		if (txq->txq_mbufs[i] == NULL)
1277			continue;
1278		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1279		m_freem(txq->txq_mbufs[i]);
1280		txq->txq_mbufs[i] = NULL;
1281	}
1282}
1283
1284/*
1285 * Control vq
1286 */
1287/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1288static void
1289vioif_ctrl_acquire(struct vioif_softc *sc)
1290{
1291	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1292
1293	mutex_enter(&ctrlq->ctrlq_wait_lock);
1294	while (ctrlq->ctrlq_inuse != FREE)
1295		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1296	ctrlq->ctrlq_inuse = INUSE;
1297	ctrlq->ctrlq_owner = curlwp;
1298	mutex_exit(&ctrlq->ctrlq_wait_lock);
1299}
1300
1301static void
1302vioif_ctrl_release(struct vioif_softc *sc)
1303{
1304	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1305
1306	KASSERT(ctrlq->ctrlq_inuse != FREE);
1307	KASSERT(ctrlq->ctrlq_owner == curlwp);
1308
1309	mutex_enter(&ctrlq->ctrlq_wait_lock);
1310	ctrlq->ctrlq_inuse = FREE;
1311	cv_signal(&ctrlq->ctrlq_wait);
1312	mutex_exit(&ctrlq->ctrlq_wait_lock);
1313}
1314
1315static int
1316vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1317    struct vioif_ctrl_cmdspec *specs, int nspecs)
1318{
1319	struct virtio_softc *vsc = sc->sc_virtio;
1320	int i, r, loaded;
1321
1322	loaded = 0;
1323	for (i = 0; i < nspecs; i++) {
1324		r = bus_dmamap_load(virtio_dmat(vsc),
1325		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1326		    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1327		if (r) {
1328			printf("%s: control command dmamap load failed, "
1329			       "error code %d\n", device_xname(sc->sc_dev), r);
1330			goto err;
1331		}
1332		loaded++;
1333
1334	}
1335
1336	return r;
1337
1338err:
1339	for (i = 0; i < loaded; i++) {
1340		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1341	}
1342
1343	return r;
1344}
1345
1346static void
1347vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1348    struct vioif_ctrl_cmdspec *specs, int nspecs)
1349{
1350	struct virtio_softc *vsc = sc->sc_virtio;
1351	int i;
1352
1353	for (i = 0; i < nspecs; i++) {
1354		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1355	}
1356}
1357
1358static int
1359vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1360    struct vioif_ctrl_cmdspec *specs, int nspecs)
1361{
1362	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1363	struct virtqueue *vq = ctrlq->ctrlq_vq;
1364	struct virtio_softc *vsc = sc->sc_virtio;
1365	int i, r, slot;
1366
1367	ctrlq->ctrlq_cmd->class = class;
1368	ctrlq->ctrlq_cmd->command = cmd;
1369
1370	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1371			0, sizeof(struct virtio_net_ctrl_cmd),
1372			BUS_DMASYNC_PREWRITE);
1373	for (i = 0; i < nspecs; i++) {
1374		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1375				0, specs[i].bufsize,
1376				BUS_DMASYNC_PREWRITE);
1377	}
1378	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1379			0, sizeof(struct virtio_net_ctrl_status),
1380			BUS_DMASYNC_PREREAD);
1381
1382	r = virtio_enqueue_prep(vsc, vq, &slot);
1383	if (r != 0)
1384		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1385	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1386	if (r != 0)
1387		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1388	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1389	for (i = 0; i < nspecs; i++) {
1390		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1391	}
1392	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1393	virtio_enqueue_commit(vsc, vq, slot, true);
1394
1395	/* wait for done */
1396	mutex_enter(&ctrlq->ctrlq_wait_lock);
1397	while (ctrlq->ctrlq_inuse != DONE)
1398		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1399	mutex_exit(&ctrlq->ctrlq_wait_lock);
1400	/* already dequeueued */
1401
1402	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1403			sizeof(struct virtio_net_ctrl_cmd),
1404			BUS_DMASYNC_POSTWRITE);
1405	for (i = 0; i < nspecs; i++) {
1406		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1407				specs[i].bufsize,
1408				BUS_DMASYNC_POSTWRITE);
1409	}
1410	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1411			sizeof(struct virtio_net_ctrl_status),
1412			BUS_DMASYNC_POSTREAD);
1413
1414	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1415		r = 0;
1416	else {
1417		printf("%s: failed setting rx mode\n",
1418		       device_xname(sc->sc_dev));
1419		r = EIO;
1420	}
1421
1422	return r;
1423}
1424
1425static int
1426vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1427{
1428	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1429	struct vioif_ctrl_cmdspec specs[1];
1430	int r;
1431
1432	if (!sc->sc_has_ctrl)
1433		return ENOTSUP;
1434
1435	vioif_ctrl_acquire(sc);
1436
1437	rx->onoff = onoff;
1438	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1439	specs[0].buf = rx;
1440	specs[0].bufsize = sizeof(*rx);
1441
1442	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1443	    specs, __arraycount(specs));
1444
1445	vioif_ctrl_release(sc);
1446	return r;
1447}
1448
1449static int
1450vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1451{
1452	int r;
1453
1454	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1455
1456	return r;
1457}
1458
1459static int
1460vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1461{
1462	int r;
1463
1464	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1465
1466	return r;
1467}
1468
1469/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1470static int
1471vioif_set_rx_filter(struct vioif_softc *sc)
1472{
1473	/* filter already set in ctrlq->ctrlq_mac_tbl */
1474	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1475	struct vioif_ctrl_cmdspec specs[2];
1476	int nspecs = __arraycount(specs);
1477	int r;
1478
1479	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1480	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1481
1482	if (!sc->sc_has_ctrl)
1483		return ENOTSUP;
1484
1485	vioif_ctrl_acquire(sc);
1486
1487	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1488	specs[0].buf = mac_tbl_uc;
1489	specs[0].bufsize = sizeof(*mac_tbl_uc)
1490	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1491
1492	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1493	specs[1].buf = mac_tbl_mc;
1494	specs[1].bufsize = sizeof(*mac_tbl_mc)
1495	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1496
1497	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1498	if (r != 0)
1499		goto out;
1500
1501	r = vioif_ctrl_send_command(sc,
1502	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1503	    specs, nspecs);
1504
1505	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1506
1507out:
1508	vioif_ctrl_release(sc);
1509
1510	return r;
1511}
1512
1513/* ctrl vq interrupt; wake up the command issuer */
1514static int
1515vioif_ctrl_vq_done(struct virtqueue *vq)
1516{
1517	struct virtio_softc *vsc = vq->vq_owner;
1518	struct vioif_softc *sc = device_private(virtio_child(vsc));
1519	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1520	int r, slot;
1521
1522	r = virtio_dequeue(vsc, vq, &slot, NULL);
1523	if (r == ENOENT)
1524		return 0;
1525	virtio_dequeue_commit(vsc, vq, slot);
1526
1527	mutex_enter(&ctrlq->ctrlq_wait_lock);
1528	ctrlq->ctrlq_inuse = DONE;
1529	cv_signal(&ctrlq->ctrlq_wait);
1530	mutex_exit(&ctrlq->ctrlq_wait_lock);
1531
1532	return 1;
1533}
1534
1535/*
1536 * If IFF_PROMISC requested,  set promiscuous
1537 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1538 * If large multicast filter exist use ALLMULTI
1539 */
1540/*
1541 * If setting rx filter fails fall back to ALLMULTI
1542 * If ALLMULTI fails fall back to PROMISC
1543 */
1544static int
1545vioif_rx_filter(struct vioif_softc *sc)
1546{
1547	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1548	struct ether_multi *enm;
1549	struct ether_multistep step;
1550	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1551	int nentries;
1552	int promisc = 0, allmulti = 0, rxfilter = 0;
1553	int r;
1554
1555	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1556		ifp->if_flags |= IFF_PROMISC;
1557		return 0;
1558	}
1559
1560	if (ifp->if_flags & IFF_PROMISC) {
1561		promisc = 1;
1562		goto set;
1563	}
1564
1565	nentries = -1;
1566	ETHER_LOCK(&sc->sc_ethercom);
1567	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1568	while (nentries++, enm != NULL) {
1569		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1570			allmulti = 1;
1571			goto set_unlock;
1572		}
1573		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1574			   ETHER_ADDR_LEN)) {
1575			allmulti = 1;
1576			goto set_unlock;
1577		}
1578		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1579		       enm->enm_addrlo, ETHER_ADDR_LEN);
1580		ETHER_NEXT_MULTI(step, enm);
1581	}
1582	rxfilter = 1;
1583
1584set_unlock:
1585	ETHER_UNLOCK(&sc->sc_ethercom);
1586
1587set:
1588	if (rxfilter) {
1589		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1590		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1591		r = vioif_set_rx_filter(sc);
1592		if (r != 0) {
1593			rxfilter = 0;
1594			allmulti = 1; /* fallback */
1595		}
1596	} else {
1597		/* remove rx filter */
1598		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1599		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1600		r = vioif_set_rx_filter(sc);
1601		/* what to do on failure? */
1602	}
1603	if (allmulti) {
1604		r = vioif_set_allmulti(sc, true);
1605		if (r != 0) {
1606			allmulti = 0;
1607			promisc = 1; /* fallback */
1608		}
1609	} else {
1610		r = vioif_set_allmulti(sc, false);
1611		/* what to do on failure? */
1612	}
1613	if (promisc) {
1614		r = vioif_set_promisc(sc, true);
1615	} else {
1616		r = vioif_set_promisc(sc, false);
1617	}
1618
1619	return r;
1620}
1621
1622static bool
1623vioif_is_link_up(struct vioif_softc *sc)
1624{
1625	struct virtio_softc *vsc = sc->sc_virtio;
1626	uint16_t status;
1627
1628	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
1629		status = virtio_read_device_config_2(vsc,
1630		    VIRTIO_NET_CONFIG_STATUS);
1631	else
1632		status = VIRTIO_NET_S_LINK_UP;
1633
1634	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
1635}
1636
1637/* change link status */
1638static void
1639vioif_update_link_status(struct vioif_softc *sc)
1640{
1641	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1642	struct vioif_txqueue *txq = &sc->sc_txq;
1643	bool active, changed;
1644	int link;
1645
1646	active = vioif_is_link_up(sc);
1647	changed = false;
1648
1649	if (active) {
1650		if (!sc->sc_link_active)
1651			changed = true;
1652
1653		link = LINK_STATE_UP;
1654		sc->sc_link_active = true;
1655	} else {
1656		if (sc->sc_link_active)
1657			changed = true;
1658
1659		link = LINK_STATE_DOWN;
1660		sc->sc_link_active = false;
1661	}
1662
1663	if (changed) {
1664		VIOIF_TXQ_LOCK(txq);
1665		txq->txq_link_active = sc->sc_link_active;
1666		VIOIF_TXQ_UNLOCK(txq);
1667
1668		if_link_state_change(ifp, link);
1669	}
1670}
1671
1672static int
1673vioif_config_change(struct virtio_softc *vsc)
1674{
1675	struct vioif_softc *sc = device_private(virtio_child(vsc));
1676
1677#ifdef VIOIF_SOFTINT_INTR
1678	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1679#endif
1680
1681#ifdef VIOIF_SOFTINT_INTR
1682	KASSERT(!cpu_intr_p());
1683	vioif_update_link_status(sc);
1684	vioif_start(ifp);
1685#else
1686	softint_schedule(sc->sc_ctl_softint);
1687#endif
1688
1689	return 0;
1690}
1691
1692static void
1693vioif_ctl_softint(void *arg)
1694{
1695	struct vioif_softc *sc = arg;
1696	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1697
1698	vioif_update_link_status(sc);
1699	vioif_start(ifp);
1700}
1701
1702MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
1703
1704#ifdef _MODULE
1705#include "ioconf.c"
1706#endif
1707
1708static int
1709if_vioif_modcmd(modcmd_t cmd, void *opaque)
1710{
1711	int error = 0;
1712
1713#ifdef _MODULE
1714	switch (cmd) {
1715	case MODULE_CMD_INIT:
1716		error = config_init_component(cfdriver_ioconf_if_vioif,
1717		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1718		break;
1719	case MODULE_CMD_FINI:
1720		error = config_fini_component(cfdriver_ioconf_if_vioif,
1721		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1722		break;
1723	default:
1724		error = ENOTTY;
1725		break;
1726	}
1727#endif
1728
1729	return error;
1730}
1731