if_vioif.c revision 1.25
1/*	$NetBSD: if_vioif.c,v 1.25 2016/08/29 04:21:25 ozaki-r Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.25 2016/08/29 04:21:25 ozaki-r Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47
48#include <dev/pci/pcidevs.h>
49#include <dev/pci/pcireg.h>
50#include <dev/pci/pcivar.h>
51#include <dev/pci/virtioreg.h>
52#include <dev/pci/virtiovar.h>
53
54#include <net/if.h>
55#include <net/if_media.h>
56#include <net/if_ether.h>
57
58#include <net/bpf.h>
59
60
61#ifdef NET_MPSAFE
62#define VIOIF_MPSAFE	1
63#endif
64
65#ifdef SOFTINT_INTR
66#define VIOIF_SOFTINT_INTR	1
67#endif
68
69/*
70 * if_vioifreg.h:
71 */
72/* Configuration registers */
73#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
74#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
75
76/* Feature bits */
77#define VIRTIO_NET_F_CSUM	(1<<0)
78#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
79#define VIRTIO_NET_F_MAC	(1<<5)
80#define VIRTIO_NET_F_GSO	(1<<6)
81#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
82#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
83#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
84#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
85#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
86#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
87#define VIRTIO_NET_F_HOST_ECN	(1<<13)
88#define VIRTIO_NET_F_HOST_UFO	(1<<14)
89#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
90#define VIRTIO_NET_F_STATUS	(1<<16)
91#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
92#define VIRTIO_NET_F_CTRL_RX	(1<<18)
93#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
94
95#define VIRTIO_NET_FLAG_BITS \
96	VIRTIO_COMMON_FLAG_BITS \
97	"\x14""CTRL_VLAN" \
98	"\x13""CTRL_RX" \
99	"\x12""CTRL_VQ" \
100	"\x11""STATUS" \
101	"\x10""MRG_RXBUF" \
102	"\x0f""HOST_UFO" \
103	"\x0e""HOST_ECN" \
104	"\x0d""HOST_TSO6" \
105	"\x0c""HOST_TSO4" \
106	"\x0b""GUEST_UFO" \
107	"\x0a""GUEST_ECN" \
108	"\x09""GUEST_TSO6" \
109	"\x08""GUEST_TSO4" \
110	"\x07""GSO" \
111	"\x06""MAC" \
112	"\x02""GUEST_CSUM" \
113	"\x01""CSUM"
114
115/* Status */
116#define VIRTIO_NET_S_LINK_UP	1
117
118/* Packet header structure */
119struct virtio_net_hdr {
120	uint8_t		flags;
121	uint8_t		gso_type;
122	uint16_t	hdr_len;
123	uint16_t	gso_size;
124	uint16_t	csum_start;
125	uint16_t	csum_offset;
126#if 0
127	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
128#endif
129} __packed;
130
131#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
132#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
133#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
134#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
135#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
136#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
137
138#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
139
140/* Control virtqueue */
141struct virtio_net_ctrl_cmd {
142	uint8_t	class;
143	uint8_t	command;
144} __packed;
145#define VIRTIO_NET_CTRL_RX		0
146# define VIRTIO_NET_CTRL_RX_PROMISC	0
147# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
148
149#define VIRTIO_NET_CTRL_MAC		1
150# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
151
152#define VIRTIO_NET_CTRL_VLAN		2
153# define VIRTIO_NET_CTRL_VLAN_ADD	0
154# define VIRTIO_NET_CTRL_VLAN_DEL	1
155
156struct virtio_net_ctrl_status {
157	uint8_t	ack;
158} __packed;
159#define VIRTIO_NET_OK			0
160#define VIRTIO_NET_ERR			1
161
162struct virtio_net_ctrl_rx {
163	uint8_t	onoff;
164} __packed;
165
166struct virtio_net_ctrl_mac_tbl {
167	uint32_t nentries;
168	uint8_t macs[][ETHER_ADDR_LEN];
169} __packed;
170
171struct virtio_net_ctrl_vlan {
172	uint16_t id;
173} __packed;
174
175
176/*
177 * if_vioifvar.h:
178 */
179struct vioif_softc {
180	device_t		sc_dev;
181
182	struct virtio_softc	*sc_virtio;
183	struct virtqueue	sc_vq[3];
184#define VQ_RX	0
185#define VQ_TX	1
186#define VQ_CTRL	2
187
188	uint8_t			sc_mac[ETHER_ADDR_LEN];
189	struct ethercom		sc_ethercom;
190	short			sc_deferred_init_done;
191
192	/* bus_dmamem */
193	bus_dma_segment_t	sc_hdr_segs[1];
194	struct virtio_net_hdr	*sc_hdrs;
195#define sc_rx_hdrs	sc_hdrs
196	struct virtio_net_hdr	*sc_tx_hdrs;
197	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
198	struct virtio_net_ctrl_status *sc_ctrl_status;
199	struct virtio_net_ctrl_rx *sc_ctrl_rx;
200	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
201	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
202
203	/* kmem */
204	bus_dmamap_t		*sc_arrays;
205#define sc_rxhdr_dmamaps sc_arrays
206	bus_dmamap_t		*sc_txhdr_dmamaps;
207	bus_dmamap_t		*sc_rx_dmamaps;
208	bus_dmamap_t		*sc_tx_dmamaps;
209	struct mbuf		**sc_rx_mbufs;
210	struct mbuf		**sc_tx_mbufs;
211
212	bus_dmamap_t		sc_ctrl_cmd_dmamap;
213	bus_dmamap_t		sc_ctrl_status_dmamap;
214	bus_dmamap_t		sc_ctrl_rx_dmamap;
215	bus_dmamap_t		sc_ctrl_tbl_uc_dmamap;
216	bus_dmamap_t		sc_ctrl_tbl_mc_dmamap;
217
218	void			*sc_rx_softint;
219
220	enum {
221		FREE, INUSE, DONE
222	}			sc_ctrl_inuse;
223	kcondvar_t		sc_ctrl_wait;
224	kmutex_t		sc_ctrl_wait_lock;
225	kmutex_t		*sc_tx_lock;
226	kmutex_t		*sc_rx_lock;
227	bool			sc_stopping;
228};
229#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
230#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
231
232#define VIOIF_TX_LOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock)
233#define VIOIF_TX_UNLOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_exit((_sc)->sc_tx_lock)
234#define VIOIF_TX_LOCKED(_sc)	(!(_sc)->sc_tx_lock || mutex_owned((_sc)->sc_tx_lock))
235#define VIOIF_RX_LOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_enter((_sc)->sc_rx_lock)
236#define VIOIF_RX_UNLOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_exit((_sc)->sc_rx_lock)
237#define VIOIF_RX_LOCKED(_sc)	(!(_sc)->sc_rx_lock || mutex_owned((_sc)->sc_rx_lock))
238
239/* cfattach interface functions */
240static int	vioif_match(device_t, cfdata_t, void *);
241static void	vioif_attach(device_t, device_t, void *);
242static void	vioif_deferred_init(device_t);
243
244/* ifnet interface functions */
245static int	vioif_init(struct ifnet *);
246static void	vioif_stop(struct ifnet *, int);
247static void	vioif_start(struct ifnet *);
248static int	vioif_ioctl(struct ifnet *, u_long, void *);
249static void	vioif_watchdog(struct ifnet *);
250
251/* rx */
252static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
253static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
254static void	vioif_populate_rx_mbufs(struct vioif_softc *);
255static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
256static int	vioif_rx_deq(struct vioif_softc *);
257static int	vioif_rx_deq_locked(struct vioif_softc *);
258static int	vioif_rx_vq_done(struct virtqueue *);
259static void	vioif_rx_softint(void *);
260static void	vioif_rx_drain(struct vioif_softc *);
261
262/* tx */
263static int	vioif_tx_vq_done(struct virtqueue *);
264static int	vioif_tx_vq_done_locked(struct virtqueue *);
265static void	vioif_tx_drain(struct vioif_softc *);
266
267/* other control */
268static int	vioif_updown(struct vioif_softc *, bool);
269static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
270static int	vioif_set_promisc(struct vioif_softc *, bool);
271static int	vioif_set_allmulti(struct vioif_softc *, bool);
272static int	vioif_set_rx_filter(struct vioif_softc *);
273static int	vioif_rx_filter(struct vioif_softc *);
274static int	vioif_ctrl_vq_done(struct virtqueue *);
275
276CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
277		  vioif_match, vioif_attach, NULL, NULL);
278
279static int
280vioif_match(device_t parent, cfdata_t match, void *aux)
281{
282	struct virtio_softc *va = aux;
283
284	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
285		return 1;
286
287	return 0;
288}
289
290/* allocate memory */
291/*
292 * dma memory is used for:
293 *   sc_rx_hdrs[slot]:	 metadata array for recieved frames (READ)
294 *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
295 *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
296 *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
297 *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
298 *			 (WRITE)
299 *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
300 *			 class command (WRITE)
301 *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
302 *			 class command (WRITE)
303 * sc_ctrl_* structures are allocated only one each; they are protected by
304 * sc_ctrl_inuse variable and sc_ctrl_wait condvar.
305 */
306/*
307 * dynamically allocated memory is used for:
308 *   sc_rxhdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
309 *   sc_txhdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
310 *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for recieved payload
311 *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
312 *   sc_rx_mbufs[slot]:		mbuf pointer array for recieved frames
313 *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
314 */
315static int
316vioif_alloc_mems(struct vioif_softc *sc)
317{
318	struct virtio_softc *vsc = sc->sc_virtio;
319	int allocsize, allocsize2, r, rsegs, i;
320	void *vaddr;
321	intptr_t p;
322	int rxqsize, txqsize;
323
324	rxqsize = vsc->sc_vqs[VQ_RX].vq_num;
325	txqsize = vsc->sc_vqs[VQ_TX].vq_num;
326
327	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
328	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
329	if (vsc->sc_nvqs == 3) {
330		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
331		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
332		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
333		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
334			+ sizeof(struct virtio_net_ctrl_mac_tbl)
335			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
336	}
337	r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0,
338			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
339	if (r != 0) {
340		aprint_error_dev(sc->sc_dev,
341				 "DMA memory allocation failed, size %d, "
342				 "error code %d\n", allocsize, r);
343		goto err_none;
344	}
345	r = bus_dmamem_map(vsc->sc_dmat,
346			   &sc->sc_hdr_segs[0], 1, allocsize,
347			   &vaddr, BUS_DMA_NOWAIT);
348	if (r != 0) {
349		aprint_error_dev(sc->sc_dev,
350				 "DMA memory map failed, "
351				 "error code %d\n", r);
352		goto err_dmamem_alloc;
353	}
354	sc->sc_hdrs = vaddr;
355	memset(vaddr, 0, allocsize);
356	p = (intptr_t) vaddr;
357	p += sizeof(struct virtio_net_hdr) * rxqsize;
358#define P(name,size)	do { sc->sc_ ##name = (void*) p;	\
359			     p += size; } while (0)
360	P(tx_hdrs, sizeof(struct virtio_net_hdr) * txqsize);
361	if (vsc->sc_nvqs == 3) {
362		P(ctrl_cmd, sizeof(struct virtio_net_ctrl_cmd));
363		P(ctrl_status, sizeof(struct virtio_net_ctrl_status));
364		P(ctrl_rx, sizeof(struct virtio_net_ctrl_rx));
365		P(ctrl_mac_tbl_uc, sizeof(struct virtio_net_ctrl_mac_tbl));
366		P(ctrl_mac_tbl_mc,
367		  (sizeof(struct virtio_net_ctrl_mac_tbl)
368		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
369	}
370#undef P
371
372	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
373	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
374	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
375	sc->sc_arrays = kmem_zalloc(allocsize2, KM_SLEEP);
376	if (sc->sc_arrays == NULL)
377		goto err_dmamem_map;
378	sc->sc_txhdr_dmamaps = sc->sc_arrays + rxqsize;
379	sc->sc_rx_dmamaps = sc->sc_txhdr_dmamaps + txqsize;
380	sc->sc_tx_dmamaps = sc->sc_rx_dmamaps + rxqsize;
381	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
382	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
383
384#define C(map, buf, size, nsegs, rw, usage)				\
385	do {								\
386		r = bus_dmamap_create(vsc->sc_dmat, size, nsegs, size, 0, \
387				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,	\
388				      &sc->sc_ ##map);			\
389		if (r != 0) {						\
390			aprint_error_dev(sc->sc_dev,			\
391					 usage " dmamap creation failed, " \
392					 "error code %d\n", r);		\
393					 goto err_reqs;			\
394		}							\
395	} while (0)
396#define C_L1(map, buf, size, nsegs, rw, usage)				\
397	C(map, buf, size, nsegs, rw, usage);				\
398	do {								\
399		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
400				    &sc->sc_ ##buf, size, NULL,		\
401				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
402		if (r != 0) {						\
403			aprint_error_dev(sc->sc_dev,			\
404					 usage " dmamap load failed, "	\
405					 "error code %d\n", r);		\
406			goto err_reqs;					\
407		}							\
408	} while (0)
409#define C_L2(map, buf, size, nsegs, rw, usage)				\
410	C(map, buf, size, nsegs, rw, usage);				\
411	do {								\
412		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
413				    sc->sc_ ##buf, size, NULL,		\
414				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
415		if (r != 0) {						\
416			aprint_error_dev(sc->sc_dev,			\
417					 usage " dmamap load failed, "	\
418					 "error code %d\n", r);		\
419			goto err_reqs;					\
420		}							\
421	} while (0)
422	for (i = 0; i < rxqsize; i++) {
423		C_L1(rxhdr_dmamaps[i], rx_hdrs[i],
424		    sizeof(struct virtio_net_hdr), 1,
425		    READ, "rx header");
426		C(rx_dmamaps[i], NULL, MCLBYTES, 1, 0, "rx payload");
427	}
428
429	for (i = 0; i < txqsize; i++) {
430		C_L1(txhdr_dmamaps[i], tx_hdrs[i],
431		    sizeof(struct virtio_net_hdr), 1,
432		    WRITE, "tx header");
433		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, 16 /* XXX */, 0,
434		  "tx payload");
435	}
436
437	if (vsc->sc_nvqs == 3) {
438		/* control vq class & command */
439		C_L2(ctrl_cmd_dmamap, ctrl_cmd,
440		    sizeof(struct virtio_net_ctrl_cmd), 1, WRITE,
441		    "control command");
442
443		/* control vq status */
444		C_L2(ctrl_status_dmamap, ctrl_status,
445		    sizeof(struct virtio_net_ctrl_status), 1, READ,
446		    "control status");
447
448		/* control vq rx mode command parameter */
449		C_L2(ctrl_rx_dmamap, ctrl_rx,
450		    sizeof(struct virtio_net_ctrl_rx), 1, WRITE,
451		    "rx mode control command");
452
453		/* control vq MAC filter table for unicast */
454		/* do not load now since its length is variable */
455		C(ctrl_tbl_uc_dmamap, NULL,
456		  sizeof(struct virtio_net_ctrl_mac_tbl) + 0, 1, WRITE,
457		  "unicast MAC address filter command");
458
459		/* control vq MAC filter table for multicast */
460		C(ctrl_tbl_mc_dmamap, NULL,
461		  (sizeof(struct virtio_net_ctrl_mac_tbl)
462		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES),
463		  1, WRITE, "multicast MAC address filter command");
464	}
465#undef C_L2
466#undef C_L1
467#undef C
468
469	return 0;
470
471err_reqs:
472#define D(map)								\
473	do {								\
474		if (sc->sc_ ##map) {					\
475			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_ ##map); \
476			sc->sc_ ##map = NULL;				\
477		}							\
478	} while (0)
479	D(ctrl_tbl_mc_dmamap);
480	D(ctrl_tbl_uc_dmamap);
481	D(ctrl_rx_dmamap);
482	D(ctrl_status_dmamap);
483	D(ctrl_cmd_dmamap);
484	for (i = 0; i < txqsize; i++) {
485		D(tx_dmamaps[i]);
486		D(txhdr_dmamaps[i]);
487	}
488	for (i = 0; i < rxqsize; i++) {
489		D(rx_dmamaps[i]);
490		D(rxhdr_dmamaps[i]);
491	}
492#undef D
493	if (sc->sc_arrays) {
494		kmem_free(sc->sc_arrays, allocsize2);
495		sc->sc_arrays = 0;
496	}
497err_dmamem_map:
498	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_hdrs, allocsize);
499err_dmamem_alloc:
500	bus_dmamem_free(vsc->sc_dmat, &sc->sc_hdr_segs[0], 1);
501err_none:
502	return -1;
503}
504
505static void
506vioif_attach(device_t parent, device_t self, void *aux)
507{
508	struct vioif_softc *sc = device_private(self);
509	struct virtio_softc *vsc = device_private(parent);
510	uint32_t features;
511	char buf[256];
512	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
513	u_int flags;
514	int r;
515
516	if (vsc->sc_child != NULL) {
517		aprint_normal(": child already attached for %s; "
518			      "something wrong...\n",
519			      device_xname(parent));
520		return;
521	}
522
523	sc->sc_dev = self;
524	sc->sc_virtio = vsc;
525
526	vsc->sc_child = self;
527	vsc->sc_ipl = IPL_NET;
528	vsc->sc_vqs = &sc->sc_vq[0];
529	vsc->sc_config_change = NULL;
530	vsc->sc_intrhand = virtio_vq_intr;
531	vsc->sc_flags = 0;
532
533#ifdef VIOIF_MPSAFE
534	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
535#endif
536#ifdef VIOIF_SOFTINT_INTR
537	vsc->sc_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
538#endif
539	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MSIX;
540
541	features = virtio_negotiate_features(vsc,
542					     (VIRTIO_NET_F_MAC |
543					      VIRTIO_NET_F_STATUS |
544					      VIRTIO_NET_F_CTRL_VQ |
545					      VIRTIO_NET_F_CTRL_RX |
546					      VIRTIO_F_NOTIFY_ON_EMPTY));
547	if (features & VIRTIO_NET_F_MAC) {
548		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
549						    VIRTIO_NET_CONFIG_MAC+0);
550		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
551						    VIRTIO_NET_CONFIG_MAC+1);
552		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
553						    VIRTIO_NET_CONFIG_MAC+2);
554		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
555						    VIRTIO_NET_CONFIG_MAC+3);
556		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
557						    VIRTIO_NET_CONFIG_MAC+4);
558		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
559						    VIRTIO_NET_CONFIG_MAC+5);
560	} else {
561		/* code stolen from sys/net/if_tap.c */
562		struct timeval tv;
563		uint32_t ui;
564		getmicrouptime(&tv);
565		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
566		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
567		virtio_write_device_config_1(vsc,
568					     VIRTIO_NET_CONFIG_MAC+0,
569					     sc->sc_mac[0]);
570		virtio_write_device_config_1(vsc,
571					     VIRTIO_NET_CONFIG_MAC+1,
572					     sc->sc_mac[1]);
573		virtio_write_device_config_1(vsc,
574					     VIRTIO_NET_CONFIG_MAC+2,
575					     sc->sc_mac[2]);
576		virtio_write_device_config_1(vsc,
577					     VIRTIO_NET_CONFIG_MAC+3,
578					     sc->sc_mac[3]);
579		virtio_write_device_config_1(vsc,
580					     VIRTIO_NET_CONFIG_MAC+4,
581					     sc->sc_mac[4]);
582		virtio_write_device_config_1(vsc,
583					     VIRTIO_NET_CONFIG_MAC+5,
584					     sc->sc_mac[5]);
585	}
586	aprint_normal(": Ethernet address %s\n", ether_sprintf(sc->sc_mac));
587	snprintb(buf, sizeof(buf), VIRTIO_NET_FLAG_BITS, features);
588	aprint_normal_dev(self, "Features: %s\n", buf);
589	aprint_naive("\n");
590
591#ifdef VIOIF_MPSAFE
592	sc->sc_tx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
593	sc->sc_rx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
594#else
595	sc->sc_tx_lock = NULL;
596	sc->sc_rx_lock = NULL;
597#endif
598	sc->sc_stopping = false;
599
600	/*
601	 * Allocating a virtqueue for Rx
602	 */
603	r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_RX], 0,
604	    MCLBYTES+sizeof(struct virtio_net_hdr), 2, "rx");
605	if (r != 0)
606		goto err;
607	vsc->sc_nvqs = 1;
608	sc->sc_vq[VQ_RX].vq_done = vioif_rx_vq_done;
609
610	/*
611	 * Allocating a virtqueue for Tx
612	 */
613	r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_TX], 1,
614	    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
615	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx");
616	if (r != 0)
617		goto err;
618	vsc->sc_nvqs = 2;
619	sc->sc_vq[VQ_TX].vq_done = vioif_tx_vq_done;
620
621	virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_RX]);
622	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQ_TX]); /* not urgent; do it later */
623
624	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
625	    (features & VIRTIO_NET_F_CTRL_RX)) {
626		/*
627		 * Allocating a virtqueue for control channel
628		 */
629		r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_CTRL], 2,
630		    NBPG, 1, "control");
631		if (r != 0) {
632			aprint_error_dev(self, "failed to allocate "
633			    "a virtqueue for control channel\n");
634			goto skip;
635		}
636
637		sc->sc_vq[VQ_CTRL].vq_done = vioif_ctrl_vq_done;
638		cv_init(&sc->sc_ctrl_wait, "ctrl_vq");
639		mutex_init(&sc->sc_ctrl_wait_lock, MUTEX_DEFAULT, IPL_NET);
640		sc->sc_ctrl_inuse = FREE;
641		virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_CTRL]);
642		vsc->sc_nvqs = 3;
643	}
644skip:
645
646#ifdef VIOIF_MPSAFE
647	flags = SOFTINT_NET | SOFTINT_MPSAFE;
648#else
649	flags = SOFTINT_NET;
650#endif
651	sc->sc_rx_softint = softint_establish(flags, vioif_rx_softint, sc);
652	if (sc->sc_rx_softint == NULL) {
653		aprint_error_dev(self, "cannot establish softint\n");
654		goto err;
655	}
656
657	if (vioif_alloc_mems(sc) < 0)
658		goto err;
659
660	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
661	ifp->if_softc = sc;
662	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
663	ifp->if_start = vioif_start;
664	ifp->if_ioctl = vioif_ioctl;
665	ifp->if_init = vioif_init;
666	ifp->if_stop = vioif_stop;
667	ifp->if_capabilities = 0;
668	ifp->if_watchdog = vioif_watchdog;
669
670	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
671
672	if_attach(ifp);
673	ether_ifattach(ifp, sc->sc_mac);
674
675	return;
676
677err:
678	if (sc->sc_tx_lock)
679		mutex_obj_free(sc->sc_tx_lock);
680	if (sc->sc_rx_lock)
681		mutex_obj_free(sc->sc_rx_lock);
682
683	if (vsc->sc_nvqs == 3) {
684		cv_destroy(&sc->sc_ctrl_wait);
685		mutex_destroy(&sc->sc_ctrl_wait_lock);
686	}
687
688	while (vsc->sc_nvqs > 0)
689		virtio_free_vq(vsc, &sc->sc_vq[--vsc->sc_nvqs]);
690
691	vsc->sc_child = (void*)1;
692	return;
693}
694
695/* we need interrupts to make promiscuous mode off */
696static void
697vioif_deferred_init(device_t self)
698{
699	struct vioif_softc *sc = device_private(self);
700	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
701	int r;
702
703	if (ifp->if_flags & IFF_PROMISC)
704		return;
705
706	r =  vioif_set_promisc(sc, false);
707	if (r != 0)
708		aprint_error_dev(self, "resetting promisc mode failed, "
709				 "errror code %d\n", r);
710}
711
712/*
713 * Interface functions for ifnet
714 */
715static int
716vioif_init(struct ifnet *ifp)
717{
718	struct vioif_softc *sc = ifp->if_softc;
719
720	vioif_stop(ifp, 0);
721
722	if (!sc->sc_deferred_init_done) {
723		struct virtio_softc *vsc = sc->sc_virtio;
724
725		sc->sc_deferred_init_done = 1;
726		if (vsc->sc_nvqs == 3)
727			vioif_deferred_init(sc->sc_dev);
728	}
729
730	/* Have to set false before vioif_populate_rx_mbufs */
731	sc->sc_stopping = false;
732
733	vioif_populate_rx_mbufs(sc);
734
735	vioif_updown(sc, true);
736	ifp->if_flags |= IFF_RUNNING;
737	ifp->if_flags &= ~IFF_OACTIVE;
738	vioif_rx_filter(sc);
739
740	return 0;
741}
742
743static void
744vioif_stop(struct ifnet *ifp, int disable)
745{
746	struct vioif_softc *sc = ifp->if_softc;
747	struct virtio_softc *vsc = sc->sc_virtio;
748
749	/* Take the locks to ensure that ongoing TX/RX finish */
750	VIOIF_TX_LOCK(sc);
751	VIOIF_RX_LOCK(sc);
752	sc->sc_stopping = true;
753	VIOIF_RX_UNLOCK(sc);
754	VIOIF_TX_UNLOCK(sc);
755
756	/* only way to stop I/O and DMA is resetting... */
757	virtio_reset(vsc);
758	vioif_rx_deq(sc);
759	vioif_tx_drain(sc);
760	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
761
762	if (disable)
763		vioif_rx_drain(sc);
764
765	virtio_reinit_start(vsc);
766	virtio_negotiate_features(vsc, vsc->sc_features);
767	virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_RX]);
768	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQ_TX]);
769	if (vsc->sc_nvqs >= 3)
770		virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_CTRL]);
771	virtio_reinit_end(vsc);
772	vioif_updown(sc, false);
773}
774
775static void
776vioif_start(struct ifnet *ifp)
777{
778	struct vioif_softc *sc = ifp->if_softc;
779	struct virtio_softc *vsc = sc->sc_virtio;
780	struct virtqueue *vq = &sc->sc_vq[VQ_TX];
781	struct mbuf *m;
782	int queued = 0, retry = 0;
783
784	VIOIF_TX_LOCK(sc);
785
786	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
787		goto out;
788
789	if (sc->sc_stopping)
790		goto out;
791
792	for (;;) {
793		int slot, r;
794
795		IFQ_DEQUEUE(&ifp->if_snd, m);
796
797		if (m == NULL)
798			break;
799
800retry:
801		r = virtio_enqueue_prep(vsc, vq, &slot);
802		if (r == EAGAIN) {
803			ifp->if_flags |= IFF_OACTIVE;
804			vioif_tx_vq_done_locked(vq);
805			if (retry++ == 0)
806				goto retry;
807			else
808				break;
809		}
810		if (r != 0)
811			panic("enqueue_prep for a tx buffer");
812		r = bus_dmamap_load_mbuf(vsc->sc_dmat,
813					 sc->sc_tx_dmamaps[slot],
814					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
815		if (r != 0) {
816			virtio_enqueue_abort(vsc, vq, slot);
817			aprint_error_dev(sc->sc_dev,
818			    "tx dmamap load failed, error code %d\n", r);
819			break;
820		}
821		r = virtio_enqueue_reserve(vsc, vq, slot,
822					sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
823		if (r != 0) {
824			virtio_enqueue_abort(vsc, vq, slot);
825			bus_dmamap_unload(vsc->sc_dmat,
826					  sc->sc_tx_dmamaps[slot]);
827			ifp->if_flags |= IFF_OACTIVE;
828			vioif_tx_vq_done_locked(vq);
829			if (retry++ == 0)
830				goto retry;
831			else
832				break;
833		}
834
835		sc->sc_tx_mbufs[slot] = m;
836
837		memset(&sc->sc_tx_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
838		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
839				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
840				BUS_DMASYNC_PREWRITE);
841		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
842				0, sc->sc_txhdr_dmamaps[slot]->dm_mapsize,
843				BUS_DMASYNC_PREWRITE);
844		virtio_enqueue(vsc, vq, slot, sc->sc_txhdr_dmamaps[slot], true);
845		virtio_enqueue(vsc, vq, slot, sc->sc_tx_dmamaps[slot], true);
846		virtio_enqueue_commit(vsc, vq, slot, false);
847		queued++;
848		bpf_mtap(ifp, m);
849	}
850
851	if (m != NULL) {
852		ifp->if_flags |= IFF_OACTIVE;
853		m_freem(m);
854	}
855
856	if (queued > 0) {
857		virtio_enqueue_commit(vsc, vq, -1, true);
858		ifp->if_timer = 5;
859	}
860
861out:
862	VIOIF_TX_UNLOCK(sc);
863}
864
865static int
866vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
867{
868	int s, r;
869
870	s = splnet();
871
872	r = ether_ioctl(ifp, cmd, data);
873	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
874	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
875		if (ifp->if_flags & IFF_RUNNING)
876			r = vioif_rx_filter(ifp->if_softc);
877		else
878			r = 0;
879	}
880
881	splx(s);
882
883	return r;
884}
885
886void
887vioif_watchdog(struct ifnet *ifp)
888{
889	struct vioif_softc *sc = ifp->if_softc;
890
891	if (ifp->if_flags & IFF_RUNNING)
892		vioif_tx_vq_done(&sc->sc_vq[VQ_TX]);
893}
894
895
896/*
897 * Recieve implementation
898 */
899/* allocate and initialize a mbuf for recieve */
900static int
901vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
902{
903	struct mbuf *m;
904	int r;
905
906	MGETHDR(m, M_DONTWAIT, MT_DATA);
907	if (m == NULL)
908		return ENOBUFS;
909	MCLGET(m, M_DONTWAIT);
910	if ((m->m_flags & M_EXT) == 0) {
911		m_freem(m);
912		return ENOBUFS;
913	}
914	sc->sc_rx_mbufs[i] = m;
915	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
916	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat,
917				 sc->sc_rx_dmamaps[i],
918				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
919	if (r) {
920		m_freem(m);
921		sc->sc_rx_mbufs[i] = 0;
922		return r;
923	}
924
925	return 0;
926}
927
928/* free a mbuf for recieve */
929static void
930vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
931{
932	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
933	m_freem(sc->sc_rx_mbufs[i]);
934	sc->sc_rx_mbufs[i] = NULL;
935}
936
937/* add mbufs for all the empty recieve slots */
938static void
939vioif_populate_rx_mbufs(struct vioif_softc *sc)
940{
941	VIOIF_RX_LOCK(sc);
942	vioif_populate_rx_mbufs_locked(sc);
943	VIOIF_RX_UNLOCK(sc);
944}
945
946static void
947vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
948{
949	struct virtio_softc *vsc = sc->sc_virtio;
950	int i, r, ndone = 0;
951	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
952
953	KASSERT(VIOIF_RX_LOCKED(sc));
954
955	if (sc->sc_stopping)
956		return;
957
958	for (i = 0; i < vq->vq_num; i++) {
959		int slot;
960		r = virtio_enqueue_prep(vsc, vq, &slot);
961		if (r == EAGAIN)
962			break;
963		if (r != 0)
964			panic("enqueue_prep for rx buffers");
965		if (sc->sc_rx_mbufs[slot] == NULL) {
966			r = vioif_add_rx_mbuf(sc, slot);
967			if (r != 0) {
968				printf("%s: rx mbuf allocation failed, "
969				       "error code %d\n",
970				       device_xname(sc->sc_dev), r);
971				break;
972			}
973		}
974		r = virtio_enqueue_reserve(vsc, vq, slot,
975					sc->sc_rx_dmamaps[slot]->dm_nsegs + 1);
976		if (r != 0) {
977			vioif_free_rx_mbuf(sc, slot);
978			break;
979		}
980		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
981			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
982		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
983			0, MCLBYTES, BUS_DMASYNC_PREREAD);
984		virtio_enqueue(vsc, vq, slot, sc->sc_rxhdr_dmamaps[slot], false);
985		virtio_enqueue(vsc, vq, slot, sc->sc_rx_dmamaps[slot], false);
986		virtio_enqueue_commit(vsc, vq, slot, false);
987		ndone++;
988	}
989	if (ndone > 0)
990		virtio_enqueue_commit(vsc, vq, -1, true);
991}
992
993/* dequeue recieved packets */
994static int
995vioif_rx_deq(struct vioif_softc *sc)
996{
997	int r;
998
999	KASSERT(sc->sc_stopping);
1000
1001	VIOIF_RX_LOCK(sc);
1002	r = vioif_rx_deq_locked(sc);
1003	VIOIF_RX_UNLOCK(sc);
1004
1005	return r;
1006}
1007
1008/* dequeue recieved packets */
1009static int
1010vioif_rx_deq_locked(struct vioif_softc *sc)
1011{
1012	struct virtio_softc *vsc = sc->sc_virtio;
1013	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
1014	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1015	struct mbuf *m;
1016	int r = 0;
1017	int slot, len;
1018
1019	KASSERT(VIOIF_RX_LOCKED(sc));
1020
1021	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1022		len -= sizeof(struct virtio_net_hdr);
1023		r = 1;
1024		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
1025				0, sizeof(struct virtio_net_hdr),
1026				BUS_DMASYNC_POSTREAD);
1027		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
1028				0, MCLBYTES,
1029				BUS_DMASYNC_POSTREAD);
1030		m = sc->sc_rx_mbufs[slot];
1031		KASSERT(m != NULL);
1032		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1033		sc->sc_rx_mbufs[slot] = 0;
1034		virtio_dequeue_commit(vsc, vq, slot);
1035		m_set_rcvif(m, ifp);
1036		m->m_len = m->m_pkthdr.len = len;
1037		ifp->if_ipackets++;
1038		bpf_mtap(ifp, m);
1039
1040		VIOIF_RX_UNLOCK(sc);
1041		if_percpuq_enqueue(ifp->if_percpuq, m);
1042		VIOIF_RX_LOCK(sc);
1043
1044		if (sc->sc_stopping)
1045			break;
1046	}
1047
1048	return r;
1049}
1050
1051/* rx interrupt; call _dequeue above and schedule a softint */
1052static int
1053vioif_rx_vq_done(struct virtqueue *vq)
1054{
1055	struct virtio_softc *vsc = vq->vq_owner;
1056	struct vioif_softc *sc = device_private(vsc->sc_child);
1057	int r = 0;
1058
1059#ifdef VIOIF_SOFTINT_INTR
1060	KASSERT(!cpu_intr_p());
1061#endif
1062
1063	VIOIF_RX_LOCK(sc);
1064
1065	if (sc->sc_stopping)
1066		goto out;
1067
1068	r = vioif_rx_deq_locked(sc);
1069	if (r)
1070#ifdef VIOIF_SOFTINT_INTR
1071		vioif_populate_rx_mbufs_locked(sc);
1072#else
1073		softint_schedule(sc->sc_rx_softint);
1074#endif
1075
1076out:
1077	VIOIF_RX_UNLOCK(sc);
1078	return r;
1079}
1080
1081/* softint: enqueue recieve requests for new incoming packets */
1082static void
1083vioif_rx_softint(void *arg)
1084{
1085	struct vioif_softc *sc = arg;
1086
1087	vioif_populate_rx_mbufs(sc);
1088}
1089
1090/* free all the mbufs; called from if_stop(disable) */
1091static void
1092vioif_rx_drain(struct vioif_softc *sc)
1093{
1094	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
1095	int i;
1096
1097	for (i = 0; i < vq->vq_num; i++) {
1098		if (sc->sc_rx_mbufs[i] == NULL)
1099			continue;
1100		vioif_free_rx_mbuf(sc, i);
1101	}
1102}
1103
1104
1105/*
1106 * Transmition implementation
1107 */
1108/* actual transmission is done in if_start */
1109/* tx interrupt; dequeue and free mbufs */
1110/*
1111 * tx interrupt is actually disabled; this should be called upon
1112 * tx vq full and watchdog
1113 */
1114static int
1115vioif_tx_vq_done(struct virtqueue *vq)
1116{
1117	struct virtio_softc *vsc = vq->vq_owner;
1118	struct vioif_softc *sc = device_private(vsc->sc_child);
1119	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1120	int r = 0;
1121
1122	VIOIF_TX_LOCK(sc);
1123
1124	if (sc->sc_stopping)
1125		goto out;
1126
1127	r = vioif_tx_vq_done_locked(vq);
1128
1129out:
1130	VIOIF_TX_UNLOCK(sc);
1131	if (r)
1132		vioif_start(ifp);
1133	return r;
1134}
1135
1136static int
1137vioif_tx_vq_done_locked(struct virtqueue *vq)
1138{
1139	struct virtio_softc *vsc = vq->vq_owner;
1140	struct vioif_softc *sc = device_private(vsc->sc_child);
1141	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1142	struct mbuf *m;
1143	int r = 0;
1144	int slot, len;
1145
1146	KASSERT(VIOIF_TX_LOCKED(sc));
1147
1148	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1149		r++;
1150		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
1151				0, sizeof(struct virtio_net_hdr),
1152				BUS_DMASYNC_POSTWRITE);
1153		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
1154				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
1155				BUS_DMASYNC_POSTWRITE);
1156		m = sc->sc_tx_mbufs[slot];
1157		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1158		sc->sc_tx_mbufs[slot] = 0;
1159		virtio_dequeue_commit(vsc, vq, slot);
1160		ifp->if_opackets++;
1161		m_freem(m);
1162	}
1163
1164	if (r)
1165		ifp->if_flags &= ~IFF_OACTIVE;
1166	return r;
1167}
1168
1169/* free all the mbufs already put on vq; called from if_stop(disable) */
1170static void
1171vioif_tx_drain(struct vioif_softc *sc)
1172{
1173	struct virtio_softc *vsc = sc->sc_virtio;
1174	struct virtqueue *vq = &sc->sc_vq[VQ_TX];
1175	int i;
1176
1177	KASSERT(sc->sc_stopping);
1178
1179	for (i = 0; i < vq->vq_num; i++) {
1180		if (sc->sc_tx_mbufs[i] == NULL)
1181			continue;
1182		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1183		m_freem(sc->sc_tx_mbufs[i]);
1184		sc->sc_tx_mbufs[i] = NULL;
1185	}
1186}
1187
1188/*
1189 * Control vq
1190 */
1191/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1192static int
1193vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1194{
1195	struct virtio_softc *vsc = sc->sc_virtio;
1196	struct virtqueue *vq = &sc->sc_vq[VQ_CTRL];
1197	int r, slot;
1198
1199	if (vsc->sc_nvqs < 3)
1200		return ENOTSUP;
1201
1202	mutex_enter(&sc->sc_ctrl_wait_lock);
1203	while (sc->sc_ctrl_inuse != FREE)
1204		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1205	sc->sc_ctrl_inuse = INUSE;
1206	mutex_exit(&sc->sc_ctrl_wait_lock);
1207
1208	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1209	sc->sc_ctrl_cmd->command = cmd;
1210	sc->sc_ctrl_rx->onoff = onoff;
1211
1212	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1213			0, sizeof(struct virtio_net_ctrl_cmd),
1214			BUS_DMASYNC_PREWRITE);
1215	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap,
1216			0, sizeof(struct virtio_net_ctrl_rx),
1217			BUS_DMASYNC_PREWRITE);
1218	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1219			0, sizeof(struct virtio_net_ctrl_status),
1220			BUS_DMASYNC_PREREAD);
1221
1222	r = virtio_enqueue_prep(vsc, vq, &slot);
1223	if (r != 0)
1224		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1225	r = virtio_enqueue_reserve(vsc, vq, slot, 3);
1226	if (r != 0)
1227		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1228	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1229	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_rx_dmamap, true);
1230	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1231	virtio_enqueue_commit(vsc, vq, slot, true);
1232
1233	/* wait for done */
1234	mutex_enter(&sc->sc_ctrl_wait_lock);
1235	while (sc->sc_ctrl_inuse != DONE)
1236		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1237	mutex_exit(&sc->sc_ctrl_wait_lock);
1238	/* already dequeueued */
1239
1240	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1241			sizeof(struct virtio_net_ctrl_cmd),
1242			BUS_DMASYNC_POSTWRITE);
1243	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap, 0,
1244			sizeof(struct virtio_net_ctrl_rx),
1245			BUS_DMASYNC_POSTWRITE);
1246	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1247			sizeof(struct virtio_net_ctrl_status),
1248			BUS_DMASYNC_POSTREAD);
1249
1250	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1251		r = 0;
1252	else {
1253		printf("%s: failed setting rx mode\n",
1254		       device_xname(sc->sc_dev));
1255		r = EIO;
1256	}
1257
1258	mutex_enter(&sc->sc_ctrl_wait_lock);
1259	sc->sc_ctrl_inuse = FREE;
1260	cv_signal(&sc->sc_ctrl_wait);
1261	mutex_exit(&sc->sc_ctrl_wait_lock);
1262
1263	return r;
1264}
1265
1266static int
1267vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1268{
1269	int r;
1270
1271	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1272
1273	return r;
1274}
1275
1276static int
1277vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1278{
1279	int r;
1280
1281	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1282
1283	return r;
1284}
1285
1286/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1287static int
1288vioif_set_rx_filter(struct vioif_softc *sc)
1289{
1290	/* filter already set in sc_ctrl_mac_tbl */
1291	struct virtio_softc *vsc = sc->sc_virtio;
1292	struct virtqueue *vq = &sc->sc_vq[VQ_CTRL];
1293	int r, slot;
1294
1295	if (vsc->sc_nvqs < 3)
1296		return ENOTSUP;
1297
1298	mutex_enter(&sc->sc_ctrl_wait_lock);
1299	while (sc->sc_ctrl_inuse != FREE)
1300		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1301	sc->sc_ctrl_inuse = INUSE;
1302	mutex_exit(&sc->sc_ctrl_wait_lock);
1303
1304	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1305	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1306
1307	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap,
1308			    sc->sc_ctrl_mac_tbl_uc,
1309			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1310			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1311			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1312	if (r) {
1313		printf("%s: control command dmamap load failed, "
1314		       "error code %d\n", device_xname(sc->sc_dev), r);
1315		goto out;
1316	}
1317	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap,
1318			    sc->sc_ctrl_mac_tbl_mc,
1319			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1320			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1321			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1322	if (r) {
1323		printf("%s: control command dmamap load failed, "
1324		       "error code %d\n", device_xname(sc->sc_dev), r);
1325		bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1326		goto out;
1327	}
1328
1329	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1330			0, sizeof(struct virtio_net_ctrl_cmd),
1331			BUS_DMASYNC_PREWRITE);
1332	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1333			(sizeof(struct virtio_net_ctrl_mac_tbl)
1334			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1335			BUS_DMASYNC_PREWRITE);
1336	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1337			(sizeof(struct virtio_net_ctrl_mac_tbl)
1338			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1339			BUS_DMASYNC_PREWRITE);
1340	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1341			0, sizeof(struct virtio_net_ctrl_status),
1342			BUS_DMASYNC_PREREAD);
1343
1344	r = virtio_enqueue_prep(vsc, vq, &slot);
1345	if (r != 0)
1346		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1347	r = virtio_enqueue_reserve(vsc, vq, slot, 4);
1348	if (r != 0)
1349		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1350	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1351	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_uc_dmamap, true);
1352	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_mc_dmamap, true);
1353	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1354	virtio_enqueue_commit(vsc, vq, slot, true);
1355
1356	/* wait for done */
1357	mutex_enter(&sc->sc_ctrl_wait_lock);
1358	while (sc->sc_ctrl_inuse != DONE)
1359		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1360	mutex_exit(&sc->sc_ctrl_wait_lock);
1361	/* already dequeueued */
1362
1363	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1364			sizeof(struct virtio_net_ctrl_cmd),
1365			BUS_DMASYNC_POSTWRITE);
1366	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1367			(sizeof(struct virtio_net_ctrl_mac_tbl)
1368			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1369			BUS_DMASYNC_POSTWRITE);
1370	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1371			(sizeof(struct virtio_net_ctrl_mac_tbl)
1372			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1373			BUS_DMASYNC_POSTWRITE);
1374	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1375			sizeof(struct virtio_net_ctrl_status),
1376			BUS_DMASYNC_POSTREAD);
1377	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1378	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap);
1379
1380	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1381		r = 0;
1382	else {
1383		printf("%s: failed setting rx filter\n",
1384		       device_xname(sc->sc_dev));
1385		r = EIO;
1386	}
1387
1388out:
1389	mutex_enter(&sc->sc_ctrl_wait_lock);
1390	sc->sc_ctrl_inuse = FREE;
1391	cv_signal(&sc->sc_ctrl_wait);
1392	mutex_exit(&sc->sc_ctrl_wait_lock);
1393
1394	return r;
1395}
1396
1397/* ctrl vq interrupt; wake up the command issuer */
1398static int
1399vioif_ctrl_vq_done(struct virtqueue *vq)
1400{
1401	struct virtio_softc *vsc = vq->vq_owner;
1402	struct vioif_softc *sc = device_private(vsc->sc_child);
1403	int r, slot;
1404
1405	r = virtio_dequeue(vsc, vq, &slot, NULL);
1406	if (r == ENOENT)
1407		return 0;
1408	virtio_dequeue_commit(vsc, vq, slot);
1409
1410	mutex_enter(&sc->sc_ctrl_wait_lock);
1411	sc->sc_ctrl_inuse = DONE;
1412	cv_signal(&sc->sc_ctrl_wait);
1413	mutex_exit(&sc->sc_ctrl_wait_lock);
1414
1415	return 1;
1416}
1417
1418/*
1419 * If IFF_PROMISC requested,  set promiscuous
1420 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1421 * If large multicast filter exist use ALLMULTI
1422 */
1423/*
1424 * If setting rx filter fails fall back to ALLMULTI
1425 * If ALLMULTI fails fall back to PROMISC
1426 */
1427static int
1428vioif_rx_filter(struct vioif_softc *sc)
1429{
1430	struct virtio_softc *vsc = sc->sc_virtio;
1431	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1432	struct ether_multi *enm;
1433	struct ether_multistep step;
1434	int nentries;
1435	int promisc = 0, allmulti = 0, rxfilter = 0;
1436	int r;
1437
1438	if (vsc->sc_nvqs < 3) {	/* no ctrl vq; always promisc */
1439		ifp->if_flags |= IFF_PROMISC;
1440		return 0;
1441	}
1442
1443	if (ifp->if_flags & IFF_PROMISC) {
1444		promisc = 1;
1445		goto set;
1446	}
1447
1448	nentries = -1;
1449	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1450	while (nentries++, enm != NULL) {
1451		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1452			allmulti = 1;
1453			goto set;
1454		}
1455		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1456			   ETHER_ADDR_LEN)) {
1457			allmulti = 1;
1458			goto set;
1459		}
1460		memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries],
1461		       enm->enm_addrlo, ETHER_ADDR_LEN);
1462		ETHER_NEXT_MULTI(step, enm);
1463	}
1464	rxfilter = 1;
1465
1466set:
1467	if (rxfilter) {
1468		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1469		sc->sc_ctrl_mac_tbl_mc->nentries = nentries;
1470		r = vioif_set_rx_filter(sc);
1471		if (r != 0) {
1472			rxfilter = 0;
1473			allmulti = 1; /* fallback */
1474		}
1475	} else {
1476		/* remove rx filter */
1477		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1478		sc->sc_ctrl_mac_tbl_mc->nentries = 0;
1479		r = vioif_set_rx_filter(sc);
1480		/* what to do on failure? */
1481	}
1482	if (allmulti) {
1483		r = vioif_set_allmulti(sc, true);
1484		if (r != 0) {
1485			allmulti = 0;
1486			promisc = 1; /* fallback */
1487		}
1488	} else {
1489		r = vioif_set_allmulti(sc, false);
1490		/* what to do on failure? */
1491	}
1492	if (promisc) {
1493		r = vioif_set_promisc(sc, true);
1494	} else {
1495		r = vioif_set_promisc(sc, false);
1496	}
1497
1498	return r;
1499}
1500
1501/* change link status */
1502static int
1503vioif_updown(struct vioif_softc *sc, bool isup)
1504{
1505	struct virtio_softc *vsc = sc->sc_virtio;
1506
1507	if (!(vsc->sc_features & VIRTIO_NET_F_STATUS))
1508		return ENODEV;
1509	virtio_write_device_config_1(vsc,
1510				     VIRTIO_NET_CONFIG_STATUS,
1511				     isup?VIRTIO_NET_S_LINK_UP:0);
1512	return 0;
1513}
1514