if_vioif.c revision 1.13
1/*	$NetBSD: if_vioif.c,v 1.13 2014/12/24 02:48:24 ozaki-r Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.13 2014/12/24 02:48:24 ozaki-r Exp $");
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/bus.h>
35#include <sys/condvar.h>
36#include <sys/device.h>
37#include <sys/intr.h>
38#include <sys/kmem.h>
39#include <sys/mbuf.h>
40#include <sys/mutex.h>
41#include <sys/sockio.h>
42#include <sys/cpu.h>
43
44#include <dev/pci/pcidevs.h>
45#include <dev/pci/pcireg.h>
46#include <dev/pci/pcivar.h>
47#include <dev/pci/virtioreg.h>
48#include <dev/pci/virtiovar.h>
49
50#include <net/if.h>
51#include <net/if_media.h>
52#include <net/if_ether.h>
53
54#include <net/bpf.h>
55
56
57#ifdef NET_MPSAFE
58#define VIOIF_MPSAFE	1
59#endif
60
61#ifdef SOFTINT_INTR
62#define VIOIF_SOFTINT_INTR	1
63#endif
64
65/*
66 * if_vioifreg.h:
67 */
68/* Configuration registers */
69#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
70#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
71
72/* Feature bits */
73#define VIRTIO_NET_F_CSUM	(1<<0)
74#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
75#define VIRTIO_NET_F_MAC	(1<<5)
76#define VIRTIO_NET_F_GSO	(1<<6)
77#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
78#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
79#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
80#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
81#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
82#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
83#define VIRTIO_NET_F_HOST_ECN	(1<<13)
84#define VIRTIO_NET_F_HOST_UFO	(1<<14)
85#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
86#define VIRTIO_NET_F_STATUS	(1<<16)
87#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
88#define VIRTIO_NET_F_CTRL_RX	(1<<18)
89#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
90
91/* Status */
92#define VIRTIO_NET_S_LINK_UP	1
93
94/* Packet header structure */
95struct virtio_net_hdr {
96	uint8_t		flags;
97	uint8_t		gso_type;
98	uint16_t	hdr_len;
99	uint16_t	gso_size;
100	uint16_t	csum_start;
101	uint16_t	csum_offset;
102#if 0
103	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
104#endif
105} __packed;
106
107#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
108#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
109#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
110#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
111#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
112#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
113
114#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
115
116/* Control virtqueue */
117struct virtio_net_ctrl_cmd {
118	uint8_t	class;
119	uint8_t	command;
120} __packed;
121#define VIRTIO_NET_CTRL_RX		0
122# define VIRTIO_NET_CTRL_RX_PROMISC	0
123# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
124
125#define VIRTIO_NET_CTRL_MAC		1
126# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
127
128#define VIRTIO_NET_CTRL_VLAN		2
129# define VIRTIO_NET_CTRL_VLAN_ADD	0
130# define VIRTIO_NET_CTRL_VLAN_DEL	1
131
132struct virtio_net_ctrl_status {
133	uint8_t	ack;
134} __packed;
135#define VIRTIO_NET_OK			0
136#define VIRTIO_NET_ERR			1
137
138struct virtio_net_ctrl_rx {
139	uint8_t	onoff;
140} __packed;
141
142struct virtio_net_ctrl_mac_tbl {
143	uint32_t nentries;
144	uint8_t macs[][ETHER_ADDR_LEN];
145} __packed;
146
147struct virtio_net_ctrl_vlan {
148	uint16_t id;
149} __packed;
150
151
152/*
153 * if_vioifvar.h:
154 */
155struct vioif_softc {
156	device_t		sc_dev;
157
158	struct virtio_softc	*sc_virtio;
159	struct virtqueue	sc_vq[3];
160
161	uint8_t			sc_mac[ETHER_ADDR_LEN];
162	struct ethercom		sc_ethercom;
163	short			sc_deferred_init_done;
164
165	/* bus_dmamem */
166	bus_dma_segment_t	sc_hdr_segs[1];
167	struct virtio_net_hdr	*sc_hdrs;
168#define sc_rx_hdrs	sc_hdrs
169	struct virtio_net_hdr	*sc_tx_hdrs;
170	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
171	struct virtio_net_ctrl_status *sc_ctrl_status;
172	struct virtio_net_ctrl_rx *sc_ctrl_rx;
173	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
174	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
175
176	/* kmem */
177	bus_dmamap_t		*sc_arrays;
178#define sc_rxhdr_dmamaps sc_arrays
179	bus_dmamap_t		*sc_txhdr_dmamaps;
180	bus_dmamap_t		*sc_rx_dmamaps;
181	bus_dmamap_t		*sc_tx_dmamaps;
182	struct mbuf		**sc_rx_mbufs;
183	struct mbuf		**sc_tx_mbufs;
184
185	bus_dmamap_t		sc_ctrl_cmd_dmamap;
186	bus_dmamap_t		sc_ctrl_status_dmamap;
187	bus_dmamap_t		sc_ctrl_rx_dmamap;
188	bus_dmamap_t		sc_ctrl_tbl_uc_dmamap;
189	bus_dmamap_t		sc_ctrl_tbl_mc_dmamap;
190
191	void			*sc_rx_softint;
192
193	enum {
194		FREE, INUSE, DONE
195	}			sc_ctrl_inuse;
196	kcondvar_t		sc_ctrl_wait;
197	kmutex_t		sc_ctrl_wait_lock;
198	kmutex_t		*sc_tx_lock;
199	kmutex_t		*sc_rx_lock;
200	bool			sc_stopping;
201};
202#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
203#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
204
205#define VIOIF_TX_LOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock)
206#define VIOIF_TX_UNLOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_exit((_sc)->sc_tx_lock)
207#define VIOIF_TX_LOCKED(_sc)	(!(_sc)->sc_tx_lock || mutex_owned((_sc)->sc_tx_lock))
208#define VIOIF_RX_LOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_enter((_sc)->sc_rx_lock)
209#define VIOIF_RX_UNLOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_exit((_sc)->sc_rx_lock)
210#define VIOIF_RX_LOCKED(_sc)	(!(_sc)->sc_rx_lock || mutex_owned((_sc)->sc_rx_lock))
211
212/* cfattach interface functions */
213static int	vioif_match(device_t, cfdata_t, void *);
214static void	vioif_attach(device_t, device_t, void *);
215static void	vioif_deferred_init(device_t);
216
217/* ifnet interface functions */
218static int	vioif_init(struct ifnet *);
219static void	vioif_stop(struct ifnet *, int);
220static void	vioif_start(struct ifnet *);
221static int	vioif_ioctl(struct ifnet *, u_long, void *);
222static void	vioif_watchdog(struct ifnet *);
223
224/* rx */
225static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
226static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
227static void	vioif_populate_rx_mbufs(struct vioif_softc *);
228static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
229static int	vioif_rx_deq(struct vioif_softc *);
230static int	vioif_rx_deq_locked(struct vioif_softc *);
231static int	vioif_rx_vq_done(struct virtqueue *);
232static void	vioif_rx_softint(void *);
233static void	vioif_rx_drain(struct vioif_softc *);
234
235/* tx */
236static int	vioif_tx_vq_done(struct virtqueue *);
237static int	vioif_tx_vq_done_locked(struct virtqueue *);
238static void	vioif_tx_drain(struct vioif_softc *);
239
240/* other control */
241static int	vioif_updown(struct vioif_softc *, bool);
242static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
243static int	vioif_set_promisc(struct vioif_softc *, bool);
244static int	vioif_set_allmulti(struct vioif_softc *, bool);
245static int	vioif_set_rx_filter(struct vioif_softc *);
246static int	vioif_rx_filter(struct vioif_softc *);
247static int	vioif_ctrl_vq_done(struct virtqueue *);
248
249CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
250		  vioif_match, vioif_attach, NULL, NULL);
251
252static int
253vioif_match(device_t parent, cfdata_t match, void *aux)
254{
255	struct virtio_softc *va = aux;
256
257	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
258		return 1;
259
260	return 0;
261}
262
263/* allocate memory */
264/*
265 * dma memory is used for:
266 *   sc_rx_hdrs[slot]:	 metadata array for recieved frames (READ)
267 *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
268 *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
269 *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
270 *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
271 *			 (WRITE)
272 *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
273 *			 class command (WRITE)
274 *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
275 *			 class command (WRITE)
276 * sc_ctrl_* structures are allocated only one each; they are protected by
277 * sc_ctrl_inuse variable and sc_ctrl_wait condvar.
278 */
279/*
280 * dynamically allocated memory is used for:
281 *   sc_rxhdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
282 *   sc_txhdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
283 *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for recieved payload
284 *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
285 *   sc_rx_mbufs[slot]:		mbuf pointer array for recieved frames
286 *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
287 */
288static int
289vioif_alloc_mems(struct vioif_softc *sc)
290{
291	struct virtio_softc *vsc = sc->sc_virtio;
292	int allocsize, allocsize2, r, rsegs, i;
293	void *vaddr;
294	intptr_t p;
295	int rxqsize, txqsize;
296
297	rxqsize = vsc->sc_vqs[0].vq_num;
298	txqsize = vsc->sc_vqs[1].vq_num;
299
300	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
301	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
302	if (vsc->sc_nvqs == 3) {
303		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
304		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
305		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
306		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
307			+ sizeof(struct virtio_net_ctrl_mac_tbl)
308			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
309	}
310	r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0,
311			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
312	if (r != 0) {
313		aprint_error_dev(sc->sc_dev,
314				 "DMA memory allocation failed, size %d, "
315				 "error code %d\n", allocsize, r);
316		goto err_none;
317	}
318	r = bus_dmamem_map(vsc->sc_dmat,
319			   &sc->sc_hdr_segs[0], 1, allocsize,
320			   &vaddr, BUS_DMA_NOWAIT);
321	if (r != 0) {
322		aprint_error_dev(sc->sc_dev,
323				 "DMA memory map failed, "
324				 "error code %d\n", r);
325		goto err_dmamem_alloc;
326	}
327	sc->sc_hdrs = vaddr;
328	memset(vaddr, 0, allocsize);
329	p = (intptr_t) vaddr;
330	p += sizeof(struct virtio_net_hdr) * rxqsize;
331#define P(name,size)	do { sc->sc_ ##name = (void*) p;	\
332			     p += size; } while (0)
333	P(tx_hdrs, sizeof(struct virtio_net_hdr) * txqsize);
334	if (vsc->sc_nvqs == 3) {
335		P(ctrl_cmd, sizeof(struct virtio_net_ctrl_cmd));
336		P(ctrl_status, sizeof(struct virtio_net_ctrl_status));
337		P(ctrl_rx, sizeof(struct virtio_net_ctrl_rx));
338		P(ctrl_mac_tbl_uc, sizeof(struct virtio_net_ctrl_mac_tbl));
339		P(ctrl_mac_tbl_mc,
340		  (sizeof(struct virtio_net_ctrl_mac_tbl)
341		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
342	}
343#undef P
344
345	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
346	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
347	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
348	sc->sc_arrays = kmem_zalloc(allocsize2, KM_SLEEP);
349	if (sc->sc_arrays == NULL)
350		goto err_dmamem_map;
351	sc->sc_txhdr_dmamaps = sc->sc_arrays + rxqsize;
352	sc->sc_rx_dmamaps = sc->sc_txhdr_dmamaps + txqsize;
353	sc->sc_tx_dmamaps = sc->sc_rx_dmamaps + rxqsize;
354	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
355	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
356
357#define C(map, buf, size, nsegs, rw, usage)				\
358	do {								\
359		r = bus_dmamap_create(vsc->sc_dmat, size, nsegs, size, 0, \
360				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,	\
361				      &sc->sc_ ##map);			\
362		if (r != 0) {						\
363			aprint_error_dev(sc->sc_dev,			\
364					 usage " dmamap creation failed, " \
365					 "error code %d\n", r);		\
366					 goto err_reqs;			\
367		}							\
368	} while (0)
369#define C_L1(map, buf, size, nsegs, rw, usage)				\
370	C(map, buf, size, nsegs, rw, usage);				\
371	do {								\
372		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
373				    &sc->sc_ ##buf, size, NULL,		\
374				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
375		if (r != 0) {						\
376			aprint_error_dev(sc->sc_dev,			\
377					 usage " dmamap load failed, "	\
378					 "error code %d\n", r);		\
379			goto err_reqs;					\
380		}							\
381	} while (0)
382#define C_L2(map, buf, size, nsegs, rw, usage)				\
383	C(map, buf, size, nsegs, rw, usage);				\
384	do {								\
385		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
386				    sc->sc_ ##buf, size, NULL,		\
387				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
388		if (r != 0) {						\
389			aprint_error_dev(sc->sc_dev,			\
390					 usage " dmamap load failed, "	\
391					 "error code %d\n", r);		\
392			goto err_reqs;					\
393		}							\
394	} while (0)
395	for (i = 0; i < rxqsize; i++) {
396		C_L1(rxhdr_dmamaps[i], rx_hdrs[i],
397		    sizeof(struct virtio_net_hdr), 1,
398		    READ, "rx header");
399		C(rx_dmamaps[i], NULL, MCLBYTES, 1, 0, "rx payload");
400	}
401
402	for (i = 0; i < txqsize; i++) {
403		C_L1(txhdr_dmamaps[i], rx_hdrs[i],
404		    sizeof(struct virtio_net_hdr), 1,
405		    WRITE, "tx header");
406		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, 256 /* XXX */, 0,
407		  "tx payload");
408	}
409
410	if (vsc->sc_nvqs == 3) {
411		/* control vq class & command */
412		C_L2(ctrl_cmd_dmamap, ctrl_cmd,
413		    sizeof(struct virtio_net_ctrl_cmd), 1, WRITE,
414		    "control command");
415
416		/* control vq status */
417		C_L2(ctrl_status_dmamap, ctrl_status,
418		    sizeof(struct virtio_net_ctrl_status), 1, READ,
419		    "control status");
420
421		/* control vq rx mode command parameter */
422		C_L2(ctrl_rx_dmamap, ctrl_rx,
423		    sizeof(struct virtio_net_ctrl_rx), 1, WRITE,
424		    "rx mode control command");
425
426		/* control vq MAC filter table for unicast */
427		/* do not load now since its length is variable */
428		C(ctrl_tbl_uc_dmamap, NULL,
429		  sizeof(struct virtio_net_ctrl_mac_tbl) + 0, 1, WRITE,
430		  "unicast MAC address filter command");
431
432		/* control vq MAC filter table for multicast */
433		C(ctrl_tbl_mc_dmamap, NULL,
434		  (sizeof(struct virtio_net_ctrl_mac_tbl)
435		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES),
436		  1, WRITE, "multicast MAC address filter command");
437	}
438#undef C_L2
439#undef C_L1
440#undef C
441
442	return 0;
443
444err_reqs:
445#define D(map)								\
446	do {								\
447		if (sc->sc_ ##map) {					\
448			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_ ##map); \
449			sc->sc_ ##map = NULL;				\
450		}							\
451	} while (0)
452	D(ctrl_tbl_mc_dmamap);
453	D(ctrl_tbl_uc_dmamap);
454	D(ctrl_rx_dmamap);
455	D(ctrl_status_dmamap);
456	D(ctrl_cmd_dmamap);
457	for (i = 0; i < txqsize; i++) {
458		D(tx_dmamaps[i]);
459		D(txhdr_dmamaps[i]);
460	}
461	for (i = 0; i < rxqsize; i++) {
462		D(rx_dmamaps[i]);
463		D(rxhdr_dmamaps[i]);
464	}
465#undef D
466	if (sc->sc_arrays) {
467		kmem_free(sc->sc_arrays, allocsize2);
468		sc->sc_arrays = 0;
469	}
470err_dmamem_map:
471	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_hdrs, allocsize);
472err_dmamem_alloc:
473	bus_dmamem_free(vsc->sc_dmat, &sc->sc_hdr_segs[0], 1);
474err_none:
475	return -1;
476}
477
478static void
479vioif_attach(device_t parent, device_t self, void *aux)
480{
481	struct vioif_softc *sc = device_private(self);
482	struct virtio_softc *vsc = device_private(parent);
483	uint32_t features;
484	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
485	u_int flags;
486
487	if (vsc->sc_child != NULL) {
488		aprint_normal(": child already attached for %s; "
489			      "something wrong...\n",
490			      device_xname(parent));
491		return;
492	}
493
494	sc->sc_dev = self;
495	sc->sc_virtio = vsc;
496
497	vsc->sc_child = self;
498	vsc->sc_ipl = IPL_NET;
499	vsc->sc_vqs = &sc->sc_vq[0];
500	vsc->sc_config_change = 0;
501	vsc->sc_intrhand = virtio_vq_intr;
502	vsc->sc_flags = 0;
503
504#ifdef VIOIF_MPSAFE
505	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
506#endif
507#ifdef VIOIF_SOFTINT_INTR
508	vsc->sc_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
509#endif
510
511	features = virtio_negotiate_features(vsc,
512					     (VIRTIO_NET_F_MAC |
513					      VIRTIO_NET_F_STATUS |
514					      VIRTIO_NET_F_CTRL_VQ |
515					      VIRTIO_NET_F_CTRL_RX |
516					      VIRTIO_F_NOTIFY_ON_EMPTY));
517	if (features & VIRTIO_NET_F_MAC) {
518		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
519						    VIRTIO_NET_CONFIG_MAC+0);
520		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
521						    VIRTIO_NET_CONFIG_MAC+1);
522		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
523						    VIRTIO_NET_CONFIG_MAC+2);
524		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
525						    VIRTIO_NET_CONFIG_MAC+3);
526		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
527						    VIRTIO_NET_CONFIG_MAC+4);
528		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
529						    VIRTIO_NET_CONFIG_MAC+5);
530	} else {
531		/* code stolen from sys/net/if_tap.c */
532		struct timeval tv;
533		uint32_t ui;
534		getmicrouptime(&tv);
535		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
536		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
537		virtio_write_device_config_1(vsc,
538					     VIRTIO_NET_CONFIG_MAC+0,
539					     sc->sc_mac[0]);
540		virtio_write_device_config_1(vsc,
541					     VIRTIO_NET_CONFIG_MAC+1,
542					     sc->sc_mac[1]);
543		virtio_write_device_config_1(vsc,
544					     VIRTIO_NET_CONFIG_MAC+2,
545					     sc->sc_mac[2]);
546		virtio_write_device_config_1(vsc,
547					     VIRTIO_NET_CONFIG_MAC+3,
548					     sc->sc_mac[3]);
549		virtio_write_device_config_1(vsc,
550					     VIRTIO_NET_CONFIG_MAC+4,
551					     sc->sc_mac[4]);
552		virtio_write_device_config_1(vsc,
553					     VIRTIO_NET_CONFIG_MAC+5,
554					     sc->sc_mac[5]);
555	}
556	aprint_normal(": Ethernet address %s\n", ether_sprintf(sc->sc_mac));
557	aprint_naive("\n");
558
559	if (virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
560			    MCLBYTES+sizeof(struct virtio_net_hdr), 2,
561			    "rx") != 0) {
562		goto err;
563	}
564	vsc->sc_nvqs = 1;
565	sc->sc_vq[0].vq_done = vioif_rx_vq_done;
566	if (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
567			    (sizeof(struct virtio_net_hdr)
568			     + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
569			    VIRTIO_NET_TX_MAXNSEGS + 1,
570			    "tx") != 0) {
571		goto err;
572	}
573
574#ifdef VIOIF_MPSAFE
575	sc->sc_tx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
576	sc->sc_rx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
577#else
578	sc->sc_tx_lock = NULL;
579	sc->sc_rx_lock = NULL;
580#endif
581	sc->sc_stopping = false;
582
583	vsc->sc_nvqs = 2;
584	sc->sc_vq[1].vq_done = vioif_tx_vq_done;
585	virtio_start_vq_intr(vsc, &sc->sc_vq[0]);
586	virtio_stop_vq_intr(vsc, &sc->sc_vq[1]); /* not urgent; do it later */
587	if ((features & VIRTIO_NET_F_CTRL_VQ)
588	    && (features & VIRTIO_NET_F_CTRL_RX)) {
589		if (virtio_alloc_vq(vsc, &sc->sc_vq[2], 2,
590				    NBPG, 1, "control") == 0) {
591			sc->sc_vq[2].vq_done = vioif_ctrl_vq_done;
592			cv_init(&sc->sc_ctrl_wait, "ctrl_vq");
593			mutex_init(&sc->sc_ctrl_wait_lock,
594				   MUTEX_DEFAULT, IPL_NET);
595			sc->sc_ctrl_inuse = FREE;
596			virtio_start_vq_intr(vsc, &sc->sc_vq[2]);
597			vsc->sc_nvqs = 3;
598		}
599	}
600
601#ifdef VIOIF_MPSAFE
602	flags = SOFTINT_NET | SOFTINT_MPSAFE;
603#else
604	flags = SOFTINT_NET;
605#endif
606	sc->sc_rx_softint = softint_establish(flags, vioif_rx_softint, sc);
607	if (sc->sc_rx_softint == NULL) {
608		aprint_error_dev(self, "cannot establish softint\n");
609		goto err;
610	}
611
612	if (vioif_alloc_mems(sc) < 0)
613		goto err;
614
615	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
616	ifp->if_softc = sc;
617	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
618	ifp->if_start = vioif_start;
619	ifp->if_ioctl = vioif_ioctl;
620	ifp->if_init = vioif_init;
621	ifp->if_stop = vioif_stop;
622	ifp->if_capabilities = 0;
623	ifp->if_watchdog = vioif_watchdog;
624
625	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
626
627	if_attach(ifp);
628	ether_ifattach(ifp, sc->sc_mac);
629
630	return;
631
632err:
633	if (sc->sc_tx_lock)
634		mutex_obj_free(sc->sc_tx_lock);
635	if (sc->sc_rx_lock)
636		mutex_obj_free(sc->sc_rx_lock);
637
638	if (vsc->sc_nvqs == 3) {
639		virtio_free_vq(vsc, &sc->sc_vq[2]);
640		cv_destroy(&sc->sc_ctrl_wait);
641		mutex_destroy(&sc->sc_ctrl_wait_lock);
642		vsc->sc_nvqs = 2;
643	}
644	if (vsc->sc_nvqs == 2) {
645		virtio_free_vq(vsc, &sc->sc_vq[1]);
646		vsc->sc_nvqs = 1;
647	}
648	if (vsc->sc_nvqs == 1) {
649		virtio_free_vq(vsc, &sc->sc_vq[0]);
650		vsc->sc_nvqs = 0;
651	}
652	vsc->sc_child = (void*)1;
653	return;
654}
655
656/* we need interrupts to make promiscuous mode off */
657static void
658vioif_deferred_init(device_t self)
659{
660	struct vioif_softc *sc = device_private(self);
661	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
662	int r;
663
664	if (ifp->if_flags & IFF_PROMISC)
665		return;
666
667	r =  vioif_set_promisc(sc, false);
668	if (r != 0)
669		aprint_error_dev(self, "resetting promisc mode failed, "
670				 "errror code %d\n", r);
671}
672
673/*
674 * Interface functions for ifnet
675 */
676static int
677vioif_init(struct ifnet *ifp)
678{
679	struct vioif_softc *sc = ifp->if_softc;
680
681	vioif_stop(ifp, 0);
682
683	if (!sc->sc_deferred_init_done) {
684		struct virtio_softc *vsc = sc->sc_virtio;
685
686		sc->sc_deferred_init_done = 1;
687		if (vsc->sc_nvqs == 3)
688			vioif_deferred_init(sc->sc_dev);
689	}
690
691	/* Have to set false before vioif_populate_rx_mbufs */
692	sc->sc_stopping = false;
693
694	vioif_populate_rx_mbufs(sc);
695
696	vioif_updown(sc, true);
697	ifp->if_flags |= IFF_RUNNING;
698	ifp->if_flags &= ~IFF_OACTIVE;
699	vioif_rx_filter(sc);
700
701	return 0;
702}
703
704static void
705vioif_stop(struct ifnet *ifp, int disable)
706{
707	struct vioif_softc *sc = ifp->if_softc;
708	struct virtio_softc *vsc = sc->sc_virtio;
709
710	/* Take the locks to ensure that ongoing TX/RX finish */
711	VIOIF_TX_LOCK(sc);
712	VIOIF_RX_LOCK(sc);
713	sc->sc_stopping = true;
714	VIOIF_RX_UNLOCK(sc);
715	VIOIF_TX_UNLOCK(sc);
716
717	/* only way to stop I/O and DMA is resetting... */
718	virtio_reset(vsc);
719	vioif_rx_deq(sc);
720	vioif_tx_drain(sc);
721	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
722
723	if (disable)
724		vioif_rx_drain(sc);
725
726	virtio_reinit_start(vsc);
727	virtio_negotiate_features(vsc, vsc->sc_features);
728	virtio_start_vq_intr(vsc, &sc->sc_vq[0]);
729	virtio_stop_vq_intr(vsc, &sc->sc_vq[1]);
730	if (vsc->sc_nvqs >= 3)
731		virtio_start_vq_intr(vsc, &sc->sc_vq[2]);
732	virtio_reinit_end(vsc);
733	vioif_updown(sc, false);
734}
735
736static void
737vioif_start(struct ifnet *ifp)
738{
739	struct vioif_softc *sc = ifp->if_softc;
740	struct virtio_softc *vsc = sc->sc_virtio;
741	struct virtqueue *vq = &sc->sc_vq[1]; /* tx vq */
742	struct mbuf *m;
743	int queued = 0, retry = 0;
744
745	VIOIF_TX_LOCK(sc);
746
747	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
748		goto out;
749
750	if (sc->sc_stopping)
751		goto out;
752
753	for (;;) {
754		int slot, r;
755
756		IFQ_DEQUEUE(&ifp->if_snd, m);
757
758		if (m == NULL)
759			break;
760
761		r = virtio_enqueue_prep(vsc, vq, &slot);
762		if (r == EAGAIN) {
763			ifp->if_flags |= IFF_OACTIVE;
764			vioif_tx_vq_done_locked(vq);
765			if (retry++ == 0)
766				continue;
767			else
768				break;
769		}
770		if (r != 0)
771			panic("enqueue_prep for a tx buffer");
772		r = bus_dmamap_load_mbuf(vsc->sc_dmat,
773					 sc->sc_tx_dmamaps[slot],
774					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
775		if (r != 0) {
776			virtio_enqueue_abort(vsc, vq, slot);
777			printf("%s: tx dmamap load failed, error code %d\n",
778			       device_xname(sc->sc_dev), r);
779			break;
780		}
781		r = virtio_enqueue_reserve(vsc, vq, slot,
782					sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
783		if (r != 0) {
784			bus_dmamap_unload(vsc->sc_dmat,
785					  sc->sc_tx_dmamaps[slot]);
786			ifp->if_flags |= IFF_OACTIVE;
787			vioif_tx_vq_done_locked(vq);
788			if (retry++ == 0)
789				continue;
790			else
791				break;
792		}
793
794		sc->sc_tx_mbufs[slot] = m;
795
796		memset(&sc->sc_tx_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
797		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
798				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
799				BUS_DMASYNC_PREWRITE);
800		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
801				0, sc->sc_txhdr_dmamaps[slot]->dm_mapsize,
802				BUS_DMASYNC_PREWRITE);
803		virtio_enqueue(vsc, vq, slot, sc->sc_txhdr_dmamaps[slot], true);
804		virtio_enqueue(vsc, vq, slot, sc->sc_tx_dmamaps[slot], true);
805		virtio_enqueue_commit(vsc, vq, slot, false);
806		queued++;
807		bpf_mtap(ifp, m);
808	}
809
810	if (m != NULL) {
811		ifp->if_flags |= IFF_OACTIVE;
812		m_freem(m);
813	}
814
815	if (queued > 0) {
816		virtio_enqueue_commit(vsc, vq, -1, true);
817		ifp->if_timer = 5;
818	}
819
820out:
821	VIOIF_TX_UNLOCK(sc);
822}
823
824static int
825vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
826{
827	int s, r;
828
829	s = splnet();
830
831	r = ether_ioctl(ifp, cmd, data);
832	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
833	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
834		if (ifp->if_flags & IFF_RUNNING)
835			r = vioif_rx_filter(ifp->if_softc);
836		else
837			r = 0;
838	}
839
840	splx(s);
841
842	return r;
843}
844
845void
846vioif_watchdog(struct ifnet *ifp)
847{
848	struct vioif_softc *sc = ifp->if_softc;
849
850	if (ifp->if_flags & IFF_RUNNING)
851		vioif_tx_vq_done(&sc->sc_vq[1]);
852}
853
854
855/*
856 * Recieve implementation
857 */
858/* allocate and initialize a mbuf for recieve */
859static int
860vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
861{
862	struct mbuf *m;
863	int r;
864
865	MGETHDR(m, M_DONTWAIT, MT_DATA);
866	if (m == NULL)
867		return ENOBUFS;
868	MCLGET(m, M_DONTWAIT);
869	if ((m->m_flags & M_EXT) == 0) {
870		m_freem(m);
871		return ENOBUFS;
872	}
873	sc->sc_rx_mbufs[i] = m;
874	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
875	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat,
876				 sc->sc_rx_dmamaps[i],
877				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
878	if (r) {
879		m_freem(m);
880		sc->sc_rx_mbufs[i] = 0;
881		return r;
882	}
883
884	return 0;
885}
886
887/* free a mbuf for recieve */
888static void
889vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
890{
891	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
892	m_freem(sc->sc_rx_mbufs[i]);
893	sc->sc_rx_mbufs[i] = NULL;
894}
895
896/* add mbufs for all the empty recieve slots */
897static void
898vioif_populate_rx_mbufs(struct vioif_softc *sc)
899{
900	VIOIF_RX_LOCK(sc);
901	vioif_populate_rx_mbufs_locked(sc);
902	VIOIF_RX_UNLOCK(sc);
903}
904
905static void
906vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
907{
908	struct virtio_softc *vsc = sc->sc_virtio;
909	int i, r, ndone = 0;
910	struct virtqueue *vq = &sc->sc_vq[0]; /* rx vq */
911
912	KASSERT(VIOIF_RX_LOCKED(sc));
913
914	if (sc->sc_stopping)
915		return;
916
917	for (i = 0; i < vq->vq_num; i++) {
918		int slot;
919		r = virtio_enqueue_prep(vsc, vq, &slot);
920		if (r == EAGAIN)
921			break;
922		if (r != 0)
923			panic("enqueue_prep for rx buffers");
924		if (sc->sc_rx_mbufs[slot] == NULL) {
925			r = vioif_add_rx_mbuf(sc, slot);
926			if (r != 0) {
927				printf("%s: rx mbuf allocation failed, "
928				       "error code %d\n",
929				       device_xname(sc->sc_dev), r);
930				break;
931			}
932		}
933		r = virtio_enqueue_reserve(vsc, vq, slot,
934					sc->sc_rx_dmamaps[slot]->dm_nsegs + 1);
935		if (r != 0) {
936			vioif_free_rx_mbuf(sc, slot);
937			break;
938		}
939		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
940			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
941		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
942			0, MCLBYTES, BUS_DMASYNC_PREREAD);
943		virtio_enqueue(vsc, vq, slot, sc->sc_rxhdr_dmamaps[slot], false);
944		virtio_enqueue(vsc, vq, slot, sc->sc_rx_dmamaps[slot], false);
945		virtio_enqueue_commit(vsc, vq, slot, false);
946		ndone++;
947	}
948	if (ndone > 0)
949		virtio_enqueue_commit(vsc, vq, -1, true);
950}
951
952/* dequeue recieved packets */
953static int
954vioif_rx_deq(struct vioif_softc *sc)
955{
956	int r;
957
958	KASSERT(sc->sc_stopping);
959
960	VIOIF_RX_LOCK(sc);
961	r = vioif_rx_deq_locked(sc);
962	VIOIF_RX_UNLOCK(sc);
963
964	return r;
965}
966
967/* dequeue recieved packets */
968static int
969vioif_rx_deq_locked(struct vioif_softc *sc)
970{
971	struct virtio_softc *vsc = sc->sc_virtio;
972	struct virtqueue *vq = &sc->sc_vq[0];
973	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
974	struct mbuf *m;
975	int r = 0;
976	int slot, len;
977
978	KASSERT(VIOIF_RX_LOCKED(sc));
979
980	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
981		len -= sizeof(struct virtio_net_hdr);
982		r = 1;
983		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
984				0, sizeof(struct virtio_net_hdr),
985				BUS_DMASYNC_POSTREAD);
986		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
987				0, MCLBYTES,
988				BUS_DMASYNC_POSTREAD);
989		m = sc->sc_rx_mbufs[slot];
990		KASSERT(m != NULL);
991		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
992		sc->sc_rx_mbufs[slot] = 0;
993		virtio_dequeue_commit(vsc, vq, slot);
994		m->m_pkthdr.rcvif = ifp;
995		m->m_len = m->m_pkthdr.len = len;
996		ifp->if_ipackets++;
997		bpf_mtap(ifp, m);
998
999		VIOIF_RX_UNLOCK(sc);
1000		(*ifp->if_input)(ifp, m);
1001		VIOIF_RX_LOCK(sc);
1002
1003		if (sc->sc_stopping)
1004			break;
1005	}
1006
1007	return r;
1008}
1009
1010/* rx interrupt; call _dequeue above and schedule a softint */
1011static int
1012vioif_rx_vq_done(struct virtqueue *vq)
1013{
1014	struct virtio_softc *vsc = vq->vq_owner;
1015	struct vioif_softc *sc = device_private(vsc->sc_child);
1016	int r = 0;
1017
1018#ifdef VIOIF_SOFTINT_INTR
1019	KASSERT(!cpu_intr_p());
1020#endif
1021
1022	VIOIF_RX_LOCK(sc);
1023
1024	if (sc->sc_stopping)
1025		goto out;
1026
1027	r = vioif_rx_deq_locked(sc);
1028	if (r)
1029#ifdef VIOIF_SOFTINT_INTR
1030		vioif_populate_rx_mbufs_locked(sc);
1031#else
1032		softint_schedule(sc->sc_rx_softint);
1033#endif
1034
1035out:
1036	VIOIF_RX_UNLOCK(sc);
1037	return r;
1038}
1039
1040/* softint: enqueue recieve requests for new incoming packets */
1041static void
1042vioif_rx_softint(void *arg)
1043{
1044	struct vioif_softc *sc = arg;
1045
1046	vioif_populate_rx_mbufs(sc);
1047}
1048
1049/* free all the mbufs; called from if_stop(disable) */
1050static void
1051vioif_rx_drain(struct vioif_softc *sc)
1052{
1053	struct virtqueue *vq = &sc->sc_vq[0];
1054	int i;
1055
1056	for (i = 0; i < vq->vq_num; i++) {
1057		if (sc->sc_rx_mbufs[i] == NULL)
1058			continue;
1059		vioif_free_rx_mbuf(sc, i);
1060	}
1061}
1062
1063
1064/*
1065 * Transmition implementation
1066 */
1067/* actual transmission is done in if_start */
1068/* tx interrupt; dequeue and free mbufs */
1069/*
1070 * tx interrupt is actually disabled; this should be called upon
1071 * tx vq full and watchdog
1072 */
1073static int
1074vioif_tx_vq_done(struct virtqueue *vq)
1075{
1076	struct virtio_softc *vsc = vq->vq_owner;
1077	struct vioif_softc *sc = device_private(vsc->sc_child);
1078	int r = 0;
1079
1080	VIOIF_TX_LOCK(sc);
1081
1082	if (sc->sc_stopping)
1083		goto out;
1084
1085	r = vioif_tx_vq_done_locked(vq);
1086
1087out:
1088	VIOIF_TX_UNLOCK(sc);
1089	return r;
1090}
1091
1092static int
1093vioif_tx_vq_done_locked(struct virtqueue *vq)
1094{
1095	struct virtio_softc *vsc = vq->vq_owner;
1096	struct vioif_softc *sc = device_private(vsc->sc_child);
1097	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1098	struct mbuf *m;
1099	int r = 0;
1100	int slot, len;
1101
1102	KASSERT(VIOIF_TX_LOCKED(sc));
1103
1104	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1105		r++;
1106		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
1107				0, sizeof(struct virtio_net_hdr),
1108				BUS_DMASYNC_POSTWRITE);
1109		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
1110				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
1111				BUS_DMASYNC_POSTWRITE);
1112		m = sc->sc_tx_mbufs[slot];
1113		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1114		sc->sc_tx_mbufs[slot] = 0;
1115		virtio_dequeue_commit(vsc, vq, slot);
1116		ifp->if_opackets++;
1117		m_freem(m);
1118	}
1119
1120	if (r)
1121		ifp->if_flags &= ~IFF_OACTIVE;
1122	return r;
1123}
1124
1125/* free all the mbufs already put on vq; called from if_stop(disable) */
1126static void
1127vioif_tx_drain(struct vioif_softc *sc)
1128{
1129	struct virtio_softc *vsc = sc->sc_virtio;
1130	struct virtqueue *vq = &sc->sc_vq[1];
1131	int i;
1132
1133	KASSERT(sc->sc_stopping);
1134
1135	for (i = 0; i < vq->vq_num; i++) {
1136		if (sc->sc_tx_mbufs[i] == NULL)
1137			continue;
1138		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1139		m_freem(sc->sc_tx_mbufs[i]);
1140		sc->sc_tx_mbufs[i] = NULL;
1141	}
1142}
1143
1144/*
1145 * Control vq
1146 */
1147/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1148static int
1149vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1150{
1151	struct virtio_softc *vsc = sc->sc_virtio;
1152	struct virtqueue *vq = &sc->sc_vq[2];
1153	int r, slot;
1154
1155	if (vsc->sc_nvqs < 3)
1156		return ENOTSUP;
1157
1158	mutex_enter(&sc->sc_ctrl_wait_lock);
1159	while (sc->sc_ctrl_inuse != FREE)
1160		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1161	sc->sc_ctrl_inuse = INUSE;
1162	mutex_exit(&sc->sc_ctrl_wait_lock);
1163
1164	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1165	sc->sc_ctrl_cmd->command = cmd;
1166	sc->sc_ctrl_rx->onoff = onoff;
1167
1168	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1169			0, sizeof(struct virtio_net_ctrl_cmd),
1170			BUS_DMASYNC_PREWRITE);
1171	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap,
1172			0, sizeof(struct virtio_net_ctrl_rx),
1173			BUS_DMASYNC_PREWRITE);
1174	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1175			0, sizeof(struct virtio_net_ctrl_status),
1176			BUS_DMASYNC_PREREAD);
1177
1178	r = virtio_enqueue_prep(vsc, vq, &slot);
1179	if (r != 0)
1180		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1181	r = virtio_enqueue_reserve(vsc, vq, slot, 3);
1182	if (r != 0)
1183		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1184	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1185	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_rx_dmamap, true);
1186	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1187	virtio_enqueue_commit(vsc, vq, slot, true);
1188
1189	/* wait for done */
1190	mutex_enter(&sc->sc_ctrl_wait_lock);
1191	while (sc->sc_ctrl_inuse != DONE)
1192		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1193	mutex_exit(&sc->sc_ctrl_wait_lock);
1194	/* already dequeueued */
1195
1196	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1197			sizeof(struct virtio_net_ctrl_cmd),
1198			BUS_DMASYNC_POSTWRITE);
1199	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap, 0,
1200			sizeof(struct virtio_net_ctrl_rx),
1201			BUS_DMASYNC_POSTWRITE);
1202	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1203			sizeof(struct virtio_net_ctrl_status),
1204			BUS_DMASYNC_POSTREAD);
1205
1206	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1207		r = 0;
1208	else {
1209		printf("%s: failed setting rx mode\n",
1210		       device_xname(sc->sc_dev));
1211		r = EIO;
1212	}
1213
1214	mutex_enter(&sc->sc_ctrl_wait_lock);
1215	sc->sc_ctrl_inuse = FREE;
1216	cv_signal(&sc->sc_ctrl_wait);
1217	mutex_exit(&sc->sc_ctrl_wait_lock);
1218
1219	return r;
1220}
1221
1222static int
1223vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1224{
1225	int r;
1226
1227	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1228
1229	return r;
1230}
1231
1232static int
1233vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1234{
1235	int r;
1236
1237	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1238
1239	return r;
1240}
1241
1242/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1243static int
1244vioif_set_rx_filter(struct vioif_softc *sc)
1245{
1246	/* filter already set in sc_ctrl_mac_tbl */
1247	struct virtio_softc *vsc = sc->sc_virtio;
1248	struct virtqueue *vq = &sc->sc_vq[2];
1249	int r, slot;
1250
1251	if (vsc->sc_nvqs < 3)
1252		return ENOTSUP;
1253
1254	mutex_enter(&sc->sc_ctrl_wait_lock);
1255	while (sc->sc_ctrl_inuse != FREE)
1256		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1257	sc->sc_ctrl_inuse = INUSE;
1258	mutex_exit(&sc->sc_ctrl_wait_lock);
1259
1260	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1261	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1262
1263	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap,
1264			    sc->sc_ctrl_mac_tbl_uc,
1265			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1266			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1267			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1268	if (r) {
1269		printf("%s: control command dmamap load failed, "
1270		       "error code %d\n", device_xname(sc->sc_dev), r);
1271		goto out;
1272	}
1273	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap,
1274			    sc->sc_ctrl_mac_tbl_mc,
1275			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1276			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1277			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1278	if (r) {
1279		printf("%s: control command dmamap load failed, "
1280		       "error code %d\n", device_xname(sc->sc_dev), r);
1281		bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1282		goto out;
1283	}
1284
1285	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1286			0, sizeof(struct virtio_net_ctrl_cmd),
1287			BUS_DMASYNC_PREWRITE);
1288	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1289			(sizeof(struct virtio_net_ctrl_mac_tbl)
1290			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1291			BUS_DMASYNC_PREWRITE);
1292	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1293			(sizeof(struct virtio_net_ctrl_mac_tbl)
1294			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1295			BUS_DMASYNC_PREWRITE);
1296	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1297			0, sizeof(struct virtio_net_ctrl_status),
1298			BUS_DMASYNC_PREREAD);
1299
1300	r = virtio_enqueue_prep(vsc, vq, &slot);
1301	if (r != 0)
1302		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1303	r = virtio_enqueue_reserve(vsc, vq, slot, 4);
1304	if (r != 0)
1305		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1306	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1307	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_uc_dmamap, true);
1308	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_mc_dmamap, true);
1309	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1310	virtio_enqueue_commit(vsc, vq, slot, true);
1311
1312	/* wait for done */
1313	mutex_enter(&sc->sc_ctrl_wait_lock);
1314	while (sc->sc_ctrl_inuse != DONE)
1315		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1316	mutex_exit(&sc->sc_ctrl_wait_lock);
1317	/* already dequeueued */
1318
1319	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1320			sizeof(struct virtio_net_ctrl_cmd),
1321			BUS_DMASYNC_POSTWRITE);
1322	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1323			(sizeof(struct virtio_net_ctrl_mac_tbl)
1324			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1325			BUS_DMASYNC_POSTWRITE);
1326	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1327			(sizeof(struct virtio_net_ctrl_mac_tbl)
1328			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1329			BUS_DMASYNC_POSTWRITE);
1330	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1331			sizeof(struct virtio_net_ctrl_status),
1332			BUS_DMASYNC_POSTREAD);
1333	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1334	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap);
1335
1336	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1337		r = 0;
1338	else {
1339		printf("%s: failed setting rx filter\n",
1340		       device_xname(sc->sc_dev));
1341		r = EIO;
1342	}
1343
1344out:
1345	mutex_enter(&sc->sc_ctrl_wait_lock);
1346	sc->sc_ctrl_inuse = FREE;
1347	cv_signal(&sc->sc_ctrl_wait);
1348	mutex_exit(&sc->sc_ctrl_wait_lock);
1349
1350	return r;
1351}
1352
1353/* ctrl vq interrupt; wake up the command issuer */
1354static int
1355vioif_ctrl_vq_done(struct virtqueue *vq)
1356{
1357	struct virtio_softc *vsc = vq->vq_owner;
1358	struct vioif_softc *sc = device_private(vsc->sc_child);
1359	int r, slot;
1360
1361	r = virtio_dequeue(vsc, vq, &slot, NULL);
1362	if (r == ENOENT)
1363		return 0;
1364	virtio_dequeue_commit(vsc, vq, slot);
1365
1366	mutex_enter(&sc->sc_ctrl_wait_lock);
1367	sc->sc_ctrl_inuse = DONE;
1368	cv_signal(&sc->sc_ctrl_wait);
1369	mutex_exit(&sc->sc_ctrl_wait_lock);
1370
1371	return 1;
1372}
1373
1374/*
1375 * If IFF_PROMISC requested,  set promiscuous
1376 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1377 * If large multicast filter exist use ALLMULTI
1378 */
1379/*
1380 * If setting rx filter fails fall back to ALLMULTI
1381 * If ALLMULTI fails fall back to PROMISC
1382 */
1383static int
1384vioif_rx_filter(struct vioif_softc *sc)
1385{
1386	struct virtio_softc *vsc = sc->sc_virtio;
1387	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1388	struct ether_multi *enm;
1389	struct ether_multistep step;
1390	int nentries;
1391	int promisc = 0, allmulti = 0, rxfilter = 0;
1392	int r;
1393
1394	if (vsc->sc_nvqs < 3) {	/* no ctrl vq; always promisc */
1395		ifp->if_flags |= IFF_PROMISC;
1396		return 0;
1397	}
1398
1399	if (ifp->if_flags & IFF_PROMISC) {
1400		promisc = 1;
1401		goto set;
1402	}
1403
1404	nentries = -1;
1405	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1406	while (nentries++, enm != NULL) {
1407		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1408			allmulti = 1;
1409			goto set;
1410		}
1411		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1412			   ETHER_ADDR_LEN)) {
1413			allmulti = 1;
1414			goto set;
1415		}
1416		memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries],
1417		       enm->enm_addrlo, ETHER_ADDR_LEN);
1418		ETHER_NEXT_MULTI(step, enm);
1419	}
1420	rxfilter = 1;
1421
1422set:
1423	if (rxfilter) {
1424		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1425		sc->sc_ctrl_mac_tbl_mc->nentries = nentries;
1426		r = vioif_set_rx_filter(sc);
1427		if (r != 0) {
1428			rxfilter = 0;
1429			allmulti = 1; /* fallback */
1430		}
1431	} else {
1432		/* remove rx filter */
1433		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1434		sc->sc_ctrl_mac_tbl_mc->nentries = 0;
1435		r = vioif_set_rx_filter(sc);
1436		/* what to do on failure? */
1437	}
1438	if (allmulti) {
1439		r = vioif_set_allmulti(sc, true);
1440		if (r != 0) {
1441			allmulti = 0;
1442			promisc = 1; /* fallback */
1443		}
1444	} else {
1445		r = vioif_set_allmulti(sc, false);
1446		/* what to do on failure? */
1447	}
1448	if (promisc) {
1449		r = vioif_set_promisc(sc, true);
1450	} else {
1451		r = vioif_set_promisc(sc, false);
1452	}
1453
1454	return r;
1455}
1456
1457/* change link status */
1458static int
1459vioif_updown(struct vioif_softc *sc, bool isup)
1460{
1461	struct virtio_softc *vsc = sc->sc_virtio;
1462
1463	if (!(vsc->sc_features & VIRTIO_NET_F_STATUS))
1464		return ENODEV;
1465	virtio_write_device_config_1(vsc,
1466				     VIRTIO_NET_CONFIG_STATUS,
1467				     isup?VIRTIO_NET_S_LINK_UP:0);
1468	return 0;
1469}
1470