if_vioif.c revision 1.18
1/*	$NetBSD: if_vioif.c,v 1.18 2015/10/27 15:56:21 christos Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.18 2015/10/27 15:56:21 christos Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47
48#include <dev/pci/pcidevs.h>
49#include <dev/pci/pcireg.h>
50#include <dev/pci/pcivar.h>
51#include <dev/pci/virtioreg.h>
52#include <dev/pci/virtiovar.h>
53
54#include <net/if.h>
55#include <net/if_media.h>
56#include <net/if_ether.h>
57
58#include <net/bpf.h>
59
60
61#ifdef NET_MPSAFE
62#define VIOIF_MPSAFE	1
63#endif
64
65#ifdef SOFTINT_INTR
66#define VIOIF_SOFTINT_INTR	1
67#endif
68
69/*
70 * if_vioifreg.h:
71 */
72/* Configuration registers */
73#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
74#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
75
76/* Feature bits */
77#define VIRTIO_NET_F_CSUM	(1<<0)
78#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
79#define VIRTIO_NET_F_MAC	(1<<5)
80#define VIRTIO_NET_F_GSO	(1<<6)
81#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
82#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
83#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
84#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
85#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
86#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
87#define VIRTIO_NET_F_HOST_ECN	(1<<13)
88#define VIRTIO_NET_F_HOST_UFO	(1<<14)
89#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
90#define VIRTIO_NET_F_STATUS	(1<<16)
91#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
92#define VIRTIO_NET_F_CTRL_RX	(1<<18)
93#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
94
95#define VIRTIO_NET_FLAG_BITS \
96	VIRTIO_COMMON_FLAG_BITS \
97	"\x14""CTRL_VLAN" \
98	"\x13""CTRL_RX" \
99	"\x12""CTRL_VQ" \
100	"\x11""STATUS" \
101	"\x10""MRG_RXBUF" \
102	"\x0f""HOST_UFO" \
103	"\x0e""HOST_ECN" \
104	"\x0d""HOST_TSO6" \
105	"\x0c""HOST_TSO4" \
106	"\x0b""GUEST_UFO" \
107	"\x0a""GUEST_ECN" \
108	"\x09""GUEST_TSO6" \
109	"\x08""GUEST_TSO4" \
110	"\x07""GSO" \
111	"\x06""MAC" \
112	"\x02""GUEST_CSUM" \
113	"\x01""CSUM"
114
115/* Status */
116#define VIRTIO_NET_S_LINK_UP	1
117
118/* Packet header structure */
119struct virtio_net_hdr {
120	uint8_t		flags;
121	uint8_t		gso_type;
122	uint16_t	hdr_len;
123	uint16_t	gso_size;
124	uint16_t	csum_start;
125	uint16_t	csum_offset;
126#if 0
127	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
128#endif
129} __packed;
130
131#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
132#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
133#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
134#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
135#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
136#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
137
138#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
139
140/* Control virtqueue */
141struct virtio_net_ctrl_cmd {
142	uint8_t	class;
143	uint8_t	command;
144} __packed;
145#define VIRTIO_NET_CTRL_RX		0
146# define VIRTIO_NET_CTRL_RX_PROMISC	0
147# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
148
149#define VIRTIO_NET_CTRL_MAC		1
150# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
151
152#define VIRTIO_NET_CTRL_VLAN		2
153# define VIRTIO_NET_CTRL_VLAN_ADD	0
154# define VIRTIO_NET_CTRL_VLAN_DEL	1
155
156struct virtio_net_ctrl_status {
157	uint8_t	ack;
158} __packed;
159#define VIRTIO_NET_OK			0
160#define VIRTIO_NET_ERR			1
161
162struct virtio_net_ctrl_rx {
163	uint8_t	onoff;
164} __packed;
165
166struct virtio_net_ctrl_mac_tbl {
167	uint32_t nentries;
168	uint8_t macs[][ETHER_ADDR_LEN];
169} __packed;
170
171struct virtio_net_ctrl_vlan {
172	uint16_t id;
173} __packed;
174
175
176/*
177 * if_vioifvar.h:
178 */
179struct vioif_softc {
180	device_t		sc_dev;
181
182	struct virtio_softc	*sc_virtio;
183	struct virtqueue	sc_vq[3];
184
185	uint8_t			sc_mac[ETHER_ADDR_LEN];
186	struct ethercom		sc_ethercom;
187	short			sc_deferred_init_done;
188
189	/* bus_dmamem */
190	bus_dma_segment_t	sc_hdr_segs[1];
191	struct virtio_net_hdr	*sc_hdrs;
192#define sc_rx_hdrs	sc_hdrs
193	struct virtio_net_hdr	*sc_tx_hdrs;
194	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
195	struct virtio_net_ctrl_status *sc_ctrl_status;
196	struct virtio_net_ctrl_rx *sc_ctrl_rx;
197	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
198	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
199
200	/* kmem */
201	bus_dmamap_t		*sc_arrays;
202#define sc_rxhdr_dmamaps sc_arrays
203	bus_dmamap_t		*sc_txhdr_dmamaps;
204	bus_dmamap_t		*sc_rx_dmamaps;
205	bus_dmamap_t		*sc_tx_dmamaps;
206	struct mbuf		**sc_rx_mbufs;
207	struct mbuf		**sc_tx_mbufs;
208
209	bus_dmamap_t		sc_ctrl_cmd_dmamap;
210	bus_dmamap_t		sc_ctrl_status_dmamap;
211	bus_dmamap_t		sc_ctrl_rx_dmamap;
212	bus_dmamap_t		sc_ctrl_tbl_uc_dmamap;
213	bus_dmamap_t		sc_ctrl_tbl_mc_dmamap;
214
215	void			*sc_rx_softint;
216
217	enum {
218		FREE, INUSE, DONE
219	}			sc_ctrl_inuse;
220	kcondvar_t		sc_ctrl_wait;
221	kmutex_t		sc_ctrl_wait_lock;
222	kmutex_t		*sc_tx_lock;
223	kmutex_t		*sc_rx_lock;
224	bool			sc_stopping;
225};
226#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
227#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
228
229#define VIOIF_TX_LOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock)
230#define VIOIF_TX_UNLOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_exit((_sc)->sc_tx_lock)
231#define VIOIF_TX_LOCKED(_sc)	(!(_sc)->sc_tx_lock || mutex_owned((_sc)->sc_tx_lock))
232#define VIOIF_RX_LOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_enter((_sc)->sc_rx_lock)
233#define VIOIF_RX_UNLOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_exit((_sc)->sc_rx_lock)
234#define VIOIF_RX_LOCKED(_sc)	(!(_sc)->sc_rx_lock || mutex_owned((_sc)->sc_rx_lock))
235
236/* cfattach interface functions */
237static int	vioif_match(device_t, cfdata_t, void *);
238static void	vioif_attach(device_t, device_t, void *);
239static void	vioif_deferred_init(device_t);
240
241/* ifnet interface functions */
242static int	vioif_init(struct ifnet *);
243static void	vioif_stop(struct ifnet *, int);
244static void	vioif_start(struct ifnet *);
245static int	vioif_ioctl(struct ifnet *, u_long, void *);
246static void	vioif_watchdog(struct ifnet *);
247
248/* rx */
249static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
250static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
251static void	vioif_populate_rx_mbufs(struct vioif_softc *);
252static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
253static int	vioif_rx_deq(struct vioif_softc *);
254static int	vioif_rx_deq_locked(struct vioif_softc *);
255static int	vioif_rx_vq_done(struct virtqueue *);
256static void	vioif_rx_softint(void *);
257static void	vioif_rx_drain(struct vioif_softc *);
258
259/* tx */
260static int	vioif_tx_vq_done(struct virtqueue *);
261static int	vioif_tx_vq_done_locked(struct virtqueue *);
262static void	vioif_tx_drain(struct vioif_softc *);
263
264/* other control */
265static int	vioif_updown(struct vioif_softc *, bool);
266static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
267static int	vioif_set_promisc(struct vioif_softc *, bool);
268static int	vioif_set_allmulti(struct vioif_softc *, bool);
269static int	vioif_set_rx_filter(struct vioif_softc *);
270static int	vioif_rx_filter(struct vioif_softc *);
271static int	vioif_ctrl_vq_done(struct virtqueue *);
272
273CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
274		  vioif_match, vioif_attach, NULL, NULL);
275
276static int
277vioif_match(device_t parent, cfdata_t match, void *aux)
278{
279	struct virtio_softc *va = aux;
280
281	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
282		return 1;
283
284	return 0;
285}
286
287/* allocate memory */
288/*
289 * dma memory is used for:
290 *   sc_rx_hdrs[slot]:	 metadata array for recieved frames (READ)
291 *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
292 *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
293 *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
294 *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
295 *			 (WRITE)
296 *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
297 *			 class command (WRITE)
298 *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
299 *			 class command (WRITE)
300 * sc_ctrl_* structures are allocated only one each; they are protected by
301 * sc_ctrl_inuse variable and sc_ctrl_wait condvar.
302 */
303/*
304 * dynamically allocated memory is used for:
305 *   sc_rxhdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
306 *   sc_txhdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
307 *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for recieved payload
308 *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
309 *   sc_rx_mbufs[slot]:		mbuf pointer array for recieved frames
310 *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
311 */
312static int
313vioif_alloc_mems(struct vioif_softc *sc)
314{
315	struct virtio_softc *vsc = sc->sc_virtio;
316	int allocsize, allocsize2, r, rsegs, i;
317	void *vaddr;
318	intptr_t p;
319	int rxqsize, txqsize;
320
321	rxqsize = vsc->sc_vqs[0].vq_num;
322	txqsize = vsc->sc_vqs[1].vq_num;
323
324	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
325	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
326	if (vsc->sc_nvqs == 3) {
327		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
328		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
329		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
330		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
331			+ sizeof(struct virtio_net_ctrl_mac_tbl)
332			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
333	}
334	r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0,
335			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
336	if (r != 0) {
337		aprint_error_dev(sc->sc_dev,
338				 "DMA memory allocation failed, size %d, "
339				 "error code %d\n", allocsize, r);
340		goto err_none;
341	}
342	r = bus_dmamem_map(vsc->sc_dmat,
343			   &sc->sc_hdr_segs[0], 1, allocsize,
344			   &vaddr, BUS_DMA_NOWAIT);
345	if (r != 0) {
346		aprint_error_dev(sc->sc_dev,
347				 "DMA memory map failed, "
348				 "error code %d\n", r);
349		goto err_dmamem_alloc;
350	}
351	sc->sc_hdrs = vaddr;
352	memset(vaddr, 0, allocsize);
353	p = (intptr_t) vaddr;
354	p += sizeof(struct virtio_net_hdr) * rxqsize;
355#define P(name,size)	do { sc->sc_ ##name = (void*) p;	\
356			     p += size; } while (0)
357	P(tx_hdrs, sizeof(struct virtio_net_hdr) * txqsize);
358	if (vsc->sc_nvqs == 3) {
359		P(ctrl_cmd, sizeof(struct virtio_net_ctrl_cmd));
360		P(ctrl_status, sizeof(struct virtio_net_ctrl_status));
361		P(ctrl_rx, sizeof(struct virtio_net_ctrl_rx));
362		P(ctrl_mac_tbl_uc, sizeof(struct virtio_net_ctrl_mac_tbl));
363		P(ctrl_mac_tbl_mc,
364		  (sizeof(struct virtio_net_ctrl_mac_tbl)
365		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
366	}
367#undef P
368
369	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
370	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
371	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
372	sc->sc_arrays = kmem_zalloc(allocsize2, KM_SLEEP);
373	if (sc->sc_arrays == NULL)
374		goto err_dmamem_map;
375	sc->sc_txhdr_dmamaps = sc->sc_arrays + rxqsize;
376	sc->sc_rx_dmamaps = sc->sc_txhdr_dmamaps + txqsize;
377	sc->sc_tx_dmamaps = sc->sc_rx_dmamaps + rxqsize;
378	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
379	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
380
381#define C(map, buf, size, nsegs, rw, usage)				\
382	do {								\
383		r = bus_dmamap_create(vsc->sc_dmat, size, nsegs, size, 0, \
384				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,	\
385				      &sc->sc_ ##map);			\
386		if (r != 0) {						\
387			aprint_error_dev(sc->sc_dev,			\
388					 usage " dmamap creation failed, " \
389					 "error code %d\n", r);		\
390					 goto err_reqs;			\
391		}							\
392	} while (0)
393#define C_L1(map, buf, size, nsegs, rw, usage)				\
394	C(map, buf, size, nsegs, rw, usage);				\
395	do {								\
396		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
397				    &sc->sc_ ##buf, size, NULL,		\
398				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
399		if (r != 0) {						\
400			aprint_error_dev(sc->sc_dev,			\
401					 usage " dmamap load failed, "	\
402					 "error code %d\n", r);		\
403			goto err_reqs;					\
404		}							\
405	} while (0)
406#define C_L2(map, buf, size, nsegs, rw, usage)				\
407	C(map, buf, size, nsegs, rw, usage);				\
408	do {								\
409		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
410				    sc->sc_ ##buf, size, NULL,		\
411				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
412		if (r != 0) {						\
413			aprint_error_dev(sc->sc_dev,			\
414					 usage " dmamap load failed, "	\
415					 "error code %d\n", r);		\
416			goto err_reqs;					\
417		}							\
418	} while (0)
419	for (i = 0; i < rxqsize; i++) {
420		C_L1(rxhdr_dmamaps[i], rx_hdrs[i],
421		    sizeof(struct virtio_net_hdr), 1,
422		    READ, "rx header");
423		C(rx_dmamaps[i], NULL, MCLBYTES, 1, 0, "rx payload");
424	}
425
426	for (i = 0; i < txqsize; i++) {
427		C_L1(txhdr_dmamaps[i], rx_hdrs[i],
428		    sizeof(struct virtio_net_hdr), 1,
429		    WRITE, "tx header");
430		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, 256 /* XXX */, 0,
431		  "tx payload");
432	}
433
434	if (vsc->sc_nvqs == 3) {
435		/* control vq class & command */
436		C_L2(ctrl_cmd_dmamap, ctrl_cmd,
437		    sizeof(struct virtio_net_ctrl_cmd), 1, WRITE,
438		    "control command");
439
440		/* control vq status */
441		C_L2(ctrl_status_dmamap, ctrl_status,
442		    sizeof(struct virtio_net_ctrl_status), 1, READ,
443		    "control status");
444
445		/* control vq rx mode command parameter */
446		C_L2(ctrl_rx_dmamap, ctrl_rx,
447		    sizeof(struct virtio_net_ctrl_rx), 1, WRITE,
448		    "rx mode control command");
449
450		/* control vq MAC filter table for unicast */
451		/* do not load now since its length is variable */
452		C(ctrl_tbl_uc_dmamap, NULL,
453		  sizeof(struct virtio_net_ctrl_mac_tbl) + 0, 1, WRITE,
454		  "unicast MAC address filter command");
455
456		/* control vq MAC filter table for multicast */
457		C(ctrl_tbl_mc_dmamap, NULL,
458		  (sizeof(struct virtio_net_ctrl_mac_tbl)
459		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES),
460		  1, WRITE, "multicast MAC address filter command");
461	}
462#undef C_L2
463#undef C_L1
464#undef C
465
466	return 0;
467
468err_reqs:
469#define D(map)								\
470	do {								\
471		if (sc->sc_ ##map) {					\
472			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_ ##map); \
473			sc->sc_ ##map = NULL;				\
474		}							\
475	} while (0)
476	D(ctrl_tbl_mc_dmamap);
477	D(ctrl_tbl_uc_dmamap);
478	D(ctrl_rx_dmamap);
479	D(ctrl_status_dmamap);
480	D(ctrl_cmd_dmamap);
481	for (i = 0; i < txqsize; i++) {
482		D(tx_dmamaps[i]);
483		D(txhdr_dmamaps[i]);
484	}
485	for (i = 0; i < rxqsize; i++) {
486		D(rx_dmamaps[i]);
487		D(rxhdr_dmamaps[i]);
488	}
489#undef D
490	if (sc->sc_arrays) {
491		kmem_free(sc->sc_arrays, allocsize2);
492		sc->sc_arrays = 0;
493	}
494err_dmamem_map:
495	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_hdrs, allocsize);
496err_dmamem_alloc:
497	bus_dmamem_free(vsc->sc_dmat, &sc->sc_hdr_segs[0], 1);
498err_none:
499	return -1;
500}
501
502static void
503vioif_attach(device_t parent, device_t self, void *aux)
504{
505	struct vioif_softc *sc = device_private(self);
506	struct virtio_softc *vsc = device_private(parent);
507	uint32_t features;
508	char buf[256];
509	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
510	u_int flags;
511	int r;
512
513	if (vsc->sc_child != NULL) {
514		aprint_normal(": child already attached for %s; "
515			      "something wrong...\n",
516			      device_xname(parent));
517		return;
518	}
519
520	sc->sc_dev = self;
521	sc->sc_virtio = vsc;
522
523	vsc->sc_child = self;
524	vsc->sc_ipl = IPL_NET;
525	vsc->sc_vqs = &sc->sc_vq[0];
526	vsc->sc_config_change = NULL;
527	vsc->sc_intrhand = virtio_vq_intr;
528	vsc->sc_flags = 0;
529
530#ifdef VIOIF_MPSAFE
531	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
532#endif
533#ifdef VIOIF_SOFTINT_INTR
534	vsc->sc_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
535#endif
536	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MSIX;
537
538	features = virtio_negotiate_features(vsc,
539					     (VIRTIO_NET_F_MAC |
540					      VIRTIO_NET_F_STATUS |
541					      VIRTIO_NET_F_CTRL_VQ |
542					      VIRTIO_NET_F_CTRL_RX |
543					      VIRTIO_F_NOTIFY_ON_EMPTY));
544	if (features & VIRTIO_NET_F_MAC) {
545		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
546						    VIRTIO_NET_CONFIG_MAC+0);
547		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
548						    VIRTIO_NET_CONFIG_MAC+1);
549		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
550						    VIRTIO_NET_CONFIG_MAC+2);
551		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
552						    VIRTIO_NET_CONFIG_MAC+3);
553		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
554						    VIRTIO_NET_CONFIG_MAC+4);
555		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
556						    VIRTIO_NET_CONFIG_MAC+5);
557	} else {
558		/* code stolen from sys/net/if_tap.c */
559		struct timeval tv;
560		uint32_t ui;
561		getmicrouptime(&tv);
562		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
563		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
564		virtio_write_device_config_1(vsc,
565					     VIRTIO_NET_CONFIG_MAC+0,
566					     sc->sc_mac[0]);
567		virtio_write_device_config_1(vsc,
568					     VIRTIO_NET_CONFIG_MAC+1,
569					     sc->sc_mac[1]);
570		virtio_write_device_config_1(vsc,
571					     VIRTIO_NET_CONFIG_MAC+2,
572					     sc->sc_mac[2]);
573		virtio_write_device_config_1(vsc,
574					     VIRTIO_NET_CONFIG_MAC+3,
575					     sc->sc_mac[3]);
576		virtio_write_device_config_1(vsc,
577					     VIRTIO_NET_CONFIG_MAC+4,
578					     sc->sc_mac[4]);
579		virtio_write_device_config_1(vsc,
580					     VIRTIO_NET_CONFIG_MAC+5,
581					     sc->sc_mac[5]);
582	}
583	aprint_normal(": Ethernet address %s\n", ether_sprintf(sc->sc_mac));
584	snprintb(buf, sizeof(buf), VIRTIO_NET_FLAG_BITS, features);
585	aprint_normal_dev(self, "Features: %s\n", buf);
586	aprint_naive("\n");
587
588#ifdef VIOIF_MPSAFE
589	sc->sc_tx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
590	sc->sc_rx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
591#else
592	sc->sc_tx_lock = NULL;
593	sc->sc_rx_lock = NULL;
594#endif
595	sc->sc_stopping = false;
596
597	/*
598	 * Allocating a virtqueue for Rx
599	 */
600	r = virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
601	    MCLBYTES+sizeof(struct virtio_net_hdr), 2, "rx");
602	if (r != 0)
603		goto err;
604	vsc->sc_nvqs = 1;
605	sc->sc_vq[0].vq_done = vioif_rx_vq_done;
606
607	/*
608	 * Allocating a virtqueue for Tx
609	 */
610	r = virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
611	    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
612	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx");
613	if (r != 0)
614		goto err;
615	vsc->sc_nvqs = 2;
616	sc->sc_vq[1].vq_done = vioif_tx_vq_done;
617
618	virtio_start_vq_intr(vsc, &sc->sc_vq[0]);
619	virtio_stop_vq_intr(vsc, &sc->sc_vq[1]); /* not urgent; do it later */
620
621	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
622	    (features & VIRTIO_NET_F_CTRL_RX)) {
623		/*
624		 * Allocating a virtqueue for control channel
625		 */
626		r = virtio_alloc_vq(vsc, &sc->sc_vq[2], 2,
627		    NBPG, 1, "control");
628		if (r != 0) {
629			aprint_error_dev(self, "failed to allocate "
630			    "a virtqueue for control channel\n");
631			goto skip;
632		}
633
634		sc->sc_vq[2].vq_done = vioif_ctrl_vq_done;
635		cv_init(&sc->sc_ctrl_wait, "ctrl_vq");
636		mutex_init(&sc->sc_ctrl_wait_lock, MUTEX_DEFAULT, IPL_NET);
637		sc->sc_ctrl_inuse = FREE;
638		virtio_start_vq_intr(vsc, &sc->sc_vq[2]);
639		vsc->sc_nvqs = 3;
640	}
641skip:
642
643#ifdef VIOIF_MPSAFE
644	flags = SOFTINT_NET | SOFTINT_MPSAFE;
645#else
646	flags = SOFTINT_NET;
647#endif
648	sc->sc_rx_softint = softint_establish(flags, vioif_rx_softint, sc);
649	if (sc->sc_rx_softint == NULL) {
650		aprint_error_dev(self, "cannot establish softint\n");
651		goto err;
652	}
653
654	if (vioif_alloc_mems(sc) < 0)
655		goto err;
656
657	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
658	ifp->if_softc = sc;
659	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
660	ifp->if_start = vioif_start;
661	ifp->if_ioctl = vioif_ioctl;
662	ifp->if_init = vioif_init;
663	ifp->if_stop = vioif_stop;
664	ifp->if_capabilities = 0;
665	ifp->if_watchdog = vioif_watchdog;
666
667	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
668
669	if_attach(ifp);
670	ether_ifattach(ifp, sc->sc_mac);
671
672	return;
673
674err:
675	if (sc->sc_tx_lock)
676		mutex_obj_free(sc->sc_tx_lock);
677	if (sc->sc_rx_lock)
678		mutex_obj_free(sc->sc_rx_lock);
679
680	if (vsc->sc_nvqs == 3) {
681		virtio_free_vq(vsc, &sc->sc_vq[2]);
682		cv_destroy(&sc->sc_ctrl_wait);
683		mutex_destroy(&sc->sc_ctrl_wait_lock);
684		vsc->sc_nvqs = 2;
685	}
686	if (vsc->sc_nvqs == 2) {
687		virtio_free_vq(vsc, &sc->sc_vq[1]);
688		vsc->sc_nvqs = 1;
689	}
690	if (vsc->sc_nvqs == 1) {
691		virtio_free_vq(vsc, &sc->sc_vq[0]);
692		vsc->sc_nvqs = 0;
693	}
694	vsc->sc_child = (void*)1;
695	return;
696}
697
698/* we need interrupts to make promiscuous mode off */
699static void
700vioif_deferred_init(device_t self)
701{
702	struct vioif_softc *sc = device_private(self);
703	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
704	int r;
705
706	if (ifp->if_flags & IFF_PROMISC)
707		return;
708
709	r =  vioif_set_promisc(sc, false);
710	if (r != 0)
711		aprint_error_dev(self, "resetting promisc mode failed, "
712				 "errror code %d\n", r);
713}
714
715/*
716 * Interface functions for ifnet
717 */
718static int
719vioif_init(struct ifnet *ifp)
720{
721	struct vioif_softc *sc = ifp->if_softc;
722
723	vioif_stop(ifp, 0);
724
725	if (!sc->sc_deferred_init_done) {
726		struct virtio_softc *vsc = sc->sc_virtio;
727
728		sc->sc_deferred_init_done = 1;
729		if (vsc->sc_nvqs == 3)
730			vioif_deferred_init(sc->sc_dev);
731	}
732
733	/* Have to set false before vioif_populate_rx_mbufs */
734	sc->sc_stopping = false;
735
736	vioif_populate_rx_mbufs(sc);
737
738	vioif_updown(sc, true);
739	ifp->if_flags |= IFF_RUNNING;
740	ifp->if_flags &= ~IFF_OACTIVE;
741	vioif_rx_filter(sc);
742
743	return 0;
744}
745
746static void
747vioif_stop(struct ifnet *ifp, int disable)
748{
749	struct vioif_softc *sc = ifp->if_softc;
750	struct virtio_softc *vsc = sc->sc_virtio;
751
752	/* Take the locks to ensure that ongoing TX/RX finish */
753	VIOIF_TX_LOCK(sc);
754	VIOIF_RX_LOCK(sc);
755	sc->sc_stopping = true;
756	VIOIF_RX_UNLOCK(sc);
757	VIOIF_TX_UNLOCK(sc);
758
759	/* only way to stop I/O and DMA is resetting... */
760	virtio_reset(vsc);
761	vioif_rx_deq(sc);
762	vioif_tx_drain(sc);
763	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
764
765	if (disable)
766		vioif_rx_drain(sc);
767
768	virtio_reinit_start(vsc);
769	virtio_negotiate_features(vsc, vsc->sc_features);
770	virtio_start_vq_intr(vsc, &sc->sc_vq[0]);
771	virtio_stop_vq_intr(vsc, &sc->sc_vq[1]);
772	if (vsc->sc_nvqs >= 3)
773		virtio_start_vq_intr(vsc, &sc->sc_vq[2]);
774	virtio_reinit_end(vsc);
775	vioif_updown(sc, false);
776}
777
778static void
779vioif_start(struct ifnet *ifp)
780{
781	struct vioif_softc *sc = ifp->if_softc;
782	struct virtio_softc *vsc = sc->sc_virtio;
783	struct virtqueue *vq = &sc->sc_vq[1]; /* tx vq */
784	struct mbuf *m;
785	int queued = 0, retry = 0;
786
787	VIOIF_TX_LOCK(sc);
788
789	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
790		goto out;
791
792	if (sc->sc_stopping)
793		goto out;
794
795	for (;;) {
796		int slot, r;
797
798		IFQ_DEQUEUE(&ifp->if_snd, m);
799
800		if (m == NULL)
801			break;
802
803retry:
804		r = virtio_enqueue_prep(vsc, vq, &slot);
805		if (r == EAGAIN) {
806			ifp->if_flags |= IFF_OACTIVE;
807			vioif_tx_vq_done_locked(vq);
808			if (retry++ == 0)
809				goto retry;
810			else
811				break;
812		}
813		if (r != 0)
814			panic("enqueue_prep for a tx buffer");
815		r = bus_dmamap_load_mbuf(vsc->sc_dmat,
816					 sc->sc_tx_dmamaps[slot],
817					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
818		if (r != 0) {
819			virtio_enqueue_abort(vsc, vq, slot);
820			aprint_error_dev(sc->sc_dev,
821			    "tx dmamap load failed, error code %d\n", r);
822			break;
823		}
824		r = virtio_enqueue_reserve(vsc, vq, slot,
825					sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
826		if (r != 0) {
827			virtio_enqueue_abort(vsc, vq, slot);
828			bus_dmamap_unload(vsc->sc_dmat,
829					  sc->sc_tx_dmamaps[slot]);
830			ifp->if_flags |= IFF_OACTIVE;
831			vioif_tx_vq_done_locked(vq);
832			if (retry++ == 0)
833				goto retry;
834			else
835				break;
836		}
837
838		sc->sc_tx_mbufs[slot] = m;
839
840		memset(&sc->sc_tx_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
841		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
842				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
843				BUS_DMASYNC_PREWRITE);
844		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
845				0, sc->sc_txhdr_dmamaps[slot]->dm_mapsize,
846				BUS_DMASYNC_PREWRITE);
847		virtio_enqueue(vsc, vq, slot, sc->sc_txhdr_dmamaps[slot], true);
848		virtio_enqueue(vsc, vq, slot, sc->sc_tx_dmamaps[slot], true);
849		virtio_enqueue_commit(vsc, vq, slot, false);
850		queued++;
851		bpf_mtap(ifp, m);
852	}
853
854	if (m != NULL) {
855		ifp->if_flags |= IFF_OACTIVE;
856		m_freem(m);
857	}
858
859	if (queued > 0) {
860		virtio_enqueue_commit(vsc, vq, -1, true);
861		ifp->if_timer = 5;
862	}
863
864out:
865	VIOIF_TX_UNLOCK(sc);
866}
867
868static int
869vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
870{
871	int s, r;
872
873	s = splnet();
874
875	r = ether_ioctl(ifp, cmd, data);
876	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
877	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
878		if (ifp->if_flags & IFF_RUNNING)
879			r = vioif_rx_filter(ifp->if_softc);
880		else
881			r = 0;
882	}
883
884	splx(s);
885
886	return r;
887}
888
889void
890vioif_watchdog(struct ifnet *ifp)
891{
892	struct vioif_softc *sc = ifp->if_softc;
893
894	if (ifp->if_flags & IFF_RUNNING)
895		vioif_tx_vq_done(&sc->sc_vq[1]);
896}
897
898
899/*
900 * Recieve implementation
901 */
902/* allocate and initialize a mbuf for recieve */
903static int
904vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
905{
906	struct mbuf *m;
907	int r;
908
909	MGETHDR(m, M_DONTWAIT, MT_DATA);
910	if (m == NULL)
911		return ENOBUFS;
912	MCLGET(m, M_DONTWAIT);
913	if ((m->m_flags & M_EXT) == 0) {
914		m_freem(m);
915		return ENOBUFS;
916	}
917	sc->sc_rx_mbufs[i] = m;
918	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
919	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat,
920				 sc->sc_rx_dmamaps[i],
921				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
922	if (r) {
923		m_freem(m);
924		sc->sc_rx_mbufs[i] = 0;
925		return r;
926	}
927
928	return 0;
929}
930
931/* free a mbuf for recieve */
932static void
933vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
934{
935	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
936	m_freem(sc->sc_rx_mbufs[i]);
937	sc->sc_rx_mbufs[i] = NULL;
938}
939
940/* add mbufs for all the empty recieve slots */
941static void
942vioif_populate_rx_mbufs(struct vioif_softc *sc)
943{
944	VIOIF_RX_LOCK(sc);
945	vioif_populate_rx_mbufs_locked(sc);
946	VIOIF_RX_UNLOCK(sc);
947}
948
949static void
950vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
951{
952	struct virtio_softc *vsc = sc->sc_virtio;
953	int i, r, ndone = 0;
954	struct virtqueue *vq = &sc->sc_vq[0]; /* rx vq */
955
956	KASSERT(VIOIF_RX_LOCKED(sc));
957
958	if (sc->sc_stopping)
959		return;
960
961	for (i = 0; i < vq->vq_num; i++) {
962		int slot;
963		r = virtio_enqueue_prep(vsc, vq, &slot);
964		if (r == EAGAIN)
965			break;
966		if (r != 0)
967			panic("enqueue_prep for rx buffers");
968		if (sc->sc_rx_mbufs[slot] == NULL) {
969			r = vioif_add_rx_mbuf(sc, slot);
970			if (r != 0) {
971				printf("%s: rx mbuf allocation failed, "
972				       "error code %d\n",
973				       device_xname(sc->sc_dev), r);
974				break;
975			}
976		}
977		r = virtio_enqueue_reserve(vsc, vq, slot,
978					sc->sc_rx_dmamaps[slot]->dm_nsegs + 1);
979		if (r != 0) {
980			vioif_free_rx_mbuf(sc, slot);
981			break;
982		}
983		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
984			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
985		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
986			0, MCLBYTES, BUS_DMASYNC_PREREAD);
987		virtio_enqueue(vsc, vq, slot, sc->sc_rxhdr_dmamaps[slot], false);
988		virtio_enqueue(vsc, vq, slot, sc->sc_rx_dmamaps[slot], false);
989		virtio_enqueue_commit(vsc, vq, slot, false);
990		ndone++;
991	}
992	if (ndone > 0)
993		virtio_enqueue_commit(vsc, vq, -1, true);
994}
995
996/* dequeue recieved packets */
997static int
998vioif_rx_deq(struct vioif_softc *sc)
999{
1000	int r;
1001
1002	KASSERT(sc->sc_stopping);
1003
1004	VIOIF_RX_LOCK(sc);
1005	r = vioif_rx_deq_locked(sc);
1006	VIOIF_RX_UNLOCK(sc);
1007
1008	return r;
1009}
1010
1011/* dequeue recieved packets */
1012static int
1013vioif_rx_deq_locked(struct vioif_softc *sc)
1014{
1015	struct virtio_softc *vsc = sc->sc_virtio;
1016	struct virtqueue *vq = &sc->sc_vq[0];
1017	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1018	struct mbuf *m;
1019	int r = 0;
1020	int slot, len;
1021
1022	KASSERT(VIOIF_RX_LOCKED(sc));
1023
1024	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1025		len -= sizeof(struct virtio_net_hdr);
1026		r = 1;
1027		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
1028				0, sizeof(struct virtio_net_hdr),
1029				BUS_DMASYNC_POSTREAD);
1030		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
1031				0, MCLBYTES,
1032				BUS_DMASYNC_POSTREAD);
1033		m = sc->sc_rx_mbufs[slot];
1034		KASSERT(m != NULL);
1035		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1036		sc->sc_rx_mbufs[slot] = 0;
1037		virtio_dequeue_commit(vsc, vq, slot);
1038		m->m_pkthdr.rcvif = ifp;
1039		m->m_len = m->m_pkthdr.len = len;
1040		ifp->if_ipackets++;
1041		bpf_mtap(ifp, m);
1042
1043		VIOIF_RX_UNLOCK(sc);
1044		(*ifp->if_input)(ifp, m);
1045		VIOIF_RX_LOCK(sc);
1046
1047		if (sc->sc_stopping)
1048			break;
1049	}
1050
1051	return r;
1052}
1053
1054/* rx interrupt; call _dequeue above and schedule a softint */
1055static int
1056vioif_rx_vq_done(struct virtqueue *vq)
1057{
1058	struct virtio_softc *vsc = vq->vq_owner;
1059	struct vioif_softc *sc = device_private(vsc->sc_child);
1060	int r = 0;
1061
1062#ifdef VIOIF_SOFTINT_INTR
1063	KASSERT(!cpu_intr_p());
1064#endif
1065
1066	VIOIF_RX_LOCK(sc);
1067
1068	if (sc->sc_stopping)
1069		goto out;
1070
1071	r = vioif_rx_deq_locked(sc);
1072	if (r)
1073#ifdef VIOIF_SOFTINT_INTR
1074		vioif_populate_rx_mbufs_locked(sc);
1075#else
1076		softint_schedule(sc->sc_rx_softint);
1077#endif
1078
1079out:
1080	VIOIF_RX_UNLOCK(sc);
1081	return r;
1082}
1083
1084/* softint: enqueue recieve requests for new incoming packets */
1085static void
1086vioif_rx_softint(void *arg)
1087{
1088	struct vioif_softc *sc = arg;
1089
1090	vioif_populate_rx_mbufs(sc);
1091}
1092
1093/* free all the mbufs; called from if_stop(disable) */
1094static void
1095vioif_rx_drain(struct vioif_softc *sc)
1096{
1097	struct virtqueue *vq = &sc->sc_vq[0];
1098	int i;
1099
1100	for (i = 0; i < vq->vq_num; i++) {
1101		if (sc->sc_rx_mbufs[i] == NULL)
1102			continue;
1103		vioif_free_rx_mbuf(sc, i);
1104	}
1105}
1106
1107
1108/*
1109 * Transmition implementation
1110 */
1111/* actual transmission is done in if_start */
1112/* tx interrupt; dequeue and free mbufs */
1113/*
1114 * tx interrupt is actually disabled; this should be called upon
1115 * tx vq full and watchdog
1116 */
1117static int
1118vioif_tx_vq_done(struct virtqueue *vq)
1119{
1120	struct virtio_softc *vsc = vq->vq_owner;
1121	struct vioif_softc *sc = device_private(vsc->sc_child);
1122	int r = 0;
1123
1124	VIOIF_TX_LOCK(sc);
1125
1126	if (sc->sc_stopping)
1127		goto out;
1128
1129	r = vioif_tx_vq_done_locked(vq);
1130
1131out:
1132	VIOIF_TX_UNLOCK(sc);
1133	return r;
1134}
1135
1136static int
1137vioif_tx_vq_done_locked(struct virtqueue *vq)
1138{
1139	struct virtio_softc *vsc = vq->vq_owner;
1140	struct vioif_softc *sc = device_private(vsc->sc_child);
1141	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1142	struct mbuf *m;
1143	int r = 0;
1144	int slot, len;
1145
1146	KASSERT(VIOIF_TX_LOCKED(sc));
1147
1148	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1149		r++;
1150		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
1151				0, sizeof(struct virtio_net_hdr),
1152				BUS_DMASYNC_POSTWRITE);
1153		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
1154				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
1155				BUS_DMASYNC_POSTWRITE);
1156		m = sc->sc_tx_mbufs[slot];
1157		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1158		sc->sc_tx_mbufs[slot] = 0;
1159		virtio_dequeue_commit(vsc, vq, slot);
1160		ifp->if_opackets++;
1161		m_freem(m);
1162	}
1163
1164	if (r)
1165		ifp->if_flags &= ~IFF_OACTIVE;
1166	return r;
1167}
1168
1169/* free all the mbufs already put on vq; called from if_stop(disable) */
1170static void
1171vioif_tx_drain(struct vioif_softc *sc)
1172{
1173	struct virtio_softc *vsc = sc->sc_virtio;
1174	struct virtqueue *vq = &sc->sc_vq[1];
1175	int i;
1176
1177	KASSERT(sc->sc_stopping);
1178
1179	for (i = 0; i < vq->vq_num; i++) {
1180		if (sc->sc_tx_mbufs[i] == NULL)
1181			continue;
1182		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1183		m_freem(sc->sc_tx_mbufs[i]);
1184		sc->sc_tx_mbufs[i] = NULL;
1185	}
1186}
1187
1188/*
1189 * Control vq
1190 */
1191/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1192static int
1193vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1194{
1195	struct virtio_softc *vsc = sc->sc_virtio;
1196	struct virtqueue *vq = &sc->sc_vq[2];
1197	int r, slot;
1198
1199	if (vsc->sc_nvqs < 3)
1200		return ENOTSUP;
1201
1202	mutex_enter(&sc->sc_ctrl_wait_lock);
1203	while (sc->sc_ctrl_inuse != FREE)
1204		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1205	sc->sc_ctrl_inuse = INUSE;
1206	mutex_exit(&sc->sc_ctrl_wait_lock);
1207
1208	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1209	sc->sc_ctrl_cmd->command = cmd;
1210	sc->sc_ctrl_rx->onoff = onoff;
1211
1212	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1213			0, sizeof(struct virtio_net_ctrl_cmd),
1214			BUS_DMASYNC_PREWRITE);
1215	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap,
1216			0, sizeof(struct virtio_net_ctrl_rx),
1217			BUS_DMASYNC_PREWRITE);
1218	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1219			0, sizeof(struct virtio_net_ctrl_status),
1220			BUS_DMASYNC_PREREAD);
1221
1222	r = virtio_enqueue_prep(vsc, vq, &slot);
1223	if (r != 0)
1224		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1225	r = virtio_enqueue_reserve(vsc, vq, slot, 3);
1226	if (r != 0)
1227		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1228	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1229	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_rx_dmamap, true);
1230	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1231	virtio_enqueue_commit(vsc, vq, slot, true);
1232
1233	/* wait for done */
1234	mutex_enter(&sc->sc_ctrl_wait_lock);
1235	while (sc->sc_ctrl_inuse != DONE)
1236		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1237	mutex_exit(&sc->sc_ctrl_wait_lock);
1238	/* already dequeueued */
1239
1240	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1241			sizeof(struct virtio_net_ctrl_cmd),
1242			BUS_DMASYNC_POSTWRITE);
1243	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap, 0,
1244			sizeof(struct virtio_net_ctrl_rx),
1245			BUS_DMASYNC_POSTWRITE);
1246	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1247			sizeof(struct virtio_net_ctrl_status),
1248			BUS_DMASYNC_POSTREAD);
1249
1250	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1251		r = 0;
1252	else {
1253		printf("%s: failed setting rx mode\n",
1254		       device_xname(sc->sc_dev));
1255		r = EIO;
1256	}
1257
1258	mutex_enter(&sc->sc_ctrl_wait_lock);
1259	sc->sc_ctrl_inuse = FREE;
1260	cv_signal(&sc->sc_ctrl_wait);
1261	mutex_exit(&sc->sc_ctrl_wait_lock);
1262
1263	return r;
1264}
1265
1266static int
1267vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1268{
1269	int r;
1270
1271	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1272
1273	return r;
1274}
1275
1276static int
1277vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1278{
1279	int r;
1280
1281	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1282
1283	return r;
1284}
1285
1286/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1287static int
1288vioif_set_rx_filter(struct vioif_softc *sc)
1289{
1290	/* filter already set in sc_ctrl_mac_tbl */
1291	struct virtio_softc *vsc = sc->sc_virtio;
1292	struct virtqueue *vq = &sc->sc_vq[2];
1293	int r, slot;
1294
1295	if (vsc->sc_nvqs < 3)
1296		return ENOTSUP;
1297
1298	mutex_enter(&sc->sc_ctrl_wait_lock);
1299	while (sc->sc_ctrl_inuse != FREE)
1300		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1301	sc->sc_ctrl_inuse = INUSE;
1302	mutex_exit(&sc->sc_ctrl_wait_lock);
1303
1304	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1305	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1306
1307	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap,
1308			    sc->sc_ctrl_mac_tbl_uc,
1309			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1310			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1311			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1312	if (r) {
1313		printf("%s: control command dmamap load failed, "
1314		       "error code %d\n", device_xname(sc->sc_dev), r);
1315		goto out;
1316	}
1317	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap,
1318			    sc->sc_ctrl_mac_tbl_mc,
1319			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1320			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1321			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1322	if (r) {
1323		printf("%s: control command dmamap load failed, "
1324		       "error code %d\n", device_xname(sc->sc_dev), r);
1325		bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1326		goto out;
1327	}
1328
1329	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1330			0, sizeof(struct virtio_net_ctrl_cmd),
1331			BUS_DMASYNC_PREWRITE);
1332	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1333			(sizeof(struct virtio_net_ctrl_mac_tbl)
1334			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1335			BUS_DMASYNC_PREWRITE);
1336	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1337			(sizeof(struct virtio_net_ctrl_mac_tbl)
1338			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1339			BUS_DMASYNC_PREWRITE);
1340	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1341			0, sizeof(struct virtio_net_ctrl_status),
1342			BUS_DMASYNC_PREREAD);
1343
1344	r = virtio_enqueue_prep(vsc, vq, &slot);
1345	if (r != 0)
1346		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1347	r = virtio_enqueue_reserve(vsc, vq, slot, 4);
1348	if (r != 0)
1349		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1350	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1351	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_uc_dmamap, true);
1352	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_mc_dmamap, true);
1353	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1354	virtio_enqueue_commit(vsc, vq, slot, true);
1355
1356	/* wait for done */
1357	mutex_enter(&sc->sc_ctrl_wait_lock);
1358	while (sc->sc_ctrl_inuse != DONE)
1359		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1360	mutex_exit(&sc->sc_ctrl_wait_lock);
1361	/* already dequeueued */
1362
1363	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1364			sizeof(struct virtio_net_ctrl_cmd),
1365			BUS_DMASYNC_POSTWRITE);
1366	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1367			(sizeof(struct virtio_net_ctrl_mac_tbl)
1368			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1369			BUS_DMASYNC_POSTWRITE);
1370	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1371			(sizeof(struct virtio_net_ctrl_mac_tbl)
1372			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1373			BUS_DMASYNC_POSTWRITE);
1374	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1375			sizeof(struct virtio_net_ctrl_status),
1376			BUS_DMASYNC_POSTREAD);
1377	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1378	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap);
1379
1380	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1381		r = 0;
1382	else {
1383		printf("%s: failed setting rx filter\n",
1384		       device_xname(sc->sc_dev));
1385		r = EIO;
1386	}
1387
1388out:
1389	mutex_enter(&sc->sc_ctrl_wait_lock);
1390	sc->sc_ctrl_inuse = FREE;
1391	cv_signal(&sc->sc_ctrl_wait);
1392	mutex_exit(&sc->sc_ctrl_wait_lock);
1393
1394	return r;
1395}
1396
1397/* ctrl vq interrupt; wake up the command issuer */
1398static int
1399vioif_ctrl_vq_done(struct virtqueue *vq)
1400{
1401	struct virtio_softc *vsc = vq->vq_owner;
1402	struct vioif_softc *sc = device_private(vsc->sc_child);
1403	int r, slot;
1404
1405	r = virtio_dequeue(vsc, vq, &slot, NULL);
1406	if (r == ENOENT)
1407		return 0;
1408	virtio_dequeue_commit(vsc, vq, slot);
1409
1410	mutex_enter(&sc->sc_ctrl_wait_lock);
1411	sc->sc_ctrl_inuse = DONE;
1412	cv_signal(&sc->sc_ctrl_wait);
1413	mutex_exit(&sc->sc_ctrl_wait_lock);
1414
1415	return 1;
1416}
1417
1418/*
1419 * If IFF_PROMISC requested,  set promiscuous
1420 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1421 * If large multicast filter exist use ALLMULTI
1422 */
1423/*
1424 * If setting rx filter fails fall back to ALLMULTI
1425 * If ALLMULTI fails fall back to PROMISC
1426 */
1427static int
1428vioif_rx_filter(struct vioif_softc *sc)
1429{
1430	struct virtio_softc *vsc = sc->sc_virtio;
1431	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1432	struct ether_multi *enm;
1433	struct ether_multistep step;
1434	int nentries;
1435	int promisc = 0, allmulti = 0, rxfilter = 0;
1436	int r;
1437
1438	if (vsc->sc_nvqs < 3) {	/* no ctrl vq; always promisc */
1439		ifp->if_flags |= IFF_PROMISC;
1440		return 0;
1441	}
1442
1443	if (ifp->if_flags & IFF_PROMISC) {
1444		promisc = 1;
1445		goto set;
1446	}
1447
1448	nentries = -1;
1449	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1450	while (nentries++, enm != NULL) {
1451		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1452			allmulti = 1;
1453			goto set;
1454		}
1455		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1456			   ETHER_ADDR_LEN)) {
1457			allmulti = 1;
1458			goto set;
1459		}
1460		memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries],
1461		       enm->enm_addrlo, ETHER_ADDR_LEN);
1462		ETHER_NEXT_MULTI(step, enm);
1463	}
1464	rxfilter = 1;
1465
1466set:
1467	if (rxfilter) {
1468		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1469		sc->sc_ctrl_mac_tbl_mc->nentries = nentries;
1470		r = vioif_set_rx_filter(sc);
1471		if (r != 0) {
1472			rxfilter = 0;
1473			allmulti = 1; /* fallback */
1474		}
1475	} else {
1476		/* remove rx filter */
1477		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1478		sc->sc_ctrl_mac_tbl_mc->nentries = 0;
1479		r = vioif_set_rx_filter(sc);
1480		/* what to do on failure? */
1481	}
1482	if (allmulti) {
1483		r = vioif_set_allmulti(sc, true);
1484		if (r != 0) {
1485			allmulti = 0;
1486			promisc = 1; /* fallback */
1487		}
1488	} else {
1489		r = vioif_set_allmulti(sc, false);
1490		/* what to do on failure? */
1491	}
1492	if (promisc) {
1493		r = vioif_set_promisc(sc, true);
1494	} else {
1495		r = vioif_set_promisc(sc, false);
1496	}
1497
1498	return r;
1499}
1500
1501/* change link status */
1502static int
1503vioif_updown(struct vioif_softc *sc, bool isup)
1504{
1505	struct virtio_softc *vsc = sc->sc_virtio;
1506
1507	if (!(vsc->sc_features & VIRTIO_NET_F_STATUS))
1508		return ENODEV;
1509	virtio_write_device_config_1(vsc,
1510				     VIRTIO_NET_CONFIG_STATUS,
1511				     isup?VIRTIO_NET_S_LINK_UP:0);
1512	return 0;
1513}
1514