if_vioif.c revision 1.28
1/*	$NetBSD: if_vioif.c,v 1.28 2016/12/08 01:12:01 ozaki-r Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.28 2016/12/08 01:12:01 ozaki-r Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48
49#include <dev/pci/pcidevs.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <dev/pci/virtioreg.h>
53#include <dev/pci/virtiovar.h>
54
55#include <net/if.h>
56#include <net/if_media.h>
57#include <net/if_ether.h>
58
59#include <net/bpf.h>
60
61#include "ioconf.h"
62
63#ifdef NET_MPSAFE
64#define VIOIF_MPSAFE	1
65#endif
66
67#ifdef SOFTINT_INTR
68#define VIOIF_SOFTINT_INTR	1
69#endif
70
71/*
72 * if_vioifreg.h:
73 */
74/* Configuration registers */
75#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
76#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
77
78/* Feature bits */
79#define VIRTIO_NET_F_CSUM	(1<<0)
80#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
81#define VIRTIO_NET_F_MAC	(1<<5)
82#define VIRTIO_NET_F_GSO	(1<<6)
83#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
84#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
85#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
86#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
87#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
88#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
89#define VIRTIO_NET_F_HOST_ECN	(1<<13)
90#define VIRTIO_NET_F_HOST_UFO	(1<<14)
91#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
92#define VIRTIO_NET_F_STATUS	(1<<16)
93#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
94#define VIRTIO_NET_F_CTRL_RX	(1<<18)
95#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
96
97#define VIRTIO_NET_FLAG_BITS \
98	VIRTIO_COMMON_FLAG_BITS \
99	"\x14""CTRL_VLAN" \
100	"\x13""CTRL_RX" \
101	"\x12""CTRL_VQ" \
102	"\x11""STATUS" \
103	"\x10""MRG_RXBUF" \
104	"\x0f""HOST_UFO" \
105	"\x0e""HOST_ECN" \
106	"\x0d""HOST_TSO6" \
107	"\x0c""HOST_TSO4" \
108	"\x0b""GUEST_UFO" \
109	"\x0a""GUEST_ECN" \
110	"\x09""GUEST_TSO6" \
111	"\x08""GUEST_TSO4" \
112	"\x07""GSO" \
113	"\x06""MAC" \
114	"\x02""GUEST_CSUM" \
115	"\x01""CSUM"
116
117/* Status */
118#define VIRTIO_NET_S_LINK_UP	1
119
120/* Packet header structure */
121struct virtio_net_hdr {
122	uint8_t		flags;
123	uint8_t		gso_type;
124	uint16_t	hdr_len;
125	uint16_t	gso_size;
126	uint16_t	csum_start;
127	uint16_t	csum_offset;
128#if 0
129	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
130#endif
131} __packed;
132
133#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
134#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
135#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
136#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
137#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
138#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
139
140#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
141
142/* Control virtqueue */
143struct virtio_net_ctrl_cmd {
144	uint8_t	class;
145	uint8_t	command;
146} __packed;
147#define VIRTIO_NET_CTRL_RX		0
148# define VIRTIO_NET_CTRL_RX_PROMISC	0
149# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
150
151#define VIRTIO_NET_CTRL_MAC		1
152# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
153
154#define VIRTIO_NET_CTRL_VLAN		2
155# define VIRTIO_NET_CTRL_VLAN_ADD	0
156# define VIRTIO_NET_CTRL_VLAN_DEL	1
157
158struct virtio_net_ctrl_status {
159	uint8_t	ack;
160} __packed;
161#define VIRTIO_NET_OK			0
162#define VIRTIO_NET_ERR			1
163
164struct virtio_net_ctrl_rx {
165	uint8_t	onoff;
166} __packed;
167
168struct virtio_net_ctrl_mac_tbl {
169	uint32_t nentries;
170	uint8_t macs[][ETHER_ADDR_LEN];
171} __packed;
172
173struct virtio_net_ctrl_vlan {
174	uint16_t id;
175} __packed;
176
177
178/*
179 * if_vioifvar.h:
180 */
181struct vioif_softc {
182	device_t		sc_dev;
183
184	struct virtio_softc	*sc_virtio;
185	struct virtqueue	sc_vq[3];
186#define VQ_RX	0
187#define VQ_TX	1
188#define VQ_CTRL	2
189
190	uint8_t			sc_mac[ETHER_ADDR_LEN];
191	struct ethercom		sc_ethercom;
192	short			sc_deferred_init_done;
193
194	/* bus_dmamem */
195	bus_dma_segment_t	sc_hdr_segs[1];
196	struct virtio_net_hdr	*sc_hdrs;
197#define sc_rx_hdrs	sc_hdrs
198	struct virtio_net_hdr	*sc_tx_hdrs;
199	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
200	struct virtio_net_ctrl_status *sc_ctrl_status;
201	struct virtio_net_ctrl_rx *sc_ctrl_rx;
202	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
203	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
204
205	/* kmem */
206	bus_dmamap_t		*sc_arrays;
207#define sc_rxhdr_dmamaps sc_arrays
208	bus_dmamap_t		*sc_txhdr_dmamaps;
209	bus_dmamap_t		*sc_rx_dmamaps;
210	bus_dmamap_t		*sc_tx_dmamaps;
211	struct mbuf		**sc_rx_mbufs;
212	struct mbuf		**sc_tx_mbufs;
213
214	bus_dmamap_t		sc_ctrl_cmd_dmamap;
215	bus_dmamap_t		sc_ctrl_status_dmamap;
216	bus_dmamap_t		sc_ctrl_rx_dmamap;
217	bus_dmamap_t		sc_ctrl_tbl_uc_dmamap;
218	bus_dmamap_t		sc_ctrl_tbl_mc_dmamap;
219
220	void			*sc_rx_softint;
221
222	enum {
223		FREE, INUSE, DONE
224	}			sc_ctrl_inuse;
225	kcondvar_t		sc_ctrl_wait;
226	kmutex_t		sc_ctrl_wait_lock;
227	kmutex_t		*sc_tx_lock;
228	kmutex_t		*sc_rx_lock;
229	bool			sc_stopping;
230};
231#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
232#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
233
234#define VIOIF_TX_LOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock)
235#define VIOIF_TX_UNLOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_exit((_sc)->sc_tx_lock)
236#define VIOIF_TX_LOCKED(_sc)	(!(_sc)->sc_tx_lock || mutex_owned((_sc)->sc_tx_lock))
237#define VIOIF_RX_LOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_enter((_sc)->sc_rx_lock)
238#define VIOIF_RX_UNLOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_exit((_sc)->sc_rx_lock)
239#define VIOIF_RX_LOCKED(_sc)	(!(_sc)->sc_rx_lock || mutex_owned((_sc)->sc_rx_lock))
240
241/* cfattach interface functions */
242static int	vioif_match(device_t, cfdata_t, void *);
243static void	vioif_attach(device_t, device_t, void *);
244static void	vioif_deferred_init(device_t);
245
246/* ifnet interface functions */
247static int	vioif_init(struct ifnet *);
248static void	vioif_stop(struct ifnet *, int);
249static void	vioif_start(struct ifnet *);
250static int	vioif_ioctl(struct ifnet *, u_long, void *);
251static void	vioif_watchdog(struct ifnet *);
252
253/* rx */
254static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
255static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
256static void	vioif_populate_rx_mbufs(struct vioif_softc *);
257static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
258static int	vioif_rx_deq(struct vioif_softc *);
259static int	vioif_rx_deq_locked(struct vioif_softc *);
260static int	vioif_rx_vq_done(struct virtqueue *);
261static void	vioif_rx_softint(void *);
262static void	vioif_rx_drain(struct vioif_softc *);
263
264/* tx */
265static int	vioif_tx_vq_done(struct virtqueue *);
266static int	vioif_tx_vq_done_locked(struct virtqueue *);
267static void	vioif_tx_drain(struct vioif_softc *);
268
269/* other control */
270static int	vioif_updown(struct vioif_softc *, bool);
271static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
272static int	vioif_set_promisc(struct vioif_softc *, bool);
273static int	vioif_set_allmulti(struct vioif_softc *, bool);
274static int	vioif_set_rx_filter(struct vioif_softc *);
275static int	vioif_rx_filter(struct vioif_softc *);
276static int	vioif_ctrl_vq_done(struct virtqueue *);
277
278CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
279		  vioif_match, vioif_attach, NULL, NULL);
280
281static int
282vioif_match(device_t parent, cfdata_t match, void *aux)
283{
284	struct virtio_softc *va = aux;
285
286	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
287		return 1;
288
289	return 0;
290}
291
292/* allocate memory */
293/*
294 * dma memory is used for:
295 *   sc_rx_hdrs[slot]:	 metadata array for recieved frames (READ)
296 *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
297 *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
298 *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
299 *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
300 *			 (WRITE)
301 *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
302 *			 class command (WRITE)
303 *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
304 *			 class command (WRITE)
305 * sc_ctrl_* structures are allocated only one each; they are protected by
306 * sc_ctrl_inuse variable and sc_ctrl_wait condvar.
307 */
308/*
309 * dynamically allocated memory is used for:
310 *   sc_rxhdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
311 *   sc_txhdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
312 *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for recieved payload
313 *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
314 *   sc_rx_mbufs[slot]:		mbuf pointer array for recieved frames
315 *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
316 */
317static int
318vioif_alloc_mems(struct vioif_softc *sc)
319{
320	struct virtio_softc *vsc = sc->sc_virtio;
321	int allocsize, allocsize2, r, rsegs, i;
322	void *vaddr;
323	intptr_t p;
324	int rxqsize, txqsize;
325
326	rxqsize = vsc->sc_vqs[VQ_RX].vq_num;
327	txqsize = vsc->sc_vqs[VQ_TX].vq_num;
328
329	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
330	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
331	if (vsc->sc_nvqs == 3) {
332		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
333		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
334		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
335		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
336			+ sizeof(struct virtio_net_ctrl_mac_tbl)
337			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
338	}
339	r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0,
340			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
341	if (r != 0) {
342		aprint_error_dev(sc->sc_dev,
343				 "DMA memory allocation failed, size %d, "
344				 "error code %d\n", allocsize, r);
345		goto err_none;
346	}
347	r = bus_dmamem_map(vsc->sc_dmat,
348			   &sc->sc_hdr_segs[0], 1, allocsize,
349			   &vaddr, BUS_DMA_NOWAIT);
350	if (r != 0) {
351		aprint_error_dev(sc->sc_dev,
352				 "DMA memory map failed, "
353				 "error code %d\n", r);
354		goto err_dmamem_alloc;
355	}
356	sc->sc_hdrs = vaddr;
357	memset(vaddr, 0, allocsize);
358	p = (intptr_t) vaddr;
359	p += sizeof(struct virtio_net_hdr) * rxqsize;
360#define P(name,size)	do { sc->sc_ ##name = (void*) p;	\
361			     p += size; } while (0)
362	P(tx_hdrs, sizeof(struct virtio_net_hdr) * txqsize);
363	if (vsc->sc_nvqs == 3) {
364		P(ctrl_cmd, sizeof(struct virtio_net_ctrl_cmd));
365		P(ctrl_status, sizeof(struct virtio_net_ctrl_status));
366		P(ctrl_rx, sizeof(struct virtio_net_ctrl_rx));
367		P(ctrl_mac_tbl_uc, sizeof(struct virtio_net_ctrl_mac_tbl));
368		P(ctrl_mac_tbl_mc,
369		  (sizeof(struct virtio_net_ctrl_mac_tbl)
370		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
371	}
372#undef P
373
374	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
375	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
376	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
377	sc->sc_arrays = kmem_zalloc(allocsize2, KM_SLEEP);
378	if (sc->sc_arrays == NULL)
379		goto err_dmamem_map;
380	sc->sc_txhdr_dmamaps = sc->sc_arrays + rxqsize;
381	sc->sc_rx_dmamaps = sc->sc_txhdr_dmamaps + txqsize;
382	sc->sc_tx_dmamaps = sc->sc_rx_dmamaps + rxqsize;
383	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
384	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
385
386#define C(map, buf, size, nsegs, rw, usage)				\
387	do {								\
388		r = bus_dmamap_create(vsc->sc_dmat, size, nsegs, size, 0, \
389				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,	\
390				      &sc->sc_ ##map);			\
391		if (r != 0) {						\
392			aprint_error_dev(sc->sc_dev,			\
393					 usage " dmamap creation failed, " \
394					 "error code %d\n", r);		\
395					 goto err_reqs;			\
396		}							\
397	} while (0)
398#define C_L1(map, buf, size, nsegs, rw, usage)				\
399	C(map, buf, size, nsegs, rw, usage);				\
400	do {								\
401		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
402				    &sc->sc_ ##buf, size, NULL,		\
403				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
404		if (r != 0) {						\
405			aprint_error_dev(sc->sc_dev,			\
406					 usage " dmamap load failed, "	\
407					 "error code %d\n", r);		\
408			goto err_reqs;					\
409		}							\
410	} while (0)
411#define C_L2(map, buf, size, nsegs, rw, usage)				\
412	C(map, buf, size, nsegs, rw, usage);				\
413	do {								\
414		r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ ##map,	\
415				    sc->sc_ ##buf, size, NULL,		\
416				    BUS_DMA_ ##rw | BUS_DMA_NOWAIT);	\
417		if (r != 0) {						\
418			aprint_error_dev(sc->sc_dev,			\
419					 usage " dmamap load failed, "	\
420					 "error code %d\n", r);		\
421			goto err_reqs;					\
422		}							\
423	} while (0)
424	for (i = 0; i < rxqsize; i++) {
425		C_L1(rxhdr_dmamaps[i], rx_hdrs[i],
426		    sizeof(struct virtio_net_hdr), 1,
427		    READ, "rx header");
428		C(rx_dmamaps[i], NULL, MCLBYTES, 1, 0, "rx payload");
429	}
430
431	for (i = 0; i < txqsize; i++) {
432		C_L1(txhdr_dmamaps[i], tx_hdrs[i],
433		    sizeof(struct virtio_net_hdr), 1,
434		    WRITE, "tx header");
435		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, 16 /* XXX */, 0,
436		  "tx payload");
437	}
438
439	if (vsc->sc_nvqs == 3) {
440		/* control vq class & command */
441		C_L2(ctrl_cmd_dmamap, ctrl_cmd,
442		    sizeof(struct virtio_net_ctrl_cmd), 1, WRITE,
443		    "control command");
444
445		/* control vq status */
446		C_L2(ctrl_status_dmamap, ctrl_status,
447		    sizeof(struct virtio_net_ctrl_status), 1, READ,
448		    "control status");
449
450		/* control vq rx mode command parameter */
451		C_L2(ctrl_rx_dmamap, ctrl_rx,
452		    sizeof(struct virtio_net_ctrl_rx), 1, WRITE,
453		    "rx mode control command");
454
455		/* control vq MAC filter table for unicast */
456		/* do not load now since its length is variable */
457		C(ctrl_tbl_uc_dmamap, NULL,
458		  sizeof(struct virtio_net_ctrl_mac_tbl) + 0, 1, WRITE,
459		  "unicast MAC address filter command");
460
461		/* control vq MAC filter table for multicast */
462		C(ctrl_tbl_mc_dmamap, NULL,
463		  (sizeof(struct virtio_net_ctrl_mac_tbl)
464		   + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES),
465		  1, WRITE, "multicast MAC address filter command");
466	}
467#undef C_L2
468#undef C_L1
469#undef C
470
471	return 0;
472
473err_reqs:
474#define D(map)								\
475	do {								\
476		if (sc->sc_ ##map) {					\
477			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_ ##map); \
478			sc->sc_ ##map = NULL;				\
479		}							\
480	} while (0)
481	D(ctrl_tbl_mc_dmamap);
482	D(ctrl_tbl_uc_dmamap);
483	D(ctrl_rx_dmamap);
484	D(ctrl_status_dmamap);
485	D(ctrl_cmd_dmamap);
486	for (i = 0; i < txqsize; i++) {
487		D(tx_dmamaps[i]);
488		D(txhdr_dmamaps[i]);
489	}
490	for (i = 0; i < rxqsize; i++) {
491		D(rx_dmamaps[i]);
492		D(rxhdr_dmamaps[i]);
493	}
494#undef D
495	if (sc->sc_arrays) {
496		kmem_free(sc->sc_arrays, allocsize2);
497		sc->sc_arrays = 0;
498	}
499err_dmamem_map:
500	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_hdrs, allocsize);
501err_dmamem_alloc:
502	bus_dmamem_free(vsc->sc_dmat, &sc->sc_hdr_segs[0], 1);
503err_none:
504	return -1;
505}
506
507static void
508vioif_attach(device_t parent, device_t self, void *aux)
509{
510	struct vioif_softc *sc = device_private(self);
511	struct virtio_softc *vsc = device_private(parent);
512	uint32_t features;
513	char buf[256];
514	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
515	u_int flags;
516	int r;
517
518	if (vsc->sc_child != NULL) {
519		aprint_normal(": child already attached for %s; "
520			      "something wrong...\n",
521			      device_xname(parent));
522		return;
523	}
524
525	sc->sc_dev = self;
526	sc->sc_virtio = vsc;
527
528	vsc->sc_child = self;
529	vsc->sc_ipl = IPL_NET;
530	vsc->sc_vqs = &sc->sc_vq[0];
531	vsc->sc_config_change = NULL;
532	vsc->sc_intrhand = virtio_vq_intr;
533	vsc->sc_flags = 0;
534
535#ifdef VIOIF_MPSAFE
536	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
537#endif
538#ifdef VIOIF_SOFTINT_INTR
539	vsc->sc_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
540#endif
541	vsc->sc_flags |= VIRTIO_F_PCI_INTR_MSIX;
542
543	features = virtio_negotiate_features(vsc,
544					     (VIRTIO_NET_F_MAC |
545					      VIRTIO_NET_F_STATUS |
546					      VIRTIO_NET_F_CTRL_VQ |
547					      VIRTIO_NET_F_CTRL_RX |
548					      VIRTIO_F_NOTIFY_ON_EMPTY));
549	if (features & VIRTIO_NET_F_MAC) {
550		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
551						    VIRTIO_NET_CONFIG_MAC+0);
552		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
553						    VIRTIO_NET_CONFIG_MAC+1);
554		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
555						    VIRTIO_NET_CONFIG_MAC+2);
556		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
557						    VIRTIO_NET_CONFIG_MAC+3);
558		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
559						    VIRTIO_NET_CONFIG_MAC+4);
560		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
561						    VIRTIO_NET_CONFIG_MAC+5);
562	} else {
563		/* code stolen from sys/net/if_tap.c */
564		struct timeval tv;
565		uint32_t ui;
566		getmicrouptime(&tv);
567		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
568		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
569		virtio_write_device_config_1(vsc,
570					     VIRTIO_NET_CONFIG_MAC+0,
571					     sc->sc_mac[0]);
572		virtio_write_device_config_1(vsc,
573					     VIRTIO_NET_CONFIG_MAC+1,
574					     sc->sc_mac[1]);
575		virtio_write_device_config_1(vsc,
576					     VIRTIO_NET_CONFIG_MAC+2,
577					     sc->sc_mac[2]);
578		virtio_write_device_config_1(vsc,
579					     VIRTIO_NET_CONFIG_MAC+3,
580					     sc->sc_mac[3]);
581		virtio_write_device_config_1(vsc,
582					     VIRTIO_NET_CONFIG_MAC+4,
583					     sc->sc_mac[4]);
584		virtio_write_device_config_1(vsc,
585					     VIRTIO_NET_CONFIG_MAC+5,
586					     sc->sc_mac[5]);
587	}
588	aprint_normal(": Ethernet address %s\n", ether_sprintf(sc->sc_mac));
589	snprintb(buf, sizeof(buf), VIRTIO_NET_FLAG_BITS, features);
590	aprint_normal_dev(self, "Features: %s\n", buf);
591	aprint_naive("\n");
592
593#ifdef VIOIF_MPSAFE
594	sc->sc_tx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
595	sc->sc_rx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
596#else
597	sc->sc_tx_lock = NULL;
598	sc->sc_rx_lock = NULL;
599#endif
600	sc->sc_stopping = false;
601
602	/*
603	 * Allocating a virtqueue for Rx
604	 */
605	r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_RX], 0,
606	    MCLBYTES+sizeof(struct virtio_net_hdr), 2, "rx");
607	if (r != 0)
608		goto err;
609	vsc->sc_nvqs = 1;
610	sc->sc_vq[VQ_RX].vq_done = vioif_rx_vq_done;
611
612	/*
613	 * Allocating a virtqueue for Tx
614	 */
615	r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_TX], 1,
616	    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
617	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx");
618	if (r != 0)
619		goto err;
620	vsc->sc_nvqs = 2;
621	sc->sc_vq[VQ_TX].vq_done = vioif_tx_vq_done;
622
623	virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_RX]);
624	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQ_TX]); /* not urgent; do it later */
625
626	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
627	    (features & VIRTIO_NET_F_CTRL_RX)) {
628		/*
629		 * Allocating a virtqueue for control channel
630		 */
631		r = virtio_alloc_vq(vsc, &sc->sc_vq[VQ_CTRL], 2,
632		    NBPG, 1, "control");
633		if (r != 0) {
634			aprint_error_dev(self, "failed to allocate "
635			    "a virtqueue for control channel\n");
636			goto skip;
637		}
638
639		sc->sc_vq[VQ_CTRL].vq_done = vioif_ctrl_vq_done;
640		cv_init(&sc->sc_ctrl_wait, "ctrl_vq");
641		mutex_init(&sc->sc_ctrl_wait_lock, MUTEX_DEFAULT, IPL_NET);
642		sc->sc_ctrl_inuse = FREE;
643		virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_CTRL]);
644		vsc->sc_nvqs = 3;
645	}
646skip:
647
648#ifdef VIOIF_MPSAFE
649	flags = SOFTINT_NET | SOFTINT_MPSAFE;
650#else
651	flags = SOFTINT_NET;
652#endif
653	sc->sc_rx_softint = softint_establish(flags, vioif_rx_softint, sc);
654	if (sc->sc_rx_softint == NULL) {
655		aprint_error_dev(self, "cannot establish softint\n");
656		goto err;
657	}
658
659	if (vioif_alloc_mems(sc) < 0)
660		goto err;
661
662	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
663	ifp->if_softc = sc;
664	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
665	ifp->if_start = vioif_start;
666	ifp->if_ioctl = vioif_ioctl;
667	ifp->if_init = vioif_init;
668	ifp->if_stop = vioif_stop;
669	ifp->if_capabilities = 0;
670	ifp->if_watchdog = vioif_watchdog;
671
672	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
673
674	if_attach(ifp);
675	if_deferred_start_init(ifp, NULL);
676	ether_ifattach(ifp, sc->sc_mac);
677
678	return;
679
680err:
681	if (sc->sc_tx_lock)
682		mutex_obj_free(sc->sc_tx_lock);
683	if (sc->sc_rx_lock)
684		mutex_obj_free(sc->sc_rx_lock);
685
686	if (vsc->sc_nvqs == 3) {
687		cv_destroy(&sc->sc_ctrl_wait);
688		mutex_destroy(&sc->sc_ctrl_wait_lock);
689	}
690
691	while (vsc->sc_nvqs > 0)
692		virtio_free_vq(vsc, &sc->sc_vq[--vsc->sc_nvqs]);
693
694	vsc->sc_child = (void*)1;
695	return;
696}
697
698/* we need interrupts to make promiscuous mode off */
699static void
700vioif_deferred_init(device_t self)
701{
702	struct vioif_softc *sc = device_private(self);
703	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
704	int r;
705
706	if (ifp->if_flags & IFF_PROMISC)
707		return;
708
709	r =  vioif_set_promisc(sc, false);
710	if (r != 0)
711		aprint_error_dev(self, "resetting promisc mode failed, "
712				 "errror code %d\n", r);
713}
714
715/*
716 * Interface functions for ifnet
717 */
718static int
719vioif_init(struct ifnet *ifp)
720{
721	struct vioif_softc *sc = ifp->if_softc;
722
723	vioif_stop(ifp, 0);
724
725	if (!sc->sc_deferred_init_done) {
726		struct virtio_softc *vsc = sc->sc_virtio;
727
728		sc->sc_deferred_init_done = 1;
729		if (vsc->sc_nvqs == 3)
730			vioif_deferred_init(sc->sc_dev);
731	}
732
733	/* Have to set false before vioif_populate_rx_mbufs */
734	sc->sc_stopping = false;
735
736	vioif_populate_rx_mbufs(sc);
737
738	vioif_updown(sc, true);
739	ifp->if_flags |= IFF_RUNNING;
740	ifp->if_flags &= ~IFF_OACTIVE;
741	vioif_rx_filter(sc);
742
743	return 0;
744}
745
746static void
747vioif_stop(struct ifnet *ifp, int disable)
748{
749	struct vioif_softc *sc = ifp->if_softc;
750	struct virtio_softc *vsc = sc->sc_virtio;
751
752	/* Take the locks to ensure that ongoing TX/RX finish */
753	VIOIF_TX_LOCK(sc);
754	VIOIF_RX_LOCK(sc);
755	sc->sc_stopping = true;
756	VIOIF_RX_UNLOCK(sc);
757	VIOIF_TX_UNLOCK(sc);
758
759	/* only way to stop I/O and DMA is resetting... */
760	virtio_reset(vsc);
761	vioif_rx_deq(sc);
762	vioif_tx_drain(sc);
763	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
764
765	if (disable)
766		vioif_rx_drain(sc);
767
768	virtio_reinit_start(vsc);
769	virtio_negotiate_features(vsc, vsc->sc_features);
770	virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_RX]);
771	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQ_TX]);
772	if (vsc->sc_nvqs >= 3)
773		virtio_start_vq_intr(vsc, &sc->sc_vq[VQ_CTRL]);
774	virtio_reinit_end(vsc);
775	vioif_updown(sc, false);
776}
777
778static void
779vioif_start(struct ifnet *ifp)
780{
781	struct vioif_softc *sc = ifp->if_softc;
782	struct virtio_softc *vsc = sc->sc_virtio;
783	struct virtqueue *vq = &sc->sc_vq[VQ_TX];
784	struct mbuf *m;
785	int queued = 0, retry = 0;
786
787	VIOIF_TX_LOCK(sc);
788
789	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
790		goto out;
791
792	if (sc->sc_stopping)
793		goto out;
794
795	for (;;) {
796		int slot, r;
797
798		IFQ_DEQUEUE(&ifp->if_snd, m);
799
800		if (m == NULL)
801			break;
802
803retry:
804		r = virtio_enqueue_prep(vsc, vq, &slot);
805		if (r == EAGAIN) {
806			ifp->if_flags |= IFF_OACTIVE;
807			vioif_tx_vq_done_locked(vq);
808			if (retry++ == 0)
809				goto retry;
810			else
811				break;
812		}
813		if (r != 0)
814			panic("enqueue_prep for a tx buffer");
815		r = bus_dmamap_load_mbuf(vsc->sc_dmat,
816					 sc->sc_tx_dmamaps[slot],
817					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
818		if (r != 0) {
819			virtio_enqueue_abort(vsc, vq, slot);
820			aprint_error_dev(sc->sc_dev,
821			    "tx dmamap load failed, error code %d\n", r);
822			break;
823		}
824		r = virtio_enqueue_reserve(vsc, vq, slot,
825					sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
826		if (r != 0) {
827			bus_dmamap_unload(vsc->sc_dmat,
828					  sc->sc_tx_dmamaps[slot]);
829			ifp->if_flags |= IFF_OACTIVE;
830			vioif_tx_vq_done_locked(vq);
831			if (retry++ == 0)
832				goto retry;
833			else
834				break;
835		}
836
837		sc->sc_tx_mbufs[slot] = m;
838
839		memset(&sc->sc_tx_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
840		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
841				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
842				BUS_DMASYNC_PREWRITE);
843		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
844				0, sc->sc_txhdr_dmamaps[slot]->dm_mapsize,
845				BUS_DMASYNC_PREWRITE);
846		virtio_enqueue(vsc, vq, slot, sc->sc_txhdr_dmamaps[slot], true);
847		virtio_enqueue(vsc, vq, slot, sc->sc_tx_dmamaps[slot], true);
848		virtio_enqueue_commit(vsc, vq, slot, false);
849		queued++;
850		bpf_mtap(ifp, m);
851	}
852
853	if (m != NULL) {
854		ifp->if_flags |= IFF_OACTIVE;
855		m_freem(m);
856	}
857
858	if (queued > 0) {
859		virtio_enqueue_commit(vsc, vq, -1, true);
860		ifp->if_timer = 5;
861	}
862
863out:
864	VIOIF_TX_UNLOCK(sc);
865}
866
867static int
868vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
869{
870	int s, r;
871
872	s = splnet();
873
874	r = ether_ioctl(ifp, cmd, data);
875	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
876	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
877		if (ifp->if_flags & IFF_RUNNING)
878			r = vioif_rx_filter(ifp->if_softc);
879		else
880			r = 0;
881	}
882
883	splx(s);
884
885	return r;
886}
887
888void
889vioif_watchdog(struct ifnet *ifp)
890{
891	struct vioif_softc *sc = ifp->if_softc;
892
893	if (ifp->if_flags & IFF_RUNNING)
894		vioif_tx_vq_done(&sc->sc_vq[VQ_TX]);
895}
896
897
898/*
899 * Recieve implementation
900 */
901/* allocate and initialize a mbuf for recieve */
902static int
903vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
904{
905	struct mbuf *m;
906	int r;
907
908	MGETHDR(m, M_DONTWAIT, MT_DATA);
909	if (m == NULL)
910		return ENOBUFS;
911	MCLGET(m, M_DONTWAIT);
912	if ((m->m_flags & M_EXT) == 0) {
913		m_freem(m);
914		return ENOBUFS;
915	}
916	sc->sc_rx_mbufs[i] = m;
917	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
918	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat,
919				 sc->sc_rx_dmamaps[i],
920				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
921	if (r) {
922		m_freem(m);
923		sc->sc_rx_mbufs[i] = 0;
924		return r;
925	}
926
927	return 0;
928}
929
930/* free a mbuf for recieve */
931static void
932vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
933{
934	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
935	m_freem(sc->sc_rx_mbufs[i]);
936	sc->sc_rx_mbufs[i] = NULL;
937}
938
939/* add mbufs for all the empty recieve slots */
940static void
941vioif_populate_rx_mbufs(struct vioif_softc *sc)
942{
943	VIOIF_RX_LOCK(sc);
944	vioif_populate_rx_mbufs_locked(sc);
945	VIOIF_RX_UNLOCK(sc);
946}
947
948static void
949vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
950{
951	struct virtio_softc *vsc = sc->sc_virtio;
952	int i, r, ndone = 0;
953	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
954
955	KASSERT(VIOIF_RX_LOCKED(sc));
956
957	if (sc->sc_stopping)
958		return;
959
960	for (i = 0; i < vq->vq_num; i++) {
961		int slot;
962		r = virtio_enqueue_prep(vsc, vq, &slot);
963		if (r == EAGAIN)
964			break;
965		if (r != 0)
966			panic("enqueue_prep for rx buffers");
967		if (sc->sc_rx_mbufs[slot] == NULL) {
968			r = vioif_add_rx_mbuf(sc, slot);
969			if (r != 0) {
970				printf("%s: rx mbuf allocation failed, "
971				       "error code %d\n",
972				       device_xname(sc->sc_dev), r);
973				break;
974			}
975		}
976		r = virtio_enqueue_reserve(vsc, vq, slot,
977					sc->sc_rx_dmamaps[slot]->dm_nsegs + 1);
978		if (r != 0) {
979			vioif_free_rx_mbuf(sc, slot);
980			break;
981		}
982		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
983			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
984		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
985			0, MCLBYTES, BUS_DMASYNC_PREREAD);
986		virtio_enqueue(vsc, vq, slot, sc->sc_rxhdr_dmamaps[slot], false);
987		virtio_enqueue(vsc, vq, slot, sc->sc_rx_dmamaps[slot], false);
988		virtio_enqueue_commit(vsc, vq, slot, false);
989		ndone++;
990	}
991	if (ndone > 0)
992		virtio_enqueue_commit(vsc, vq, -1, true);
993}
994
995/* dequeue recieved packets */
996static int
997vioif_rx_deq(struct vioif_softc *sc)
998{
999	int r;
1000
1001	KASSERT(sc->sc_stopping);
1002
1003	VIOIF_RX_LOCK(sc);
1004	r = vioif_rx_deq_locked(sc);
1005	VIOIF_RX_UNLOCK(sc);
1006
1007	return r;
1008}
1009
1010/* dequeue recieved packets */
1011static int
1012vioif_rx_deq_locked(struct vioif_softc *sc)
1013{
1014	struct virtio_softc *vsc = sc->sc_virtio;
1015	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
1016	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1017	struct mbuf *m;
1018	int r = 0;
1019	int slot, len;
1020
1021	KASSERT(VIOIF_RX_LOCKED(sc));
1022
1023	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1024		len -= sizeof(struct virtio_net_hdr);
1025		r = 1;
1026		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rxhdr_dmamaps[slot],
1027				0, sizeof(struct virtio_net_hdr),
1028				BUS_DMASYNC_POSTREAD);
1029		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot],
1030				0, MCLBYTES,
1031				BUS_DMASYNC_POSTREAD);
1032		m = sc->sc_rx_mbufs[slot];
1033		KASSERT(m != NULL);
1034		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1035		sc->sc_rx_mbufs[slot] = 0;
1036		virtio_dequeue_commit(vsc, vq, slot);
1037		m_set_rcvif(m, ifp);
1038		m->m_len = m->m_pkthdr.len = len;
1039		ifp->if_ipackets++;
1040		bpf_mtap(ifp, m);
1041
1042		VIOIF_RX_UNLOCK(sc);
1043		if_percpuq_enqueue(ifp->if_percpuq, m);
1044		VIOIF_RX_LOCK(sc);
1045
1046		if (sc->sc_stopping)
1047			break;
1048	}
1049
1050	return r;
1051}
1052
1053/* rx interrupt; call _dequeue above and schedule a softint */
1054static int
1055vioif_rx_vq_done(struct virtqueue *vq)
1056{
1057	struct virtio_softc *vsc = vq->vq_owner;
1058	struct vioif_softc *sc = device_private(vsc->sc_child);
1059	int r = 0;
1060
1061#ifdef VIOIF_SOFTINT_INTR
1062	KASSERT(!cpu_intr_p());
1063#endif
1064
1065	VIOIF_RX_LOCK(sc);
1066
1067	if (sc->sc_stopping)
1068		goto out;
1069
1070	r = vioif_rx_deq_locked(sc);
1071	if (r)
1072#ifdef VIOIF_SOFTINT_INTR
1073		vioif_populate_rx_mbufs_locked(sc);
1074#else
1075		softint_schedule(sc->sc_rx_softint);
1076#endif
1077
1078out:
1079	VIOIF_RX_UNLOCK(sc);
1080	return r;
1081}
1082
1083/* softint: enqueue recieve requests for new incoming packets */
1084static void
1085vioif_rx_softint(void *arg)
1086{
1087	struct vioif_softc *sc = arg;
1088
1089	vioif_populate_rx_mbufs(sc);
1090}
1091
1092/* free all the mbufs; called from if_stop(disable) */
1093static void
1094vioif_rx_drain(struct vioif_softc *sc)
1095{
1096	struct virtqueue *vq = &sc->sc_vq[VQ_RX];
1097	int i;
1098
1099	for (i = 0; i < vq->vq_num; i++) {
1100		if (sc->sc_rx_mbufs[i] == NULL)
1101			continue;
1102		vioif_free_rx_mbuf(sc, i);
1103	}
1104}
1105
1106
1107/*
1108 * Transmition implementation
1109 */
1110/* actual transmission is done in if_start */
1111/* tx interrupt; dequeue and free mbufs */
1112/*
1113 * tx interrupt is actually disabled; this should be called upon
1114 * tx vq full and watchdog
1115 */
1116static int
1117vioif_tx_vq_done(struct virtqueue *vq)
1118{
1119	struct virtio_softc *vsc = vq->vq_owner;
1120	struct vioif_softc *sc = device_private(vsc->sc_child);
1121	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1122	int r = 0;
1123
1124	VIOIF_TX_LOCK(sc);
1125
1126	if (sc->sc_stopping)
1127		goto out;
1128
1129	r = vioif_tx_vq_done_locked(vq);
1130
1131out:
1132	VIOIF_TX_UNLOCK(sc);
1133	if (r)
1134		if_schedule_deferred_start(ifp);
1135	return r;
1136}
1137
1138static int
1139vioif_tx_vq_done_locked(struct virtqueue *vq)
1140{
1141	struct virtio_softc *vsc = vq->vq_owner;
1142	struct vioif_softc *sc = device_private(vsc->sc_child);
1143	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1144	struct mbuf *m;
1145	int r = 0;
1146	int slot, len;
1147
1148	KASSERT(VIOIF_TX_LOCKED(sc));
1149
1150	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1151		r++;
1152		bus_dmamap_sync(vsc->sc_dmat, sc->sc_txhdr_dmamaps[slot],
1153				0, sizeof(struct virtio_net_hdr),
1154				BUS_DMASYNC_POSTWRITE);
1155		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot],
1156				0, sc->sc_tx_dmamaps[slot]->dm_mapsize,
1157				BUS_DMASYNC_POSTWRITE);
1158		m = sc->sc_tx_mbufs[slot];
1159		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1160		sc->sc_tx_mbufs[slot] = 0;
1161		virtio_dequeue_commit(vsc, vq, slot);
1162		ifp->if_opackets++;
1163		m_freem(m);
1164	}
1165
1166	if (r)
1167		ifp->if_flags &= ~IFF_OACTIVE;
1168	return r;
1169}
1170
1171/* free all the mbufs already put on vq; called from if_stop(disable) */
1172static void
1173vioif_tx_drain(struct vioif_softc *sc)
1174{
1175	struct virtio_softc *vsc = sc->sc_virtio;
1176	struct virtqueue *vq = &sc->sc_vq[VQ_TX];
1177	int i;
1178
1179	KASSERT(sc->sc_stopping);
1180
1181	for (i = 0; i < vq->vq_num; i++) {
1182		if (sc->sc_tx_mbufs[i] == NULL)
1183			continue;
1184		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1185		m_freem(sc->sc_tx_mbufs[i]);
1186		sc->sc_tx_mbufs[i] = NULL;
1187	}
1188}
1189
1190/*
1191 * Control vq
1192 */
1193/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1194static int
1195vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1196{
1197	struct virtio_softc *vsc = sc->sc_virtio;
1198	struct virtqueue *vq = &sc->sc_vq[VQ_CTRL];
1199	int r, slot;
1200
1201	if (vsc->sc_nvqs < 3)
1202		return ENOTSUP;
1203
1204	mutex_enter(&sc->sc_ctrl_wait_lock);
1205	while (sc->sc_ctrl_inuse != FREE)
1206		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1207	sc->sc_ctrl_inuse = INUSE;
1208	mutex_exit(&sc->sc_ctrl_wait_lock);
1209
1210	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1211	sc->sc_ctrl_cmd->command = cmd;
1212	sc->sc_ctrl_rx->onoff = onoff;
1213
1214	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1215			0, sizeof(struct virtio_net_ctrl_cmd),
1216			BUS_DMASYNC_PREWRITE);
1217	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap,
1218			0, sizeof(struct virtio_net_ctrl_rx),
1219			BUS_DMASYNC_PREWRITE);
1220	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1221			0, sizeof(struct virtio_net_ctrl_status),
1222			BUS_DMASYNC_PREREAD);
1223
1224	r = virtio_enqueue_prep(vsc, vq, &slot);
1225	if (r != 0)
1226		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1227	r = virtio_enqueue_reserve(vsc, vq, slot, 3);
1228	if (r != 0)
1229		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1230	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1231	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_rx_dmamap, true);
1232	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1233	virtio_enqueue_commit(vsc, vq, slot, true);
1234
1235	/* wait for done */
1236	mutex_enter(&sc->sc_ctrl_wait_lock);
1237	while (sc->sc_ctrl_inuse != DONE)
1238		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1239	mutex_exit(&sc->sc_ctrl_wait_lock);
1240	/* already dequeueued */
1241
1242	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1243			sizeof(struct virtio_net_ctrl_cmd),
1244			BUS_DMASYNC_POSTWRITE);
1245	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_rx_dmamap, 0,
1246			sizeof(struct virtio_net_ctrl_rx),
1247			BUS_DMASYNC_POSTWRITE);
1248	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1249			sizeof(struct virtio_net_ctrl_status),
1250			BUS_DMASYNC_POSTREAD);
1251
1252	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1253		r = 0;
1254	else {
1255		printf("%s: failed setting rx mode\n",
1256		       device_xname(sc->sc_dev));
1257		r = EIO;
1258	}
1259
1260	mutex_enter(&sc->sc_ctrl_wait_lock);
1261	sc->sc_ctrl_inuse = FREE;
1262	cv_signal(&sc->sc_ctrl_wait);
1263	mutex_exit(&sc->sc_ctrl_wait_lock);
1264
1265	return r;
1266}
1267
1268static int
1269vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1270{
1271	int r;
1272
1273	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1274
1275	return r;
1276}
1277
1278static int
1279vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1280{
1281	int r;
1282
1283	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1284
1285	return r;
1286}
1287
1288/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1289static int
1290vioif_set_rx_filter(struct vioif_softc *sc)
1291{
1292	/* filter already set in sc_ctrl_mac_tbl */
1293	struct virtio_softc *vsc = sc->sc_virtio;
1294	struct virtqueue *vq = &sc->sc_vq[VQ_CTRL];
1295	int r, slot;
1296
1297	if (vsc->sc_nvqs < 3)
1298		return ENOTSUP;
1299
1300	mutex_enter(&sc->sc_ctrl_wait_lock);
1301	while (sc->sc_ctrl_inuse != FREE)
1302		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1303	sc->sc_ctrl_inuse = INUSE;
1304	mutex_exit(&sc->sc_ctrl_wait_lock);
1305
1306	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1307	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1308
1309	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap,
1310			    sc->sc_ctrl_mac_tbl_uc,
1311			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1312			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1313			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1314	if (r) {
1315		printf("%s: control command dmamap load failed, "
1316		       "error code %d\n", device_xname(sc->sc_dev), r);
1317		goto out;
1318	}
1319	r = bus_dmamap_load(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap,
1320			    sc->sc_ctrl_mac_tbl_mc,
1321			    (sizeof(struct virtio_net_ctrl_mac_tbl)
1322			  + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1323			    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1324	if (r) {
1325		printf("%s: control command dmamap load failed, "
1326		       "error code %d\n", device_xname(sc->sc_dev), r);
1327		bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1328		goto out;
1329	}
1330
1331	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap,
1332			0, sizeof(struct virtio_net_ctrl_cmd),
1333			BUS_DMASYNC_PREWRITE);
1334	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1335			(sizeof(struct virtio_net_ctrl_mac_tbl)
1336			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1337			BUS_DMASYNC_PREWRITE);
1338	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1339			(sizeof(struct virtio_net_ctrl_mac_tbl)
1340			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1341			BUS_DMASYNC_PREWRITE);
1342	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap,
1343			0, sizeof(struct virtio_net_ctrl_status),
1344			BUS_DMASYNC_PREREAD);
1345
1346	r = virtio_enqueue_prep(vsc, vq, &slot);
1347	if (r != 0)
1348		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1349	r = virtio_enqueue_reserve(vsc, vq, slot, 4);
1350	if (r != 0)
1351		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1352	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_cmd_dmamap, true);
1353	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_uc_dmamap, true);
1354	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_tbl_mc_dmamap, true);
1355	virtio_enqueue(vsc, vq, slot, sc->sc_ctrl_status_dmamap, false);
1356	virtio_enqueue_commit(vsc, vq, slot, true);
1357
1358	/* wait for done */
1359	mutex_enter(&sc->sc_ctrl_wait_lock);
1360	while (sc->sc_ctrl_inuse != DONE)
1361		cv_wait(&sc->sc_ctrl_wait, &sc->sc_ctrl_wait_lock);
1362	mutex_exit(&sc->sc_ctrl_wait_lock);
1363	/* already dequeueued */
1364
1365	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_cmd_dmamap, 0,
1366			sizeof(struct virtio_net_ctrl_cmd),
1367			BUS_DMASYNC_POSTWRITE);
1368	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap, 0,
1369			(sizeof(struct virtio_net_ctrl_mac_tbl)
1370			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_uc->nentries),
1371			BUS_DMASYNC_POSTWRITE);
1372	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap, 0,
1373			(sizeof(struct virtio_net_ctrl_mac_tbl)
1374			 + ETHER_ADDR_LEN * sc->sc_ctrl_mac_tbl_mc->nentries),
1375			BUS_DMASYNC_POSTWRITE);
1376	bus_dmamap_sync(vsc->sc_dmat, sc->sc_ctrl_status_dmamap, 0,
1377			sizeof(struct virtio_net_ctrl_status),
1378			BUS_DMASYNC_POSTREAD);
1379	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_uc_dmamap);
1380	bus_dmamap_unload(vsc->sc_dmat, sc->sc_ctrl_tbl_mc_dmamap);
1381
1382	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK)
1383		r = 0;
1384	else {
1385		printf("%s: failed setting rx filter\n",
1386		       device_xname(sc->sc_dev));
1387		r = EIO;
1388	}
1389
1390out:
1391	mutex_enter(&sc->sc_ctrl_wait_lock);
1392	sc->sc_ctrl_inuse = FREE;
1393	cv_signal(&sc->sc_ctrl_wait);
1394	mutex_exit(&sc->sc_ctrl_wait_lock);
1395
1396	return r;
1397}
1398
1399/* ctrl vq interrupt; wake up the command issuer */
1400static int
1401vioif_ctrl_vq_done(struct virtqueue *vq)
1402{
1403	struct virtio_softc *vsc = vq->vq_owner;
1404	struct vioif_softc *sc = device_private(vsc->sc_child);
1405	int r, slot;
1406
1407	r = virtio_dequeue(vsc, vq, &slot, NULL);
1408	if (r == ENOENT)
1409		return 0;
1410	virtio_dequeue_commit(vsc, vq, slot);
1411
1412	mutex_enter(&sc->sc_ctrl_wait_lock);
1413	sc->sc_ctrl_inuse = DONE;
1414	cv_signal(&sc->sc_ctrl_wait);
1415	mutex_exit(&sc->sc_ctrl_wait_lock);
1416
1417	return 1;
1418}
1419
1420/*
1421 * If IFF_PROMISC requested,  set promiscuous
1422 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1423 * If large multicast filter exist use ALLMULTI
1424 */
1425/*
1426 * If setting rx filter fails fall back to ALLMULTI
1427 * If ALLMULTI fails fall back to PROMISC
1428 */
1429static int
1430vioif_rx_filter(struct vioif_softc *sc)
1431{
1432	struct virtio_softc *vsc = sc->sc_virtio;
1433	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1434	struct ether_multi *enm;
1435	struct ether_multistep step;
1436	int nentries;
1437	int promisc = 0, allmulti = 0, rxfilter = 0;
1438	int r;
1439
1440	if (vsc->sc_nvqs < 3) {	/* no ctrl vq; always promisc */
1441		ifp->if_flags |= IFF_PROMISC;
1442		return 0;
1443	}
1444
1445	if (ifp->if_flags & IFF_PROMISC) {
1446		promisc = 1;
1447		goto set;
1448	}
1449
1450	nentries = -1;
1451	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1452	while (nentries++, enm != NULL) {
1453		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1454			allmulti = 1;
1455			goto set;
1456		}
1457		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1458			   ETHER_ADDR_LEN)) {
1459			allmulti = 1;
1460			goto set;
1461		}
1462		memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries],
1463		       enm->enm_addrlo, ETHER_ADDR_LEN);
1464		ETHER_NEXT_MULTI(step, enm);
1465	}
1466	rxfilter = 1;
1467
1468set:
1469	if (rxfilter) {
1470		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1471		sc->sc_ctrl_mac_tbl_mc->nentries = nentries;
1472		r = vioif_set_rx_filter(sc);
1473		if (r != 0) {
1474			rxfilter = 0;
1475			allmulti = 1; /* fallback */
1476		}
1477	} else {
1478		/* remove rx filter */
1479		sc->sc_ctrl_mac_tbl_uc->nentries = 0;
1480		sc->sc_ctrl_mac_tbl_mc->nentries = 0;
1481		r = vioif_set_rx_filter(sc);
1482		/* what to do on failure? */
1483	}
1484	if (allmulti) {
1485		r = vioif_set_allmulti(sc, true);
1486		if (r != 0) {
1487			allmulti = 0;
1488			promisc = 1; /* fallback */
1489		}
1490	} else {
1491		r = vioif_set_allmulti(sc, false);
1492		/* what to do on failure? */
1493	}
1494	if (promisc) {
1495		r = vioif_set_promisc(sc, true);
1496	} else {
1497		r = vioif_set_promisc(sc, false);
1498	}
1499
1500	return r;
1501}
1502
1503/* change link status */
1504static int
1505vioif_updown(struct vioif_softc *sc, bool isup)
1506{
1507	struct virtio_softc *vsc = sc->sc_virtio;
1508
1509	if (!(vsc->sc_features & VIRTIO_NET_F_STATUS))
1510		return ENODEV;
1511	virtio_write_device_config_1(vsc,
1512				     VIRTIO_NET_CONFIG_STATUS,
1513				     isup?VIRTIO_NET_S_LINK_UP:0);
1514	return 0;
1515}
1516
1517MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
1518
1519#ifdef _MODULE
1520#include "ioconf.c"
1521#endif
1522
1523static int
1524if_vioif_modcmd(modcmd_t cmd, void *opaque)
1525{
1526	int error = 0;
1527
1528#ifdef _MODULE
1529	switch (cmd) {
1530	case MODULE_CMD_INIT:
1531		error = config_init_component(cfdriver_ioconf_if_vioif,
1532		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1533		break;
1534	case MODULE_CMD_FINI:
1535		error = config_fini_component(cfdriver_ioconf_if_vioif,
1536		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1537		break;
1538	default:
1539		error = ENOTTY;
1540		break;
1541	}
1542#endif
1543
1544	return error;
1545}
1546