if_vioif.c revision 1.45
1/*	$NetBSD: if_vioif.c,v 1.45 2019/01/14 14:57:25 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.45 2019/01/14 14:57:25 yamaguchi Exp $");
30
31#ifdef _KERNEL_OPT
32#include "opt_net_mpsafe.h"
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/condvar.h>
40#include <sys/device.h>
41#include <sys/intr.h>
42#include <sys/kmem.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/sockio.h>
46#include <sys/cpu.h>
47#include <sys/module.h>
48
49#include <dev/pci/virtioreg.h>
50#include <dev/pci/virtiovar.h>
51
52#include <net/if.h>
53#include <net/if_media.h>
54#include <net/if_ether.h>
55
56#include <net/bpf.h>
57
58#include "ioconf.h"
59
60#ifdef NET_MPSAFE
61#define VIOIF_MPSAFE	1
62#endif
63
64#ifdef SOFTINT_INTR
65#define VIOIF_SOFTINT_INTR	1
66#endif
67
68/*
69 * if_vioifreg.h:
70 */
71/* Configuration registers */
72#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
73#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
74
75/* Feature bits */
76#define VIRTIO_NET_F_CSUM	(1<<0)
77#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
78#define VIRTIO_NET_F_MAC	(1<<5)
79#define VIRTIO_NET_F_GSO	(1<<6)
80#define VIRTIO_NET_F_GUEST_TSO4	(1<<7)
81#define VIRTIO_NET_F_GUEST_TSO6	(1<<8)
82#define VIRTIO_NET_F_GUEST_ECN	(1<<9)
83#define VIRTIO_NET_F_GUEST_UFO	(1<<10)
84#define VIRTIO_NET_F_HOST_TSO4	(1<<11)
85#define VIRTIO_NET_F_HOST_TSO6	(1<<12)
86#define VIRTIO_NET_F_HOST_ECN	(1<<13)
87#define VIRTIO_NET_F_HOST_UFO	(1<<14)
88#define VIRTIO_NET_F_MRG_RXBUF	(1<<15)
89#define VIRTIO_NET_F_STATUS	(1<<16)
90#define VIRTIO_NET_F_CTRL_VQ	(1<<17)
91#define VIRTIO_NET_F_CTRL_RX	(1<<18)
92#define VIRTIO_NET_F_CTRL_VLAN	(1<<19)
93
94#define VIRTIO_NET_FLAG_BITS \
95	VIRTIO_COMMON_FLAG_BITS \
96	"\x14""CTRL_VLAN" \
97	"\x13""CTRL_RX" \
98	"\x12""CTRL_VQ" \
99	"\x11""STATUS" \
100	"\x10""MRG_RXBUF" \
101	"\x0f""HOST_UFO" \
102	"\x0e""HOST_ECN" \
103	"\x0d""HOST_TSO6" \
104	"\x0c""HOST_TSO4" \
105	"\x0b""GUEST_UFO" \
106	"\x0a""GUEST_ECN" \
107	"\x09""GUEST_TSO6" \
108	"\x08""GUEST_TSO4" \
109	"\x07""GSO" \
110	"\x06""MAC" \
111	"\x02""GUEST_CSUM" \
112	"\x01""CSUM"
113
114/* Status */
115#define VIRTIO_NET_S_LINK_UP	1
116
117/* Packet header structure */
118struct virtio_net_hdr {
119	uint8_t		flags;
120	uint8_t		gso_type;
121	uint16_t	hdr_len;
122	uint16_t	gso_size;
123	uint16_t	csum_start;
124	uint16_t	csum_offset;
125#if 0
126	uint16_t	num_buffers; /* if VIRTIO_NET_F_MRG_RXBUF enabled */
127#endif
128} __packed;
129
130#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
131#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
132#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
133#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
134#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
135#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
136
137#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
138
139/* Control virtqueue */
140struct virtio_net_ctrl_cmd {
141	uint8_t	class;
142	uint8_t	command;
143} __packed;
144#define VIRTIO_NET_CTRL_RX		0
145# define VIRTIO_NET_CTRL_RX_PROMISC	0
146# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
147
148#define VIRTIO_NET_CTRL_MAC		1
149# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
150
151#define VIRTIO_NET_CTRL_VLAN		2
152# define VIRTIO_NET_CTRL_VLAN_ADD	0
153# define VIRTIO_NET_CTRL_VLAN_DEL	1
154
155struct virtio_net_ctrl_status {
156	uint8_t	ack;
157} __packed;
158#define VIRTIO_NET_OK			0
159#define VIRTIO_NET_ERR			1
160
161struct virtio_net_ctrl_rx {
162	uint8_t	onoff;
163} __packed;
164
165struct virtio_net_ctrl_mac_tbl {
166	uint32_t nentries;
167	uint8_t macs[][ETHER_ADDR_LEN];
168} __packed;
169
170struct virtio_net_ctrl_vlan {
171	uint16_t id;
172} __packed;
173
174struct vioif_ctrl_cmdspec {
175	bus_dmamap_t	dmamap;
176	void		*buf;
177	bus_size_t	bufsize;
178};
179
180/*
181 * if_vioifvar.h:
182 */
183
184/*
185 * Locking notes:
186 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
187 *   a filds in vioif_rxqueue is protected by rxq_lock (a spin mutex).
188 *      - more than one lock cannot be held at onece
189 * + ctrlq_inuse is protected by ctrlq_wait_lock.
190 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
191 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
192 */
193
194struct vioif_txqueue {
195	kmutex_t		*txq_lock;	/* lock for tx operations */
196
197	struct virtqueue	*txq_vq;
198	bool			txq_stopping;
199	bool			txq_link_active;
200
201	struct virtio_net_hdr	*txq_hdrs;
202	bus_dmamap_t		*txq_hdr_dmamaps;
203
204	struct mbuf		**txq_mbufs;
205	bus_dmamap_t		*txq_dmamaps;
206};
207
208struct vioif_rxqueue {
209	kmutex_t		*rxq_lock;	/* lock for rx operations */
210
211	struct virtqueue	*rxq_vq;
212	bool			rxq_stopping;
213
214	struct virtio_net_hdr	*rxq_hdrs;
215	bus_dmamap_t		*rxq_hdr_dmamaps;
216
217	struct mbuf		**rxq_mbufs;
218	bus_dmamap_t		*rxq_dmamaps;
219
220	void			*rxq_softint;
221};
222
223struct vioif_ctrlqueue {
224	struct virtqueue		*ctrlq_vq;
225	enum {
226		FREE, INUSE, DONE
227	}				ctrlq_inuse;
228	kcondvar_t			ctrlq_wait;
229	kmutex_t			ctrlq_wait_lock;
230	struct lwp			*ctrlq_owner;
231
232	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
233	struct virtio_net_ctrl_status	*ctrlq_status;
234	struct virtio_net_ctrl_rx	*ctrlq_rx;
235	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
236	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
237
238	bus_dmamap_t			ctrlq_cmd_dmamap;
239	bus_dmamap_t			ctrlq_status_dmamap;
240	bus_dmamap_t			ctrlq_rx_dmamap;
241	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
242	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
243};
244
245struct vioif_softc {
246	device_t		sc_dev;
247
248	struct virtio_softc	*sc_virtio;
249	struct virtqueue	sc_vq[3];
250#define VQ_RX	0
251#define VQ_TX	1
252#define VQ_CTRL	2
253
254	uint8_t			sc_mac[ETHER_ADDR_LEN];
255	struct ethercom		sc_ethercom;
256	short			sc_deferred_init_done;
257	bool			sc_link_active;
258
259	struct vioif_txqueue	sc_txq;
260	struct vioif_rxqueue	sc_rxq;
261
262	bool			sc_has_ctrl;
263	struct vioif_ctrlqueue	sc_ctrlq;
264
265	bus_dma_segment_t	sc_hdr_segs[1];
266	void			*sc_dmamem;
267	void			*sc_kmem;
268
269	void			*sc_ctl_softint;
270};
271#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
272#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
273
274#define VIOIF_TXQ_LOCK(_q)	mutex_enter((_q)->txq_lock)
275#define VIOIF_TXQ_UNLOCK(_q)	mutex_exit((_q)->txq_lock)
276#define VIOIF_TXQ_LOCKED(_q)	mutex_owned((_q)->txq_lock)
277
278#define VIOIF_RXQ_LOCK(_q)	mutex_enter((_q)->rxq_lock)
279#define VIOIF_RXQ_UNLOCK(_q)	mutex_exit((_q)->rxq_lock)
280#define VIOIF_RXQ_LOCKED(_q)	mutex_owned((_q)->rxq_lock)
281
282/* cfattach interface functions */
283static int	vioif_match(device_t, cfdata_t, void *);
284static void	vioif_attach(device_t, device_t, void *);
285static void	vioif_deferred_init(device_t);
286
287/* ifnet interface functions */
288static int	vioif_init(struct ifnet *);
289static void	vioif_stop(struct ifnet *, int);
290static void	vioif_start(struct ifnet *);
291static int	vioif_ioctl(struct ifnet *, u_long, void *);
292static void	vioif_watchdog(struct ifnet *);
293
294/* rx */
295static int	vioif_add_rx_mbuf(struct vioif_softc *, int);
296static void	vioif_free_rx_mbuf(struct vioif_softc *, int);
297static void	vioif_populate_rx_mbufs(struct vioif_softc *);
298static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *);
299static int	vioif_rx_deq(struct vioif_softc *);
300static int	vioif_rx_deq_locked(struct vioif_softc *);
301static int	vioif_rx_vq_done(struct virtqueue *);
302static void	vioif_rx_softint(void *);
303static void	vioif_rx_drain(struct vioif_softc *);
304
305/* tx */
306static int	vioif_tx_vq_done(struct virtqueue *);
307static int	vioif_tx_vq_done_locked(struct virtqueue *);
308static void	vioif_tx_drain(struct vioif_softc *);
309
310/* other control */
311static bool	vioif_is_link_up(struct vioif_softc *);
312static void	vioif_update_link_status(struct vioif_softc *);
313static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
314static int	vioif_set_promisc(struct vioif_softc *, bool);
315static int	vioif_set_allmulti(struct vioif_softc *, bool);
316static int	vioif_set_rx_filter(struct vioif_softc *);
317static int	vioif_rx_filter(struct vioif_softc *);
318static int	vioif_ctrl_vq_done(struct virtqueue *);
319static int	vioif_config_change(struct virtio_softc *);
320static void	vioif_ctl_softint(void *);
321
322CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
323		  vioif_match, vioif_attach, NULL, NULL);
324
325static int
326vioif_match(device_t parent, cfdata_t match, void *aux)
327{
328	struct virtio_attach_args *va = aux;
329
330	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
331		return 1;
332
333	return 0;
334}
335
336/* allocate memory */
337/*
338 * dma memory is used for:
339 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
340 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
341 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
342 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
343 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
344 *			 (WRITE)
345 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
346 *			 class command (WRITE)
347 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
348 *			 class command (WRITE)
349 * ctrlq_* structures are allocated only one each; they are protected by
350 * ctrlq_inuse variable and ctrlq_wait condvar.
351 */
352/*
353 * dynamically allocated memory is used for:
354 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
355 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
356 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
357 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
358 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
359 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
360 */
361static int
362vioif_alloc_mems(struct vioif_softc *sc)
363{
364	struct virtio_softc *vsc = sc->sc_virtio;
365	struct vioif_txqueue *txq = &sc->sc_txq;
366	struct vioif_rxqueue *rxq = &sc->sc_rxq;
367	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
368	int allocsize, allocsize2, r, rsegs, i;
369	void *vaddr;
370	intptr_t p;
371	int rxqsize, txqsize;
372
373	rxqsize = rxq->rxq_vq->vq_num;
374	txqsize = txq->txq_vq->vq_num;
375
376	allocsize = sizeof(struct virtio_net_hdr) * rxqsize;
377	allocsize += sizeof(struct virtio_net_hdr) * txqsize;
378	if (sc->sc_has_ctrl) {
379		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
380		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
381		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
382		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
383			+ sizeof(struct virtio_net_ctrl_mac_tbl)
384			+ ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
385	}
386	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
387			     &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
388	if (r != 0) {
389		aprint_error_dev(sc->sc_dev,
390				 "DMA memory allocation failed, size %d, "
391				 "error code %d\n", allocsize, r);
392		goto err_none;
393	}
394	r = bus_dmamem_map(virtio_dmat(vsc),
395			   &sc->sc_hdr_segs[0], 1, allocsize,
396			   &vaddr, BUS_DMA_NOWAIT);
397	if (r != 0) {
398		aprint_error_dev(sc->sc_dev,
399				 "DMA memory map failed, "
400				 "error code %d\n", r);
401		goto err_dmamem_alloc;
402	}
403
404#define P(p, p0, p0size)	do { p0 = (void *) p;		\
405				     p += p0size; } while (0)
406	memset(vaddr, 0, allocsize);
407	sc->sc_dmamem = vaddr;
408	p = (intptr_t) vaddr;
409
410	P(p, rxq->rxq_hdrs, sizeof(rxq->rxq_hdrs[0]) * rxqsize);
411	P(p, txq->txq_hdrs, sizeof(txq->txq_hdrs[0]) * txqsize);
412	if (sc->sc_has_ctrl) {
413		P(p, ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd));
414		P(p, ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status));
415		P(p, ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx));
416		P(p, ctrlq->ctrlq_mac_tbl_uc, sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0);
417		P(p, ctrlq->ctrlq_mac_tbl_mc,
418		    (sizeof(*ctrlq->ctrlq_mac_tbl_mc)
419		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES));
420	}
421
422	allocsize2 = sizeof(bus_dmamap_t) * (rxqsize + txqsize);
423	allocsize2 += sizeof(bus_dmamap_t) * (rxqsize + txqsize);
424	allocsize2 += sizeof(struct mbuf*) * (rxqsize + txqsize);
425	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
426	sc->sc_kmem = vaddr;
427	p = (intptr_t) vaddr;
428
429	P(p, rxq->rxq_hdr_dmamaps, sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
430	P(p, txq->txq_hdr_dmamaps, sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
431	P(p, rxq->rxq_dmamaps, sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
432	P(p, txq->txq_dmamaps, sizeof(txq->txq_dmamaps[0]) * txqsize);
433	P(p, rxq->rxq_mbufs, sizeof(rxq->rxq_mbufs[0]) * rxqsize);
434	P(p, txq->txq_mbufs, sizeof(txq->txq_mbufs[0]) * txqsize);
435#undef P
436
437#define C(map, size, nsegs, usage)						\
438	do {									\
439		r = bus_dmamap_create(virtio_dmat(vsc), size, nsegs, size, 0,	\
440				      BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,		\
441				      &map);					\
442		if (r != 0) {							\
443			aprint_error_dev(sc->sc_dev,				\
444			    "%s dmamap creation failed, "			\
445			    "error code %d\n", usage, r);			\
446			goto err_reqs;						\
447		}								\
448	} while (0)
449#define C_L(map, buf, size, nsegs, rw, usage)				\
450	C(map, size, nsegs, usage);					\
451	do {								\
452		r = bus_dmamap_load(virtio_dmat(vsc), map,		\
453				    buf, size, NULL,			\
454				    rw | BUS_DMA_NOWAIT);		\
455		if (r != 0) {						\
456			aprint_error_dev(sc->sc_dev,			\
457					 usage " dmamap load failed, "	\
458					 "error code %d\n", r);		\
459			goto err_reqs;					\
460		}							\
461	} while (0)
462	for (i = 0; i < rxqsize; i++) {
463		C_L(rxq->rxq_hdr_dmamaps[i], &rxq->rxq_hdrs[i], sizeof(rxq->rxq_hdrs[0]), 1,
464		    BUS_DMA_READ, "rx header");
465		C(rxq->rxq_dmamaps[i], MCLBYTES, 1, "rx payload");
466	}
467
468	for (i = 0; i < txqsize; i++) {
469		C_L(txq->txq_hdr_dmamaps[i], &txq->txq_hdrs[i], sizeof(txq->txq_hdrs[0]), 1,
470		    BUS_DMA_READ, "tx header");
471		C(txq->txq_dmamaps[i], ETHER_MAX_LEN, VIRTIO_NET_TX_MAXNSEGS, "tx payload");
472	}
473
474	if (sc->sc_has_ctrl) {
475		/* control vq class & command */
476		C_L(ctrlq->ctrlq_cmd_dmamap,
477		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
478		    BUS_DMA_WRITE, "control command");
479		C_L(ctrlq->ctrlq_status_dmamap,
480		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
481		    BUS_DMA_READ, "control status");
482
483		/* control vq rx mode command parameter */
484		C_L(ctrlq->ctrlq_rx_dmamap,
485		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
486		    BUS_DMA_WRITE, "rx mode control command");
487
488		/* control vq MAC filter table for unicast */
489		/* do not load now since its length is variable */
490		C(ctrlq->ctrlq_tbl_uc_dmamap,
491		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
492		    "unicast MAC address filter command");
493
494		/* control vq MAC filter table for multicast */
495		C(ctrlq->ctrlq_tbl_mc_dmamap,
496		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
497		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
498		    "multicast MAC address filter command");
499	}
500#undef C_L
501#undef C
502
503	return 0;
504
505err_reqs:
506#define D(map)								\
507	do {								\
508		if (map) {						\
509			bus_dmamap_destroy(virtio_dmat(vsc), map);	\
510			map = NULL;					\
511		}							\
512	} while (0)
513	D(ctrlq->ctrlq_tbl_mc_dmamap);
514	D(ctrlq->ctrlq_tbl_uc_dmamap);
515	D(ctrlq->ctrlq_rx_dmamap);
516	D(ctrlq->ctrlq_status_dmamap);
517	D(ctrlq->ctrlq_cmd_dmamap);
518	for (i = 0; i < txqsize; i++) {
519		D(txq->txq_dmamaps[i]);
520		D(txq->txq_hdr_dmamaps[i]);
521	}
522	for (i = 0; i < rxqsize; i++) {
523		D(rxq->rxq_dmamaps[i]);
524		D(rxq->rxq_hdr_dmamaps[i]);
525	}
526#undef D
527	if (sc->sc_kmem) {
528		kmem_free(sc->sc_kmem, allocsize2);
529		sc->sc_kmem = NULL;
530	}
531	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
532err_dmamem_alloc:
533	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
534err_none:
535	return -1;
536}
537
538static void
539vioif_attach(device_t parent, device_t self, void *aux)
540{
541	struct vioif_softc *sc = device_private(self);
542	struct virtio_softc *vsc = device_private(parent);
543	struct vioif_txqueue *txq = &sc->sc_txq;
544	struct vioif_rxqueue *rxq = &sc->sc_rxq;
545	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
546	uint32_t features;
547	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
548	u_int softint_flags;
549	int r, nvqs=0, req_flags;
550
551	if (virtio_child(vsc) != NULL) {
552		aprint_normal(": child already attached for %s; "
553			      "something wrong...\n",
554			      device_xname(parent));
555		return;
556	}
557
558	sc->sc_dev = self;
559	sc->sc_virtio = vsc;
560	sc->sc_link_active = false;
561
562	req_flags = 0;
563
564#ifdef VIOIF_MPSAFE
565	req_flags |= VIRTIO_F_PCI_INTR_MPSAFE;
566#endif
567#ifdef VIOIF_SOFTINT_INTR
568	req_flags |= VIRTIO_F_PCI_INTR_SOFTINT;
569#endif
570	req_flags |= VIRTIO_F_PCI_INTR_MSIX;
571
572	virtio_child_attach_start(vsc, self, IPL_NET, sc->sc_vq,
573	    vioif_config_change, virtio_vq_intr, req_flags,
574	    (VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
575	     VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY),
576	    VIRTIO_NET_FLAG_BITS);
577
578	features = virtio_features(vsc);
579
580	if (features & VIRTIO_NET_F_MAC) {
581		sc->sc_mac[0] = virtio_read_device_config_1(vsc,
582						    VIRTIO_NET_CONFIG_MAC+0);
583		sc->sc_mac[1] = virtio_read_device_config_1(vsc,
584						    VIRTIO_NET_CONFIG_MAC+1);
585		sc->sc_mac[2] = virtio_read_device_config_1(vsc,
586						    VIRTIO_NET_CONFIG_MAC+2);
587		sc->sc_mac[3] = virtio_read_device_config_1(vsc,
588						    VIRTIO_NET_CONFIG_MAC+3);
589		sc->sc_mac[4] = virtio_read_device_config_1(vsc,
590						    VIRTIO_NET_CONFIG_MAC+4);
591		sc->sc_mac[5] = virtio_read_device_config_1(vsc,
592						    VIRTIO_NET_CONFIG_MAC+5);
593	} else {
594		/* code stolen from sys/net/if_tap.c */
595		struct timeval tv;
596		uint32_t ui;
597		getmicrouptime(&tv);
598		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
599		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
600		virtio_write_device_config_1(vsc,
601					     VIRTIO_NET_CONFIG_MAC+0,
602					     sc->sc_mac[0]);
603		virtio_write_device_config_1(vsc,
604					     VIRTIO_NET_CONFIG_MAC+1,
605					     sc->sc_mac[1]);
606		virtio_write_device_config_1(vsc,
607					     VIRTIO_NET_CONFIG_MAC+2,
608					     sc->sc_mac[2]);
609		virtio_write_device_config_1(vsc,
610					     VIRTIO_NET_CONFIG_MAC+3,
611					     sc->sc_mac[3]);
612		virtio_write_device_config_1(vsc,
613					     VIRTIO_NET_CONFIG_MAC+4,
614					     sc->sc_mac[4]);
615		virtio_write_device_config_1(vsc,
616					     VIRTIO_NET_CONFIG_MAC+5,
617					     sc->sc_mac[5]);
618	}
619
620	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(sc->sc_mac));
621
622#ifdef VIOIF_MPSAFE
623	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
624#else
625	softint_flags = SOFTINT_NET;
626#endif
627
628	/*
629	 * Allocating a virtqueue for Rx
630	 */
631	rxq->rxq_vq = &sc->sc_vq[VQ_RX];
632	rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
633
634	rxq->rxq_softint = softint_establish(softint_flags, vioif_rx_softint, sc);
635	if (rxq->rxq_softint == NULL) {
636		aprint_error_dev(self, "cannot establish rx softint\n");
637		goto err;
638	}
639	r = virtio_alloc_vq(vsc, rxq->rxq_vq, VQ_RX,
640	    MCLBYTES+sizeof(struct virtio_net_hdr), 2, "rx");
641	if (r != 0)
642		goto err;
643	nvqs = 1;
644	rxq->rxq_vq->vq_done = vioif_rx_vq_done;
645	rxq->rxq_stopping = true;
646
647	/*
648	 * Allocating a virtqueue for Tx
649	 */
650	txq->txq_vq = &sc->sc_vq[VQ_TX];
651	txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
652	r = virtio_alloc_vq(vsc, txq->txq_vq, VQ_TX,
653	    (sizeof(struct virtio_net_hdr) + (ETHER_MAX_LEN - ETHER_HDR_LEN)),
654	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx");
655	if (r != 0)
656		goto err;
657	nvqs = 2;
658	txq->txq_vq->vq_done = vioif_tx_vq_done;
659	txq->txq_link_active = sc->sc_link_active;
660	txq->txq_stopping = false;
661
662	virtio_start_vq_intr(vsc, rxq->rxq_vq);
663	virtio_stop_vq_intr(vsc, txq->txq_vq); /* not urgent; do it later */
664
665	ctrlq->ctrlq_vq = &sc->sc_vq[VQ_CTRL];
666	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
667	    (features & VIRTIO_NET_F_CTRL_RX)) {
668		/*
669		 * Allocating a virtqueue for control channel
670		 */
671		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, VQ_CTRL,
672		    NBPG, 1, "control");
673		if (r != 0) {
674			aprint_error_dev(self, "failed to allocate "
675			    "a virtqueue for control channel\n");
676			goto skip;
677		}
678
679		ctrlq->ctrlq_vq->vq_done = vioif_ctrl_vq_done;
680		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
681		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
682		ctrlq->ctrlq_inuse = FREE;
683		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
684		sc->sc_has_ctrl = true;
685		nvqs = 3;
686	}
687skip:
688
689
690	sc->sc_ctl_softint = softint_establish(softint_flags, vioif_ctl_softint, sc);
691	if (sc->sc_ctl_softint == NULL) {
692		aprint_error_dev(self, "cannot establish ctl softint\n");
693		goto err;
694	}
695
696	if (vioif_alloc_mems(sc) < 0)
697		goto err;
698
699	if (virtio_child_attach_finish(vsc) != 0)
700		goto err;
701
702	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
703	ifp->if_softc = sc;
704	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
705#ifdef VIOIF_MPSAFE
706	ifp->if_extflags = IFEF_MPSAFE;
707#endif
708	ifp->if_start = vioif_start;
709	ifp->if_ioctl = vioif_ioctl;
710	ifp->if_init = vioif_init;
711	ifp->if_stop = vioif_stop;
712	ifp->if_capabilities = 0;
713	ifp->if_watchdog = vioif_watchdog;
714	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
715	IFQ_SET_READY(&ifp->if_snd);
716
717	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
718
719	if_attach(ifp);
720	if_deferred_start_init(ifp, NULL);
721	ether_ifattach(ifp, sc->sc_mac);
722
723	return;
724
725err:
726	if (rxq->rxq_lock) {
727		mutex_obj_free(rxq->rxq_lock);
728		rxq->rxq_lock = NULL;
729	}
730
731	if (rxq->rxq_softint) {
732		softint_disestablish(rxq->rxq_softint);
733		rxq->rxq_softint = NULL;
734	}
735
736	if (txq->txq_lock) {
737		mutex_obj_free(txq->txq_lock);
738		txq->txq_lock = NULL;
739	}
740
741	if (sc->sc_has_ctrl) {
742		cv_destroy(&ctrlq->ctrlq_wait);
743		mutex_destroy(&ctrlq->ctrlq_wait_lock);
744	}
745
746	while (nvqs > 0)
747		virtio_free_vq(vsc, &sc->sc_vq[--nvqs]);
748
749	virtio_child_attach_failed(vsc);
750	return;
751}
752
753/* we need interrupts to make promiscuous mode off */
754static void
755vioif_deferred_init(device_t self)
756{
757	struct vioif_softc *sc = device_private(self);
758	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
759	int r;
760
761	if (ifp->if_flags & IFF_PROMISC)
762		return;
763
764	r =  vioif_set_promisc(sc, false);
765	if (r != 0)
766		aprint_error_dev(self, "resetting promisc mode failed, "
767				 "errror code %d\n", r);
768}
769
770/*
771 * Interface functions for ifnet
772 */
773static int
774vioif_init(struct ifnet *ifp)
775{
776	struct vioif_softc *sc = ifp->if_softc;
777	struct virtio_softc *vsc = sc->sc_virtio;
778	struct vioif_txqueue *txq = &sc->sc_txq;
779	struct vioif_rxqueue *rxq = &sc->sc_rxq;
780	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
781
782	vioif_stop(ifp, 0);
783
784	virtio_reinit_start(vsc);
785	virtio_negotiate_features(vsc, virtio_features(vsc));
786	virtio_start_vq_intr(vsc, rxq->rxq_vq);
787	virtio_stop_vq_intr(vsc, txq->txq_vq);
788	if (sc->sc_has_ctrl)
789		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
790	virtio_reinit_end(vsc);
791
792	if (!sc->sc_deferred_init_done) {
793		sc->sc_deferred_init_done = 1;
794		if (sc->sc_has_ctrl)
795			vioif_deferred_init(sc->sc_dev);
796	}
797
798	/* Have to set false before vioif_populate_rx_mbufs */
799	rxq->rxq_stopping = false;
800	txq->txq_stopping = false;
801
802	vioif_populate_rx_mbufs(sc);
803
804	vioif_update_link_status(sc);
805	ifp->if_flags |= IFF_RUNNING;
806	ifp->if_flags &= ~IFF_OACTIVE;
807	vioif_rx_filter(sc);
808
809	return 0;
810}
811
812static void
813vioif_stop(struct ifnet *ifp, int disable)
814{
815	struct vioif_softc *sc = ifp->if_softc;
816	struct virtio_softc *vsc = sc->sc_virtio;
817	struct vioif_txqueue *txq = &sc->sc_txq;
818	struct vioif_rxqueue *rxq = &sc->sc_rxq;
819	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
820
821	/* Take the locks to ensure that ongoing TX/RX finish */
822	VIOIF_TXQ_LOCK(txq);
823	txq->txq_stopping = true;
824	VIOIF_TXQ_UNLOCK(txq);
825
826	VIOIF_RXQ_LOCK(rxq);
827	rxq->rxq_stopping = true;
828	VIOIF_RXQ_UNLOCK(rxq);
829
830	/* disable interrupts */
831	virtio_stop_vq_intr(vsc, rxq->rxq_vq);
832	virtio_stop_vq_intr(vsc, txq->txq_vq);
833	if (sc->sc_has_ctrl)
834		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
835
836	/* only way to stop I/O and DMA is resetting... */
837	virtio_reset(vsc);
838	vioif_rx_deq(sc);
839	vioif_tx_drain(sc);
840	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
841	sc->sc_link_active = false;
842	txq->txq_link_active = false;
843
844	if (disable)
845		vioif_rx_drain(sc);
846}
847
848static void
849vioif_start(struct ifnet *ifp)
850{
851	struct vioif_softc *sc = ifp->if_softc;
852	struct virtio_softc *vsc = sc->sc_virtio;
853	struct vioif_txqueue *txq = &sc->sc_txq;
854	struct virtqueue *vq = txq->txq_vq;
855	struct mbuf *m;
856	int queued = 0;
857
858	VIOIF_TXQ_LOCK(txq);
859
860	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING ||
861	    !txq->txq_link_active)
862		goto out;
863
864	if (txq->txq_stopping)
865		goto out;
866
867	for (;;) {
868		int slot, r;
869
870		IFQ_DEQUEUE(&ifp->if_snd, m);
871		if (m == NULL)
872			break;
873
874		r = virtio_enqueue_prep(vsc, vq, &slot);
875		if (r == EAGAIN) {
876			ifp->if_flags |= IFF_OACTIVE;
877			m_freem(m);
878			break;
879		}
880		if (r != 0)
881			panic("enqueue_prep for a tx buffer");
882
883		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
884					 txq->txq_dmamaps[slot],
885					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
886		if (r != 0) {
887			/* maybe just too fragmented */
888			struct mbuf *newm;
889
890			newm = m_defrag(m, M_NOWAIT);
891			if (newm == NULL) {
892				aprint_error_dev(sc->sc_dev,
893				    "m_defrag() failed\n");
894				goto skip;
895			}
896
897			m = newm;
898			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
899					 txq->txq_dmamaps[slot],
900					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
901			if (r != 0) {
902				aprint_error_dev(sc->sc_dev,
903	   			    "tx dmamap load failed, error code %d\n",
904				    r);
905skip:
906				m_freem(m);
907				virtio_enqueue_abort(vsc, vq, slot);
908				continue;
909			}
910		}
911
912		/* This should actually never fail */
913		r = virtio_enqueue_reserve(vsc, vq, slot,
914					txq->txq_dmamaps[slot]->dm_nsegs + 1);
915		if (r != 0) {
916			aprint_error_dev(sc->sc_dev,
917	   		    "virtio_enqueue_reserve failed, error code %d\n",
918			    r);
919			bus_dmamap_unload(virtio_dmat(vsc),
920					  txq->txq_dmamaps[slot]);
921			/* slot already freed by virtio_enqueue_reserve */
922			m_freem(m);
923			continue;
924		}
925
926		txq->txq_mbufs[slot] = m;
927
928		memset(&txq->txq_hdrs[slot], 0, sizeof(struct virtio_net_hdr));
929		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
930				0, txq->txq_dmamaps[slot]->dm_mapsize,
931				BUS_DMASYNC_PREWRITE);
932		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
933				0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
934				BUS_DMASYNC_PREWRITE);
935		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
936		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
937		virtio_enqueue_commit(vsc, vq, slot, false);
938
939		queued++;
940		bpf_mtap(ifp, m, BPF_D_OUT);
941	}
942
943	if (queued > 0) {
944		virtio_enqueue_commit(vsc, vq, -1, true);
945		ifp->if_timer = 5;
946	}
947
948out:
949	VIOIF_TXQ_UNLOCK(txq);
950}
951
952static int
953vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
954{
955	int s, r;
956
957	s = splnet();
958
959	r = ether_ioctl(ifp, cmd, data);
960	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
961	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
962		if (ifp->if_flags & IFF_RUNNING)
963			r = vioif_rx_filter(ifp->if_softc);
964		else
965			r = 0;
966	}
967
968	splx(s);
969
970	return r;
971}
972
973void
974vioif_watchdog(struct ifnet *ifp)
975{
976	struct vioif_softc *sc = ifp->if_softc;
977	struct vioif_txqueue *txq = &sc->sc_txq;
978
979	if (ifp->if_flags & IFF_RUNNING)
980		vioif_tx_vq_done(txq->txq_vq);
981}
982
983
984/*
985 * Receive implementation
986 */
987/* allocate and initialize a mbuf for receive */
988static int
989vioif_add_rx_mbuf(struct vioif_softc *sc, int i)
990{
991	struct vioif_rxqueue *rxq = &sc->sc_rxq;
992	struct mbuf *m;
993	int r;
994
995	MGETHDR(m, M_DONTWAIT, MT_DATA);
996	if (m == NULL)
997		return ENOBUFS;
998	MCLGET(m, M_DONTWAIT);
999	if ((m->m_flags & M_EXT) == 0) {
1000		m_freem(m);
1001		return ENOBUFS;
1002	}
1003	rxq->rxq_mbufs[i] = m;
1004	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1005	r = bus_dmamap_load_mbuf(virtio_dmat(sc->sc_virtio),
1006				 rxq->rxq_dmamaps[i],
1007				 m, BUS_DMA_READ|BUS_DMA_NOWAIT);
1008	if (r) {
1009		m_freem(m);
1010		rxq->rxq_mbufs[i] = 0;
1011		return r;
1012	}
1013
1014	return 0;
1015}
1016
1017/* free a mbuf for receive */
1018static void
1019vioif_free_rx_mbuf(struct vioif_softc *sc, int i)
1020{
1021	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1022
1023	bus_dmamap_unload(virtio_dmat(sc->sc_virtio), rxq->rxq_dmamaps[i]);
1024	m_freem(rxq->rxq_mbufs[i]);
1025	rxq->rxq_mbufs[i] = NULL;
1026}
1027
1028/* add mbufs for all the empty receive slots */
1029static void
1030vioif_populate_rx_mbufs(struct vioif_softc *sc)
1031{
1032	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1033
1034	VIOIF_RXQ_LOCK(rxq);
1035	vioif_populate_rx_mbufs_locked(sc);
1036	VIOIF_RXQ_UNLOCK(rxq);
1037}
1038
1039static void
1040vioif_populate_rx_mbufs_locked(struct vioif_softc *sc)
1041{
1042	struct virtio_softc *vsc = sc->sc_virtio;
1043	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1044	int i, r, ndone = 0;
1045	struct virtqueue *vq = rxq->rxq_vq;
1046
1047	KASSERT(VIOIF_RXQ_LOCKED(rxq));
1048
1049	if (rxq->rxq_stopping)
1050		return;
1051
1052	for (i = 0; i < vq->vq_num; i++) {
1053		int slot;
1054		r = virtio_enqueue_prep(vsc, vq, &slot);
1055		if (r == EAGAIN)
1056			break;
1057		if (r != 0)
1058			panic("enqueue_prep for rx buffers");
1059		if (rxq->rxq_mbufs[slot] == NULL) {
1060			r = vioif_add_rx_mbuf(sc, slot);
1061			if (r != 0) {
1062				printf("%s: rx mbuf allocation failed, "
1063				       "error code %d\n",
1064				       device_xname(sc->sc_dev), r);
1065				break;
1066			}
1067		}
1068		r = virtio_enqueue_reserve(vsc, vq, slot,
1069					rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1070		if (r != 0) {
1071			vioif_free_rx_mbuf(sc, slot);
1072			break;
1073		}
1074		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1075			0, sizeof(struct virtio_net_hdr), BUS_DMASYNC_PREREAD);
1076		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1077			0, MCLBYTES, BUS_DMASYNC_PREREAD);
1078		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot], false);
1079		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1080		virtio_enqueue_commit(vsc, vq, slot, false);
1081		ndone++;
1082	}
1083	if (ndone > 0)
1084		virtio_enqueue_commit(vsc, vq, -1, true);
1085}
1086
1087/* dequeue received packets */
1088static int
1089vioif_rx_deq(struct vioif_softc *sc)
1090{
1091	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1092	int r;
1093
1094	KASSERT(rxq->rxq_stopping);
1095
1096	VIOIF_RXQ_LOCK(rxq);
1097	r = vioif_rx_deq_locked(sc);
1098	VIOIF_RXQ_UNLOCK(rxq);
1099
1100	return r;
1101}
1102
1103/* dequeue received packets */
1104static int
1105vioif_rx_deq_locked(struct vioif_softc *sc)
1106{
1107	struct virtio_softc *vsc = sc->sc_virtio;
1108	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1109	struct virtqueue *vq = rxq->rxq_vq;
1110	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1111	struct mbuf *m;
1112	int r = 0;
1113	int slot, len;
1114
1115	KASSERT(VIOIF_RXQ_LOCKED(rxq));
1116
1117	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1118		len -= sizeof(struct virtio_net_hdr);
1119		r = 1;
1120		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1121				0, sizeof(struct virtio_net_hdr),
1122				BUS_DMASYNC_POSTREAD);
1123		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1124				0, MCLBYTES,
1125				BUS_DMASYNC_POSTREAD);
1126		m = rxq->rxq_mbufs[slot];
1127		KASSERT(m != NULL);
1128		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1129		rxq->rxq_mbufs[slot] = 0;
1130		virtio_dequeue_commit(vsc, vq, slot);
1131		m_set_rcvif(m, ifp);
1132		m->m_len = m->m_pkthdr.len = len;
1133
1134		VIOIF_RXQ_UNLOCK(rxq);
1135		if_percpuq_enqueue(ifp->if_percpuq, m);
1136		VIOIF_RXQ_LOCK(rxq);
1137
1138		if (rxq->rxq_stopping)
1139			break;
1140	}
1141
1142	return r;
1143}
1144
1145/* rx interrupt; call _dequeue above and schedule a softint */
1146static int
1147vioif_rx_vq_done(struct virtqueue *vq)
1148{
1149	struct virtio_softc *vsc = vq->vq_owner;
1150	struct vioif_softc *sc = device_private(virtio_child(vsc));
1151	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1152	int r = 0;
1153
1154#ifdef VIOIF_SOFTINT_INTR
1155	KASSERT(!cpu_intr_p());
1156#endif
1157
1158	VIOIF_RXQ_LOCK(rxq);
1159
1160	if (rxq->rxq_stopping)
1161		goto out;
1162
1163	r = vioif_rx_deq_locked(sc);
1164	if (r)
1165#ifdef VIOIF_SOFTINT_INTR
1166		vioif_populate_rx_mbufs_locked(sc);
1167#else
1168		softint_schedule(rxq->rxq_softint);
1169#endif
1170
1171out:
1172	VIOIF_RXQ_UNLOCK(rxq);
1173	return r;
1174}
1175
1176/* softint: enqueue receive requests for new incoming packets */
1177static void
1178vioif_rx_softint(void *arg)
1179{
1180	struct vioif_softc *sc = arg;
1181
1182	vioif_populate_rx_mbufs(sc);
1183}
1184
1185/* free all the mbufs; called from if_stop(disable) */
1186static void
1187vioif_rx_drain(struct vioif_softc *sc)
1188{
1189	struct vioif_rxqueue *rxq = &sc->sc_rxq;
1190	struct virtqueue *vq = rxq->rxq_vq;
1191	int i;
1192
1193	for (i = 0; i < vq->vq_num; i++) {
1194		if (rxq->rxq_mbufs[i] == NULL)
1195			continue;
1196		vioif_free_rx_mbuf(sc, i);
1197	}
1198}
1199
1200
1201/*
1202 * Transmition implementation
1203 */
1204/* actual transmission is done in if_start */
1205/* tx interrupt; dequeue and free mbufs */
1206/*
1207 * tx interrupt is actually disabled; this should be called upon
1208 * tx vq full and watchdog
1209 */
1210static int
1211vioif_tx_vq_done(struct virtqueue *vq)
1212{
1213	struct virtio_softc *vsc = vq->vq_owner;
1214	struct vioif_softc *sc = device_private(virtio_child(vsc));
1215	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1216	struct vioif_txqueue *txq = &sc->sc_txq;
1217	int r = 0;
1218
1219	VIOIF_TXQ_LOCK(txq);
1220
1221	if (txq->txq_stopping)
1222		goto out;
1223
1224	r = vioif_tx_vq_done_locked(vq);
1225
1226out:
1227	VIOIF_TXQ_UNLOCK(txq);
1228	if (r)
1229		if_schedule_deferred_start(ifp);
1230	return r;
1231}
1232
1233static int
1234vioif_tx_vq_done_locked(struct virtqueue *vq)
1235{
1236	struct virtio_softc *vsc = vq->vq_owner;
1237	struct vioif_softc *sc = device_private(virtio_child(vsc));
1238	struct vioif_txqueue *txq = &sc->sc_txq;
1239	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1240	struct mbuf *m;
1241	int r = 0;
1242	int slot, len;
1243
1244	KASSERT(VIOIF_TXQ_LOCKED(txq));
1245
1246	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1247		r++;
1248		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1249				0, sizeof(struct virtio_net_hdr),
1250				BUS_DMASYNC_POSTWRITE);
1251		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1252				0, txq->txq_dmamaps[slot]->dm_mapsize,
1253				BUS_DMASYNC_POSTWRITE);
1254		m = txq->txq_mbufs[slot];
1255		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1256		txq->txq_mbufs[slot] = 0;
1257		virtio_dequeue_commit(vsc, vq, slot);
1258		ifp->if_opackets++;
1259		m_freem(m);
1260	}
1261
1262	if (r)
1263		ifp->if_flags &= ~IFF_OACTIVE;
1264	return r;
1265}
1266
1267/* free all the mbufs already put on vq; called from if_stop(disable) */
1268static void
1269vioif_tx_drain(struct vioif_softc *sc)
1270{
1271	struct virtio_softc *vsc = sc->sc_virtio;
1272	struct vioif_txqueue *txq = &sc->sc_txq;
1273	struct virtqueue *vq = txq->txq_vq;
1274	int i;
1275
1276	KASSERT(txq->txq_stopping);
1277
1278	for (i = 0; i < vq->vq_num; i++) {
1279		if (txq->txq_mbufs[i] == NULL)
1280			continue;
1281		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1282		m_freem(txq->txq_mbufs[i]);
1283		txq->txq_mbufs[i] = NULL;
1284	}
1285}
1286
1287/*
1288 * Control vq
1289 */
1290/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1291static void
1292vioif_ctrl_acquire(struct vioif_softc *sc)
1293{
1294	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1295
1296	mutex_enter(&ctrlq->ctrlq_wait_lock);
1297	while (ctrlq->ctrlq_inuse != FREE)
1298		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1299	ctrlq->ctrlq_inuse = INUSE;
1300	ctrlq->ctrlq_owner = curlwp;
1301	mutex_exit(&ctrlq->ctrlq_wait_lock);
1302}
1303
1304static void
1305vioif_ctrl_release(struct vioif_softc *sc)
1306{
1307	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1308
1309	KASSERT(ctrlq->ctrlq_inuse != FREE);
1310	KASSERT(ctrlq->ctrlq_owner == curlwp);
1311
1312	mutex_enter(&ctrlq->ctrlq_wait_lock);
1313	ctrlq->ctrlq_inuse = FREE;
1314	cv_signal(&ctrlq->ctrlq_wait);
1315	mutex_exit(&ctrlq->ctrlq_wait_lock);
1316}
1317
1318static int
1319vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1320    struct vioif_ctrl_cmdspec *specs, int nspecs)
1321{
1322	struct virtio_softc *vsc = sc->sc_virtio;
1323	int i, r, loaded;
1324
1325	loaded = 0;
1326	for (i = 0; i < nspecs; i++) {
1327		r = bus_dmamap_load(virtio_dmat(vsc),
1328		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1329		    NULL, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1330		if (r) {
1331			printf("%s: control command dmamap load failed, "
1332			       "error code %d\n", device_xname(sc->sc_dev), r);
1333			goto err;
1334		}
1335		loaded++;
1336
1337	}
1338
1339	return r;
1340
1341err:
1342	for (i = 0; i < loaded; i++) {
1343		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1344	}
1345
1346	return r;
1347}
1348
1349static void
1350vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
1351    struct vioif_ctrl_cmdspec *specs, int nspecs)
1352{
1353	struct virtio_softc *vsc = sc->sc_virtio;
1354	int i;
1355
1356	for (i = 0; i < nspecs; i++) {
1357		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
1358	}
1359}
1360
1361static int
1362vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
1363    struct vioif_ctrl_cmdspec *specs, int nspecs)
1364{
1365	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1366	struct virtqueue *vq = ctrlq->ctrlq_vq;
1367	struct virtio_softc *vsc = sc->sc_virtio;
1368	int i, r, slot;
1369
1370	ctrlq->ctrlq_cmd->class = class;
1371	ctrlq->ctrlq_cmd->command = cmd;
1372
1373	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
1374			0, sizeof(struct virtio_net_ctrl_cmd),
1375			BUS_DMASYNC_PREWRITE);
1376	for (i = 0; i < nspecs; i++) {
1377		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
1378				0, specs[i].bufsize,
1379				BUS_DMASYNC_PREWRITE);
1380	}
1381	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
1382			0, sizeof(struct virtio_net_ctrl_status),
1383			BUS_DMASYNC_PREREAD);
1384
1385	r = virtio_enqueue_prep(vsc, vq, &slot);
1386	if (r != 0)
1387		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1388	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
1389	if (r != 0)
1390		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
1391	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
1392	for (i = 0; i < nspecs; i++) {
1393		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
1394	}
1395	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
1396	virtio_enqueue_commit(vsc, vq, slot, true);
1397
1398	/* wait for done */
1399	mutex_enter(&ctrlq->ctrlq_wait_lock);
1400	while (ctrlq->ctrlq_inuse != DONE)
1401		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1402	mutex_exit(&ctrlq->ctrlq_wait_lock);
1403	/* already dequeueued */
1404
1405	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
1406			sizeof(struct virtio_net_ctrl_cmd),
1407			BUS_DMASYNC_POSTWRITE);
1408	for (i = 0; i < nspecs; i++) {
1409		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
1410				specs[i].bufsize,
1411				BUS_DMASYNC_POSTWRITE);
1412	}
1413	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
1414			sizeof(struct virtio_net_ctrl_status),
1415			BUS_DMASYNC_POSTREAD);
1416
1417	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
1418		r = 0;
1419	else {
1420		printf("%s: failed setting rx mode\n",
1421		       device_xname(sc->sc_dev));
1422		r = EIO;
1423	}
1424
1425	return r;
1426}
1427
1428static int
1429vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
1430{
1431	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
1432	struct vioif_ctrl_cmdspec specs[1];
1433	int r;
1434
1435	if (!sc->sc_has_ctrl)
1436		return ENOTSUP;
1437
1438	vioif_ctrl_acquire(sc);
1439
1440	rx->onoff = onoff;
1441	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
1442	specs[0].buf = rx;
1443	specs[0].bufsize = sizeof(*rx);
1444
1445	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
1446	    specs, __arraycount(specs));
1447
1448	vioif_ctrl_release(sc);
1449	return r;
1450}
1451
1452static int
1453vioif_set_promisc(struct vioif_softc *sc, bool onoff)
1454{
1455	int r;
1456
1457	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
1458
1459	return r;
1460}
1461
1462static int
1463vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
1464{
1465	int r;
1466
1467	r = vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
1468
1469	return r;
1470}
1471
1472/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1473static int
1474vioif_set_rx_filter(struct vioif_softc *sc)
1475{
1476	/* filter already set in ctrlq->ctrlq_mac_tbl */
1477	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
1478	struct vioif_ctrl_cmdspec specs[2];
1479	int nspecs = __arraycount(specs);
1480	int r;
1481
1482	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
1483	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
1484
1485	if (!sc->sc_has_ctrl)
1486		return ENOTSUP;
1487
1488	vioif_ctrl_acquire(sc);
1489
1490	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
1491	specs[0].buf = mac_tbl_uc;
1492	specs[0].bufsize = sizeof(*mac_tbl_uc)
1493	    + (ETHER_ADDR_LEN * mac_tbl_uc->nentries);
1494
1495	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
1496	specs[1].buf = mac_tbl_mc;
1497	specs[1].bufsize = sizeof(*mac_tbl_mc)
1498	    + (ETHER_ADDR_LEN * mac_tbl_mc->nentries);
1499
1500	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
1501	if (r != 0)
1502		goto out;
1503
1504	r = vioif_ctrl_send_command(sc,
1505	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
1506	    specs, nspecs);
1507
1508	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
1509
1510out:
1511	vioif_ctrl_release(sc);
1512
1513	return r;
1514}
1515
1516/* ctrl vq interrupt; wake up the command issuer */
1517static int
1518vioif_ctrl_vq_done(struct virtqueue *vq)
1519{
1520	struct virtio_softc *vsc = vq->vq_owner;
1521	struct vioif_softc *sc = device_private(virtio_child(vsc));
1522	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1523	int r, slot;
1524
1525	r = virtio_dequeue(vsc, vq, &slot, NULL);
1526	if (r == ENOENT)
1527		return 0;
1528	virtio_dequeue_commit(vsc, vq, slot);
1529
1530	mutex_enter(&ctrlq->ctrlq_wait_lock);
1531	ctrlq->ctrlq_inuse = DONE;
1532	cv_signal(&ctrlq->ctrlq_wait);
1533	mutex_exit(&ctrlq->ctrlq_wait_lock);
1534
1535	return 1;
1536}
1537
1538/*
1539 * If IFF_PROMISC requested,  set promiscuous
1540 * If multicast filter small enough (<=MAXENTRIES) set rx filter
1541 * If large multicast filter exist use ALLMULTI
1542 */
1543/*
1544 * If setting rx filter fails fall back to ALLMULTI
1545 * If ALLMULTI fails fall back to PROMISC
1546 */
1547static int
1548vioif_rx_filter(struct vioif_softc *sc)
1549{
1550	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1551	struct ether_multi *enm;
1552	struct ether_multistep step;
1553	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1554	int nentries;
1555	int promisc = 0, allmulti = 0, rxfilter = 0;
1556	int r;
1557
1558	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
1559		ifp->if_flags |= IFF_PROMISC;
1560		return 0;
1561	}
1562
1563	if (ifp->if_flags & IFF_PROMISC) {
1564		promisc = 1;
1565		goto set;
1566	}
1567
1568	nentries = -1;
1569	ETHER_LOCK(&sc->sc_ethercom);
1570	ETHER_FIRST_MULTI(step, &sc->sc_ethercom, enm);
1571	while (nentries++, enm != NULL) {
1572		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
1573			allmulti = 1;
1574			goto set_unlock;
1575		}
1576		if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
1577			   ETHER_ADDR_LEN)) {
1578			allmulti = 1;
1579			goto set_unlock;
1580		}
1581		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
1582		       enm->enm_addrlo, ETHER_ADDR_LEN);
1583		ETHER_NEXT_MULTI(step, enm);
1584	}
1585	rxfilter = 1;
1586
1587set_unlock:
1588	ETHER_UNLOCK(&sc->sc_ethercom);
1589
1590set:
1591	if (rxfilter) {
1592		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1593		ctrlq->ctrlq_mac_tbl_mc->nentries = nentries;
1594		r = vioif_set_rx_filter(sc);
1595		if (r != 0) {
1596			rxfilter = 0;
1597			allmulti = 1; /* fallback */
1598		}
1599	} else {
1600		/* remove rx filter */
1601		ctrlq->ctrlq_mac_tbl_uc->nentries = 0;
1602		ctrlq->ctrlq_mac_tbl_mc->nentries = 0;
1603		r = vioif_set_rx_filter(sc);
1604		/* what to do on failure? */
1605	}
1606	if (allmulti) {
1607		r = vioif_set_allmulti(sc, true);
1608		if (r != 0) {
1609			allmulti = 0;
1610			promisc = 1; /* fallback */
1611		}
1612	} else {
1613		r = vioif_set_allmulti(sc, false);
1614		/* what to do on failure? */
1615	}
1616	if (promisc) {
1617		r = vioif_set_promisc(sc, true);
1618	} else {
1619		r = vioif_set_promisc(sc, false);
1620	}
1621
1622	return r;
1623}
1624
1625static bool
1626vioif_is_link_up(struct vioif_softc *sc)
1627{
1628	struct virtio_softc *vsc = sc->sc_virtio;
1629	uint16_t status;
1630
1631	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
1632		status = virtio_read_device_config_2(vsc,
1633		    VIRTIO_NET_CONFIG_STATUS);
1634	else
1635		status = VIRTIO_NET_S_LINK_UP;
1636
1637	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
1638}
1639
1640/* change link status */
1641static void
1642vioif_update_link_status(struct vioif_softc *sc)
1643{
1644	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1645	struct vioif_txqueue *txq = &sc->sc_txq;
1646	bool active, changed;
1647	int link;
1648
1649	active = vioif_is_link_up(sc);
1650	changed = false;
1651
1652	if (active) {
1653		if (!sc->sc_link_active)
1654			changed = true;
1655
1656		link = LINK_STATE_UP;
1657		sc->sc_link_active = true;
1658	} else {
1659		if (sc->sc_link_active)
1660			changed = true;
1661
1662		link = LINK_STATE_DOWN;
1663		sc->sc_link_active = false;
1664	}
1665
1666	if (changed) {
1667		VIOIF_TXQ_LOCK(txq);
1668		txq->txq_link_active = sc->sc_link_active;
1669		VIOIF_TXQ_UNLOCK(txq);
1670
1671		if_link_state_change(ifp, link);
1672	}
1673}
1674
1675static int
1676vioif_config_change(struct virtio_softc *vsc)
1677{
1678	struct vioif_softc *sc = device_private(virtio_child(vsc));
1679
1680#ifdef VIOIF_SOFTINT_INTR
1681	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1682#endif
1683
1684#ifdef VIOIF_SOFTINT_INTR
1685	KASSERT(!cpu_intr_p());
1686	vioif_update_link_status(sc);
1687	vioif_start(ifp);
1688#else
1689	softint_schedule(sc->sc_ctl_softint);
1690#endif
1691
1692	return 0;
1693}
1694
1695static void
1696vioif_ctl_softint(void *arg)
1697{
1698	struct vioif_softc *sc = arg;
1699	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1700
1701	vioif_update_link_status(sc);
1702	vioif_start(ifp);
1703}
1704
1705MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
1706
1707#ifdef _MODULE
1708#include "ioconf.c"
1709#endif
1710
1711static int
1712if_vioif_modcmd(modcmd_t cmd, void *opaque)
1713{
1714	int error = 0;
1715
1716#ifdef _MODULE
1717	switch (cmd) {
1718	case MODULE_CMD_INIT:
1719		error = config_init_component(cfdriver_ioconf_if_vioif,
1720		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1721		break;
1722	case MODULE_CMD_FINI:
1723		error = config_fini_component(cfdriver_ioconf_if_vioif,
1724		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
1725		break;
1726	default:
1727		error = ENOTTY;
1728		break;
1729	}
1730#endif
1731
1732	return error;
1733}
1734