if_vioif.c revision 1.73
1/*	$NetBSD: if_vioif.c,v 1.73 2022/03/24 07:51:14 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.73 2022/03/24 07:51:14 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54
55#include <dev/pci/virtioreg.h>
56#include <dev/pci/virtiovar.h>
57
58#include <net/if.h>
59#include <net/if_media.h>
60#include <net/if_ether.h>
61
62#include <net/bpf.h>
63
64#include "ioconf.h"
65
66#ifdef NET_MPSAFE
67#define VIOIF_MPSAFE	1
68#define VIOIF_MULTIQ	1
69#endif
70
71/*
72 * if_vioifreg.h:
73 */
74/* Configuration registers */
75#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
76#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
77#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
78#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
79
80/* Feature bits */
81#define VIRTIO_NET_F_CSUM		__BIT(0)
82#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
83#define VIRTIO_NET_F_MAC		__BIT(5)
84#define VIRTIO_NET_F_GSO		__BIT(6)
85#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
86#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
87#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
88#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
89#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
90#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
91#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
92#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
93#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
94#define VIRTIO_NET_F_STATUS		__BIT(16)
95#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
96#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
97#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
98#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
99#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
100#define VIRTIO_NET_F_MQ			__BIT(22)
101
102#define VIRTIO_NET_FLAG_BITS \
103	VIRTIO_COMMON_FLAG_BITS \
104	"\x17""MQ" \
105	"\x16""GUEST_ANNOUNCE" \
106	"\x15""CTRL_RX_EXTRA" \
107	"\x14""CTRL_VLAN" \
108	"\x13""CTRL_RX" \
109	"\x12""CTRL_VQ" \
110	"\x11""STATUS" \
111	"\x10""MRG_RXBUF" \
112	"\x0f""HOST_UFO" \
113	"\x0e""HOST_ECN" \
114	"\x0d""HOST_TSO6" \
115	"\x0c""HOST_TSO4" \
116	"\x0b""GUEST_UFO" \
117	"\x0a""GUEST_ECN" \
118	"\x09""GUEST_TSO6" \
119	"\x08""GUEST_TSO4" \
120	"\x07""GSO" \
121	"\x06""MAC" \
122	"\x02""GUEST_CSUM" \
123	"\x01""CSUM"
124
125/* Status */
126#define VIRTIO_NET_S_LINK_UP	1
127
128/* Packet header structure */
129struct virtio_net_hdr {
130	uint8_t		flags;
131	uint8_t		gso_type;
132	uint16_t	hdr_len;
133	uint16_t	gso_size;
134	uint16_t	csum_start;
135	uint16_t	csum_offset;
136
137	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
138} __packed;
139
140#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
141#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
142#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
145#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
146
147#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
148
149/* Control virtqueue */
150struct virtio_net_ctrl_cmd {
151	uint8_t	class;
152	uint8_t	command;
153} __packed;
154#define VIRTIO_NET_CTRL_RX		0
155# define VIRTIO_NET_CTRL_RX_PROMISC	0
156# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
157
158#define VIRTIO_NET_CTRL_MAC		1
159# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
160
161#define VIRTIO_NET_CTRL_VLAN		2
162# define VIRTIO_NET_CTRL_VLAN_ADD	0
163# define VIRTIO_NET_CTRL_VLAN_DEL	1
164
165#define VIRTIO_NET_CTRL_MQ			4
166# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
167# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
168# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
169
170struct virtio_net_ctrl_status {
171	uint8_t	ack;
172} __packed;
173#define VIRTIO_NET_OK			0
174#define VIRTIO_NET_ERR			1
175
176struct virtio_net_ctrl_rx {
177	uint8_t	onoff;
178} __packed;
179
180struct virtio_net_ctrl_mac_tbl {
181	uint32_t nentries;
182	uint8_t macs[][ETHER_ADDR_LEN];
183} __packed;
184
185struct virtio_net_ctrl_vlan {
186	uint16_t id;
187} __packed;
188
189struct virtio_net_ctrl_mq {
190	uint16_t virtqueue_pairs;
191} __packed;
192
193/*
194 * if_vioifvar.h:
195 */
196
197/*
198 * Locking notes:
199 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
200 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
201 *      - more than one lock cannot be held at onece
202 * + ctrlq_inuse is protected by ctrlq_wait_lock.
203 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
204 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
205 * + fields in vioif_softc except queues are protected by
206 *   sc->sc_lock(an adaptive mutex)
207 *      - the lock is held before acquisition of other locks
208 */
209
210struct vioif_ctrl_cmdspec {
211	bus_dmamap_t	dmamap;
212	void		*buf;
213	bus_size_t	bufsize;
214};
215
216struct vioif_work {
217	struct work	 cookie;
218	void		(*func)(void *);
219	void		*arg;
220	unsigned int	 added;
221};
222
223struct vioif_txqueue {
224	kmutex_t		*txq_lock;	/* lock for tx operations */
225
226	struct virtqueue	*txq_vq;
227	bool			txq_stopping;
228	bool			txq_link_active;
229	pcq_t			*txq_intrq;
230
231	struct virtio_net_hdr	*txq_hdrs;
232	bus_dmamap_t		*txq_hdr_dmamaps;
233
234	struct mbuf		**txq_mbufs;
235	bus_dmamap_t		*txq_dmamaps;
236
237	void			*txq_deferred_transmit;
238	void			*txq_handle_si;
239	struct vioif_work	 txq_work;
240	bool			 txq_workqueue;
241	bool			 txq_active;
242
243	char			 txq_evgroup[16];
244	struct evcnt		 txq_defrag_failed;
245	struct evcnt		 txq_mbuf_load_failed;
246	struct evcnt		 txq_enqueue_reserve_failed;
247};
248
249struct vioif_rxqueue {
250	kmutex_t		*rxq_lock;	/* lock for rx operations */
251
252	struct virtqueue	*rxq_vq;
253	bool			rxq_stopping;
254
255	struct virtio_net_hdr	*rxq_hdrs;
256	bus_dmamap_t		*rxq_hdr_dmamaps;
257
258	struct mbuf		**rxq_mbufs;
259	bus_dmamap_t		*rxq_dmamaps;
260
261	void			*rxq_handle_si;
262	struct vioif_work	 rxq_work;
263	bool			 rxq_workqueue;
264	bool			 rxq_active;
265
266	char			 rxq_evgroup[16];
267	struct evcnt		 rxq_mbuf_add_failed;
268};
269
270struct vioif_ctrlqueue {
271	struct virtqueue		*ctrlq_vq;
272	enum {
273		FREE, INUSE, DONE
274	}				ctrlq_inuse;
275	kcondvar_t			ctrlq_wait;
276	kmutex_t			ctrlq_wait_lock;
277	struct lwp			*ctrlq_owner;
278
279	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
280	struct virtio_net_ctrl_status	*ctrlq_status;
281	struct virtio_net_ctrl_rx	*ctrlq_rx;
282	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
283	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
284	struct virtio_net_ctrl_mq	*ctrlq_mq;
285
286	bus_dmamap_t			ctrlq_cmd_dmamap;
287	bus_dmamap_t			ctrlq_status_dmamap;
288	bus_dmamap_t			ctrlq_rx_dmamap;
289	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
290	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
291	bus_dmamap_t			ctrlq_mq_dmamap;
292
293	struct evcnt			ctrlq_cmd_load_failed;
294	struct evcnt			ctrlq_cmd_failed;
295};
296
297struct vioif_softc {
298	device_t		sc_dev;
299	kmutex_t		sc_lock;
300	struct sysctllog	*sc_sysctllog;
301
302	struct virtio_softc	*sc_virtio;
303	struct virtqueue	*sc_vqs;
304	u_int			 sc_hdr_size;
305
306	int			sc_max_nvq_pairs;
307	int			sc_req_nvq_pairs;
308	int			sc_act_nvq_pairs;
309
310	uint8_t			sc_mac[ETHER_ADDR_LEN];
311	struct ethercom		sc_ethercom;
312	short			sc_deferred_init_done;
313	bool			sc_link_active;
314
315	struct vioif_txqueue	*sc_txq;
316	struct vioif_rxqueue	*sc_rxq;
317
318	bool			sc_has_ctrl;
319	struct vioif_ctrlqueue	sc_ctrlq;
320
321	bus_dma_segment_t	sc_hdr_segs[1];
322	void			*sc_dmamem;
323	void			*sc_kmem;
324
325	void			*sc_ctl_softint;
326
327	struct workqueue	*sc_txrx_workqueue;
328	bool			 sc_txrx_workqueue_sysctl;
329	u_int			 sc_tx_intr_process_limit;
330	u_int			 sc_tx_process_limit;
331	u_int			 sc_rx_intr_process_limit;
332	u_int			 sc_rx_process_limit;
333};
334#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
335#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
336
337#define VIOIF_TX_INTR_PROCESS_LIMIT	256
338#define VIOIF_TX_PROCESS_LIMIT		256
339#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
340#define VIOIF_RX_PROCESS_LIMIT		256
341
342#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
343
344/* cfattach interface functions */
345static int	vioif_match(device_t, cfdata_t, void *);
346static void	vioif_attach(device_t, device_t, void *);
347static void	vioif_deferred_init(device_t);
348static int	vioif_finalize_teardown(device_t);
349
350/* ifnet interface functions */
351static int	vioif_init(struct ifnet *);
352static void	vioif_stop(struct ifnet *, int);
353static void	vioif_start(struct ifnet *);
354static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
355static int	vioif_transmit(struct ifnet *, struct mbuf *);
356static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
357static int	vioif_ioctl(struct ifnet *, u_long, void *);
358static void	vioif_watchdog(struct ifnet *);
359
360/* rx */
361static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
362static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
363static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
364		    struct vioif_rxqueue *);
365static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
366static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
367		    struct vioif_rxqueue *, u_int);
368static int	vioif_rx_intr(void *);
369static void	vioif_rx_handle(void *);
370static void	vioif_rx_sched_handle(struct vioif_softc *,
371		    struct vioif_rxqueue *);
372static void	vioif_rx_drain(struct vioif_rxqueue *);
373
374/* tx */
375static int	vioif_tx_intr(void *);
376static void	vioif_tx_handle(void *);
377static void	vioif_tx_sched_handle(struct vioif_softc *,
378		    struct vioif_txqueue *);
379static void	vioif_tx_queue_clear(struct vioif_txqueue *);
380static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
381		    struct vioif_txqueue *, u_int);
382static void	vioif_tx_drain(struct vioif_txqueue *);
383static void	vioif_deferred_transmit(void *);
384
385/* workqueue */
386static struct workqueue*
387		vioif_workq_create(const char *, pri_t, int, int);
388static void	vioif_workq_destroy(struct workqueue *);
389static void	vioif_workq_work(struct work *, void *);
390static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
391static void	vioif_work_add(struct workqueue *, struct vioif_work *);
392static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
393
394/* other control */
395static bool	vioif_is_link_up(struct vioif_softc *);
396static void	vioif_update_link_status(struct vioif_softc *);
397static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
398static int	vioif_set_promisc(struct vioif_softc *, bool);
399static int	vioif_set_allmulti(struct vioif_softc *, bool);
400static int	vioif_set_rx_filter(struct vioif_softc *);
401static int	vioif_rx_filter(struct vioif_softc *);
402static int	vioif_ctrl_intr(void *);
403static int	vioif_config_change(struct virtio_softc *);
404static void	vioif_ctl_softint(void *);
405static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
406static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
407static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
408static int	vioif_setup_sysctl(struct vioif_softc *);
409static void	vioif_setup_stats(struct vioif_softc *);
410
411CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
412		  vioif_match, vioif_attach, NULL, NULL);
413
414static int
415vioif_match(device_t parent, cfdata_t match, void *aux)
416{
417	struct virtio_attach_args *va = aux;
418
419	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
420		return 1;
421
422	return 0;
423}
424
425static int
426vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
427    bus_size_t size, int nsegs, const char *usage)
428{
429	int r;
430
431	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
432	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
433
434	if (r != 0) {
435		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
436		    "error code %d\n", usage, r);
437	}
438
439	return r;
440}
441
442static void
443vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
444{
445
446	if (*map) {
447		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
448		*map = NULL;
449	}
450}
451
452static int
453vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
454    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
455{
456	int r;
457
458	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
459	if (r != 0)
460		return 1;
461
462	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
463	    size, NULL, rw | BUS_DMA_NOWAIT);
464	if (r != 0) {
465		vioif_dmamap_destroy(sc, map);
466		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
467		    "error code %d\n", usage, r);
468	}
469
470	return r;
471}
472
473static void *
474vioif_assign_mem(intptr_t *p, size_t size)
475{
476	intptr_t rv;
477
478	rv = *p;
479	*p += size;
480
481	return (void *)rv;
482}
483
484static void
485vioif_alloc_queues(struct vioif_softc *sc)
486{
487	int nvq_pairs = sc->sc_max_nvq_pairs;
488	int nvqs = nvq_pairs * 2;
489	int i;
490
491	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
492
493	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
494	    KM_SLEEP);
495	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
496	    KM_SLEEP);
497
498	if (sc->sc_has_ctrl)
499		nvqs++;
500
501	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
502	nvqs = 0;
503	for (i = 0; i < nvq_pairs; i++) {
504		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
505		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
506	}
507
508	if (sc->sc_has_ctrl)
509		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
510}
511
512static void
513vioif_free_queues(struct vioif_softc *sc)
514{
515	int nvq_pairs = sc->sc_max_nvq_pairs;
516	int nvqs = nvq_pairs * 2;
517
518	if (sc->sc_ctrlq.ctrlq_vq)
519		nvqs++;
520
521	if (sc->sc_txq) {
522		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
523		sc->sc_txq = NULL;
524	}
525
526	if (sc->sc_rxq) {
527		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
528		sc->sc_rxq = NULL;
529	}
530
531	if (sc->sc_vqs) {
532		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
533		sc->sc_vqs = NULL;
534	}
535}
536
537/* allocate memory */
538/*
539 * dma memory is used for:
540 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
541 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
542 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
543 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
544 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
545 *			 (WRITE)
546 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
547 *			 class command (WRITE)
548 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
549 *			 class command (WRITE)
550 * ctrlq_* structures are allocated only one each; they are protected by
551 * ctrlq_inuse variable and ctrlq_wait condvar.
552 */
553/*
554 * dynamically allocated memory is used for:
555 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
556 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
557 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
558 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
559 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
560 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
561 */
562static int
563vioif_alloc_mems(struct vioif_softc *sc)
564{
565	struct virtio_softc *vsc = sc->sc_virtio;
566	struct vioif_txqueue *txq;
567	struct vioif_rxqueue *rxq;
568	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
569	int allocsize, allocsize2, r, rsegs, i, qid;
570	void *vaddr;
571	intptr_t p;
572
573	allocsize = 0;
574	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
575		rxq = &sc->sc_rxq[qid];
576		txq = &sc->sc_txq[qid];
577
578		allocsize += sizeof(struct virtio_net_hdr) *
579			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
580	}
581	if (sc->sc_has_ctrl) {
582		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
583		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
584		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
585		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
586		    + sizeof(struct virtio_net_ctrl_mac_tbl)
587		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
588		allocsize += sizeof(struct virtio_net_ctrl_mq) * 1;
589	}
590	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
591	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
592	if (r != 0) {
593		aprint_error_dev(sc->sc_dev,
594		    "DMA memory allocation failed, size %d, "
595		    "error code %d\n", allocsize, r);
596		goto err_none;
597	}
598	r = bus_dmamem_map(virtio_dmat(vsc),
599	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
600	if (r != 0) {
601		aprint_error_dev(sc->sc_dev,
602		    "DMA memory map failed, error code %d\n", r);
603		goto err_dmamem_alloc;
604	}
605
606	memset(vaddr, 0, allocsize);
607	sc->sc_dmamem = vaddr;
608	p = (intptr_t) vaddr;
609
610	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
611		rxq = &sc->sc_rxq[qid];
612		txq = &sc->sc_txq[qid];
613
614		rxq->rxq_hdrs = vioif_assign_mem(&p,
615		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
616		txq->txq_hdrs = vioif_assign_mem(&p,
617		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
618	}
619	if (sc->sc_has_ctrl) {
620		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
621		    sizeof(*ctrlq->ctrlq_cmd));
622		ctrlq->ctrlq_status = vioif_assign_mem(&p,
623		    sizeof(*ctrlq->ctrlq_status));
624		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
625		    sizeof(*ctrlq->ctrlq_rx));
626		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
627		    sizeof(*ctrlq->ctrlq_mac_tbl_uc));
628		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
629		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
630		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
631		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
632	}
633
634	allocsize2 = 0;
635	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
636		int rxqsize, txqsize;
637
638		rxq = &sc->sc_rxq[qid];
639		txq = &sc->sc_txq[qid];
640		rxqsize = rxq->rxq_vq->vq_num;
641		txqsize = txq->txq_vq->vq_num;
642
643		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
644		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
645		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
646
647		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
648		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
649		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
650	}
651	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
652	sc->sc_kmem = vaddr;
653	p = (intptr_t) vaddr;
654
655	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
656		int rxqsize, txqsize;
657		rxq = &sc->sc_rxq[qid];
658		txq = &sc->sc_txq[qid];
659		rxqsize = rxq->rxq_vq->vq_num;
660		txqsize = txq->txq_vq->vq_num;
661
662		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
663		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
664		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
665		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
666		rxq->rxq_dmamaps = vioif_assign_mem(&p,
667		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
668		txq->txq_dmamaps = vioif_assign_mem(&p,
669		    sizeof(txq->txq_dmamaps[0]) * txqsize);
670		rxq->rxq_mbufs = vioif_assign_mem(&p,
671		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
672		txq->txq_mbufs = vioif_assign_mem(&p,
673		    sizeof(txq->txq_mbufs[0]) * txqsize);
674	}
675
676	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
677		rxq = &sc->sc_rxq[qid];
678		txq = &sc->sc_txq[qid];
679
680		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
681			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
682			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
683			    BUS_DMA_READ, "rx header");
684			if (r != 0)
685				goto err_reqs;
686
687			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
688			    MCLBYTES, 1, "rx payload");
689			if (r != 0)
690				goto err_reqs;
691		}
692
693		for (i = 0; i < txq->txq_vq->vq_num; i++) {
694			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
695			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
696			    BUS_DMA_READ, "tx header");
697			if (r != 0)
698				goto err_reqs;
699
700			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
701			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
702			if (r != 0)
703				goto err_reqs;
704		}
705	}
706
707	if (sc->sc_has_ctrl) {
708		/* control vq class & command */
709		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
710		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
711		    BUS_DMA_WRITE, "control command");
712		if (r != 0)
713			goto err_reqs;
714
715		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
716		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
717		    BUS_DMA_READ, "control status");
718		if (r != 0)
719			goto err_reqs;
720
721		/* control vq rx mode command parameter */
722		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
723		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
724		    BUS_DMA_WRITE, "rx mode control command");
725		if (r != 0)
726			goto err_reqs;
727
728		/* multiqueue set command */
729		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
730		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
731		    BUS_DMA_WRITE, "multiqueue set command");
732		if (r != 0)
733			goto err_reqs;
734
735		/* control vq MAC filter table for unicast */
736		/* do not load now since its length is variable */
737		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
738		    sizeof(*ctrlq->ctrlq_mac_tbl_uc) + 0, 1,
739		    "unicast MAC address filter command");
740		if (r != 0)
741			goto err_reqs;
742
743		/* control vq MAC filter table for multicast */
744		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
745		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
746		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
747		    "multicast MAC address filter command");
748		if (r != 0)
749			goto err_reqs;
750	}
751
752	return 0;
753
754err_reqs:
755	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
756	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
757	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
758	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
759	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
760	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
761		rxq = &sc->sc_rxq[qid];
762		txq = &sc->sc_txq[qid];
763
764		for (i = 0; i < txq->txq_vq->vq_num; i++) {
765			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
766			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
767		}
768		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
769			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
770			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
771		}
772	}
773	if (sc->sc_kmem) {
774		kmem_free(sc->sc_kmem, allocsize2);
775		sc->sc_kmem = NULL;
776	}
777	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
778err_dmamem_alloc:
779	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
780err_none:
781	return -1;
782}
783
784static void
785vioif_attach(device_t parent, device_t self, void *aux)
786{
787	struct vioif_softc *sc = device_private(self);
788	struct virtio_softc *vsc = device_private(parent);
789	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
790	struct vioif_txqueue *txq;
791	struct vioif_rxqueue *rxq;
792	uint64_t features, req_features;
793	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
794	u_int softint_flags;
795	int r, i, nvqs=0, req_flags;
796	char xnamebuf[MAXCOMLEN];
797
798	if (virtio_child(vsc) != NULL) {
799		aprint_normal(": child already attached for %s; "
800		    "something wrong...\n", device_xname(parent));
801		return;
802	}
803
804	sc->sc_dev = self;
805	sc->sc_virtio = vsc;
806	sc->sc_link_active = false;
807
808	sc->sc_max_nvq_pairs = 1;
809	sc->sc_req_nvq_pairs = 1;
810	sc->sc_act_nvq_pairs = 1;
811	sc->sc_txrx_workqueue_sysctl = true;
812	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
813	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
814	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
815	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
816
817	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
818
819	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
820	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
821	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
822	if (sc->sc_txrx_workqueue == NULL)
823		goto err;
824
825	req_flags = 0;
826
827#ifdef VIOIF_MPSAFE
828	req_flags |= VIRTIO_F_INTR_MPSAFE;
829#endif
830	req_flags |= VIRTIO_F_INTR_MSIX;
831
832	req_features =
833	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
834	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
835	req_features |= VIRTIO_F_RING_EVENT_IDX;
836#ifdef VIOIF_MULTIQ
837	req_features |= VIRTIO_NET_F_MQ;
838#endif
839	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
840	    vioif_config_change, virtio_vq_intrhand, req_flags,
841	    req_features, VIRTIO_NET_FLAG_BITS);
842
843	features = virtio_features(vsc);
844	if (features == 0)
845		goto err;
846
847	if (features & VIRTIO_NET_F_MAC) {
848		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
849			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
850			    VIRTIO_NET_CONFIG_MAC + i);
851		}
852	} else {
853		/* code stolen from sys/net/if_tap.c */
854		struct timeval tv;
855		uint32_t ui;
856		getmicrouptime(&tv);
857		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
858		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
859		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
860			virtio_write_device_config_1(vsc,
861			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
862		}
863	}
864
865	/* 'Ethernet' with capital follows other ethernet driver attachment */
866	aprint_normal_dev(self, "Ethernet address %s\n",
867	    ether_sprintf(sc->sc_mac));
868
869	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
870		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
871	} else {
872		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
873	}
874
875	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
876	    (features & VIRTIO_NET_F_CTRL_RX)) {
877		sc->sc_has_ctrl = true;
878
879		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
880		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
881		ctrlq->ctrlq_inuse = FREE;
882	} else {
883		sc->sc_has_ctrl = false;
884	}
885
886	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
887		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
888		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
889
890		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
891			goto err;
892
893		/* Limit the number of queue pairs to use */
894		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
895	}
896
897	vioif_alloc_queues(sc);
898	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
899
900#ifdef VIOIF_MPSAFE
901	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
902#else
903	softint_flags = SOFTINT_NET;
904#endif
905
906	/*
907	 * Allocating virtqueues
908	 */
909	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
910		rxq = &sc->sc_rxq[i];
911		txq = &sc->sc_txq[i];
912		char qname[32];
913
914		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
915
916		rxq->rxq_handle_si = softint_establish(softint_flags,
917		    vioif_rx_handle, rxq);
918		if (rxq->rxq_handle_si == NULL) {
919			aprint_error_dev(self, "cannot establish rx softint\n");
920			goto err;
921		}
922
923		snprintf(qname, sizeof(qname), "rx%d", i);
924		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
925		    MCLBYTES + sc->sc_hdr_size, 2, qname);
926		if (r != 0)
927			goto err;
928		nvqs++;
929		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
930		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
931		rxq->rxq_stopping = true;
932		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
933
934		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
935
936		txq->txq_deferred_transmit = softint_establish(softint_flags,
937		    vioif_deferred_transmit, txq);
938		if (txq->txq_deferred_transmit == NULL) {
939			aprint_error_dev(self, "cannot establish tx softint\n");
940			goto err;
941		}
942		txq->txq_handle_si = softint_establish(softint_flags,
943		    vioif_tx_handle, txq);
944		if (txq->txq_handle_si == NULL) {
945			aprint_error_dev(self, "cannot establish tx softint\n");
946			goto err;
947		}
948
949		snprintf(qname, sizeof(qname), "tx%d", i);
950		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
951		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
952		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
953		if (r != 0)
954			goto err;
955		nvqs++;
956		txq->txq_vq->vq_intrhand = vioif_tx_intr;
957		txq->txq_vq->vq_intrhand_arg = (void *)txq;
958		txq->txq_link_active = sc->sc_link_active;
959		txq->txq_stopping = false;
960		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
961		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
962	}
963
964	if (sc->sc_has_ctrl) {
965		/*
966		 * Allocating a virtqueue for control channel
967		 */
968		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
969		    NBPG, 1, "control");
970		if (r != 0) {
971			aprint_error_dev(self, "failed to allocate "
972			    "a virtqueue for control channel, error code %d\n",
973			    r);
974
975			sc->sc_has_ctrl = false;
976			cv_destroy(&ctrlq->ctrlq_wait);
977			mutex_destroy(&ctrlq->ctrlq_wait_lock);
978		} else {
979			nvqs++;
980			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
981			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
982		}
983	}
984
985	sc->sc_ctl_softint = softint_establish(softint_flags,
986	    vioif_ctl_softint, sc);
987	if (sc->sc_ctl_softint == NULL) {
988		aprint_error_dev(self, "cannot establish ctl softint\n");
989		goto err;
990	}
991
992	if (vioif_alloc_mems(sc) < 0)
993		goto err;
994
995	if (virtio_child_attach_finish(vsc) != 0)
996		goto err;
997
998	if (vioif_setup_sysctl(sc) != 0) {
999		aprint_error_dev(self, "unable to create sysctl node\n");
1000		/* continue */
1001	}
1002
1003	vioif_setup_stats(sc);
1004
1005	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1006	ifp->if_softc = sc;
1007	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1008#ifdef VIOIF_MPSAFE
1009	ifp->if_extflags = IFEF_MPSAFE;
1010#endif
1011	ifp->if_start = vioif_start;
1012	if (sc->sc_req_nvq_pairs > 1)
1013		ifp->if_transmit = vioif_transmit;
1014	ifp->if_ioctl = vioif_ioctl;
1015	ifp->if_init = vioif_init;
1016	ifp->if_stop = vioif_stop;
1017	ifp->if_capabilities = 0;
1018	ifp->if_watchdog = vioif_watchdog;
1019	txq = &sc->sc_txq[0];
1020	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1021	IFQ_SET_READY(&ifp->if_snd);
1022
1023	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1024
1025	if_attach(ifp);
1026	if_deferred_start_init(ifp, NULL);
1027	ether_ifattach(ifp, sc->sc_mac);
1028
1029	return;
1030
1031err:
1032	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1033		rxq = &sc->sc_rxq[i];
1034		txq = &sc->sc_txq[i];
1035
1036		if (rxq->rxq_lock) {
1037			mutex_obj_free(rxq->rxq_lock);
1038			rxq->rxq_lock = NULL;
1039		}
1040
1041		if (rxq->rxq_handle_si) {
1042			softint_disestablish(rxq->rxq_handle_si);
1043			rxq->rxq_handle_si = NULL;
1044		}
1045
1046		if (txq->txq_lock) {
1047			mutex_obj_free(txq->txq_lock);
1048			txq->txq_lock = NULL;
1049		}
1050
1051		if (txq->txq_handle_si) {
1052			softint_disestablish(txq->txq_handle_si);
1053			txq->txq_handle_si = NULL;
1054		}
1055
1056		if (txq->txq_deferred_transmit) {
1057			softint_disestablish(txq->txq_deferred_transmit);
1058			txq->txq_deferred_transmit = NULL;
1059		}
1060
1061		if (txq->txq_intrq) {
1062			pcq_destroy(txq->txq_intrq);
1063			txq->txq_intrq = NULL;
1064		}
1065	}
1066
1067	if (sc->sc_has_ctrl) {
1068		cv_destroy(&ctrlq->ctrlq_wait);
1069		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1070	}
1071
1072	while (nvqs > 0)
1073		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1074
1075	vioif_free_queues(sc);
1076	mutex_destroy(&sc->sc_lock);
1077	virtio_child_attach_failed(vsc);
1078	config_finalize_register(self, vioif_finalize_teardown);
1079
1080	return;
1081}
1082
1083static int
1084vioif_finalize_teardown(device_t self)
1085{
1086	struct vioif_softc *sc = device_private(self);
1087
1088	if (sc->sc_txrx_workqueue != NULL) {
1089		vioif_workq_destroy(sc->sc_txrx_workqueue);
1090		sc->sc_txrx_workqueue = NULL;
1091	}
1092
1093	return 0;
1094}
1095
1096/* we need interrupts to make promiscuous mode off */
1097static void
1098vioif_deferred_init(device_t self)
1099{
1100	struct vioif_softc *sc = device_private(self);
1101	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1102	int r;
1103
1104	if (ifp->if_flags & IFF_PROMISC)
1105		return;
1106
1107	r =  vioif_set_promisc(sc, false);
1108	if (r != 0)
1109		aprint_error_dev(self, "resetting promisc mode failed, "
1110		    "error code %d\n", r);
1111}
1112
1113static void
1114vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1115{
1116	struct virtio_softc *vsc = sc->sc_virtio;
1117	struct vioif_txqueue *txq;
1118	struct vioif_rxqueue *rxq;
1119	int i;
1120
1121	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1122		txq = &sc->sc_txq[i];
1123		rxq = &sc->sc_rxq[i];
1124
1125		virtio_start_vq_intr(vsc, txq->txq_vq);
1126		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1127	}
1128}
1129
1130static void
1131vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1132{
1133	struct virtio_softc *vsc = sc->sc_virtio;
1134	struct vioif_txqueue *txq;
1135	struct vioif_rxqueue *rxq;
1136	int i;
1137
1138	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1139		rxq = &sc->sc_rxq[i];
1140		txq = &sc->sc_txq[i];
1141
1142		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1143		virtio_stop_vq_intr(vsc, txq->txq_vq);
1144	}
1145}
1146
1147/*
1148 * Interface functions for ifnet
1149 */
1150static int
1151vioif_init(struct ifnet *ifp)
1152{
1153	struct vioif_softc *sc = ifp->if_softc;
1154	struct virtio_softc *vsc = sc->sc_virtio;
1155	struct vioif_rxqueue *rxq;
1156	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1157	int r, i;
1158
1159	vioif_stop(ifp, 0);
1160
1161	r = virtio_reinit_start(vsc);
1162	if (r != 0) {
1163		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1164		return EIO;
1165	}
1166
1167	virtio_negotiate_features(vsc, virtio_features(vsc));
1168
1169	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1170		rxq = &sc->sc_rxq[i];
1171
1172		/* Have to set false before vioif_populate_rx_mbufs */
1173		mutex_enter(rxq->rxq_lock);
1174		rxq->rxq_stopping = false;
1175		vioif_populate_rx_mbufs_locked(sc, rxq);
1176		mutex_exit(rxq->rxq_lock);
1177
1178	}
1179
1180	virtio_reinit_end(vsc);
1181
1182	if (sc->sc_has_ctrl)
1183		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1184
1185	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1186	if (r == 0)
1187		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1188	else
1189		sc->sc_act_nvq_pairs = 1;
1190
1191	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1192		sc->sc_txq[i].txq_stopping = false;
1193
1194	vioif_enable_interrupt_vqpairs(sc);
1195
1196	if (!sc->sc_deferred_init_done) {
1197		sc->sc_deferred_init_done = 1;
1198		if (sc->sc_has_ctrl)
1199			vioif_deferred_init(sc->sc_dev);
1200	}
1201
1202	vioif_update_link_status(sc);
1203	ifp->if_flags |= IFF_RUNNING;
1204	ifp->if_flags &= ~IFF_OACTIVE;
1205	vioif_rx_filter(sc);
1206
1207	return 0;
1208}
1209
1210static void
1211vioif_stop(struct ifnet *ifp, int disable)
1212{
1213	struct vioif_softc *sc = ifp->if_softc;
1214	struct virtio_softc *vsc = sc->sc_virtio;
1215	struct vioif_txqueue *txq;
1216	struct vioif_rxqueue *rxq;
1217	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1218	int i;
1219
1220	/* Take the locks to ensure that ongoing TX/RX finish */
1221	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1222		txq = &sc->sc_txq[i];
1223		rxq = &sc->sc_rxq[i];
1224
1225		mutex_enter(rxq->rxq_lock);
1226		rxq->rxq_stopping = true;
1227		mutex_exit(rxq->rxq_lock);
1228
1229		mutex_enter(txq->txq_lock);
1230		txq->txq_stopping = true;
1231		mutex_exit(txq->txq_lock);
1232	}
1233
1234	/* disable interrupts */
1235	vioif_disable_interrupt_vqpairs(sc);
1236
1237	if (sc->sc_has_ctrl)
1238		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1239
1240	/* only way to stop I/O and DMA is resetting... */
1241	virtio_reset(vsc);
1242
1243	/* rendezvous for finish of handlers */
1244	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1245		txq = &sc->sc_txq[i];
1246		rxq = &sc->sc_rxq[i];
1247
1248		mutex_enter(rxq->rxq_lock);
1249		mutex_exit(rxq->rxq_lock);
1250		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1251
1252		mutex_enter(txq->txq_lock);
1253		mutex_exit(txq->txq_lock);
1254		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1255	}
1256
1257	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1258		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1259		vioif_tx_queue_clear(&sc->sc_txq[i]);
1260	}
1261
1262	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1263	sc->sc_link_active = false;
1264
1265	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1266		txq = &sc->sc_txq[i];
1267		rxq = &sc->sc_rxq[i];
1268
1269		txq->txq_link_active = false;
1270
1271		if (disable)
1272			vioif_rx_drain(rxq);
1273
1274		vioif_tx_drain(txq);
1275	}
1276}
1277
1278static void
1279vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1280    bool is_transmit)
1281{
1282	struct vioif_softc *sc = ifp->if_softc;
1283	struct virtio_softc *vsc = sc->sc_virtio;
1284	struct virtqueue *vq = txq->txq_vq;
1285	struct virtio_net_hdr *hdr;
1286	struct mbuf *m;
1287	int queued = 0;
1288
1289	KASSERT(mutex_owned(txq->txq_lock));
1290
1291	if ((ifp->if_flags & IFF_RUNNING) == 0)
1292		return;
1293
1294	if (!txq->txq_link_active || txq->txq_stopping)
1295		return;
1296
1297	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1298		return;
1299
1300	for (;;) {
1301		int slot, r;
1302
1303		if (is_transmit)
1304			m = pcq_get(txq->txq_intrq);
1305		else
1306			IFQ_DEQUEUE(&ifp->if_snd, m);
1307
1308		if (m == NULL)
1309			break;
1310
1311		r = virtio_enqueue_prep(vsc, vq, &slot);
1312		if (r == EAGAIN) {
1313			ifp->if_flags |= IFF_OACTIVE;
1314			m_freem(m);
1315			break;
1316		}
1317		if (r != 0)
1318			panic("enqueue_prep for a tx buffer");
1319
1320		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1321		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1322		if (r != 0) {
1323			/* maybe just too fragmented */
1324			struct mbuf *newm;
1325
1326			newm = m_defrag(m, M_NOWAIT);
1327			if (newm == NULL) {
1328				txq->txq_defrag_failed.ev_count++;
1329				goto skip;
1330			}
1331
1332			m = newm;
1333			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1334			    txq->txq_dmamaps[slot], m,
1335			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1336			if (r != 0) {
1337				txq->txq_mbuf_load_failed.ev_count++;
1338skip:
1339				m_freem(m);
1340				virtio_enqueue_abort(vsc, vq, slot);
1341				continue;
1342			}
1343		}
1344
1345		/* This should actually never fail */
1346		r = virtio_enqueue_reserve(vsc, vq, slot,
1347		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1348		if (r != 0) {
1349			txq->txq_enqueue_reserve_failed.ev_count++;
1350			bus_dmamap_unload(virtio_dmat(vsc),
1351			     txq->txq_dmamaps[slot]);
1352			/* slot already freed by virtio_enqueue_reserve */
1353			m_freem(m);
1354			continue;
1355		}
1356
1357		txq->txq_mbufs[slot] = m;
1358
1359		hdr = &txq->txq_hdrs[slot];
1360		memset(hdr, 0, sc->sc_hdr_size);
1361		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1362		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1363		    BUS_DMASYNC_PREWRITE);
1364		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1365		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1366		    BUS_DMASYNC_PREWRITE);
1367		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1368		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1369		virtio_enqueue_commit(vsc, vq, slot, false);
1370
1371		queued++;
1372		bpf_mtap(ifp, m, BPF_D_OUT);
1373	}
1374
1375	if (queued > 0) {
1376		virtio_enqueue_commit(vsc, vq, -1, true);
1377		ifp->if_timer = 5;
1378	}
1379}
1380
1381static void
1382vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1383{
1384
1385	/*
1386	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1387	 */
1388	vioif_send_common_locked(ifp, txq, false);
1389
1390}
1391
1392static void
1393vioif_start(struct ifnet *ifp)
1394{
1395	struct vioif_softc *sc = ifp->if_softc;
1396	struct vioif_txqueue *txq = &sc->sc_txq[0];
1397
1398#ifdef VIOIF_MPSAFE
1399	KASSERT(if_is_mpsafe(ifp));
1400#endif
1401
1402	mutex_enter(txq->txq_lock);
1403	vioif_start_locked(ifp, txq);
1404	mutex_exit(txq->txq_lock);
1405}
1406
1407static inline int
1408vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1409{
1410	struct vioif_softc *sc = ifp->if_softc;
1411	u_int cpuid = cpu_index(curcpu());
1412
1413	return cpuid % sc->sc_act_nvq_pairs;
1414}
1415
1416static void
1417vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1418{
1419
1420	vioif_send_common_locked(ifp, txq, true);
1421}
1422
1423static int
1424vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1425{
1426	struct vioif_softc *sc = ifp->if_softc;
1427	struct vioif_txqueue *txq;
1428	int qid;
1429
1430	qid = vioif_select_txqueue(ifp, m);
1431	txq = &sc->sc_txq[qid];
1432
1433	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1434		m_freem(m);
1435		return ENOBUFS;
1436	}
1437
1438	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1439	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1440	if (m->m_flags & M_MCAST)
1441		if_statinc_ref(nsr, if_omcasts);
1442	IF_STAT_PUTREF(ifp);
1443
1444	if (mutex_tryenter(txq->txq_lock)) {
1445		vioif_transmit_locked(ifp, txq);
1446		mutex_exit(txq->txq_lock);
1447	}
1448
1449	return 0;
1450}
1451
1452static void
1453vioif_deferred_transmit(void *arg)
1454{
1455	struct vioif_txqueue *txq = arg;
1456	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1457	struct vioif_softc *sc = device_private(virtio_child(vsc));
1458	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1459
1460	mutex_enter(txq->txq_lock);
1461	vioif_send_common_locked(ifp, txq, true);
1462	mutex_exit(txq->txq_lock);
1463}
1464
1465static int
1466vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1467{
1468	int s, r;
1469
1470	s = splnet();
1471
1472	r = ether_ioctl(ifp, cmd, data);
1473	if ((r == 0 && cmd == SIOCSIFFLAGS) ||
1474	    (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI))) {
1475		if (ifp->if_flags & IFF_RUNNING)
1476			r = vioif_rx_filter(ifp->if_softc);
1477		else
1478			r = 0;
1479	}
1480
1481	splx(s);
1482
1483	return r;
1484}
1485
1486void
1487vioif_watchdog(struct ifnet *ifp)
1488{
1489	struct vioif_softc *sc = ifp->if_softc;
1490	int i;
1491
1492	if (ifp->if_flags & IFF_RUNNING) {
1493		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1494			vioif_tx_queue_clear(&sc->sc_txq[i]);
1495		}
1496	}
1497}
1498
1499/*
1500 * Receive implementation
1501 */
1502/* allocate and initialize a mbuf for receive */
1503static int
1504vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1505{
1506	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1507	struct mbuf *m;
1508	int r;
1509
1510	MGETHDR(m, M_DONTWAIT, MT_DATA);
1511	if (m == NULL)
1512		return ENOBUFS;
1513	MCLGET(m, M_DONTWAIT);
1514	if ((m->m_flags & M_EXT) == 0) {
1515		m_freem(m);
1516		return ENOBUFS;
1517	}
1518	rxq->rxq_mbufs[i] = m;
1519	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1520	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1521	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1522	if (r) {
1523		m_freem(m);
1524		rxq->rxq_mbufs[i] = NULL;
1525		return r;
1526	}
1527
1528	return 0;
1529}
1530
1531/* free a mbuf for receive */
1532static void
1533vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1534{
1535	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1536
1537	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1538	m_freem(rxq->rxq_mbufs[i]);
1539	rxq->rxq_mbufs[i] = NULL;
1540}
1541
1542/* add mbufs for all the empty receive slots */
1543static void
1544vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1545{
1546	struct virtqueue *vq = rxq->rxq_vq;
1547	struct virtio_softc *vsc = vq->vq_owner;
1548	int i, r, ndone = 0;
1549
1550	KASSERT(mutex_owned(rxq->rxq_lock));
1551
1552	if (rxq->rxq_stopping)
1553		return;
1554
1555	for (i = 0; i < vq->vq_num; i++) {
1556		int slot;
1557		r = virtio_enqueue_prep(vsc, vq, &slot);
1558		if (r == EAGAIN)
1559			break;
1560		if (r != 0)
1561			panic("enqueue_prep for rx buffers");
1562		if (rxq->rxq_mbufs[slot] == NULL) {
1563			r = vioif_add_rx_mbuf(rxq, slot);
1564			if (r != 0) {
1565				rxq->rxq_mbuf_add_failed.ev_count++;
1566				break;
1567			}
1568		}
1569		r = virtio_enqueue_reserve(vsc, vq, slot,
1570		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1571		if (r != 0) {
1572			vioif_free_rx_mbuf(rxq, slot);
1573			break;
1574		}
1575		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1576		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1577		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1578		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1579		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1580		    false);
1581		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1582		virtio_enqueue_commit(vsc, vq, slot, false);
1583		ndone++;
1584	}
1585	if (ndone > 0)
1586		virtio_enqueue_commit(vsc, vq, -1, true);
1587}
1588
1589static void
1590vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1591{
1592	struct virtqueue *vq = rxq->rxq_vq;
1593	struct virtio_softc *vsc = vq->vq_owner;
1594	struct vioif_softc *sc = device_private(virtio_child(vsc));
1595	u_int limit = UINT_MAX;
1596	bool more;
1597
1598	KASSERT(rxq->rxq_stopping);
1599
1600	mutex_enter(rxq->rxq_lock);
1601	for (;;) {
1602		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1603		if (more == false)
1604			break;
1605	}
1606	mutex_exit(rxq->rxq_lock);
1607}
1608
1609/* dequeue received packets */
1610static bool
1611vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1612    struct vioif_rxqueue *rxq, u_int limit)
1613{
1614	struct virtqueue *vq = rxq->rxq_vq;
1615	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1616	struct mbuf *m;
1617	int slot, len;
1618	bool more = false, dequeued = false;
1619
1620	KASSERT(mutex_owned(rxq->rxq_lock));
1621
1622	if (virtio_vq_is_enqueued(vsc, vq) == false)
1623		return false;
1624
1625	for (;;) {
1626		if (limit-- == 0) {
1627			more = true;
1628			break;
1629		}
1630
1631		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1632			break;
1633
1634		dequeued = true;
1635
1636		len -= sc->sc_hdr_size;
1637		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1638		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1639		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1640		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1641		m = rxq->rxq_mbufs[slot];
1642		KASSERT(m != NULL);
1643		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1644		rxq->rxq_mbufs[slot] = NULL;
1645		virtio_dequeue_commit(vsc, vq, slot);
1646		m_set_rcvif(m, ifp);
1647		m->m_len = m->m_pkthdr.len = len;
1648
1649		mutex_exit(rxq->rxq_lock);
1650		if_percpuq_enqueue(ifp->if_percpuq, m);
1651		mutex_enter(rxq->rxq_lock);
1652
1653		if (rxq->rxq_stopping)
1654			break;
1655	}
1656
1657	if (dequeued)
1658		vioif_populate_rx_mbufs_locked(sc, rxq);
1659
1660	return more;
1661}
1662
1663/* rx interrupt; call _dequeue above and schedule a softint */
1664
1665static void
1666vioif_rx_handle_locked(void *xrxq, u_int limit)
1667{
1668	struct vioif_rxqueue *rxq = xrxq;
1669	struct virtqueue *vq = rxq->rxq_vq;
1670	struct virtio_softc *vsc = vq->vq_owner;
1671	struct vioif_softc *sc = device_private(virtio_child(vsc));
1672	bool more;
1673
1674	KASSERT(!rxq->rxq_stopping);
1675
1676	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1677	if (more) {
1678		vioif_rx_sched_handle(sc, rxq);
1679		return;
1680	}
1681	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1682	if (more) {
1683		vioif_rx_sched_handle(sc, rxq);
1684		return;
1685	}
1686	atomic_store_relaxed(&rxq->rxq_active, false);
1687}
1688
1689static int
1690vioif_rx_intr(void *arg)
1691{
1692	struct vioif_rxqueue *rxq = arg;
1693	struct virtqueue *vq = rxq->rxq_vq;
1694	struct virtio_softc *vsc = vq->vq_owner;
1695	struct vioif_softc *sc = device_private(virtio_child(vsc));
1696	u_int limit;
1697
1698	limit = sc->sc_rx_intr_process_limit;
1699
1700	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1701		return 1;
1702
1703	mutex_enter(rxq->rxq_lock);
1704
1705	if (!rxq->rxq_stopping) {
1706		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1707
1708		virtio_stop_vq_intr(vsc, vq);
1709		atomic_store_relaxed(&rxq->rxq_active, true);
1710
1711		vioif_rx_handle_locked(rxq, limit);
1712	}
1713
1714	mutex_exit(rxq->rxq_lock);
1715	return 1;
1716}
1717
1718static void
1719vioif_rx_handle(void *xrxq)
1720{
1721	struct vioif_rxqueue *rxq = xrxq;
1722	struct virtqueue *vq = rxq->rxq_vq;
1723	struct virtio_softc *vsc = vq->vq_owner;
1724	struct vioif_softc *sc = device_private(virtio_child(vsc));
1725	u_int limit;
1726
1727	limit = sc->sc_rx_process_limit;
1728
1729	mutex_enter(rxq->rxq_lock);
1730
1731	if (!rxq->rxq_stopping)
1732		vioif_rx_handle_locked(rxq, limit);
1733
1734	mutex_exit(rxq->rxq_lock);
1735}
1736
1737static void
1738vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1739{
1740
1741	KASSERT(mutex_owned(rxq->rxq_lock));
1742
1743	if (rxq->rxq_stopping)
1744		return;
1745
1746	if (rxq->rxq_workqueue)
1747		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1748	else
1749		softint_schedule(rxq->rxq_handle_si);
1750}
1751
1752/* free all the mbufs; called from if_stop(disable) */
1753static void
1754vioif_rx_drain(struct vioif_rxqueue *rxq)
1755{
1756	struct virtqueue *vq = rxq->rxq_vq;
1757	int i;
1758
1759	for (i = 0; i < vq->vq_num; i++) {
1760		if (rxq->rxq_mbufs[i] == NULL)
1761			continue;
1762		vioif_free_rx_mbuf(rxq, i);
1763	}
1764}
1765
1766/*
1767 * Transmition implementation
1768 */
1769/* actual transmission is done in if_start */
1770/* tx interrupt; dequeue and free mbufs */
1771/*
1772 * tx interrupt is actually disabled; this should be called upon
1773 * tx vq full and watchdog
1774 */
1775
1776static void
1777vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1778{
1779	struct virtqueue *vq = txq->txq_vq;
1780	struct virtio_softc *vsc = vq->vq_owner;
1781	struct vioif_softc *sc = device_private(virtio_child(vsc));
1782	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1783	bool more;
1784
1785	KASSERT(!txq->txq_stopping);
1786
1787	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1788	if (more) {
1789		vioif_tx_sched_handle(sc, txq);
1790		return;
1791	}
1792
1793	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1794		more = virtio_postpone_intr_smart(vsc, vq);
1795	else
1796		more = virtio_start_vq_intr(vsc, vq);
1797	if (more) {
1798		vioif_tx_sched_handle(sc, txq);
1799		return;
1800	}
1801
1802	atomic_store_relaxed(&txq->txq_active, false);
1803	/* for ALTQ */
1804	if (txq == &sc->sc_txq[0]) {
1805		if_schedule_deferred_start(ifp);
1806		ifp->if_flags &= ~IFF_OACTIVE;
1807	}
1808	softint_schedule(txq->txq_deferred_transmit);
1809}
1810
1811
1812static int
1813vioif_tx_intr(void *arg)
1814{
1815	struct vioif_txqueue *txq = arg;
1816	struct virtqueue *vq = txq->txq_vq;
1817	struct virtio_softc *vsc = vq->vq_owner;
1818	struct vioif_softc *sc = device_private(virtio_child(vsc));
1819	u_int limit;
1820
1821	limit = sc->sc_tx_intr_process_limit;
1822
1823	if (atomic_load_relaxed(&txq->txq_active) == true)
1824		return 1;
1825
1826	mutex_enter(txq->txq_lock);
1827
1828	if (!txq->txq_stopping) {
1829		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1830
1831		virtio_stop_vq_intr(vsc, vq);
1832		atomic_store_relaxed(&txq->txq_active, true);
1833
1834		vioif_tx_handle_locked(txq, limit);
1835	}
1836
1837	mutex_exit(txq->txq_lock);
1838
1839	return 1;
1840}
1841
1842static void
1843vioif_tx_handle(void *xtxq)
1844{
1845	struct vioif_txqueue *txq = xtxq;
1846	struct virtqueue *vq = txq->txq_vq;
1847	struct virtio_softc *vsc = vq->vq_owner;
1848	struct vioif_softc *sc = device_private(virtio_child(vsc));
1849	u_int limit;
1850
1851	limit = sc->sc_tx_process_limit;
1852
1853	mutex_enter(txq->txq_lock);
1854	if (!txq->txq_stopping)
1855		vioif_tx_handle_locked(txq, limit);
1856	mutex_exit(txq->txq_lock);
1857}
1858
1859static void
1860vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1861{
1862
1863	KASSERT(mutex_owned(txq->txq_lock));
1864
1865	if (txq->txq_stopping)
1866		return;
1867
1868	if (txq->txq_workqueue)
1869		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1870	else
1871		softint_schedule(txq->txq_handle_si);
1872}
1873
1874static void
1875vioif_tx_queue_clear(struct vioif_txqueue *txq)
1876{
1877	struct virtqueue *vq = txq->txq_vq;
1878	struct virtio_softc *vsc = vq->vq_owner;
1879	struct vioif_softc *sc = device_private(virtio_child(vsc));
1880	u_int limit = UINT_MAX;
1881	bool more;
1882
1883	mutex_enter(txq->txq_lock);
1884	for (;;) {
1885		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1886		if (more == false)
1887			break;
1888	}
1889	mutex_exit(txq->txq_lock);
1890}
1891
1892static bool
1893vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1894    struct vioif_txqueue *txq, u_int limit)
1895{
1896	struct virtqueue *vq = txq->txq_vq;
1897	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1898	struct mbuf *m;
1899	int slot, len;
1900	bool more = false;
1901
1902	KASSERT(mutex_owned(txq->txq_lock));
1903
1904	if (virtio_vq_is_enqueued(vsc, vq) == false)
1905		return false;
1906
1907	for (;;) {
1908		if (limit-- == 0) {
1909			more = true;
1910			break;
1911		}
1912
1913		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1914			break;
1915
1916		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1917		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1918		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1919		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1920		    BUS_DMASYNC_POSTWRITE);
1921		m = txq->txq_mbufs[slot];
1922		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1923		txq->txq_mbufs[slot] = NULL;
1924		virtio_dequeue_commit(vsc, vq, slot);
1925		if_statinc(ifp, if_opackets);
1926		m_freem(m);
1927	}
1928
1929	return more;
1930}
1931
1932/* free all the mbufs already put on vq; called from if_stop(disable) */
1933static void
1934vioif_tx_drain(struct vioif_txqueue *txq)
1935{
1936	struct virtqueue *vq = txq->txq_vq;
1937	struct virtio_softc *vsc = vq->vq_owner;
1938	int i;
1939
1940	KASSERT(txq->txq_stopping);
1941
1942	for (i = 0; i < vq->vq_num; i++) {
1943		if (txq->txq_mbufs[i] == NULL)
1944			continue;
1945		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1946		m_freem(txq->txq_mbufs[i]);
1947		txq->txq_mbufs[i] = NULL;
1948	}
1949}
1950
1951/*
1952 * Control vq
1953 */
1954/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1955static void
1956vioif_ctrl_acquire(struct vioif_softc *sc)
1957{
1958	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1959
1960	mutex_enter(&ctrlq->ctrlq_wait_lock);
1961	while (ctrlq->ctrlq_inuse != FREE)
1962		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1963	ctrlq->ctrlq_inuse = INUSE;
1964	ctrlq->ctrlq_owner = curlwp;
1965	mutex_exit(&ctrlq->ctrlq_wait_lock);
1966}
1967
1968static void
1969vioif_ctrl_release(struct vioif_softc *sc)
1970{
1971	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1972
1973	KASSERT(ctrlq->ctrlq_inuse != FREE);
1974	KASSERT(ctrlq->ctrlq_owner == curlwp);
1975
1976	mutex_enter(&ctrlq->ctrlq_wait_lock);
1977	ctrlq->ctrlq_inuse = FREE;
1978	ctrlq->ctrlq_owner = NULL;
1979	cv_signal(&ctrlq->ctrlq_wait);
1980	mutex_exit(&ctrlq->ctrlq_wait_lock);
1981}
1982
1983static int
1984vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1985    struct vioif_ctrl_cmdspec *specs, int nspecs)
1986{
1987	struct virtio_softc *vsc = sc->sc_virtio;
1988	int i, r, loaded;
1989
1990	loaded = 0;
1991	for (i = 0; i < nspecs; i++) {
1992		r = bus_dmamap_load(virtio_dmat(vsc),
1993		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1994		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1995		if (r) {
1996			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1997			goto err;
1998		}
1999		loaded++;
2000
2001	}
2002
2003	return r;
2004
2005err:
2006	for (i = 0; i < loaded; i++) {
2007		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2008	}
2009
2010	return r;
2011}
2012
2013static void
2014vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2015    struct vioif_ctrl_cmdspec *specs, int nspecs)
2016{
2017	struct virtio_softc *vsc = sc->sc_virtio;
2018	int i;
2019
2020	for (i = 0; i < nspecs; i++) {
2021		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2022	}
2023}
2024
2025static int
2026vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2027    struct vioif_ctrl_cmdspec *specs, int nspecs)
2028{
2029	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2030	struct virtqueue *vq = ctrlq->ctrlq_vq;
2031	struct virtio_softc *vsc = sc->sc_virtio;
2032	int i, r, slot;
2033
2034	ctrlq->ctrlq_cmd->class = class;
2035	ctrlq->ctrlq_cmd->command = cmd;
2036
2037	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2038	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2039	for (i = 0; i < nspecs; i++) {
2040		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2041		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2042	}
2043	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2044	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2045
2046	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2047	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2048		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2049
2050	r = virtio_enqueue_prep(vsc, vq, &slot);
2051	if (r != 0)
2052		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2053	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2054	if (r != 0)
2055		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2056	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2057	for (i = 0; i < nspecs; i++) {
2058		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2059	}
2060	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2061	virtio_enqueue_commit(vsc, vq, slot, true);
2062
2063	/* wait for done */
2064	mutex_enter(&ctrlq->ctrlq_wait_lock);
2065	while (ctrlq->ctrlq_inuse != DONE)
2066		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2067	mutex_exit(&ctrlq->ctrlq_wait_lock);
2068	/* already dequeueued */
2069
2070	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2071	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2072	for (i = 0; i < nspecs; i++) {
2073		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2074		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2075	}
2076	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2077	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2078
2079	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2080		r = 0;
2081	else {
2082		device_printf(sc->sc_dev, "failed setting rx mode\n");
2083		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2084		r = EIO;
2085	}
2086
2087	return r;
2088}
2089
2090static int
2091vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2092{
2093	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2094	struct vioif_ctrl_cmdspec specs[1];
2095	int r;
2096
2097	if (!sc->sc_has_ctrl)
2098		return ENOTSUP;
2099
2100	vioif_ctrl_acquire(sc);
2101
2102	rx->onoff = onoff;
2103	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2104	specs[0].buf = rx;
2105	specs[0].bufsize = sizeof(*rx);
2106
2107	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2108	    specs, __arraycount(specs));
2109
2110	vioif_ctrl_release(sc);
2111	return r;
2112}
2113
2114static int
2115vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2116{
2117	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2118}
2119
2120static int
2121vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2122{
2123	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2124}
2125
2126/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2127static int
2128vioif_set_rx_filter(struct vioif_softc *sc)
2129{
2130	/* filter already set in ctrlq->ctrlq_mac_tbl */
2131	struct virtio_softc *vsc = sc->sc_virtio;
2132	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2133	struct vioif_ctrl_cmdspec specs[2];
2134	int nspecs = __arraycount(specs);
2135	int r;
2136
2137	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2138	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2139
2140	if (!sc->sc_has_ctrl)
2141		return ENOTSUP;
2142
2143	vioif_ctrl_acquire(sc);
2144
2145	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2146	specs[0].buf = mac_tbl_uc;
2147	specs[0].bufsize = sizeof(*mac_tbl_uc)
2148	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2149
2150	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2151	specs[1].buf = mac_tbl_mc;
2152	specs[1].bufsize = sizeof(*mac_tbl_mc)
2153	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2154
2155	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2156	if (r != 0)
2157		goto out;
2158
2159	r = vioif_ctrl_send_command(sc,
2160	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2161	    specs, nspecs);
2162
2163	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2164
2165out:
2166	vioif_ctrl_release(sc);
2167
2168	return r;
2169}
2170
2171static int
2172vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2173{
2174	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2175	struct vioif_ctrl_cmdspec specs[1];
2176	int r;
2177
2178	if (!sc->sc_has_ctrl)
2179		return ENOTSUP;
2180
2181	if (nvq_pairs <= 1)
2182		return EINVAL;
2183
2184	vioif_ctrl_acquire(sc);
2185
2186	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2187	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2188	specs[0].buf = mq;
2189	specs[0].bufsize = sizeof(*mq);
2190
2191	r = vioif_ctrl_send_command(sc,
2192	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2193	    specs, __arraycount(specs));
2194
2195	vioif_ctrl_release(sc);
2196
2197	return r;
2198}
2199
2200/* ctrl vq interrupt; wake up the command issuer */
2201static int
2202vioif_ctrl_intr(void *arg)
2203{
2204	struct vioif_ctrlqueue *ctrlq = arg;
2205	struct virtqueue *vq = ctrlq->ctrlq_vq;
2206	struct virtio_softc *vsc = vq->vq_owner;
2207	int r, slot;
2208
2209	if (virtio_vq_is_enqueued(vsc, vq) == false)
2210		return 0;
2211
2212	r = virtio_dequeue(vsc, vq, &slot, NULL);
2213	if (r == ENOENT)
2214		return 0;
2215	virtio_dequeue_commit(vsc, vq, slot);
2216
2217	mutex_enter(&ctrlq->ctrlq_wait_lock);
2218	ctrlq->ctrlq_inuse = DONE;
2219	cv_signal(&ctrlq->ctrlq_wait);
2220	mutex_exit(&ctrlq->ctrlq_wait_lock);
2221
2222	return 1;
2223}
2224
2225/*
2226 * If IFF_PROMISC requested,  set promiscuous
2227 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2228 * If large multicast filter exist use ALLMULTI
2229 */
2230/*
2231 * If setting rx filter fails fall back to ALLMULTI
2232 * If ALLMULTI fails fall back to PROMISC
2233 */
2234static int
2235vioif_rx_filter(struct vioif_softc *sc)
2236{
2237	struct virtio_softc *vsc = sc->sc_virtio;
2238	struct ethercom *ec = &sc->sc_ethercom;
2239	struct ifnet *ifp = &ec->ec_if;
2240	struct ether_multi *enm;
2241	struct ether_multistep step;
2242	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2243	int nentries;
2244	int promisc = 0, allmulti = 0, rxfilter = 0;
2245	int r;
2246
2247	if (!sc->sc_has_ctrl) {	/* no ctrl vq; always promisc */
2248		ifp->if_flags |= IFF_PROMISC;
2249		return 0;
2250	}
2251
2252	if (ifp->if_flags & IFF_PROMISC) {
2253		promisc = 1;
2254		goto set;
2255	}
2256
2257	nentries = -1;
2258	ETHER_LOCK(ec);
2259	ETHER_FIRST_MULTI(step, ec, enm);
2260	while (nentries++, enm != NULL) {
2261		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2262			allmulti = 1;
2263			goto set_unlock;
2264		}
2265		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2266			allmulti = 1;
2267			goto set_unlock;
2268		}
2269		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2270		    enm->enm_addrlo, ETHER_ADDR_LEN);
2271		ETHER_NEXT_MULTI(step, enm);
2272	}
2273	rxfilter = 1;
2274
2275set_unlock:
2276	ETHER_UNLOCK(ec);
2277
2278set:
2279	if (rxfilter) {
2280		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2281		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2282		r = vioif_set_rx_filter(sc);
2283		if (r != 0) {
2284			rxfilter = 0;
2285			allmulti = 1; /* fallback */
2286		}
2287	} else {
2288		/* remove rx filter */
2289		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2290		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2291		r = vioif_set_rx_filter(sc);
2292		/* what to do on failure? */
2293	}
2294	if (allmulti) {
2295		r = vioif_set_allmulti(sc, true);
2296		if (r != 0) {
2297			allmulti = 0;
2298			promisc = 1; /* fallback */
2299		}
2300	} else {
2301		r = vioif_set_allmulti(sc, false);
2302		/* what to do on failure? */
2303	}
2304	if (promisc) {
2305		r = vioif_set_promisc(sc, true);
2306	} else {
2307		r = vioif_set_promisc(sc, false);
2308	}
2309
2310	return r;
2311}
2312
2313static bool
2314vioif_is_link_up(struct vioif_softc *sc)
2315{
2316	struct virtio_softc *vsc = sc->sc_virtio;
2317	uint16_t status;
2318
2319	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2320		status = virtio_read_device_config_2(vsc,
2321		    VIRTIO_NET_CONFIG_STATUS);
2322	else
2323		status = VIRTIO_NET_S_LINK_UP;
2324
2325	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2326}
2327
2328/* change link status */
2329static void
2330vioif_update_link_status(struct vioif_softc *sc)
2331{
2332	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2333	struct vioif_txqueue *txq;
2334	bool active, changed;
2335	int link, i;
2336
2337	mutex_enter(&sc->sc_lock);
2338
2339	active = vioif_is_link_up(sc);
2340	changed = false;
2341
2342	if (active) {
2343		if (!sc->sc_link_active)
2344			changed = true;
2345
2346		link = LINK_STATE_UP;
2347		sc->sc_link_active = true;
2348	} else {
2349		if (sc->sc_link_active)
2350			changed = true;
2351
2352		link = LINK_STATE_DOWN;
2353		sc->sc_link_active = false;
2354	}
2355
2356	if (changed) {
2357		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2358			txq = &sc->sc_txq[i];
2359
2360			mutex_enter(txq->txq_lock);
2361			txq->txq_link_active = sc->sc_link_active;
2362			mutex_exit(txq->txq_lock);
2363		}
2364
2365		if_link_state_change(ifp, link);
2366	}
2367
2368	mutex_exit(&sc->sc_lock);
2369}
2370
2371static int
2372vioif_config_change(struct virtio_softc *vsc)
2373{
2374	struct vioif_softc *sc = device_private(virtio_child(vsc));
2375
2376	softint_schedule(sc->sc_ctl_softint);
2377	return 0;
2378}
2379
2380static void
2381vioif_ctl_softint(void *arg)
2382{
2383	struct vioif_softc *sc = arg;
2384	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2385
2386	vioif_update_link_status(sc);
2387	vioif_start(ifp);
2388}
2389
2390static struct workqueue *
2391vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2392{
2393	struct workqueue *wq;
2394	int error;
2395
2396	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2397	    prio, ipl, flags);
2398
2399	if (error)
2400		return NULL;
2401
2402	return wq;
2403}
2404
2405static void
2406vioif_workq_destroy(struct workqueue *wq)
2407{
2408
2409	workqueue_destroy(wq);
2410}
2411
2412static void
2413vioif_workq_work(struct work *wk, void *context)
2414{
2415	struct vioif_work *work;
2416
2417	work = container_of(wk, struct vioif_work, cookie);
2418
2419	atomic_store_relaxed(&work->added, 0);
2420	work->func(work->arg);
2421}
2422
2423static void
2424vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2425{
2426
2427	memset(work, 0, sizeof(*work));
2428	work->func = func;
2429	work->arg = arg;
2430}
2431
2432static void
2433vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2434{
2435
2436	if (atomic_load_relaxed(&work->added) != 0)
2437		return;
2438
2439	atomic_store_relaxed(&work->added, 1);
2440	kpreempt_disable();
2441	workqueue_enqueue(wq, &work->cookie, NULL);
2442	kpreempt_enable();
2443}
2444
2445static void
2446vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2447{
2448
2449	workqueue_wait(wq, &work->cookie);
2450}
2451
2452static int
2453vioif_setup_sysctl(struct vioif_softc *sc)
2454{
2455	const char *devname;
2456	struct sysctllog **log;
2457	const struct sysctlnode *rnode, *rxnode, *txnode;
2458	int error;
2459
2460	log = &sc->sc_sysctllog;
2461	devname = device_xname(sc->sc_dev);
2462
2463	error = sysctl_createv(log, 0, NULL, &rnode,
2464	    0, CTLTYPE_NODE, devname,
2465	    SYSCTL_DESCR("virtio-net information and settings"),
2466	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2467	if (error)
2468		goto out;
2469
2470	error = sysctl_createv(log, 0, &rnode, NULL,
2471	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2472	    SYSCTL_DESCR("Use workqueue for packet processing"),
2473	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2474	if (error)
2475		goto out;
2476
2477	error = sysctl_createv(log, 0, &rnode, &rxnode,
2478	    0, CTLTYPE_NODE, "rx",
2479	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2480	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2481	if (error)
2482		goto out;
2483
2484	error = sysctl_createv(log, 0, &rxnode, NULL,
2485	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2486	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2487	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2488	if (error)
2489		goto out;
2490
2491	error = sysctl_createv(log, 0, &rxnode, NULL,
2492	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2493	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2494	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2495	if (error)
2496		goto out;
2497
2498	error = sysctl_createv(log, 0, &rnode, &txnode,
2499	    0, CTLTYPE_NODE, "tx",
2500	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2501	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2502	if (error)
2503		goto out;
2504
2505	error = sysctl_createv(log, 0, &txnode, NULL,
2506	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2507	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2508	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2509	if (error)
2510		goto out;
2511
2512	error = sysctl_createv(log, 0, &txnode, NULL,
2513	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2514	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2515	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2516
2517out:
2518	if (error)
2519		sysctl_teardown(log);
2520
2521	return error;
2522}
2523
2524static void
2525vioif_setup_stats(struct vioif_softc *sc)
2526{
2527	struct vioif_rxqueue *rxq;
2528	struct vioif_txqueue *txq;
2529	int i;
2530
2531	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2532		rxq = &sc->sc_rxq[i];
2533		txq = &sc->sc_txq[i];
2534
2535		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2536		    device_xname(sc->sc_dev), i);
2537		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2538		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2539		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2540		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2541		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2542		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2543
2544		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2545		    device_xname(sc->sc_dev), i);
2546		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2547		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2548	}
2549
2550	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2551	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2552	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2553	    NULL, device_xname(sc->sc_dev), "control command failed");
2554}
2555
2556MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2557
2558#ifdef _MODULE
2559#include "ioconf.c"
2560#endif
2561
2562static int
2563if_vioif_modcmd(modcmd_t cmd, void *opaque)
2564{
2565	int error = 0;
2566
2567#ifdef _MODULE
2568	switch (cmd) {
2569	case MODULE_CMD_INIT:
2570		error = config_init_component(cfdriver_ioconf_if_vioif,
2571		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2572		break;
2573	case MODULE_CMD_FINI:
2574		error = config_fini_component(cfdriver_ioconf_if_vioif,
2575		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2576		break;
2577	default:
2578		error = ENOTTY;
2579		break;
2580	}
2581#endif
2582
2583	return error;
2584}
2585