if_vioif.c revision 1.77
1/*	$NetBSD: if_vioif.c,v 1.77 2022/03/31 06:17:34 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.77 2022/03/31 06:17:34 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54
55#include <dev/pci/virtioreg.h>
56#include <dev/pci/virtiovar.h>
57
58#include <net/if.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_ether.h>
62
63#include <net/bpf.h>
64
65#include "ioconf.h"
66
67#ifdef NET_MPSAFE
68#define VIOIF_MPSAFE	1
69#define VIOIF_MULTIQ	1
70#endif
71
72/*
73 * if_vioifreg.h:
74 */
75/* Configuration registers */
76#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
77#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
78#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
79#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
80
81/* Feature bits */
82#define VIRTIO_NET_F_CSUM		__BIT(0)
83#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
84#define VIRTIO_NET_F_MAC		__BIT(5)
85#define VIRTIO_NET_F_GSO		__BIT(6)
86#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
87#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
88#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
89#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
90#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
91#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
92#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
93#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
94#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
95#define VIRTIO_NET_F_STATUS		__BIT(16)
96#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
97#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
98#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
99#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
100#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
101#define VIRTIO_NET_F_MQ			__BIT(22)
102
103#define VIRTIO_NET_FLAG_BITS \
104	VIRTIO_COMMON_FLAG_BITS \
105	"\x17""MQ" \
106	"\x16""GUEST_ANNOUNCE" \
107	"\x15""CTRL_RX_EXTRA" \
108	"\x14""CTRL_VLAN" \
109	"\x13""CTRL_RX" \
110	"\x12""CTRL_VQ" \
111	"\x11""STATUS" \
112	"\x10""MRG_RXBUF" \
113	"\x0f""HOST_UFO" \
114	"\x0e""HOST_ECN" \
115	"\x0d""HOST_TSO6" \
116	"\x0c""HOST_TSO4" \
117	"\x0b""GUEST_UFO" \
118	"\x0a""GUEST_ECN" \
119	"\x09""GUEST_TSO6" \
120	"\x08""GUEST_TSO4" \
121	"\x07""GSO" \
122	"\x06""MAC" \
123	"\x02""GUEST_CSUM" \
124	"\x01""CSUM"
125
126/* Status */
127#define VIRTIO_NET_S_LINK_UP	1
128
129/* Packet header structure */
130struct virtio_net_hdr {
131	uint8_t		flags;
132	uint8_t		gso_type;
133	uint16_t	hdr_len;
134	uint16_t	gso_size;
135	uint16_t	csum_start;
136	uint16_t	csum_offset;
137
138	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
139} __packed;
140
141#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
142#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
145#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
147
148#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
149
150/* Control virtqueue */
151struct virtio_net_ctrl_cmd {
152	uint8_t	class;
153	uint8_t	command;
154} __packed;
155#define VIRTIO_NET_CTRL_RX		0
156# define VIRTIO_NET_CTRL_RX_PROMISC	0
157# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
158
159#define VIRTIO_NET_CTRL_MAC		1
160# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
161# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
162
163#define VIRTIO_NET_CTRL_VLAN		2
164# define VIRTIO_NET_CTRL_VLAN_ADD	0
165# define VIRTIO_NET_CTRL_VLAN_DEL	1
166
167#define VIRTIO_NET_CTRL_MQ			4
168# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
169# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
170# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
171
172struct virtio_net_ctrl_status {
173	uint8_t	ack;
174} __packed;
175#define VIRTIO_NET_OK			0
176#define VIRTIO_NET_ERR			1
177
178struct virtio_net_ctrl_rx {
179	uint8_t	onoff;
180} __packed;
181
182struct virtio_net_ctrl_mac_tbl {
183	uint32_t nentries;
184	uint8_t macs[][ETHER_ADDR_LEN];
185} __packed;
186
187struct virtio_net_ctrl_mac_addr {
188	uint8_t mac[ETHER_ADDR_LEN];
189} __packed;
190
191struct virtio_net_ctrl_vlan {
192	uint16_t id;
193} __packed;
194
195struct virtio_net_ctrl_mq {
196	uint16_t virtqueue_pairs;
197} __packed;
198
199/*
200 * if_vioifvar.h:
201 */
202
203/*
204 * Locking notes:
205 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
206 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
207 *      - more than one lock cannot be held at onece
208 * + ctrlq_inuse is protected by ctrlq_wait_lock.
209 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
210 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
211 * + fields in vioif_softc except queues are protected by
212 *   sc->sc_lock(an adaptive mutex)
213 *      - the lock is held before acquisition of other locks
214 */
215
216struct vioif_ctrl_cmdspec {
217	bus_dmamap_t	dmamap;
218	void		*buf;
219	bus_size_t	bufsize;
220};
221
222struct vioif_work {
223	struct work	 cookie;
224	void		(*func)(void *);
225	void		*arg;
226	unsigned int	 added;
227};
228
229struct vioif_txqueue {
230	kmutex_t		*txq_lock;	/* lock for tx operations */
231
232	struct virtqueue	*txq_vq;
233	bool			txq_stopping;
234	bool			txq_link_active;
235	pcq_t			*txq_intrq;
236
237	struct virtio_net_hdr	*txq_hdrs;
238	bus_dmamap_t		*txq_hdr_dmamaps;
239
240	struct mbuf		**txq_mbufs;
241	bus_dmamap_t		*txq_dmamaps;
242
243	void			*txq_deferred_transmit;
244	void			*txq_handle_si;
245	struct vioif_work	 txq_work;
246	bool			 txq_workqueue;
247	bool			 txq_active;
248
249	char			 txq_evgroup[16];
250	struct evcnt		 txq_defrag_failed;
251	struct evcnt		 txq_mbuf_load_failed;
252	struct evcnt		 txq_enqueue_reserve_failed;
253};
254
255struct vioif_rxqueue {
256	kmutex_t		*rxq_lock;	/* lock for rx operations */
257
258	struct virtqueue	*rxq_vq;
259	bool			rxq_stopping;
260
261	struct virtio_net_hdr	*rxq_hdrs;
262	bus_dmamap_t		*rxq_hdr_dmamaps;
263
264	struct mbuf		**rxq_mbufs;
265	bus_dmamap_t		*rxq_dmamaps;
266
267	void			*rxq_handle_si;
268	struct vioif_work	 rxq_work;
269	bool			 rxq_workqueue;
270	bool			 rxq_active;
271
272	char			 rxq_evgroup[16];
273	struct evcnt		 rxq_mbuf_add_failed;
274};
275
276struct vioif_ctrlqueue {
277	struct virtqueue		*ctrlq_vq;
278	enum {
279		FREE, INUSE, DONE
280	}				ctrlq_inuse;
281	kcondvar_t			ctrlq_wait;
282	kmutex_t			ctrlq_wait_lock;
283	struct lwp			*ctrlq_owner;
284
285	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
286	struct virtio_net_ctrl_status	*ctrlq_status;
287	struct virtio_net_ctrl_rx	*ctrlq_rx;
288	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
289	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
290	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
291	struct virtio_net_ctrl_mq	*ctrlq_mq;
292
293	bus_dmamap_t			ctrlq_cmd_dmamap;
294	bus_dmamap_t			ctrlq_status_dmamap;
295	bus_dmamap_t			ctrlq_rx_dmamap;
296	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
297	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
298	bus_dmamap_t			ctrlq_mac_addr_dmamap;
299	bus_dmamap_t			ctrlq_mq_dmamap;
300
301	struct evcnt			ctrlq_cmd_load_failed;
302	struct evcnt			ctrlq_cmd_failed;
303};
304
305struct vioif_softc {
306	device_t		sc_dev;
307	kmutex_t		sc_lock;
308	struct sysctllog	*sc_sysctllog;
309
310	struct virtio_softc	*sc_virtio;
311	struct virtqueue	*sc_vqs;
312	u_int			 sc_hdr_size;
313
314	int			sc_max_nvq_pairs;
315	int			sc_req_nvq_pairs;
316	int			sc_act_nvq_pairs;
317
318	uint8_t			sc_mac[ETHER_ADDR_LEN];
319	struct ethercom		sc_ethercom;
320	bool			sc_link_active;
321
322	struct vioif_txqueue	*sc_txq;
323	struct vioif_rxqueue	*sc_rxq;
324
325	bool			sc_has_ctrl;
326	struct vioif_ctrlqueue	sc_ctrlq;
327
328	bus_dma_segment_t	sc_hdr_segs[1];
329	void			*sc_dmamem;
330	void			*sc_kmem;
331
332	void			*sc_ctl_softint;
333
334	struct workqueue	*sc_txrx_workqueue;
335	bool			 sc_txrx_workqueue_sysctl;
336	u_int			 sc_tx_intr_process_limit;
337	u_int			 sc_tx_process_limit;
338	u_int			 sc_rx_intr_process_limit;
339	u_int			 sc_rx_process_limit;
340};
341#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
342#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
343
344#define VIOIF_TX_INTR_PROCESS_LIMIT	256
345#define VIOIF_TX_PROCESS_LIMIT		256
346#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
347#define VIOIF_RX_PROCESS_LIMIT		256
348
349#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
350
351/* cfattach interface functions */
352static int	vioif_match(device_t, cfdata_t, void *);
353static void	vioif_attach(device_t, device_t, void *);
354static int	vioif_finalize_teardown(device_t);
355
356/* ifnet interface functions */
357static int	vioif_init(struct ifnet *);
358static void	vioif_stop(struct ifnet *, int);
359static void	vioif_start(struct ifnet *);
360static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
361static int	vioif_transmit(struct ifnet *, struct mbuf *);
362static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
363static int	vioif_ioctl(struct ifnet *, u_long, void *);
364static void	vioif_watchdog(struct ifnet *);
365static int	vioif_ifflags_cb(struct ethercom *);
366
367/* rx */
368static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
369static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
370static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
371		    struct vioif_rxqueue *);
372static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
373static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
374		    struct vioif_rxqueue *, u_int);
375static int	vioif_rx_intr(void *);
376static void	vioif_rx_handle(void *);
377static void	vioif_rx_sched_handle(struct vioif_softc *,
378		    struct vioif_rxqueue *);
379static void	vioif_rx_drain(struct vioif_rxqueue *);
380
381/* tx */
382static int	vioif_tx_intr(void *);
383static void	vioif_tx_handle(void *);
384static void	vioif_tx_sched_handle(struct vioif_softc *,
385		    struct vioif_txqueue *);
386static void	vioif_tx_queue_clear(struct vioif_txqueue *);
387static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
388		    struct vioif_txqueue *, u_int);
389static void	vioif_tx_drain(struct vioif_txqueue *);
390static void	vioif_deferred_transmit(void *);
391
392/* workqueue */
393static struct workqueue*
394		vioif_workq_create(const char *, pri_t, int, int);
395static void	vioif_workq_destroy(struct workqueue *);
396static void	vioif_workq_work(struct work *, void *);
397static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
398static void	vioif_work_add(struct workqueue *, struct vioif_work *);
399static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
400
401/* other control */
402static bool	vioif_is_link_up(struct vioif_softc *);
403static void	vioif_update_link_status(struct vioif_softc *);
404static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
405static int	vioif_set_promisc(struct vioif_softc *, bool);
406static int	vioif_set_allmulti(struct vioif_softc *, bool);
407static int	vioif_set_rx_filter(struct vioif_softc *);
408static int	vioif_rx_filter(struct vioif_softc *);
409static int	vioif_set_mac_addr(struct vioif_softc *);
410static int	vioif_ctrl_intr(void *);
411static int	vioif_config_change(struct virtio_softc *);
412static void	vioif_ctl_softint(void *);
413static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
414static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
415static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
416static int	vioif_setup_sysctl(struct vioif_softc *);
417static void	vioif_setup_stats(struct vioif_softc *);
418static int	vioif_ifflags(struct vioif_softc *);
419
420CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
421		  vioif_match, vioif_attach, NULL, NULL);
422
423static int
424vioif_match(device_t parent, cfdata_t match, void *aux)
425{
426	struct virtio_attach_args *va = aux;
427
428	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
429		return 1;
430
431	return 0;
432}
433
434static int
435vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
436    bus_size_t size, int nsegs, const char *usage)
437{
438	int r;
439
440	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
441	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
442
443	if (r != 0) {
444		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
445		    "error code %d\n", usage, r);
446	}
447
448	return r;
449}
450
451static void
452vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
453{
454
455	if (*map) {
456		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
457		*map = NULL;
458	}
459}
460
461static int
462vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
463    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
464{
465	int r;
466
467	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
468	if (r != 0)
469		return 1;
470
471	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
472	    size, NULL, rw | BUS_DMA_NOWAIT);
473	if (r != 0) {
474		vioif_dmamap_destroy(sc, map);
475		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
476		    "error code %d\n", usage, r);
477	}
478
479	return r;
480}
481
482static void *
483vioif_assign_mem(intptr_t *p, size_t size)
484{
485	intptr_t rv;
486
487	rv = *p;
488	*p += size;
489
490	return (void *)rv;
491}
492
493static void
494vioif_alloc_queues(struct vioif_softc *sc)
495{
496	int nvq_pairs = sc->sc_max_nvq_pairs;
497	int nvqs = nvq_pairs * 2;
498	int i;
499
500	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
501
502	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
503	    KM_SLEEP);
504	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
505	    KM_SLEEP);
506
507	if (sc->sc_has_ctrl)
508		nvqs++;
509
510	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
511	nvqs = 0;
512	for (i = 0; i < nvq_pairs; i++) {
513		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
514		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
515	}
516
517	if (sc->sc_has_ctrl)
518		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
519}
520
521static void
522vioif_free_queues(struct vioif_softc *sc)
523{
524	int nvq_pairs = sc->sc_max_nvq_pairs;
525	int nvqs = nvq_pairs * 2;
526
527	if (sc->sc_ctrlq.ctrlq_vq)
528		nvqs++;
529
530	if (sc->sc_txq) {
531		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
532		sc->sc_txq = NULL;
533	}
534
535	if (sc->sc_rxq) {
536		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
537		sc->sc_rxq = NULL;
538	}
539
540	if (sc->sc_vqs) {
541		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
542		sc->sc_vqs = NULL;
543	}
544}
545
546/* allocate memory */
547/*
548 * dma memory is used for:
549 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
550 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
551 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
552 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
553 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
554 *			 (WRITE)
555 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
556 *			 class command (WRITE)
557 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
558 *			 class command (WRITE)
559 * ctrlq_* structures are allocated only one each; they are protected by
560 * ctrlq_inuse variable and ctrlq_wait condvar.
561 */
562/*
563 * dynamically allocated memory is used for:
564 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
565 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
566 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
567 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
568 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
569 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
570 */
571static int
572vioif_alloc_mems(struct vioif_softc *sc)
573{
574	struct virtio_softc *vsc = sc->sc_virtio;
575	struct vioif_txqueue *txq;
576	struct vioif_rxqueue *rxq;
577	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
578	int allocsize, allocsize2, r, rsegs, i, qid;
579	void *vaddr;
580	intptr_t p;
581
582	allocsize = 0;
583	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
584		rxq = &sc->sc_rxq[qid];
585		txq = &sc->sc_txq[qid];
586
587		allocsize += sizeof(struct virtio_net_hdr) *
588			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
589	}
590	if (sc->sc_has_ctrl) {
591		allocsize += sizeof(struct virtio_net_ctrl_cmd);
592		allocsize += sizeof(struct virtio_net_ctrl_status);
593		allocsize += sizeof(struct virtio_net_ctrl_rx);
594		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
595		    + ETHER_ADDR_LEN;
596		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
597		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
598		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
599		allocsize += sizeof(struct virtio_net_ctrl_mq);
600	}
601	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
602	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
603	if (r != 0) {
604		aprint_error_dev(sc->sc_dev,
605		    "DMA memory allocation failed, size %d, "
606		    "error code %d\n", allocsize, r);
607		goto err_none;
608	}
609	r = bus_dmamem_map(virtio_dmat(vsc),
610	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
611	if (r != 0) {
612		aprint_error_dev(sc->sc_dev,
613		    "DMA memory map failed, error code %d\n", r);
614		goto err_dmamem_alloc;
615	}
616
617	memset(vaddr, 0, allocsize);
618	sc->sc_dmamem = vaddr;
619	p = (intptr_t) vaddr;
620
621	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
622		rxq = &sc->sc_rxq[qid];
623		txq = &sc->sc_txq[qid];
624
625		rxq->rxq_hdrs = vioif_assign_mem(&p,
626		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
627		txq->txq_hdrs = vioif_assign_mem(&p,
628		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
629	}
630	if (sc->sc_has_ctrl) {
631		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
632		    sizeof(*ctrlq->ctrlq_cmd));
633		ctrlq->ctrlq_status = vioif_assign_mem(&p,
634		    sizeof(*ctrlq->ctrlq_status));
635		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
636		    sizeof(*ctrlq->ctrlq_rx));
637		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
638		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
639		    + ETHER_ADDR_LEN);
640		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
641		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
642		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
643		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
644		    sizeof(*ctrlq->ctrlq_mac_addr));
645		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
646	}
647
648	allocsize2 = 0;
649	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
650		int rxqsize, txqsize;
651
652		rxq = &sc->sc_rxq[qid];
653		txq = &sc->sc_txq[qid];
654		rxqsize = rxq->rxq_vq->vq_num;
655		txqsize = txq->txq_vq->vq_num;
656
657		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
658		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
659		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
660
661		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
662		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
663		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
664	}
665	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
666	sc->sc_kmem = vaddr;
667	p = (intptr_t) vaddr;
668
669	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
670		int rxqsize, txqsize;
671		rxq = &sc->sc_rxq[qid];
672		txq = &sc->sc_txq[qid];
673		rxqsize = rxq->rxq_vq->vq_num;
674		txqsize = txq->txq_vq->vq_num;
675
676		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
677		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
678		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
679		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
680		rxq->rxq_dmamaps = vioif_assign_mem(&p,
681		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
682		txq->txq_dmamaps = vioif_assign_mem(&p,
683		    sizeof(txq->txq_dmamaps[0]) * txqsize);
684		rxq->rxq_mbufs = vioif_assign_mem(&p,
685		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
686		txq->txq_mbufs = vioif_assign_mem(&p,
687		    sizeof(txq->txq_mbufs[0]) * txqsize);
688	}
689
690	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
691		rxq = &sc->sc_rxq[qid];
692		txq = &sc->sc_txq[qid];
693
694		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
695			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
696			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
697			    BUS_DMA_READ, "rx header");
698			if (r != 0)
699				goto err_reqs;
700
701			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
702			    MCLBYTES, 1, "rx payload");
703			if (r != 0)
704				goto err_reqs;
705		}
706
707		for (i = 0; i < txq->txq_vq->vq_num; i++) {
708			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
709			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
710			    BUS_DMA_READ, "tx header");
711			if (r != 0)
712				goto err_reqs;
713
714			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
715			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
716			if (r != 0)
717				goto err_reqs;
718		}
719	}
720
721	if (sc->sc_has_ctrl) {
722		/* control vq class & command */
723		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
724		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
725		    BUS_DMA_WRITE, "control command");
726		if (r != 0)
727			goto err_reqs;
728
729		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
730		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
731		    BUS_DMA_READ, "control status");
732		if (r != 0)
733			goto err_reqs;
734
735		/* control vq rx mode command parameter */
736		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
737		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
738		    BUS_DMA_WRITE, "rx mode control command");
739		if (r != 0)
740			goto err_reqs;
741
742		/* multiqueue set command */
743		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
744		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
745		    BUS_DMA_WRITE, "multiqueue set command");
746		if (r != 0)
747			goto err_reqs;
748
749		/* control vq MAC filter table for unicast */
750		/* do not load now since its length is variable */
751		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
752		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
753		    + ETHER_ADDR_LEN, 1,
754		    "unicast MAC address filter command");
755		if (r != 0)
756			goto err_reqs;
757
758		/* control vq MAC filter table for multicast */
759		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
760		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
761		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
762		    "multicast MAC address filter command");
763		if (r != 0)
764			goto err_reqs;
765
766		/* control vq MAC address set command */
767		r = vioif_dmamap_create_load(sc,
768		    &ctrlq->ctrlq_mac_addr_dmamap,
769		    ctrlq->ctrlq_mac_addr,
770		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
771		    BUS_DMA_WRITE, "mac addr set command");
772		if (r != 0)
773			goto err_reqs;
774	}
775
776	return 0;
777
778err_reqs:
779	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
780	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
781	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
782	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
783	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
784	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
785	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
786		rxq = &sc->sc_rxq[qid];
787		txq = &sc->sc_txq[qid];
788
789		for (i = 0; i < txq->txq_vq->vq_num; i++) {
790			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
791			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
792		}
793		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
794			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
795			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
796		}
797	}
798	if (sc->sc_kmem) {
799		kmem_free(sc->sc_kmem, allocsize2);
800		sc->sc_kmem = NULL;
801	}
802	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
803err_dmamem_alloc:
804	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
805err_none:
806	return -1;
807}
808
809static void
810vioif_attach(device_t parent, device_t self, void *aux)
811{
812	struct vioif_softc *sc = device_private(self);
813	struct virtio_softc *vsc = device_private(parent);
814	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
815	struct vioif_txqueue *txq;
816	struct vioif_rxqueue *rxq;
817	uint64_t features, req_features;
818	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
819	u_int softint_flags;
820	int r, i, nvqs=0, req_flags;
821	char xnamebuf[MAXCOMLEN];
822
823	if (virtio_child(vsc) != NULL) {
824		aprint_normal(": child already attached for %s; "
825		    "something wrong...\n", device_xname(parent));
826		return;
827	}
828
829	sc->sc_dev = self;
830	sc->sc_virtio = vsc;
831	sc->sc_link_active = false;
832
833	sc->sc_max_nvq_pairs = 1;
834	sc->sc_req_nvq_pairs = 1;
835	sc->sc_act_nvq_pairs = 1;
836	sc->sc_txrx_workqueue_sysctl = true;
837	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
838	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
839	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
840	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
841
842	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
843
844	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
845	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
846	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
847	if (sc->sc_txrx_workqueue == NULL)
848		goto err;
849
850	req_flags = 0;
851
852#ifdef VIOIF_MPSAFE
853	req_flags |= VIRTIO_F_INTR_MPSAFE;
854#endif
855	req_flags |= VIRTIO_F_INTR_MSIX;
856
857	req_features =
858	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
859	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
860	req_features |= VIRTIO_F_RING_EVENT_IDX;
861#ifdef VIOIF_MULTIQ
862	req_features |= VIRTIO_NET_F_MQ;
863#endif
864	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
865	    vioif_config_change, virtio_vq_intrhand, req_flags,
866	    req_features, VIRTIO_NET_FLAG_BITS);
867
868	features = virtio_features(vsc);
869	if (features == 0)
870		goto err;
871
872	if (features & VIRTIO_NET_F_MAC) {
873		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
874			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
875			    VIRTIO_NET_CONFIG_MAC + i);
876		}
877	} else {
878		/* code stolen from sys/net/if_tap.c */
879		struct timeval tv;
880		uint32_t ui;
881		getmicrouptime(&tv);
882		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
883		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
884		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
885			virtio_write_device_config_1(vsc,
886			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
887		}
888	}
889
890	/* 'Ethernet' with capital follows other ethernet driver attachment */
891	aprint_normal_dev(self, "Ethernet address %s\n",
892	    ether_sprintf(sc->sc_mac));
893
894	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
895		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
896	} else {
897		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
898	}
899
900	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
901	    (features & VIRTIO_NET_F_CTRL_RX)) {
902		sc->sc_has_ctrl = true;
903
904		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
905		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
906		ctrlq->ctrlq_inuse = FREE;
907	} else {
908		sc->sc_has_ctrl = false;
909	}
910
911	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
912		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
913		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
914
915		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
916			goto err;
917
918		/* Limit the number of queue pairs to use */
919		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
920	}
921
922	vioif_alloc_queues(sc);
923	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
924
925#ifdef VIOIF_MPSAFE
926	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
927#else
928	softint_flags = SOFTINT_NET;
929#endif
930
931	/*
932	 * Allocating virtqueues
933	 */
934	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
935		rxq = &sc->sc_rxq[i];
936		txq = &sc->sc_txq[i];
937		char qname[32];
938
939		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
940
941		rxq->rxq_handle_si = softint_establish(softint_flags,
942		    vioif_rx_handle, rxq);
943		if (rxq->rxq_handle_si == NULL) {
944			aprint_error_dev(self, "cannot establish rx softint\n");
945			goto err;
946		}
947
948		snprintf(qname, sizeof(qname), "rx%d", i);
949		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
950		    MCLBYTES + sc->sc_hdr_size, 2, qname);
951		if (r != 0)
952			goto err;
953		nvqs++;
954		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
955		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
956		rxq->rxq_stopping = true;
957		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
958
959		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
960
961		txq->txq_deferred_transmit = softint_establish(softint_flags,
962		    vioif_deferred_transmit, txq);
963		if (txq->txq_deferred_transmit == NULL) {
964			aprint_error_dev(self, "cannot establish tx softint\n");
965			goto err;
966		}
967		txq->txq_handle_si = softint_establish(softint_flags,
968		    vioif_tx_handle, txq);
969		if (txq->txq_handle_si == NULL) {
970			aprint_error_dev(self, "cannot establish tx softint\n");
971			goto err;
972		}
973
974		snprintf(qname, sizeof(qname), "tx%d", i);
975		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
976		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
977		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
978		if (r != 0)
979			goto err;
980		nvqs++;
981		txq->txq_vq->vq_intrhand = vioif_tx_intr;
982		txq->txq_vq->vq_intrhand_arg = (void *)txq;
983		txq->txq_link_active = sc->sc_link_active;
984		txq->txq_stopping = false;
985		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
986		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
987	}
988
989	if (sc->sc_has_ctrl) {
990		/*
991		 * Allocating a virtqueue for control channel
992		 */
993		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
994		    NBPG, 1, "control");
995		if (r != 0) {
996			aprint_error_dev(self, "failed to allocate "
997			    "a virtqueue for control channel, error code %d\n",
998			    r);
999
1000			sc->sc_has_ctrl = false;
1001			cv_destroy(&ctrlq->ctrlq_wait);
1002			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1003		} else {
1004			nvqs++;
1005			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1006			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1007		}
1008	}
1009
1010	sc->sc_ctl_softint = softint_establish(softint_flags,
1011	    vioif_ctl_softint, sc);
1012	if (sc->sc_ctl_softint == NULL) {
1013		aprint_error_dev(self, "cannot establish ctl softint\n");
1014		goto err;
1015	}
1016
1017	if (vioif_alloc_mems(sc) < 0)
1018		goto err;
1019
1020	if (virtio_child_attach_finish(vsc) != 0)
1021		goto err;
1022
1023	if (vioif_setup_sysctl(sc) != 0) {
1024		aprint_error_dev(self, "unable to create sysctl node\n");
1025		/* continue */
1026	}
1027
1028	vioif_setup_stats(sc);
1029
1030	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1031	ifp->if_softc = sc;
1032	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1033#ifdef VIOIF_MPSAFE
1034	ifp->if_extflags = IFEF_MPSAFE;
1035#endif
1036	ifp->if_start = vioif_start;
1037	if (sc->sc_req_nvq_pairs > 1)
1038		ifp->if_transmit = vioif_transmit;
1039	ifp->if_ioctl = vioif_ioctl;
1040	ifp->if_init = vioif_init;
1041	ifp->if_stop = vioif_stop;
1042	ifp->if_capabilities = 0;
1043	ifp->if_watchdog = vioif_watchdog;
1044	txq = &sc->sc_txq[0];
1045	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1046	IFQ_SET_READY(&ifp->if_snd);
1047
1048	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1049
1050	if_attach(ifp);
1051	if_deferred_start_init(ifp, NULL);
1052	ether_ifattach(ifp, sc->sc_mac);
1053	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1054
1055	return;
1056
1057err:
1058	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1059		rxq = &sc->sc_rxq[i];
1060		txq = &sc->sc_txq[i];
1061
1062		if (rxq->rxq_lock) {
1063			mutex_obj_free(rxq->rxq_lock);
1064			rxq->rxq_lock = NULL;
1065		}
1066
1067		if (rxq->rxq_handle_si) {
1068			softint_disestablish(rxq->rxq_handle_si);
1069			rxq->rxq_handle_si = NULL;
1070		}
1071
1072		if (txq->txq_lock) {
1073			mutex_obj_free(txq->txq_lock);
1074			txq->txq_lock = NULL;
1075		}
1076
1077		if (txq->txq_handle_si) {
1078			softint_disestablish(txq->txq_handle_si);
1079			txq->txq_handle_si = NULL;
1080		}
1081
1082		if (txq->txq_deferred_transmit) {
1083			softint_disestablish(txq->txq_deferred_transmit);
1084			txq->txq_deferred_transmit = NULL;
1085		}
1086
1087		if (txq->txq_intrq) {
1088			pcq_destroy(txq->txq_intrq);
1089			txq->txq_intrq = NULL;
1090		}
1091	}
1092
1093	if (sc->sc_has_ctrl) {
1094		cv_destroy(&ctrlq->ctrlq_wait);
1095		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1096	}
1097
1098	while (nvqs > 0)
1099		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1100
1101	vioif_free_queues(sc);
1102	mutex_destroy(&sc->sc_lock);
1103	virtio_child_attach_failed(vsc);
1104	config_finalize_register(self, vioif_finalize_teardown);
1105
1106	return;
1107}
1108
1109static int
1110vioif_finalize_teardown(device_t self)
1111{
1112	struct vioif_softc *sc = device_private(self);
1113
1114	if (sc->sc_txrx_workqueue != NULL) {
1115		vioif_workq_destroy(sc->sc_txrx_workqueue);
1116		sc->sc_txrx_workqueue = NULL;
1117	}
1118
1119	return 0;
1120}
1121
1122static void
1123vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1124{
1125	struct virtio_softc *vsc = sc->sc_virtio;
1126	struct vioif_txqueue *txq;
1127	struct vioif_rxqueue *rxq;
1128	int i;
1129
1130	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1131		txq = &sc->sc_txq[i];
1132		rxq = &sc->sc_rxq[i];
1133
1134		virtio_start_vq_intr(vsc, txq->txq_vq);
1135		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1136	}
1137}
1138
1139static void
1140vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1141{
1142	struct virtio_softc *vsc = sc->sc_virtio;
1143	struct vioif_txqueue *txq;
1144	struct vioif_rxqueue *rxq;
1145	int i;
1146
1147	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1148		rxq = &sc->sc_rxq[i];
1149		txq = &sc->sc_txq[i];
1150
1151		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1152		virtio_stop_vq_intr(vsc, txq->txq_vq);
1153	}
1154}
1155
1156/*
1157 * Interface functions for ifnet
1158 */
1159static int
1160vioif_init(struct ifnet *ifp)
1161{
1162	struct vioif_softc *sc = ifp->if_softc;
1163	struct virtio_softc *vsc = sc->sc_virtio;
1164	struct vioif_rxqueue *rxq;
1165	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1166	int r, i;
1167
1168	vioif_stop(ifp, 0);
1169
1170	r = virtio_reinit_start(vsc);
1171	if (r != 0) {
1172		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1173		return EIO;
1174	}
1175
1176	virtio_negotiate_features(vsc, virtio_features(vsc));
1177
1178	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1179		rxq = &sc->sc_rxq[i];
1180
1181		/* Have to set false before vioif_populate_rx_mbufs */
1182		mutex_enter(rxq->rxq_lock);
1183		rxq->rxq_stopping = false;
1184		vioif_populate_rx_mbufs_locked(sc, rxq);
1185		mutex_exit(rxq->rxq_lock);
1186
1187	}
1188
1189	virtio_reinit_end(vsc);
1190
1191	if (sc->sc_has_ctrl)
1192		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1193
1194	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1195	if (r == 0)
1196		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1197	else
1198		sc->sc_act_nvq_pairs = 1;
1199
1200	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1201		sc->sc_txq[i].txq_stopping = false;
1202
1203	vioif_enable_interrupt_vqpairs(sc);
1204
1205	vioif_update_link_status(sc);
1206	ifp->if_flags |= IFF_RUNNING;
1207	ifp->if_flags &= ~IFF_OACTIVE;
1208	r = vioif_rx_filter(sc);
1209
1210	return r;
1211}
1212
1213static void
1214vioif_stop(struct ifnet *ifp, int disable)
1215{
1216	struct vioif_softc *sc = ifp->if_softc;
1217	struct virtio_softc *vsc = sc->sc_virtio;
1218	struct vioif_txqueue *txq;
1219	struct vioif_rxqueue *rxq;
1220	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1221	int i;
1222
1223	/* disable interrupts */
1224	vioif_disable_interrupt_vqpairs(sc);
1225	if (sc->sc_has_ctrl)
1226		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1227
1228	/*
1229	 * stop all packet processing:
1230	 * 1. stop interrupt handlers by rxq_stopping and txq_stopping
1231	 * 2. wait for stoping workqueue for packet processing
1232	 */
1233	for (i =0; i < sc->sc_act_nvq_pairs; i++) {
1234		txq = &sc->sc_txq[i];
1235		rxq = &sc->sc_rxq[i];
1236
1237		mutex_enter(rxq->rxq_lock);
1238		rxq->rxq_stopping = true;
1239		mutex_exit(rxq->rxq_lock);
1240		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1241
1242		mutex_enter(txq->txq_lock);
1243		txq->txq_stopping = true;
1244		mutex_exit(txq->txq_lock);
1245		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1246	}
1247
1248	/* only way to stop I/O and DMA is resetting... */
1249	virtio_reset(vsc);
1250
1251	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1252		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1253		vioif_tx_queue_clear(&sc->sc_txq[i]);
1254	}
1255
1256	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1257	sc->sc_link_active = false;
1258
1259	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1260		txq = &sc->sc_txq[i];
1261		rxq = &sc->sc_rxq[i];
1262
1263		txq->txq_link_active = false;
1264
1265		if (disable)
1266			vioif_rx_drain(rxq);
1267
1268		vioif_tx_drain(txq);
1269	}
1270}
1271
1272static void
1273vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1274    bool is_transmit)
1275{
1276	struct vioif_softc *sc = ifp->if_softc;
1277	struct virtio_softc *vsc = sc->sc_virtio;
1278	struct virtqueue *vq = txq->txq_vq;
1279	struct virtio_net_hdr *hdr;
1280	struct mbuf *m;
1281	int queued = 0;
1282
1283	KASSERT(mutex_owned(txq->txq_lock));
1284
1285	if ((ifp->if_flags & IFF_RUNNING) == 0)
1286		return;
1287
1288	if (!txq->txq_link_active || txq->txq_stopping)
1289		return;
1290
1291	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1292		return;
1293
1294	for (;;) {
1295		int slot, r;
1296
1297		if (is_transmit)
1298			m = pcq_get(txq->txq_intrq);
1299		else
1300			IFQ_DEQUEUE(&ifp->if_snd, m);
1301
1302		if (m == NULL)
1303			break;
1304
1305		r = virtio_enqueue_prep(vsc, vq, &slot);
1306		if (r == EAGAIN) {
1307			ifp->if_flags |= IFF_OACTIVE;
1308			m_freem(m);
1309			break;
1310		}
1311		if (r != 0)
1312			panic("enqueue_prep for a tx buffer");
1313
1314		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1315		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1316		if (r != 0) {
1317			/* maybe just too fragmented */
1318			struct mbuf *newm;
1319
1320			newm = m_defrag(m, M_NOWAIT);
1321			if (newm == NULL) {
1322				txq->txq_defrag_failed.ev_count++;
1323				goto skip;
1324			}
1325
1326			m = newm;
1327			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1328			    txq->txq_dmamaps[slot], m,
1329			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1330			if (r != 0) {
1331				txq->txq_mbuf_load_failed.ev_count++;
1332skip:
1333				m_freem(m);
1334				virtio_enqueue_abort(vsc, vq, slot);
1335				continue;
1336			}
1337		}
1338
1339		/* This should actually never fail */
1340		r = virtio_enqueue_reserve(vsc, vq, slot,
1341		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1342		if (r != 0) {
1343			txq->txq_enqueue_reserve_failed.ev_count++;
1344			bus_dmamap_unload(virtio_dmat(vsc),
1345			     txq->txq_dmamaps[slot]);
1346			/* slot already freed by virtio_enqueue_reserve */
1347			m_freem(m);
1348			continue;
1349		}
1350
1351		txq->txq_mbufs[slot] = m;
1352
1353		hdr = &txq->txq_hdrs[slot];
1354		memset(hdr, 0, sc->sc_hdr_size);
1355		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1356		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1357		    BUS_DMASYNC_PREWRITE);
1358		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1359		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1360		    BUS_DMASYNC_PREWRITE);
1361		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1362		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1363		virtio_enqueue_commit(vsc, vq, slot, false);
1364
1365		queued++;
1366		bpf_mtap(ifp, m, BPF_D_OUT);
1367	}
1368
1369	if (queued > 0) {
1370		virtio_enqueue_commit(vsc, vq, -1, true);
1371		ifp->if_timer = 5;
1372	}
1373}
1374
1375static void
1376vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1377{
1378
1379	/*
1380	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1381	 */
1382	vioif_send_common_locked(ifp, txq, false);
1383
1384}
1385
1386static void
1387vioif_start(struct ifnet *ifp)
1388{
1389	struct vioif_softc *sc = ifp->if_softc;
1390	struct vioif_txqueue *txq = &sc->sc_txq[0];
1391
1392#ifdef VIOIF_MPSAFE
1393	KASSERT(if_is_mpsafe(ifp));
1394#endif
1395
1396	mutex_enter(txq->txq_lock);
1397	vioif_start_locked(ifp, txq);
1398	mutex_exit(txq->txq_lock);
1399}
1400
1401static inline int
1402vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1403{
1404	struct vioif_softc *sc = ifp->if_softc;
1405	u_int cpuid = cpu_index(curcpu());
1406
1407	return cpuid % sc->sc_act_nvq_pairs;
1408}
1409
1410static void
1411vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1412{
1413
1414	vioif_send_common_locked(ifp, txq, true);
1415}
1416
1417static int
1418vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1419{
1420	struct vioif_softc *sc = ifp->if_softc;
1421	struct vioif_txqueue *txq;
1422	int qid;
1423
1424	qid = vioif_select_txqueue(ifp, m);
1425	txq = &sc->sc_txq[qid];
1426
1427	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1428		m_freem(m);
1429		return ENOBUFS;
1430	}
1431
1432	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1433	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1434	if (m->m_flags & M_MCAST)
1435		if_statinc_ref(nsr, if_omcasts);
1436	IF_STAT_PUTREF(ifp);
1437
1438	if (mutex_tryenter(txq->txq_lock)) {
1439		vioif_transmit_locked(ifp, txq);
1440		mutex_exit(txq->txq_lock);
1441	}
1442
1443	return 0;
1444}
1445
1446static void
1447vioif_deferred_transmit(void *arg)
1448{
1449	struct vioif_txqueue *txq = arg;
1450	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1451	struct vioif_softc *sc = device_private(virtio_child(vsc));
1452	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1453
1454	mutex_enter(txq->txq_lock);
1455	vioif_send_common_locked(ifp, txq, true);
1456	mutex_exit(txq->txq_lock);
1457}
1458
1459static int
1460vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1461{
1462	int s, r;
1463
1464	s = splnet();
1465
1466	r = ether_ioctl(ifp, cmd, data);
1467	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1468		if (ifp->if_flags & IFF_RUNNING) {
1469			r = vioif_rx_filter(ifp->if_softc);
1470		} else {
1471			r = 0;
1472		}
1473	}
1474
1475	splx(s);
1476
1477	return r;
1478}
1479
1480void
1481vioif_watchdog(struct ifnet *ifp)
1482{
1483	struct vioif_softc *sc = ifp->if_softc;
1484	int i;
1485
1486	if (ifp->if_flags & IFF_RUNNING) {
1487		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1488			vioif_tx_queue_clear(&sc->sc_txq[i]);
1489		}
1490	}
1491}
1492
1493/*
1494 * Receive implementation
1495 */
1496/* allocate and initialize a mbuf for receive */
1497static int
1498vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1499{
1500	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1501	struct mbuf *m;
1502	int r;
1503
1504	MGETHDR(m, M_DONTWAIT, MT_DATA);
1505	if (m == NULL)
1506		return ENOBUFS;
1507	MCLGET(m, M_DONTWAIT);
1508	if ((m->m_flags & M_EXT) == 0) {
1509		m_freem(m);
1510		return ENOBUFS;
1511	}
1512	rxq->rxq_mbufs[i] = m;
1513	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1514	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1515	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1516	if (r) {
1517		m_freem(m);
1518		rxq->rxq_mbufs[i] = NULL;
1519		return r;
1520	}
1521
1522	return 0;
1523}
1524
1525/* free a mbuf for receive */
1526static void
1527vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1528{
1529	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1530
1531	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1532	m_freem(rxq->rxq_mbufs[i]);
1533	rxq->rxq_mbufs[i] = NULL;
1534}
1535
1536/* add mbufs for all the empty receive slots */
1537static void
1538vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1539{
1540	struct virtqueue *vq = rxq->rxq_vq;
1541	struct virtio_softc *vsc = vq->vq_owner;
1542	int i, r, ndone = 0;
1543
1544	KASSERT(mutex_owned(rxq->rxq_lock));
1545
1546	if (rxq->rxq_stopping)
1547		return;
1548
1549	for (i = 0; i < vq->vq_num; i++) {
1550		int slot;
1551		r = virtio_enqueue_prep(vsc, vq, &slot);
1552		if (r == EAGAIN)
1553			break;
1554		if (r != 0)
1555			panic("enqueue_prep for rx buffers");
1556		if (rxq->rxq_mbufs[slot] == NULL) {
1557			r = vioif_add_rx_mbuf(rxq, slot);
1558			if (r != 0) {
1559				rxq->rxq_mbuf_add_failed.ev_count++;
1560				break;
1561			}
1562		}
1563		r = virtio_enqueue_reserve(vsc, vq, slot,
1564		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1565		if (r != 0) {
1566			vioif_free_rx_mbuf(rxq, slot);
1567			break;
1568		}
1569		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1570		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1571		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1572		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1573		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1574		    false);
1575		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1576		virtio_enqueue_commit(vsc, vq, slot, false);
1577		ndone++;
1578	}
1579	if (ndone > 0)
1580		virtio_enqueue_commit(vsc, vq, -1, true);
1581}
1582
1583static void
1584vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1585{
1586	struct virtqueue *vq = rxq->rxq_vq;
1587	struct virtio_softc *vsc = vq->vq_owner;
1588	struct vioif_softc *sc = device_private(virtio_child(vsc));
1589	u_int limit = UINT_MAX;
1590	bool more;
1591
1592	KASSERT(rxq->rxq_stopping);
1593
1594	mutex_enter(rxq->rxq_lock);
1595	for (;;) {
1596		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1597		if (more == false)
1598			break;
1599	}
1600	mutex_exit(rxq->rxq_lock);
1601}
1602
1603/* dequeue received packets */
1604static bool
1605vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1606    struct vioif_rxqueue *rxq, u_int limit)
1607{
1608	struct virtqueue *vq = rxq->rxq_vq;
1609	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1610	struct mbuf *m;
1611	int slot, len;
1612	bool more = false, dequeued = false;
1613
1614	KASSERT(mutex_owned(rxq->rxq_lock));
1615
1616	if (virtio_vq_is_enqueued(vsc, vq) == false)
1617		return false;
1618
1619	for (;;) {
1620		if (limit-- == 0) {
1621			more = true;
1622			break;
1623		}
1624
1625		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1626			break;
1627
1628		dequeued = true;
1629
1630		len -= sc->sc_hdr_size;
1631		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1632		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1633		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1634		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1635		m = rxq->rxq_mbufs[slot];
1636		KASSERT(m != NULL);
1637		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1638		rxq->rxq_mbufs[slot] = NULL;
1639		virtio_dequeue_commit(vsc, vq, slot);
1640		m_set_rcvif(m, ifp);
1641		m->m_len = m->m_pkthdr.len = len;
1642
1643		mutex_exit(rxq->rxq_lock);
1644		if_percpuq_enqueue(ifp->if_percpuq, m);
1645		mutex_enter(rxq->rxq_lock);
1646
1647		if (rxq->rxq_stopping)
1648			break;
1649	}
1650
1651	if (dequeued)
1652		vioif_populate_rx_mbufs_locked(sc, rxq);
1653
1654	return more;
1655}
1656
1657/* rx interrupt; call _dequeue above and schedule a softint */
1658
1659static void
1660vioif_rx_handle_locked(void *xrxq, u_int limit)
1661{
1662	struct vioif_rxqueue *rxq = xrxq;
1663	struct virtqueue *vq = rxq->rxq_vq;
1664	struct virtio_softc *vsc = vq->vq_owner;
1665	struct vioif_softc *sc = device_private(virtio_child(vsc));
1666	bool more;
1667
1668	KASSERT(!rxq->rxq_stopping);
1669
1670	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1671	if (more) {
1672		vioif_rx_sched_handle(sc, rxq);
1673		return;
1674	}
1675	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1676	if (more) {
1677		vioif_rx_sched_handle(sc, rxq);
1678		return;
1679	}
1680	atomic_store_relaxed(&rxq->rxq_active, false);
1681}
1682
1683static int
1684vioif_rx_intr(void *arg)
1685{
1686	struct vioif_rxqueue *rxq = arg;
1687	struct virtqueue *vq = rxq->rxq_vq;
1688	struct virtio_softc *vsc = vq->vq_owner;
1689	struct vioif_softc *sc = device_private(virtio_child(vsc));
1690	u_int limit;
1691
1692	limit = sc->sc_rx_intr_process_limit;
1693
1694	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1695		return 1;
1696
1697	mutex_enter(rxq->rxq_lock);
1698
1699	if (!rxq->rxq_stopping) {
1700		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1701
1702		virtio_stop_vq_intr(vsc, vq);
1703		atomic_store_relaxed(&rxq->rxq_active, true);
1704
1705		vioif_rx_handle_locked(rxq, limit);
1706	}
1707
1708	mutex_exit(rxq->rxq_lock);
1709	return 1;
1710}
1711
1712static void
1713vioif_rx_handle(void *xrxq)
1714{
1715	struct vioif_rxqueue *rxq = xrxq;
1716	struct virtqueue *vq = rxq->rxq_vq;
1717	struct virtio_softc *vsc = vq->vq_owner;
1718	struct vioif_softc *sc = device_private(virtio_child(vsc));
1719	u_int limit;
1720
1721	limit = sc->sc_rx_process_limit;
1722
1723	mutex_enter(rxq->rxq_lock);
1724
1725	if (!rxq->rxq_stopping)
1726		vioif_rx_handle_locked(rxq, limit);
1727
1728	mutex_exit(rxq->rxq_lock);
1729}
1730
1731static void
1732vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1733{
1734
1735	KASSERT(mutex_owned(rxq->rxq_lock));
1736
1737	if (rxq->rxq_stopping)
1738		return;
1739
1740	if (rxq->rxq_workqueue)
1741		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1742	else
1743		softint_schedule(rxq->rxq_handle_si);
1744}
1745
1746/* free all the mbufs; called from if_stop(disable) */
1747static void
1748vioif_rx_drain(struct vioif_rxqueue *rxq)
1749{
1750	struct virtqueue *vq = rxq->rxq_vq;
1751	int i;
1752
1753	for (i = 0; i < vq->vq_num; i++) {
1754		if (rxq->rxq_mbufs[i] == NULL)
1755			continue;
1756		vioif_free_rx_mbuf(rxq, i);
1757	}
1758}
1759
1760/*
1761 * Transmition implementation
1762 */
1763/* actual transmission is done in if_start */
1764/* tx interrupt; dequeue and free mbufs */
1765/*
1766 * tx interrupt is actually disabled; this should be called upon
1767 * tx vq full and watchdog
1768 */
1769
1770static void
1771vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1772{
1773	struct virtqueue *vq = txq->txq_vq;
1774	struct virtio_softc *vsc = vq->vq_owner;
1775	struct vioif_softc *sc = device_private(virtio_child(vsc));
1776	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1777	bool more;
1778
1779	KASSERT(!txq->txq_stopping);
1780
1781	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1782	if (more) {
1783		vioif_tx_sched_handle(sc, txq);
1784		return;
1785	}
1786
1787	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1788		more = virtio_postpone_intr_smart(vsc, vq);
1789	else
1790		more = virtio_start_vq_intr(vsc, vq);
1791	if (more) {
1792		vioif_tx_sched_handle(sc, txq);
1793		return;
1794	}
1795
1796	atomic_store_relaxed(&txq->txq_active, false);
1797	/* for ALTQ */
1798	if (txq == &sc->sc_txq[0]) {
1799		if_schedule_deferred_start(ifp);
1800		ifp->if_flags &= ~IFF_OACTIVE;
1801	}
1802	softint_schedule(txq->txq_deferred_transmit);
1803}
1804
1805
1806static int
1807vioif_tx_intr(void *arg)
1808{
1809	struct vioif_txqueue *txq = arg;
1810	struct virtqueue *vq = txq->txq_vq;
1811	struct virtio_softc *vsc = vq->vq_owner;
1812	struct vioif_softc *sc = device_private(virtio_child(vsc));
1813	u_int limit;
1814
1815	limit = sc->sc_tx_intr_process_limit;
1816
1817	if (atomic_load_relaxed(&txq->txq_active) == true)
1818		return 1;
1819
1820	mutex_enter(txq->txq_lock);
1821
1822	if (!txq->txq_stopping) {
1823		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1824
1825		virtio_stop_vq_intr(vsc, vq);
1826		atomic_store_relaxed(&txq->txq_active, true);
1827
1828		vioif_tx_handle_locked(txq, limit);
1829	}
1830
1831	mutex_exit(txq->txq_lock);
1832
1833	return 1;
1834}
1835
1836static void
1837vioif_tx_handle(void *xtxq)
1838{
1839	struct vioif_txqueue *txq = xtxq;
1840	struct virtqueue *vq = txq->txq_vq;
1841	struct virtio_softc *vsc = vq->vq_owner;
1842	struct vioif_softc *sc = device_private(virtio_child(vsc));
1843	u_int limit;
1844
1845	limit = sc->sc_tx_process_limit;
1846
1847	mutex_enter(txq->txq_lock);
1848	if (!txq->txq_stopping)
1849		vioif_tx_handle_locked(txq, limit);
1850	mutex_exit(txq->txq_lock);
1851}
1852
1853static void
1854vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1855{
1856
1857	KASSERT(mutex_owned(txq->txq_lock));
1858
1859	if (txq->txq_stopping)
1860		return;
1861
1862	if (txq->txq_workqueue)
1863		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1864	else
1865		softint_schedule(txq->txq_handle_si);
1866}
1867
1868static void
1869vioif_tx_queue_clear(struct vioif_txqueue *txq)
1870{
1871	struct virtqueue *vq = txq->txq_vq;
1872	struct virtio_softc *vsc = vq->vq_owner;
1873	struct vioif_softc *sc = device_private(virtio_child(vsc));
1874	u_int limit = UINT_MAX;
1875	bool more;
1876
1877	mutex_enter(txq->txq_lock);
1878	for (;;) {
1879		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1880		if (more == false)
1881			break;
1882	}
1883	mutex_exit(txq->txq_lock);
1884}
1885
1886static bool
1887vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1888    struct vioif_txqueue *txq, u_int limit)
1889{
1890	struct virtqueue *vq = txq->txq_vq;
1891	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1892	struct mbuf *m;
1893	int slot, len;
1894	bool more = false;
1895
1896	KASSERT(mutex_owned(txq->txq_lock));
1897
1898	if (virtio_vq_is_enqueued(vsc, vq) == false)
1899		return false;
1900
1901	for (;;) {
1902		if (limit-- == 0) {
1903			more = true;
1904			break;
1905		}
1906
1907		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1908			break;
1909
1910		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1911		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1912		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1913		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1914		    BUS_DMASYNC_POSTWRITE);
1915		m = txq->txq_mbufs[slot];
1916		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1917		txq->txq_mbufs[slot] = NULL;
1918		virtio_dequeue_commit(vsc, vq, slot);
1919		if_statinc(ifp, if_opackets);
1920		m_freem(m);
1921	}
1922
1923	return more;
1924}
1925
1926/* free all the mbufs already put on vq; called from if_stop(disable) */
1927static void
1928vioif_tx_drain(struct vioif_txqueue *txq)
1929{
1930	struct virtqueue *vq = txq->txq_vq;
1931	struct virtio_softc *vsc = vq->vq_owner;
1932	int i;
1933
1934	KASSERT(txq->txq_stopping);
1935
1936	for (i = 0; i < vq->vq_num; i++) {
1937		if (txq->txq_mbufs[i] == NULL)
1938			continue;
1939		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1940		m_freem(txq->txq_mbufs[i]);
1941		txq->txq_mbufs[i] = NULL;
1942	}
1943}
1944
1945/*
1946 * Control vq
1947 */
1948/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1949static void
1950vioif_ctrl_acquire(struct vioif_softc *sc)
1951{
1952	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1953
1954	mutex_enter(&ctrlq->ctrlq_wait_lock);
1955	while (ctrlq->ctrlq_inuse != FREE)
1956		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1957	ctrlq->ctrlq_inuse = INUSE;
1958	ctrlq->ctrlq_owner = curlwp;
1959	mutex_exit(&ctrlq->ctrlq_wait_lock);
1960}
1961
1962static void
1963vioif_ctrl_release(struct vioif_softc *sc)
1964{
1965	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1966
1967	KASSERT(ctrlq->ctrlq_inuse != FREE);
1968	KASSERT(ctrlq->ctrlq_owner == curlwp);
1969
1970	mutex_enter(&ctrlq->ctrlq_wait_lock);
1971	ctrlq->ctrlq_inuse = FREE;
1972	ctrlq->ctrlq_owner = NULL;
1973	cv_signal(&ctrlq->ctrlq_wait);
1974	mutex_exit(&ctrlq->ctrlq_wait_lock);
1975}
1976
1977static int
1978vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
1979    struct vioif_ctrl_cmdspec *specs, int nspecs)
1980{
1981	struct virtio_softc *vsc = sc->sc_virtio;
1982	int i, r, loaded;
1983
1984	loaded = 0;
1985	for (i = 0; i < nspecs; i++) {
1986		r = bus_dmamap_load(virtio_dmat(vsc),
1987		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
1988		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1989		if (r) {
1990			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
1991			goto err;
1992		}
1993		loaded++;
1994
1995	}
1996
1997	return r;
1998
1999err:
2000	for (i = 0; i < loaded; i++) {
2001		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2002	}
2003
2004	return r;
2005}
2006
2007static void
2008vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2009    struct vioif_ctrl_cmdspec *specs, int nspecs)
2010{
2011	struct virtio_softc *vsc = sc->sc_virtio;
2012	int i;
2013
2014	for (i = 0; i < nspecs; i++) {
2015		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2016	}
2017}
2018
2019static int
2020vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2021    struct vioif_ctrl_cmdspec *specs, int nspecs)
2022{
2023	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2024	struct virtqueue *vq = ctrlq->ctrlq_vq;
2025	struct virtio_softc *vsc = sc->sc_virtio;
2026	int i, r, slot;
2027
2028	ctrlq->ctrlq_cmd->class = class;
2029	ctrlq->ctrlq_cmd->command = cmd;
2030
2031	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2032	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2033	for (i = 0; i < nspecs; i++) {
2034		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2035		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2036	}
2037	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2038	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2039
2040	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2041	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2042		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2043
2044	r = virtio_enqueue_prep(vsc, vq, &slot);
2045	if (r != 0)
2046		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2047	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2048	if (r != 0)
2049		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2050	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2051	for (i = 0; i < nspecs; i++) {
2052		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2053	}
2054	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2055	virtio_enqueue_commit(vsc, vq, slot, true);
2056
2057	/* wait for done */
2058	mutex_enter(&ctrlq->ctrlq_wait_lock);
2059	while (ctrlq->ctrlq_inuse != DONE)
2060		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2061	mutex_exit(&ctrlq->ctrlq_wait_lock);
2062	/* already dequeueued */
2063
2064	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2065	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2066	for (i = 0; i < nspecs; i++) {
2067		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2068		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2069	}
2070	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2071	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2072
2073	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2074		r = 0;
2075	else {
2076		device_printf(sc->sc_dev, "failed setting rx mode\n");
2077		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2078		r = EIO;
2079	}
2080
2081	return r;
2082}
2083
2084static int
2085vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2086{
2087	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2088	struct vioif_ctrl_cmdspec specs[1];
2089	int r;
2090
2091	if (!sc->sc_has_ctrl)
2092		return ENOTSUP;
2093
2094	vioif_ctrl_acquire(sc);
2095
2096	rx->onoff = onoff;
2097	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2098	specs[0].buf = rx;
2099	specs[0].bufsize = sizeof(*rx);
2100
2101	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2102	    specs, __arraycount(specs));
2103
2104	vioif_ctrl_release(sc);
2105	return r;
2106}
2107
2108static int
2109vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2110{
2111	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2112}
2113
2114static int
2115vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2116{
2117	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2118}
2119
2120/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2121static int
2122vioif_set_rx_filter(struct vioif_softc *sc)
2123{
2124	/* filter already set in ctrlq->ctrlq_mac_tbl */
2125	struct virtio_softc *vsc = sc->sc_virtio;
2126	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2127	struct vioif_ctrl_cmdspec specs[2];
2128	int nspecs = __arraycount(specs);
2129	int r;
2130
2131	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2132	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2133
2134	if (!sc->sc_has_ctrl)
2135		return ENOTSUP;
2136
2137	vioif_ctrl_acquire(sc);
2138
2139	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2140	specs[0].buf = mac_tbl_uc;
2141	specs[0].bufsize = sizeof(*mac_tbl_uc)
2142	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2143
2144	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2145	specs[1].buf = mac_tbl_mc;
2146	specs[1].bufsize = sizeof(*mac_tbl_mc)
2147	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2148
2149	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2150	if (r != 0)
2151		goto out;
2152
2153	r = vioif_ctrl_send_command(sc,
2154	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2155	    specs, nspecs);
2156
2157	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2158
2159out:
2160	vioif_ctrl_release(sc);
2161
2162	return r;
2163}
2164
2165static int
2166vioif_set_mac_addr(struct vioif_softc *sc)
2167{
2168	struct virtio_net_ctrl_mac_addr *ma =
2169	    sc->sc_ctrlq.ctrlq_mac_addr;
2170	struct vioif_ctrl_cmdspec specs[1];
2171	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2172	int nspecs = __arraycount(specs);
2173	int r;
2174
2175	if (!sc->sc_has_ctrl)
2176		return ENOTSUP;
2177
2178	vioif_ctrl_acquire(sc);
2179
2180	memcpy(ma->mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2181	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2182	specs[0].buf = ma;
2183	specs[0].bufsize = sizeof(*ma);
2184
2185	r = vioif_ctrl_send_command(sc,
2186	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2187	    specs, nspecs);
2188
2189	vioif_ctrl_release(sc);
2190
2191	return r;
2192}
2193
2194static int
2195vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2196{
2197	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2198	struct vioif_ctrl_cmdspec specs[1];
2199	int r;
2200
2201	if (!sc->sc_has_ctrl)
2202		return ENOTSUP;
2203
2204	if (nvq_pairs <= 1)
2205		return EINVAL;
2206
2207	vioif_ctrl_acquire(sc);
2208
2209	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2210	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2211	specs[0].buf = mq;
2212	specs[0].bufsize = sizeof(*mq);
2213
2214	r = vioif_ctrl_send_command(sc,
2215	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2216	    specs, __arraycount(specs));
2217
2218	vioif_ctrl_release(sc);
2219
2220	return r;
2221}
2222
2223/* ctrl vq interrupt; wake up the command issuer */
2224static int
2225vioif_ctrl_intr(void *arg)
2226{
2227	struct vioif_ctrlqueue *ctrlq = arg;
2228	struct virtqueue *vq = ctrlq->ctrlq_vq;
2229	struct virtio_softc *vsc = vq->vq_owner;
2230	int r, slot;
2231
2232	if (virtio_vq_is_enqueued(vsc, vq) == false)
2233		return 0;
2234
2235	r = virtio_dequeue(vsc, vq, &slot, NULL);
2236	if (r == ENOENT)
2237		return 0;
2238	virtio_dequeue_commit(vsc, vq, slot);
2239
2240	mutex_enter(&ctrlq->ctrlq_wait_lock);
2241	ctrlq->ctrlq_inuse = DONE;
2242	cv_signal(&ctrlq->ctrlq_wait);
2243	mutex_exit(&ctrlq->ctrlq_wait_lock);
2244
2245	return 1;
2246}
2247
2248static int
2249vioif_ifflags(struct vioif_softc *sc)
2250{
2251	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2252	bool onoff;
2253	int r;
2254
2255	if (!sc->sc_has_ctrl) {
2256		/* no ctrl vq; always promisc and allmulti */
2257		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2258		return 0;
2259	}
2260
2261	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2262	r = vioif_set_allmulti(sc, onoff);
2263	if (r != 0) {
2264		log(LOG_WARNING,
2265		    "%s: couldn't %sable ALLMULTI\n",
2266		    ifp->if_xname, onoff ? "en" : "dis");
2267		if (onoff == false) {
2268			ifp->if_flags |= IFF_ALLMULTI;
2269		}
2270	}
2271
2272	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2273	r = vioif_set_promisc(sc, onoff);
2274	if (r != 0) {
2275		log(LOG_WARNING,
2276		    "%s: couldn't %sable PROMISC\n",
2277		    ifp->if_xname, onoff ? "en" : "dis");
2278		if (onoff == false) {
2279			ifp->if_flags |= IFF_PROMISC;
2280		}
2281	}
2282
2283	return 0;
2284}
2285
2286static int
2287vioif_ifflags_cb(struct ethercom *ec)
2288{
2289	struct ifnet *ifp = &ec->ec_if;
2290	struct vioif_softc *sc = ifp->if_softc;
2291
2292	return vioif_ifflags(sc);
2293}
2294
2295/*
2296 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2297 * If large multicast filter exist use ALLMULTI
2298 * If setting rx filter fails fall back to ALLMULTI
2299 */
2300static int
2301vioif_rx_filter(struct vioif_softc *sc)
2302{
2303	struct virtio_softc *vsc = sc->sc_virtio;
2304	struct ethercom *ec = &sc->sc_ethercom;
2305	struct ifnet *ifp = &ec->ec_if;
2306	struct ether_multi *enm;
2307	struct ether_multistep step;
2308	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2309	int nentries;
2310	bool allmulti = 0;
2311	int r;
2312
2313	if (!sc->sc_has_ctrl) {
2314		goto set_ifflags;
2315	}
2316
2317	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2318	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2319
2320	nentries = 0;
2321	allmulti = false;
2322
2323	ETHER_LOCK(ec);
2324	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2325	    ETHER_NEXT_MULTI(step, enm)) {
2326		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2327			allmulti = true;
2328			break;
2329		}
2330		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2331			allmulti = true;
2332			break;
2333		}
2334
2335		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2336		    enm->enm_addrlo, ETHER_ADDR_LEN);
2337		nentries++;
2338	}
2339	ETHER_UNLOCK(ec);
2340
2341	r = vioif_set_mac_addr(sc);
2342	if (r != 0) {
2343		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2344		    ifp->if_xname);
2345	}
2346
2347	if (!allmulti) {
2348		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2349		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2350		r = vioif_set_rx_filter(sc);
2351		if (r != 0) {
2352			allmulti = true; /* fallback */
2353		}
2354	}
2355
2356	if (allmulti) {
2357		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2358		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2359		r = vioif_set_rx_filter(sc);
2360		if (r != 0) {
2361			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2362			    ifp->if_xname);
2363			/* what to do on failure? */
2364		}
2365
2366		ifp->if_flags |= IFF_ALLMULTI;
2367	}
2368
2369set_ifflags:
2370	r = vioif_ifflags(sc);
2371
2372	return r;
2373}
2374
2375static bool
2376vioif_is_link_up(struct vioif_softc *sc)
2377{
2378	struct virtio_softc *vsc = sc->sc_virtio;
2379	uint16_t status;
2380
2381	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2382		status = virtio_read_device_config_2(vsc,
2383		    VIRTIO_NET_CONFIG_STATUS);
2384	else
2385		status = VIRTIO_NET_S_LINK_UP;
2386
2387	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2388}
2389
2390/* change link status */
2391static void
2392vioif_update_link_status(struct vioif_softc *sc)
2393{
2394	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2395	struct vioif_txqueue *txq;
2396	bool active, changed;
2397	int link, i;
2398
2399	mutex_enter(&sc->sc_lock);
2400
2401	active = vioif_is_link_up(sc);
2402	changed = false;
2403
2404	if (active) {
2405		if (!sc->sc_link_active)
2406			changed = true;
2407
2408		link = LINK_STATE_UP;
2409		sc->sc_link_active = true;
2410	} else {
2411		if (sc->sc_link_active)
2412			changed = true;
2413
2414		link = LINK_STATE_DOWN;
2415		sc->sc_link_active = false;
2416	}
2417
2418	if (changed) {
2419		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2420			txq = &sc->sc_txq[i];
2421
2422			mutex_enter(txq->txq_lock);
2423			txq->txq_link_active = sc->sc_link_active;
2424			mutex_exit(txq->txq_lock);
2425		}
2426
2427		if_link_state_change(ifp, link);
2428	}
2429
2430	mutex_exit(&sc->sc_lock);
2431}
2432
2433static int
2434vioif_config_change(struct virtio_softc *vsc)
2435{
2436	struct vioif_softc *sc = device_private(virtio_child(vsc));
2437
2438	softint_schedule(sc->sc_ctl_softint);
2439	return 0;
2440}
2441
2442static void
2443vioif_ctl_softint(void *arg)
2444{
2445	struct vioif_softc *sc = arg;
2446	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2447
2448	vioif_update_link_status(sc);
2449	vioif_start(ifp);
2450}
2451
2452static struct workqueue *
2453vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2454{
2455	struct workqueue *wq;
2456	int error;
2457
2458	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2459	    prio, ipl, flags);
2460
2461	if (error)
2462		return NULL;
2463
2464	return wq;
2465}
2466
2467static void
2468vioif_workq_destroy(struct workqueue *wq)
2469{
2470
2471	workqueue_destroy(wq);
2472}
2473
2474static void
2475vioif_workq_work(struct work *wk, void *context)
2476{
2477	struct vioif_work *work;
2478
2479	work = container_of(wk, struct vioif_work, cookie);
2480
2481	atomic_store_relaxed(&work->added, 0);
2482	work->func(work->arg);
2483}
2484
2485static void
2486vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2487{
2488
2489	memset(work, 0, sizeof(*work));
2490	work->func = func;
2491	work->arg = arg;
2492}
2493
2494static void
2495vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2496{
2497
2498	if (atomic_load_relaxed(&work->added) != 0)
2499		return;
2500
2501	atomic_store_relaxed(&work->added, 1);
2502	kpreempt_disable();
2503	workqueue_enqueue(wq, &work->cookie, NULL);
2504	kpreempt_enable();
2505}
2506
2507static void
2508vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2509{
2510
2511	workqueue_wait(wq, &work->cookie);
2512}
2513
2514static int
2515vioif_setup_sysctl(struct vioif_softc *sc)
2516{
2517	const char *devname;
2518	struct sysctllog **log;
2519	const struct sysctlnode *rnode, *rxnode, *txnode;
2520	int error;
2521
2522	log = &sc->sc_sysctllog;
2523	devname = device_xname(sc->sc_dev);
2524
2525	error = sysctl_createv(log, 0, NULL, &rnode,
2526	    0, CTLTYPE_NODE, devname,
2527	    SYSCTL_DESCR("virtio-net information and settings"),
2528	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2529	if (error)
2530		goto out;
2531
2532	error = sysctl_createv(log, 0, &rnode, NULL,
2533	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2534	    SYSCTL_DESCR("Use workqueue for packet processing"),
2535	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2536	if (error)
2537		goto out;
2538
2539	error = sysctl_createv(log, 0, &rnode, &rxnode,
2540	    0, CTLTYPE_NODE, "rx",
2541	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2542	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2543	if (error)
2544		goto out;
2545
2546	error = sysctl_createv(log, 0, &rxnode, NULL,
2547	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2548	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2549	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2550	if (error)
2551		goto out;
2552
2553	error = sysctl_createv(log, 0, &rxnode, NULL,
2554	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2555	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2556	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2557	if (error)
2558		goto out;
2559
2560	error = sysctl_createv(log, 0, &rnode, &txnode,
2561	    0, CTLTYPE_NODE, "tx",
2562	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2563	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2564	if (error)
2565		goto out;
2566
2567	error = sysctl_createv(log, 0, &txnode, NULL,
2568	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2569	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2570	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2571	if (error)
2572		goto out;
2573
2574	error = sysctl_createv(log, 0, &txnode, NULL,
2575	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2576	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2577	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2578
2579out:
2580	if (error)
2581		sysctl_teardown(log);
2582
2583	return error;
2584}
2585
2586static void
2587vioif_setup_stats(struct vioif_softc *sc)
2588{
2589	struct vioif_rxqueue *rxq;
2590	struct vioif_txqueue *txq;
2591	int i;
2592
2593	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2594		rxq = &sc->sc_rxq[i];
2595		txq = &sc->sc_txq[i];
2596
2597		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2598		    device_xname(sc->sc_dev), i);
2599		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2600		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2601		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2602		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2603		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2604		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2605
2606		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2607		    device_xname(sc->sc_dev), i);
2608		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2609		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2610	}
2611
2612	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2613	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2614	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2615	    NULL, device_xname(sc->sc_dev), "control command failed");
2616}
2617
2618MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2619
2620#ifdef _MODULE
2621#include "ioconf.c"
2622#endif
2623
2624static int
2625if_vioif_modcmd(modcmd_t cmd, void *opaque)
2626{
2627	int error = 0;
2628
2629#ifdef _MODULE
2630	switch (cmd) {
2631	case MODULE_CMD_INIT:
2632		error = config_init_component(cfdriver_ioconf_if_vioif,
2633		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2634		break;
2635	case MODULE_CMD_FINI:
2636		error = config_fini_component(cfdriver_ioconf_if_vioif,
2637		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2638		break;
2639	default:
2640		error = ENOTTY;
2641		break;
2642	}
2643#endif
2644
2645	return error;
2646}
2647