if_vioif.c revision 1.76
1/*	$NetBSD: if_vioif.c,v 1.76 2022/03/29 01:57:51 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.76 2022/03/29 01:57:51 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54
55#include <dev/pci/virtioreg.h>
56#include <dev/pci/virtiovar.h>
57
58#include <net/if.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_ether.h>
62
63#include <net/bpf.h>
64
65#include "ioconf.h"
66
67#ifdef NET_MPSAFE
68#define VIOIF_MPSAFE	1
69#define VIOIF_MULTIQ	1
70#endif
71
72/*
73 * if_vioifreg.h:
74 */
75/* Configuration registers */
76#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
77#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
78#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
79#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
80
81/* Feature bits */
82#define VIRTIO_NET_F_CSUM		__BIT(0)
83#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
84#define VIRTIO_NET_F_MAC		__BIT(5)
85#define VIRTIO_NET_F_GSO		__BIT(6)
86#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
87#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
88#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
89#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
90#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
91#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
92#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
93#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
94#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
95#define VIRTIO_NET_F_STATUS		__BIT(16)
96#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
97#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
98#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
99#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
100#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
101#define VIRTIO_NET_F_MQ			__BIT(22)
102
103#define VIRTIO_NET_FLAG_BITS \
104	VIRTIO_COMMON_FLAG_BITS \
105	"\x17""MQ" \
106	"\x16""GUEST_ANNOUNCE" \
107	"\x15""CTRL_RX_EXTRA" \
108	"\x14""CTRL_VLAN" \
109	"\x13""CTRL_RX" \
110	"\x12""CTRL_VQ" \
111	"\x11""STATUS" \
112	"\x10""MRG_RXBUF" \
113	"\x0f""HOST_UFO" \
114	"\x0e""HOST_ECN" \
115	"\x0d""HOST_TSO6" \
116	"\x0c""HOST_TSO4" \
117	"\x0b""GUEST_UFO" \
118	"\x0a""GUEST_ECN" \
119	"\x09""GUEST_TSO6" \
120	"\x08""GUEST_TSO4" \
121	"\x07""GSO" \
122	"\x06""MAC" \
123	"\x02""GUEST_CSUM" \
124	"\x01""CSUM"
125
126/* Status */
127#define VIRTIO_NET_S_LINK_UP	1
128
129/* Packet header structure */
130struct virtio_net_hdr {
131	uint8_t		flags;
132	uint8_t		gso_type;
133	uint16_t	hdr_len;
134	uint16_t	gso_size;
135	uint16_t	csum_start;
136	uint16_t	csum_offset;
137
138	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
139} __packed;
140
141#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
142#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
143#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
144#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
145#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
147
148#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
149
150/* Control virtqueue */
151struct virtio_net_ctrl_cmd {
152	uint8_t	class;
153	uint8_t	command;
154} __packed;
155#define VIRTIO_NET_CTRL_RX		0
156# define VIRTIO_NET_CTRL_RX_PROMISC	0
157# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
158
159#define VIRTIO_NET_CTRL_MAC		1
160# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
161# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
162
163#define VIRTIO_NET_CTRL_VLAN		2
164# define VIRTIO_NET_CTRL_VLAN_ADD	0
165# define VIRTIO_NET_CTRL_VLAN_DEL	1
166
167#define VIRTIO_NET_CTRL_MQ			4
168# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
169# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
170# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
171
172struct virtio_net_ctrl_status {
173	uint8_t	ack;
174} __packed;
175#define VIRTIO_NET_OK			0
176#define VIRTIO_NET_ERR			1
177
178struct virtio_net_ctrl_rx {
179	uint8_t	onoff;
180} __packed;
181
182struct virtio_net_ctrl_mac_tbl {
183	uint32_t nentries;
184	uint8_t macs[][ETHER_ADDR_LEN];
185} __packed;
186
187struct virtio_net_ctrl_mac_addr {
188	uint8_t mac[ETHER_ADDR_LEN];
189} __packed;
190
191struct virtio_net_ctrl_vlan {
192	uint16_t id;
193} __packed;
194
195struct virtio_net_ctrl_mq {
196	uint16_t virtqueue_pairs;
197} __packed;
198
199/*
200 * if_vioifvar.h:
201 */
202
203/*
204 * Locking notes:
205 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
206 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
207 *      - more than one lock cannot be held at onece
208 * + ctrlq_inuse is protected by ctrlq_wait_lock.
209 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
210 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
211 * + fields in vioif_softc except queues are protected by
212 *   sc->sc_lock(an adaptive mutex)
213 *      - the lock is held before acquisition of other locks
214 */
215
216struct vioif_ctrl_cmdspec {
217	bus_dmamap_t	dmamap;
218	void		*buf;
219	bus_size_t	bufsize;
220};
221
222struct vioif_work {
223	struct work	 cookie;
224	void		(*func)(void *);
225	void		*arg;
226	unsigned int	 added;
227};
228
229struct vioif_txqueue {
230	kmutex_t		*txq_lock;	/* lock for tx operations */
231
232	struct virtqueue	*txq_vq;
233	bool			txq_stopping;
234	bool			txq_link_active;
235	pcq_t			*txq_intrq;
236
237	struct virtio_net_hdr	*txq_hdrs;
238	bus_dmamap_t		*txq_hdr_dmamaps;
239
240	struct mbuf		**txq_mbufs;
241	bus_dmamap_t		*txq_dmamaps;
242
243	void			*txq_deferred_transmit;
244	void			*txq_handle_si;
245	struct vioif_work	 txq_work;
246	bool			 txq_workqueue;
247	bool			 txq_active;
248
249	char			 txq_evgroup[16];
250	struct evcnt		 txq_defrag_failed;
251	struct evcnt		 txq_mbuf_load_failed;
252	struct evcnt		 txq_enqueue_reserve_failed;
253};
254
255struct vioif_rxqueue {
256	kmutex_t		*rxq_lock;	/* lock for rx operations */
257
258	struct virtqueue	*rxq_vq;
259	bool			rxq_stopping;
260
261	struct virtio_net_hdr	*rxq_hdrs;
262	bus_dmamap_t		*rxq_hdr_dmamaps;
263
264	struct mbuf		**rxq_mbufs;
265	bus_dmamap_t		*rxq_dmamaps;
266
267	void			*rxq_handle_si;
268	struct vioif_work	 rxq_work;
269	bool			 rxq_workqueue;
270	bool			 rxq_active;
271
272	char			 rxq_evgroup[16];
273	struct evcnt		 rxq_mbuf_add_failed;
274};
275
276struct vioif_ctrlqueue {
277	struct virtqueue		*ctrlq_vq;
278	enum {
279		FREE, INUSE, DONE
280	}				ctrlq_inuse;
281	kcondvar_t			ctrlq_wait;
282	kmutex_t			ctrlq_wait_lock;
283	struct lwp			*ctrlq_owner;
284
285	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
286	struct virtio_net_ctrl_status	*ctrlq_status;
287	struct virtio_net_ctrl_rx	*ctrlq_rx;
288	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
289	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
290	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
291	struct virtio_net_ctrl_mq	*ctrlq_mq;
292
293	bus_dmamap_t			ctrlq_cmd_dmamap;
294	bus_dmamap_t			ctrlq_status_dmamap;
295	bus_dmamap_t			ctrlq_rx_dmamap;
296	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
297	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
298	bus_dmamap_t			ctrlq_mac_addr_dmamap;
299	bus_dmamap_t			ctrlq_mq_dmamap;
300
301	struct evcnt			ctrlq_cmd_load_failed;
302	struct evcnt			ctrlq_cmd_failed;
303};
304
305struct vioif_softc {
306	device_t		sc_dev;
307	kmutex_t		sc_lock;
308	struct sysctllog	*sc_sysctllog;
309
310	struct virtio_softc	*sc_virtio;
311	struct virtqueue	*sc_vqs;
312	u_int			 sc_hdr_size;
313
314	int			sc_max_nvq_pairs;
315	int			sc_req_nvq_pairs;
316	int			sc_act_nvq_pairs;
317
318	uint8_t			sc_mac[ETHER_ADDR_LEN];
319	struct ethercom		sc_ethercom;
320	bool			sc_link_active;
321
322	struct vioif_txqueue	*sc_txq;
323	struct vioif_rxqueue	*sc_rxq;
324
325	bool			sc_has_ctrl;
326	struct vioif_ctrlqueue	sc_ctrlq;
327
328	bus_dma_segment_t	sc_hdr_segs[1];
329	void			*sc_dmamem;
330	void			*sc_kmem;
331
332	void			*sc_ctl_softint;
333
334	struct workqueue	*sc_txrx_workqueue;
335	bool			 sc_txrx_workqueue_sysctl;
336	u_int			 sc_tx_intr_process_limit;
337	u_int			 sc_tx_process_limit;
338	u_int			 sc_rx_intr_process_limit;
339	u_int			 sc_rx_process_limit;
340};
341#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
342#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
343
344#define VIOIF_TX_INTR_PROCESS_LIMIT	256
345#define VIOIF_TX_PROCESS_LIMIT		256
346#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
347#define VIOIF_RX_PROCESS_LIMIT		256
348
349#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
350
351/* cfattach interface functions */
352static int	vioif_match(device_t, cfdata_t, void *);
353static void	vioif_attach(device_t, device_t, void *);
354static int	vioif_finalize_teardown(device_t);
355
356/* ifnet interface functions */
357static int	vioif_init(struct ifnet *);
358static void	vioif_stop(struct ifnet *, int);
359static void	vioif_start(struct ifnet *);
360static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
361static int	vioif_transmit(struct ifnet *, struct mbuf *);
362static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
363static int	vioif_ioctl(struct ifnet *, u_long, void *);
364static void	vioif_watchdog(struct ifnet *);
365static int	vioif_ifflags_cb(struct ethercom *);
366
367/* rx */
368static int	vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
369static void	vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
370static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
371		    struct vioif_rxqueue *);
372static void	vioif_rx_queue_clear(struct vioif_rxqueue *);
373static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
374		    struct vioif_rxqueue *, u_int);
375static int	vioif_rx_intr(void *);
376static void	vioif_rx_handle(void *);
377static void	vioif_rx_sched_handle(struct vioif_softc *,
378		    struct vioif_rxqueue *);
379static void	vioif_rx_drain(struct vioif_rxqueue *);
380
381/* tx */
382static int	vioif_tx_intr(void *);
383static void	vioif_tx_handle(void *);
384static void	vioif_tx_sched_handle(struct vioif_softc *,
385		    struct vioif_txqueue *);
386static void	vioif_tx_queue_clear(struct vioif_txqueue *);
387static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
388		    struct vioif_txqueue *, u_int);
389static void	vioif_tx_drain(struct vioif_txqueue *);
390static void	vioif_deferred_transmit(void *);
391
392/* workqueue */
393static struct workqueue*
394		vioif_workq_create(const char *, pri_t, int, int);
395static void	vioif_workq_destroy(struct workqueue *);
396static void	vioif_workq_work(struct work *, void *);
397static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
398static void	vioif_work_add(struct workqueue *, struct vioif_work *);
399static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
400
401/* other control */
402static bool	vioif_is_link_up(struct vioif_softc *);
403static void	vioif_update_link_status(struct vioif_softc *);
404static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
405static int	vioif_set_promisc(struct vioif_softc *, bool);
406static int	vioif_set_allmulti(struct vioif_softc *, bool);
407static int	vioif_set_rx_filter(struct vioif_softc *);
408static int	vioif_rx_filter(struct vioif_softc *);
409static int	vioif_set_mac_addr(struct vioif_softc *);
410static int	vioif_ctrl_intr(void *);
411static int	vioif_config_change(struct virtio_softc *);
412static void	vioif_ctl_softint(void *);
413static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
414static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
415static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
416static int	vioif_setup_sysctl(struct vioif_softc *);
417static void	vioif_setup_stats(struct vioif_softc *);
418static int	vioif_ifflags(struct vioif_softc *);
419
420CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
421		  vioif_match, vioif_attach, NULL, NULL);
422
423static int
424vioif_match(device_t parent, cfdata_t match, void *aux)
425{
426	struct virtio_attach_args *va = aux;
427
428	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
429		return 1;
430
431	return 0;
432}
433
434static int
435vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
436    bus_size_t size, int nsegs, const char *usage)
437{
438	int r;
439
440	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
441	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
442
443	if (r != 0) {
444		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
445		    "error code %d\n", usage, r);
446	}
447
448	return r;
449}
450
451static void
452vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
453{
454
455	if (*map) {
456		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
457		*map = NULL;
458	}
459}
460
461static int
462vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
463    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
464{
465	int r;
466
467	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
468	if (r != 0)
469		return 1;
470
471	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
472	    size, NULL, rw | BUS_DMA_NOWAIT);
473	if (r != 0) {
474		vioif_dmamap_destroy(sc, map);
475		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
476		    "error code %d\n", usage, r);
477	}
478
479	return r;
480}
481
482static void *
483vioif_assign_mem(intptr_t *p, size_t size)
484{
485	intptr_t rv;
486
487	rv = *p;
488	*p += size;
489
490	return (void *)rv;
491}
492
493static void
494vioif_alloc_queues(struct vioif_softc *sc)
495{
496	int nvq_pairs = sc->sc_max_nvq_pairs;
497	int nvqs = nvq_pairs * 2;
498	int i;
499
500	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
501
502	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
503	    KM_SLEEP);
504	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
505	    KM_SLEEP);
506
507	if (sc->sc_has_ctrl)
508		nvqs++;
509
510	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
511	nvqs = 0;
512	for (i = 0; i < nvq_pairs; i++) {
513		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
514		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
515	}
516
517	if (sc->sc_has_ctrl)
518		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
519}
520
521static void
522vioif_free_queues(struct vioif_softc *sc)
523{
524	int nvq_pairs = sc->sc_max_nvq_pairs;
525	int nvqs = nvq_pairs * 2;
526
527	if (sc->sc_ctrlq.ctrlq_vq)
528		nvqs++;
529
530	if (sc->sc_txq) {
531		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
532		sc->sc_txq = NULL;
533	}
534
535	if (sc->sc_rxq) {
536		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
537		sc->sc_rxq = NULL;
538	}
539
540	if (sc->sc_vqs) {
541		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
542		sc->sc_vqs = NULL;
543	}
544}
545
546/* allocate memory */
547/*
548 * dma memory is used for:
549 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
550 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
551 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
552 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
553 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
554 *			 (WRITE)
555 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
556 *			 class command (WRITE)
557 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
558 *			 class command (WRITE)
559 * ctrlq_* structures are allocated only one each; they are protected by
560 * ctrlq_inuse variable and ctrlq_wait condvar.
561 */
562/*
563 * dynamically allocated memory is used for:
564 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
565 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
566 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
567 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
568 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
569 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
570 */
571static int
572vioif_alloc_mems(struct vioif_softc *sc)
573{
574	struct virtio_softc *vsc = sc->sc_virtio;
575	struct vioif_txqueue *txq;
576	struct vioif_rxqueue *rxq;
577	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
578	int allocsize, allocsize2, r, rsegs, i, qid;
579	void *vaddr;
580	intptr_t p;
581
582	allocsize = 0;
583	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
584		rxq = &sc->sc_rxq[qid];
585		txq = &sc->sc_txq[qid];
586
587		allocsize += sizeof(struct virtio_net_hdr) *
588			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
589	}
590	if (sc->sc_has_ctrl) {
591		allocsize += sizeof(struct virtio_net_ctrl_cmd);
592		allocsize += sizeof(struct virtio_net_ctrl_status);
593		allocsize += sizeof(struct virtio_net_ctrl_rx);
594		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
595		    + ETHER_ADDR_LEN;
596		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
597		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
598		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
599		allocsize += sizeof(struct virtio_net_ctrl_mq);
600	}
601	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
602	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
603	if (r != 0) {
604		aprint_error_dev(sc->sc_dev,
605		    "DMA memory allocation failed, size %d, "
606		    "error code %d\n", allocsize, r);
607		goto err_none;
608	}
609	r = bus_dmamem_map(virtio_dmat(vsc),
610	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
611	if (r != 0) {
612		aprint_error_dev(sc->sc_dev,
613		    "DMA memory map failed, error code %d\n", r);
614		goto err_dmamem_alloc;
615	}
616
617	memset(vaddr, 0, allocsize);
618	sc->sc_dmamem = vaddr;
619	p = (intptr_t) vaddr;
620
621	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
622		rxq = &sc->sc_rxq[qid];
623		txq = &sc->sc_txq[qid];
624
625		rxq->rxq_hdrs = vioif_assign_mem(&p,
626		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
627		txq->txq_hdrs = vioif_assign_mem(&p,
628		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
629	}
630	if (sc->sc_has_ctrl) {
631		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
632		    sizeof(*ctrlq->ctrlq_cmd));
633		ctrlq->ctrlq_status = vioif_assign_mem(&p,
634		    sizeof(*ctrlq->ctrlq_status));
635		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
636		    sizeof(*ctrlq->ctrlq_rx));
637		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
638		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
639		    + ETHER_ADDR_LEN);
640		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
641		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
642		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
643		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
644		    sizeof(*ctrlq->ctrlq_mac_addr));
645		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
646	}
647
648	allocsize2 = 0;
649	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
650		int rxqsize, txqsize;
651
652		rxq = &sc->sc_rxq[qid];
653		txq = &sc->sc_txq[qid];
654		rxqsize = rxq->rxq_vq->vq_num;
655		txqsize = txq->txq_vq->vq_num;
656
657		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
658		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
659		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
660
661		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
662		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
663		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
664	}
665	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
666	sc->sc_kmem = vaddr;
667	p = (intptr_t) vaddr;
668
669	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
670		int rxqsize, txqsize;
671		rxq = &sc->sc_rxq[qid];
672		txq = &sc->sc_txq[qid];
673		rxqsize = rxq->rxq_vq->vq_num;
674		txqsize = txq->txq_vq->vq_num;
675
676		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
677		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
678		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
679		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
680		rxq->rxq_dmamaps = vioif_assign_mem(&p,
681		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
682		txq->txq_dmamaps = vioif_assign_mem(&p,
683		    sizeof(txq->txq_dmamaps[0]) * txqsize);
684		rxq->rxq_mbufs = vioif_assign_mem(&p,
685		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
686		txq->txq_mbufs = vioif_assign_mem(&p,
687		    sizeof(txq->txq_mbufs[0]) * txqsize);
688	}
689
690	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
691		rxq = &sc->sc_rxq[qid];
692		txq = &sc->sc_txq[qid];
693
694		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
695			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
696			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
697			    BUS_DMA_READ, "rx header");
698			if (r != 0)
699				goto err_reqs;
700
701			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
702			    MCLBYTES, 1, "rx payload");
703			if (r != 0)
704				goto err_reqs;
705		}
706
707		for (i = 0; i < txq->txq_vq->vq_num; i++) {
708			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
709			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
710			    BUS_DMA_READ, "tx header");
711			if (r != 0)
712				goto err_reqs;
713
714			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
715			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
716			if (r != 0)
717				goto err_reqs;
718		}
719	}
720
721	if (sc->sc_has_ctrl) {
722		/* control vq class & command */
723		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
724		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
725		    BUS_DMA_WRITE, "control command");
726		if (r != 0)
727			goto err_reqs;
728
729		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
730		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
731		    BUS_DMA_READ, "control status");
732		if (r != 0)
733			goto err_reqs;
734
735		/* control vq rx mode command parameter */
736		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
737		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
738		    BUS_DMA_WRITE, "rx mode control command");
739		if (r != 0)
740			goto err_reqs;
741
742		/* multiqueue set command */
743		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
744		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
745		    BUS_DMA_WRITE, "multiqueue set command");
746		if (r != 0)
747			goto err_reqs;
748
749		/* control vq MAC filter table for unicast */
750		/* do not load now since its length is variable */
751		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
752		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
753		    + ETHER_ADDR_LEN, 1,
754		    "unicast MAC address filter command");
755		if (r != 0)
756			goto err_reqs;
757
758		/* control vq MAC filter table for multicast */
759		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
760		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
761		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
762		    "multicast MAC address filter command");
763		if (r != 0)
764			goto err_reqs;
765
766		/* control vq MAC address set command */
767		r = vioif_dmamap_create_load(sc,
768		    &ctrlq->ctrlq_mac_addr_dmamap,
769		    ctrlq->ctrlq_mac_addr,
770		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
771		    BUS_DMA_WRITE, "mac addr set command");
772		if (r != 0)
773			goto err_reqs;
774	}
775
776	return 0;
777
778err_reqs:
779	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
780	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
781	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
782	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
783	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
784	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
785	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
786		rxq = &sc->sc_rxq[qid];
787		txq = &sc->sc_txq[qid];
788
789		for (i = 0; i < txq->txq_vq->vq_num; i++) {
790			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
791			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
792		}
793		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
794			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
795			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
796		}
797	}
798	if (sc->sc_kmem) {
799		kmem_free(sc->sc_kmem, allocsize2);
800		sc->sc_kmem = NULL;
801	}
802	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
803err_dmamem_alloc:
804	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
805err_none:
806	return -1;
807}
808
809static void
810vioif_attach(device_t parent, device_t self, void *aux)
811{
812	struct vioif_softc *sc = device_private(self);
813	struct virtio_softc *vsc = device_private(parent);
814	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
815	struct vioif_txqueue *txq;
816	struct vioif_rxqueue *rxq;
817	uint64_t features, req_features;
818	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
819	u_int softint_flags;
820	int r, i, nvqs=0, req_flags;
821	char xnamebuf[MAXCOMLEN];
822
823	if (virtio_child(vsc) != NULL) {
824		aprint_normal(": child already attached for %s; "
825		    "something wrong...\n", device_xname(parent));
826		return;
827	}
828
829	sc->sc_dev = self;
830	sc->sc_virtio = vsc;
831	sc->sc_link_active = false;
832
833	sc->sc_max_nvq_pairs = 1;
834	sc->sc_req_nvq_pairs = 1;
835	sc->sc_act_nvq_pairs = 1;
836	sc->sc_txrx_workqueue_sysctl = true;
837	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
838	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
839	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
840	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
841
842	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
843
844	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
845	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
846	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
847	if (sc->sc_txrx_workqueue == NULL)
848		goto err;
849
850	req_flags = 0;
851
852#ifdef VIOIF_MPSAFE
853	req_flags |= VIRTIO_F_INTR_MPSAFE;
854#endif
855	req_flags |= VIRTIO_F_INTR_MSIX;
856
857	req_features =
858	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
859	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
860	req_features |= VIRTIO_F_RING_EVENT_IDX;
861#ifdef VIOIF_MULTIQ
862	req_features |= VIRTIO_NET_F_MQ;
863#endif
864	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
865	    vioif_config_change, virtio_vq_intrhand, req_flags,
866	    req_features, VIRTIO_NET_FLAG_BITS);
867
868	features = virtio_features(vsc);
869	if (features == 0)
870		goto err;
871
872	if (features & VIRTIO_NET_F_MAC) {
873		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
874			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
875			    VIRTIO_NET_CONFIG_MAC + i);
876		}
877	} else {
878		/* code stolen from sys/net/if_tap.c */
879		struct timeval tv;
880		uint32_t ui;
881		getmicrouptime(&tv);
882		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
883		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
884		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
885			virtio_write_device_config_1(vsc,
886			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
887		}
888	}
889
890	/* 'Ethernet' with capital follows other ethernet driver attachment */
891	aprint_normal_dev(self, "Ethernet address %s\n",
892	    ether_sprintf(sc->sc_mac));
893
894	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
895		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
896	} else {
897		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
898	}
899
900	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
901	    (features & VIRTIO_NET_F_CTRL_RX)) {
902		sc->sc_has_ctrl = true;
903
904		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
905		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
906		ctrlq->ctrlq_inuse = FREE;
907	} else {
908		sc->sc_has_ctrl = false;
909	}
910
911	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
912		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
913		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
914
915		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
916			goto err;
917
918		/* Limit the number of queue pairs to use */
919		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
920	}
921
922	vioif_alloc_queues(sc);
923	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
924
925#ifdef VIOIF_MPSAFE
926	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
927#else
928	softint_flags = SOFTINT_NET;
929#endif
930
931	/*
932	 * Allocating virtqueues
933	 */
934	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
935		rxq = &sc->sc_rxq[i];
936		txq = &sc->sc_txq[i];
937		char qname[32];
938
939		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
940
941		rxq->rxq_handle_si = softint_establish(softint_flags,
942		    vioif_rx_handle, rxq);
943		if (rxq->rxq_handle_si == NULL) {
944			aprint_error_dev(self, "cannot establish rx softint\n");
945			goto err;
946		}
947
948		snprintf(qname, sizeof(qname), "rx%d", i);
949		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
950		    MCLBYTES + sc->sc_hdr_size, 2, qname);
951		if (r != 0)
952			goto err;
953		nvqs++;
954		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
955		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
956		rxq->rxq_stopping = true;
957		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
958
959		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
960
961		txq->txq_deferred_transmit = softint_establish(softint_flags,
962		    vioif_deferred_transmit, txq);
963		if (txq->txq_deferred_transmit == NULL) {
964			aprint_error_dev(self, "cannot establish tx softint\n");
965			goto err;
966		}
967		txq->txq_handle_si = softint_establish(softint_flags,
968		    vioif_tx_handle, txq);
969		if (txq->txq_handle_si == NULL) {
970			aprint_error_dev(self, "cannot establish tx softint\n");
971			goto err;
972		}
973
974		snprintf(qname, sizeof(qname), "tx%d", i);
975		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
976		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
977		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
978		if (r != 0)
979			goto err;
980		nvqs++;
981		txq->txq_vq->vq_intrhand = vioif_tx_intr;
982		txq->txq_vq->vq_intrhand_arg = (void *)txq;
983		txq->txq_link_active = sc->sc_link_active;
984		txq->txq_stopping = false;
985		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
986		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
987	}
988
989	if (sc->sc_has_ctrl) {
990		/*
991		 * Allocating a virtqueue for control channel
992		 */
993		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
994		    NBPG, 1, "control");
995		if (r != 0) {
996			aprint_error_dev(self, "failed to allocate "
997			    "a virtqueue for control channel, error code %d\n",
998			    r);
999
1000			sc->sc_has_ctrl = false;
1001			cv_destroy(&ctrlq->ctrlq_wait);
1002			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1003		} else {
1004			nvqs++;
1005			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1006			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1007		}
1008	}
1009
1010	sc->sc_ctl_softint = softint_establish(softint_flags,
1011	    vioif_ctl_softint, sc);
1012	if (sc->sc_ctl_softint == NULL) {
1013		aprint_error_dev(self, "cannot establish ctl softint\n");
1014		goto err;
1015	}
1016
1017	if (vioif_alloc_mems(sc) < 0)
1018		goto err;
1019
1020	if (virtio_child_attach_finish(vsc) != 0)
1021		goto err;
1022
1023	if (vioif_setup_sysctl(sc) != 0) {
1024		aprint_error_dev(self, "unable to create sysctl node\n");
1025		/* continue */
1026	}
1027
1028	vioif_setup_stats(sc);
1029
1030	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1031	ifp->if_softc = sc;
1032	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1033#ifdef VIOIF_MPSAFE
1034	ifp->if_extflags = IFEF_MPSAFE;
1035#endif
1036	ifp->if_start = vioif_start;
1037	if (sc->sc_req_nvq_pairs > 1)
1038		ifp->if_transmit = vioif_transmit;
1039	ifp->if_ioctl = vioif_ioctl;
1040	ifp->if_init = vioif_init;
1041	ifp->if_stop = vioif_stop;
1042	ifp->if_capabilities = 0;
1043	ifp->if_watchdog = vioif_watchdog;
1044	txq = &sc->sc_txq[0];
1045	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1046	IFQ_SET_READY(&ifp->if_snd);
1047
1048	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1049
1050	if_attach(ifp);
1051	if_deferred_start_init(ifp, NULL);
1052	ether_ifattach(ifp, sc->sc_mac);
1053	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1054
1055	return;
1056
1057err:
1058	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1059		rxq = &sc->sc_rxq[i];
1060		txq = &sc->sc_txq[i];
1061
1062		if (rxq->rxq_lock) {
1063			mutex_obj_free(rxq->rxq_lock);
1064			rxq->rxq_lock = NULL;
1065		}
1066
1067		if (rxq->rxq_handle_si) {
1068			softint_disestablish(rxq->rxq_handle_si);
1069			rxq->rxq_handle_si = NULL;
1070		}
1071
1072		if (txq->txq_lock) {
1073			mutex_obj_free(txq->txq_lock);
1074			txq->txq_lock = NULL;
1075		}
1076
1077		if (txq->txq_handle_si) {
1078			softint_disestablish(txq->txq_handle_si);
1079			txq->txq_handle_si = NULL;
1080		}
1081
1082		if (txq->txq_deferred_transmit) {
1083			softint_disestablish(txq->txq_deferred_transmit);
1084			txq->txq_deferred_transmit = NULL;
1085		}
1086
1087		if (txq->txq_intrq) {
1088			pcq_destroy(txq->txq_intrq);
1089			txq->txq_intrq = NULL;
1090		}
1091	}
1092
1093	if (sc->sc_has_ctrl) {
1094		cv_destroy(&ctrlq->ctrlq_wait);
1095		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1096	}
1097
1098	while (nvqs > 0)
1099		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1100
1101	vioif_free_queues(sc);
1102	mutex_destroy(&sc->sc_lock);
1103	virtio_child_attach_failed(vsc);
1104	config_finalize_register(self, vioif_finalize_teardown);
1105
1106	return;
1107}
1108
1109static int
1110vioif_finalize_teardown(device_t self)
1111{
1112	struct vioif_softc *sc = device_private(self);
1113
1114	if (sc->sc_txrx_workqueue != NULL) {
1115		vioif_workq_destroy(sc->sc_txrx_workqueue);
1116		sc->sc_txrx_workqueue = NULL;
1117	}
1118
1119	return 0;
1120}
1121
1122static void
1123vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1124{
1125	struct virtio_softc *vsc = sc->sc_virtio;
1126	struct vioif_txqueue *txq;
1127	struct vioif_rxqueue *rxq;
1128	int i;
1129
1130	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1131		txq = &sc->sc_txq[i];
1132		rxq = &sc->sc_rxq[i];
1133
1134		virtio_start_vq_intr(vsc, txq->txq_vq);
1135		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1136	}
1137}
1138
1139static void
1140vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1141{
1142	struct virtio_softc *vsc = sc->sc_virtio;
1143	struct vioif_txqueue *txq;
1144	struct vioif_rxqueue *rxq;
1145	int i;
1146
1147	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1148		rxq = &sc->sc_rxq[i];
1149		txq = &sc->sc_txq[i];
1150
1151		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1152		virtio_stop_vq_intr(vsc, txq->txq_vq);
1153	}
1154}
1155
1156/*
1157 * Interface functions for ifnet
1158 */
1159static int
1160vioif_init(struct ifnet *ifp)
1161{
1162	struct vioif_softc *sc = ifp->if_softc;
1163	struct virtio_softc *vsc = sc->sc_virtio;
1164	struct vioif_rxqueue *rxq;
1165	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1166	int r, i;
1167
1168	vioif_stop(ifp, 0);
1169
1170	r = virtio_reinit_start(vsc);
1171	if (r != 0) {
1172		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1173		return EIO;
1174	}
1175
1176	virtio_negotiate_features(vsc, virtio_features(vsc));
1177
1178	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1179		rxq = &sc->sc_rxq[i];
1180
1181		/* Have to set false before vioif_populate_rx_mbufs */
1182		mutex_enter(rxq->rxq_lock);
1183		rxq->rxq_stopping = false;
1184		vioif_populate_rx_mbufs_locked(sc, rxq);
1185		mutex_exit(rxq->rxq_lock);
1186
1187	}
1188
1189	virtio_reinit_end(vsc);
1190
1191	if (sc->sc_has_ctrl)
1192		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1193
1194	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1195	if (r == 0)
1196		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1197	else
1198		sc->sc_act_nvq_pairs = 1;
1199
1200	for (i = 0; i < sc->sc_act_nvq_pairs; i++)
1201		sc->sc_txq[i].txq_stopping = false;
1202
1203	vioif_enable_interrupt_vqpairs(sc);
1204
1205	vioif_update_link_status(sc);
1206	ifp->if_flags |= IFF_RUNNING;
1207	ifp->if_flags &= ~IFF_OACTIVE;
1208	r = vioif_rx_filter(sc);
1209
1210	return r;
1211}
1212
1213static void
1214vioif_stop_rendezvous(struct vioif_softc *sc)
1215{
1216	struct vioif_txqueue *txq;
1217	struct vioif_rxqueue *rxq;
1218	int i;
1219
1220	/*
1221	 * stop all packet processing:
1222	 * 1. acquire a lock for queue to wait
1223	 *    for finish of interrupt handler
1224	 * 2. stop workqueue for packet processing
1225	 */
1226
1227	for (i =0; i < sc->sc_act_nvq_pairs; i++) {
1228		txq = &sc->sc_txq[i];
1229		rxq = &sc->sc_rxq[i];
1230
1231		mutex_enter(rxq->rxq_lock);
1232		mutex_exit(rxq->rxq_lock);
1233		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1234
1235		mutex_enter(txq->txq_lock);
1236		mutex_exit(txq->txq_lock);
1237		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1238	}
1239}
1240
1241static void
1242vioif_stop(struct ifnet *ifp, int disable)
1243{
1244	struct vioif_softc *sc = ifp->if_softc;
1245	struct virtio_softc *vsc = sc->sc_virtio;
1246	struct vioif_txqueue *txq;
1247	struct vioif_rxqueue *rxq;
1248	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1249	int i;
1250
1251	/* Take the locks to ensure that ongoing TX/RX finish */
1252	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1253		txq = &sc->sc_txq[i];
1254		rxq = &sc->sc_rxq[i];
1255
1256		mutex_enter(rxq->rxq_lock);
1257		rxq->rxq_stopping = true;
1258		mutex_exit(rxq->rxq_lock);
1259
1260		mutex_enter(txq->txq_lock);
1261		txq->txq_stopping = true;
1262		mutex_exit(txq->txq_lock);
1263	}
1264
1265	/* disable interrupts */
1266	vioif_disable_interrupt_vqpairs(sc);
1267
1268	if (sc->sc_has_ctrl)
1269		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1270
1271	/* only way to stop I/O and DMA is resetting... */
1272	virtio_reset(vsc);
1273
1274	vioif_stop_rendezvous(sc);
1275
1276	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1277		vioif_rx_queue_clear(&sc->sc_rxq[i]);
1278		vioif_tx_queue_clear(&sc->sc_txq[i]);
1279	}
1280
1281	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1282	sc->sc_link_active = false;
1283
1284	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1285		txq = &sc->sc_txq[i];
1286		rxq = &sc->sc_rxq[i];
1287
1288		txq->txq_link_active = false;
1289
1290		if (disable)
1291			vioif_rx_drain(rxq);
1292
1293		vioif_tx_drain(txq);
1294	}
1295}
1296
1297static void
1298vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1299    bool is_transmit)
1300{
1301	struct vioif_softc *sc = ifp->if_softc;
1302	struct virtio_softc *vsc = sc->sc_virtio;
1303	struct virtqueue *vq = txq->txq_vq;
1304	struct virtio_net_hdr *hdr;
1305	struct mbuf *m;
1306	int queued = 0;
1307
1308	KASSERT(mutex_owned(txq->txq_lock));
1309
1310	if ((ifp->if_flags & IFF_RUNNING) == 0)
1311		return;
1312
1313	if (!txq->txq_link_active || txq->txq_stopping)
1314		return;
1315
1316	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1317		return;
1318
1319	for (;;) {
1320		int slot, r;
1321
1322		if (is_transmit)
1323			m = pcq_get(txq->txq_intrq);
1324		else
1325			IFQ_DEQUEUE(&ifp->if_snd, m);
1326
1327		if (m == NULL)
1328			break;
1329
1330		r = virtio_enqueue_prep(vsc, vq, &slot);
1331		if (r == EAGAIN) {
1332			ifp->if_flags |= IFF_OACTIVE;
1333			m_freem(m);
1334			break;
1335		}
1336		if (r != 0)
1337			panic("enqueue_prep for a tx buffer");
1338
1339		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1340		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1341		if (r != 0) {
1342			/* maybe just too fragmented */
1343			struct mbuf *newm;
1344
1345			newm = m_defrag(m, M_NOWAIT);
1346			if (newm == NULL) {
1347				txq->txq_defrag_failed.ev_count++;
1348				goto skip;
1349			}
1350
1351			m = newm;
1352			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1353			    txq->txq_dmamaps[slot], m,
1354			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1355			if (r != 0) {
1356				txq->txq_mbuf_load_failed.ev_count++;
1357skip:
1358				m_freem(m);
1359				virtio_enqueue_abort(vsc, vq, slot);
1360				continue;
1361			}
1362		}
1363
1364		/* This should actually never fail */
1365		r = virtio_enqueue_reserve(vsc, vq, slot,
1366		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1367		if (r != 0) {
1368			txq->txq_enqueue_reserve_failed.ev_count++;
1369			bus_dmamap_unload(virtio_dmat(vsc),
1370			     txq->txq_dmamaps[slot]);
1371			/* slot already freed by virtio_enqueue_reserve */
1372			m_freem(m);
1373			continue;
1374		}
1375
1376		txq->txq_mbufs[slot] = m;
1377
1378		hdr = &txq->txq_hdrs[slot];
1379		memset(hdr, 0, sc->sc_hdr_size);
1380		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1381		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1382		    BUS_DMASYNC_PREWRITE);
1383		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1384		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1385		    BUS_DMASYNC_PREWRITE);
1386		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1387		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1388		virtio_enqueue_commit(vsc, vq, slot, false);
1389
1390		queued++;
1391		bpf_mtap(ifp, m, BPF_D_OUT);
1392	}
1393
1394	if (queued > 0) {
1395		virtio_enqueue_commit(vsc, vq, -1, true);
1396		ifp->if_timer = 5;
1397	}
1398}
1399
1400static void
1401vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1402{
1403
1404	/*
1405	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1406	 */
1407	vioif_send_common_locked(ifp, txq, false);
1408
1409}
1410
1411static void
1412vioif_start(struct ifnet *ifp)
1413{
1414	struct vioif_softc *sc = ifp->if_softc;
1415	struct vioif_txqueue *txq = &sc->sc_txq[0];
1416
1417#ifdef VIOIF_MPSAFE
1418	KASSERT(if_is_mpsafe(ifp));
1419#endif
1420
1421	mutex_enter(txq->txq_lock);
1422	vioif_start_locked(ifp, txq);
1423	mutex_exit(txq->txq_lock);
1424}
1425
1426static inline int
1427vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1428{
1429	struct vioif_softc *sc = ifp->if_softc;
1430	u_int cpuid = cpu_index(curcpu());
1431
1432	return cpuid % sc->sc_act_nvq_pairs;
1433}
1434
1435static void
1436vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1437{
1438
1439	vioif_send_common_locked(ifp, txq, true);
1440}
1441
1442static int
1443vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1444{
1445	struct vioif_softc *sc = ifp->if_softc;
1446	struct vioif_txqueue *txq;
1447	int qid;
1448
1449	qid = vioif_select_txqueue(ifp, m);
1450	txq = &sc->sc_txq[qid];
1451
1452	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1453		m_freem(m);
1454		return ENOBUFS;
1455	}
1456
1457	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1458	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1459	if (m->m_flags & M_MCAST)
1460		if_statinc_ref(nsr, if_omcasts);
1461	IF_STAT_PUTREF(ifp);
1462
1463	if (mutex_tryenter(txq->txq_lock)) {
1464		vioif_transmit_locked(ifp, txq);
1465		mutex_exit(txq->txq_lock);
1466	}
1467
1468	return 0;
1469}
1470
1471static void
1472vioif_deferred_transmit(void *arg)
1473{
1474	struct vioif_txqueue *txq = arg;
1475	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1476	struct vioif_softc *sc = device_private(virtio_child(vsc));
1477	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1478
1479	mutex_enter(txq->txq_lock);
1480	vioif_send_common_locked(ifp, txq, true);
1481	mutex_exit(txq->txq_lock);
1482}
1483
1484static int
1485vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1486{
1487	int s, r;
1488
1489	s = splnet();
1490
1491	r = ether_ioctl(ifp, cmd, data);
1492	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1493		if (ifp->if_flags & IFF_RUNNING) {
1494			r = vioif_rx_filter(ifp->if_softc);
1495		} else {
1496			r = 0;
1497		}
1498	}
1499
1500	splx(s);
1501
1502	return r;
1503}
1504
1505void
1506vioif_watchdog(struct ifnet *ifp)
1507{
1508	struct vioif_softc *sc = ifp->if_softc;
1509	int i;
1510
1511	if (ifp->if_flags & IFF_RUNNING) {
1512		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1513			vioif_tx_queue_clear(&sc->sc_txq[i]);
1514		}
1515	}
1516}
1517
1518/*
1519 * Receive implementation
1520 */
1521/* allocate and initialize a mbuf for receive */
1522static int
1523vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1524{
1525	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1526	struct mbuf *m;
1527	int r;
1528
1529	MGETHDR(m, M_DONTWAIT, MT_DATA);
1530	if (m == NULL)
1531		return ENOBUFS;
1532	MCLGET(m, M_DONTWAIT);
1533	if ((m->m_flags & M_EXT) == 0) {
1534		m_freem(m);
1535		return ENOBUFS;
1536	}
1537	rxq->rxq_mbufs[i] = m;
1538	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1539	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1540	    rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1541	if (r) {
1542		m_freem(m);
1543		rxq->rxq_mbufs[i] = NULL;
1544		return r;
1545	}
1546
1547	return 0;
1548}
1549
1550/* free a mbuf for receive */
1551static void
1552vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
1553{
1554	struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
1555
1556	bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1557	m_freem(rxq->rxq_mbufs[i]);
1558	rxq->rxq_mbufs[i] = NULL;
1559}
1560
1561/* add mbufs for all the empty receive slots */
1562static void
1563vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1564{
1565	struct virtqueue *vq = rxq->rxq_vq;
1566	struct virtio_softc *vsc = vq->vq_owner;
1567	int i, r, ndone = 0;
1568
1569	KASSERT(mutex_owned(rxq->rxq_lock));
1570
1571	if (rxq->rxq_stopping)
1572		return;
1573
1574	for (i = 0; i < vq->vq_num; i++) {
1575		int slot;
1576		r = virtio_enqueue_prep(vsc, vq, &slot);
1577		if (r == EAGAIN)
1578			break;
1579		if (r != 0)
1580			panic("enqueue_prep for rx buffers");
1581		if (rxq->rxq_mbufs[slot] == NULL) {
1582			r = vioif_add_rx_mbuf(rxq, slot);
1583			if (r != 0) {
1584				rxq->rxq_mbuf_add_failed.ev_count++;
1585				break;
1586			}
1587		}
1588		r = virtio_enqueue_reserve(vsc, vq, slot,
1589		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1590		if (r != 0) {
1591			vioif_free_rx_mbuf(rxq, slot);
1592			break;
1593		}
1594		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1595		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1596		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1597		    0, MCLBYTES, BUS_DMASYNC_PREREAD);
1598		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1599		    false);
1600		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1601		virtio_enqueue_commit(vsc, vq, slot, false);
1602		ndone++;
1603	}
1604	if (ndone > 0)
1605		virtio_enqueue_commit(vsc, vq, -1, true);
1606}
1607
1608static void
1609vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
1610{
1611	struct virtqueue *vq = rxq->rxq_vq;
1612	struct virtio_softc *vsc = vq->vq_owner;
1613	struct vioif_softc *sc = device_private(virtio_child(vsc));
1614	u_int limit = UINT_MAX;
1615	bool more;
1616
1617	KASSERT(rxq->rxq_stopping);
1618
1619	mutex_enter(rxq->rxq_lock);
1620	for (;;) {
1621		more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1622		if (more == false)
1623			break;
1624	}
1625	mutex_exit(rxq->rxq_lock);
1626}
1627
1628/* dequeue received packets */
1629static bool
1630vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1631    struct vioif_rxqueue *rxq, u_int limit)
1632{
1633	struct virtqueue *vq = rxq->rxq_vq;
1634	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1635	struct mbuf *m;
1636	int slot, len;
1637	bool more = false, dequeued = false;
1638
1639	KASSERT(mutex_owned(rxq->rxq_lock));
1640
1641	if (virtio_vq_is_enqueued(vsc, vq) == false)
1642		return false;
1643
1644	for (;;) {
1645		if (limit-- == 0) {
1646			more = true;
1647			break;
1648		}
1649
1650		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1651			break;
1652
1653		dequeued = true;
1654
1655		len -= sc->sc_hdr_size;
1656		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1657		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1658		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1659		    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
1660		m = rxq->rxq_mbufs[slot];
1661		KASSERT(m != NULL);
1662		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1663		rxq->rxq_mbufs[slot] = NULL;
1664		virtio_dequeue_commit(vsc, vq, slot);
1665		m_set_rcvif(m, ifp);
1666		m->m_len = m->m_pkthdr.len = len;
1667
1668		mutex_exit(rxq->rxq_lock);
1669		if_percpuq_enqueue(ifp->if_percpuq, m);
1670		mutex_enter(rxq->rxq_lock);
1671
1672		if (rxq->rxq_stopping)
1673			break;
1674	}
1675
1676	if (dequeued)
1677		vioif_populate_rx_mbufs_locked(sc, rxq);
1678
1679	return more;
1680}
1681
1682/* rx interrupt; call _dequeue above and schedule a softint */
1683
1684static void
1685vioif_rx_handle_locked(void *xrxq, u_int limit)
1686{
1687	struct vioif_rxqueue *rxq = xrxq;
1688	struct virtqueue *vq = rxq->rxq_vq;
1689	struct virtio_softc *vsc = vq->vq_owner;
1690	struct vioif_softc *sc = device_private(virtio_child(vsc));
1691	bool more;
1692
1693	KASSERT(!rxq->rxq_stopping);
1694
1695	more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
1696	if (more) {
1697		vioif_rx_sched_handle(sc, rxq);
1698		return;
1699	}
1700	more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1701	if (more) {
1702		vioif_rx_sched_handle(sc, rxq);
1703		return;
1704	}
1705	atomic_store_relaxed(&rxq->rxq_active, false);
1706}
1707
1708static int
1709vioif_rx_intr(void *arg)
1710{
1711	struct vioif_rxqueue *rxq = arg;
1712	struct virtqueue *vq = rxq->rxq_vq;
1713	struct virtio_softc *vsc = vq->vq_owner;
1714	struct vioif_softc *sc = device_private(virtio_child(vsc));
1715	u_int limit;
1716
1717	limit = sc->sc_rx_intr_process_limit;
1718
1719	if (atomic_load_relaxed(&rxq->rxq_active) == true)
1720		return 1;
1721
1722	mutex_enter(rxq->rxq_lock);
1723
1724	if (!rxq->rxq_stopping) {
1725		rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;
1726
1727		virtio_stop_vq_intr(vsc, vq);
1728		atomic_store_relaxed(&rxq->rxq_active, true);
1729
1730		vioif_rx_handle_locked(rxq, limit);
1731	}
1732
1733	mutex_exit(rxq->rxq_lock);
1734	return 1;
1735}
1736
1737static void
1738vioif_rx_handle(void *xrxq)
1739{
1740	struct vioif_rxqueue *rxq = xrxq;
1741	struct virtqueue *vq = rxq->rxq_vq;
1742	struct virtio_softc *vsc = vq->vq_owner;
1743	struct vioif_softc *sc = device_private(virtio_child(vsc));
1744	u_int limit;
1745
1746	limit = sc->sc_rx_process_limit;
1747
1748	mutex_enter(rxq->rxq_lock);
1749
1750	if (!rxq->rxq_stopping)
1751		vioif_rx_handle_locked(rxq, limit);
1752
1753	mutex_exit(rxq->rxq_lock);
1754}
1755
1756static void
1757vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1758{
1759
1760	KASSERT(mutex_owned(rxq->rxq_lock));
1761
1762	if (rxq->rxq_stopping)
1763		return;
1764
1765	if (rxq->rxq_workqueue)
1766		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1767	else
1768		softint_schedule(rxq->rxq_handle_si);
1769}
1770
1771/* free all the mbufs; called from if_stop(disable) */
1772static void
1773vioif_rx_drain(struct vioif_rxqueue *rxq)
1774{
1775	struct virtqueue *vq = rxq->rxq_vq;
1776	int i;
1777
1778	for (i = 0; i < vq->vq_num; i++) {
1779		if (rxq->rxq_mbufs[i] == NULL)
1780			continue;
1781		vioif_free_rx_mbuf(rxq, i);
1782	}
1783}
1784
1785/*
1786 * Transmition implementation
1787 */
1788/* actual transmission is done in if_start */
1789/* tx interrupt; dequeue and free mbufs */
1790/*
1791 * tx interrupt is actually disabled; this should be called upon
1792 * tx vq full and watchdog
1793 */
1794
1795static void
1796vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1797{
1798	struct virtqueue *vq = txq->txq_vq;
1799	struct virtio_softc *vsc = vq->vq_owner;
1800	struct vioif_softc *sc = device_private(virtio_child(vsc));
1801	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1802	bool more;
1803
1804	KASSERT(!txq->txq_stopping);
1805
1806	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1807	if (more) {
1808		vioif_tx_sched_handle(sc, txq);
1809		return;
1810	}
1811
1812	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
1813		more = virtio_postpone_intr_smart(vsc, vq);
1814	else
1815		more = virtio_start_vq_intr(vsc, vq);
1816	if (more) {
1817		vioif_tx_sched_handle(sc, txq);
1818		return;
1819	}
1820
1821	atomic_store_relaxed(&txq->txq_active, false);
1822	/* for ALTQ */
1823	if (txq == &sc->sc_txq[0]) {
1824		if_schedule_deferred_start(ifp);
1825		ifp->if_flags &= ~IFF_OACTIVE;
1826	}
1827	softint_schedule(txq->txq_deferred_transmit);
1828}
1829
1830
1831static int
1832vioif_tx_intr(void *arg)
1833{
1834	struct vioif_txqueue *txq = arg;
1835	struct virtqueue *vq = txq->txq_vq;
1836	struct virtio_softc *vsc = vq->vq_owner;
1837	struct vioif_softc *sc = device_private(virtio_child(vsc));
1838	u_int limit;
1839
1840	limit = sc->sc_tx_intr_process_limit;
1841
1842	if (atomic_load_relaxed(&txq->txq_active) == true)
1843		return 1;
1844
1845	mutex_enter(txq->txq_lock);
1846
1847	if (!txq->txq_stopping) {
1848		txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1849
1850		virtio_stop_vq_intr(vsc, vq);
1851		atomic_store_relaxed(&txq->txq_active, true);
1852
1853		vioif_tx_handle_locked(txq, limit);
1854	}
1855
1856	mutex_exit(txq->txq_lock);
1857
1858	return 1;
1859}
1860
1861static void
1862vioif_tx_handle(void *xtxq)
1863{
1864	struct vioif_txqueue *txq = xtxq;
1865	struct virtqueue *vq = txq->txq_vq;
1866	struct virtio_softc *vsc = vq->vq_owner;
1867	struct vioif_softc *sc = device_private(virtio_child(vsc));
1868	u_int limit;
1869
1870	limit = sc->sc_tx_process_limit;
1871
1872	mutex_enter(txq->txq_lock);
1873	if (!txq->txq_stopping)
1874		vioif_tx_handle_locked(txq, limit);
1875	mutex_exit(txq->txq_lock);
1876}
1877
1878static void
1879vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1880{
1881
1882	KASSERT(mutex_owned(txq->txq_lock));
1883
1884	if (txq->txq_stopping)
1885		return;
1886
1887	if (txq->txq_workqueue)
1888		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1889	else
1890		softint_schedule(txq->txq_handle_si);
1891}
1892
1893static void
1894vioif_tx_queue_clear(struct vioif_txqueue *txq)
1895{
1896	struct virtqueue *vq = txq->txq_vq;
1897	struct virtio_softc *vsc = vq->vq_owner;
1898	struct vioif_softc *sc = device_private(virtio_child(vsc));
1899	u_int limit = UINT_MAX;
1900	bool more;
1901
1902	mutex_enter(txq->txq_lock);
1903	for (;;) {
1904		more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1905		if (more == false)
1906			break;
1907	}
1908	mutex_exit(txq->txq_lock);
1909}
1910
1911static bool
1912vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1913    struct vioif_txqueue *txq, u_int limit)
1914{
1915	struct virtqueue *vq = txq->txq_vq;
1916	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1917	struct mbuf *m;
1918	int slot, len;
1919	bool more = false;
1920
1921	KASSERT(mutex_owned(txq->txq_lock));
1922
1923	if (virtio_vq_is_enqueued(vsc, vq) == false)
1924		return false;
1925
1926	for (;;) {
1927		if (limit-- == 0) {
1928			more = true;
1929			break;
1930		}
1931
1932		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1933			break;
1934
1935		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1936		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1937		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1938		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1939		    BUS_DMASYNC_POSTWRITE);
1940		m = txq->txq_mbufs[slot];
1941		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1942		txq->txq_mbufs[slot] = NULL;
1943		virtio_dequeue_commit(vsc, vq, slot);
1944		if_statinc(ifp, if_opackets);
1945		m_freem(m);
1946	}
1947
1948	return more;
1949}
1950
1951/* free all the mbufs already put on vq; called from if_stop(disable) */
1952static void
1953vioif_tx_drain(struct vioif_txqueue *txq)
1954{
1955	struct virtqueue *vq = txq->txq_vq;
1956	struct virtio_softc *vsc = vq->vq_owner;
1957	int i;
1958
1959	KASSERT(txq->txq_stopping);
1960
1961	for (i = 0; i < vq->vq_num; i++) {
1962		if (txq->txq_mbufs[i] == NULL)
1963			continue;
1964		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1965		m_freem(txq->txq_mbufs[i]);
1966		txq->txq_mbufs[i] = NULL;
1967	}
1968}
1969
1970/*
1971 * Control vq
1972 */
1973/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1974static void
1975vioif_ctrl_acquire(struct vioif_softc *sc)
1976{
1977	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1978
1979	mutex_enter(&ctrlq->ctrlq_wait_lock);
1980	while (ctrlq->ctrlq_inuse != FREE)
1981		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1982	ctrlq->ctrlq_inuse = INUSE;
1983	ctrlq->ctrlq_owner = curlwp;
1984	mutex_exit(&ctrlq->ctrlq_wait_lock);
1985}
1986
1987static void
1988vioif_ctrl_release(struct vioif_softc *sc)
1989{
1990	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1991
1992	KASSERT(ctrlq->ctrlq_inuse != FREE);
1993	KASSERT(ctrlq->ctrlq_owner == curlwp);
1994
1995	mutex_enter(&ctrlq->ctrlq_wait_lock);
1996	ctrlq->ctrlq_inuse = FREE;
1997	ctrlq->ctrlq_owner = NULL;
1998	cv_signal(&ctrlq->ctrlq_wait);
1999	mutex_exit(&ctrlq->ctrlq_wait_lock);
2000}
2001
2002static int
2003vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2004    struct vioif_ctrl_cmdspec *specs, int nspecs)
2005{
2006	struct virtio_softc *vsc = sc->sc_virtio;
2007	int i, r, loaded;
2008
2009	loaded = 0;
2010	for (i = 0; i < nspecs; i++) {
2011		r = bus_dmamap_load(virtio_dmat(vsc),
2012		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2013		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2014		if (r) {
2015			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2016			goto err;
2017		}
2018		loaded++;
2019
2020	}
2021
2022	return r;
2023
2024err:
2025	for (i = 0; i < loaded; i++) {
2026		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2027	}
2028
2029	return r;
2030}
2031
2032static void
2033vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2034    struct vioif_ctrl_cmdspec *specs, int nspecs)
2035{
2036	struct virtio_softc *vsc = sc->sc_virtio;
2037	int i;
2038
2039	for (i = 0; i < nspecs; i++) {
2040		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2041	}
2042}
2043
2044static int
2045vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2046    struct vioif_ctrl_cmdspec *specs, int nspecs)
2047{
2048	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2049	struct virtqueue *vq = ctrlq->ctrlq_vq;
2050	struct virtio_softc *vsc = sc->sc_virtio;
2051	int i, r, slot;
2052
2053	ctrlq->ctrlq_cmd->class = class;
2054	ctrlq->ctrlq_cmd->command = cmd;
2055
2056	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2057	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2058	for (i = 0; i < nspecs; i++) {
2059		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2060		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2061	}
2062	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2063	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2064
2065	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2066	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2067		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2068
2069	r = virtio_enqueue_prep(vsc, vq, &slot);
2070	if (r != 0)
2071		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2072	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2073	if (r != 0)
2074		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2075	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2076	for (i = 0; i < nspecs; i++) {
2077		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2078	}
2079	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2080	virtio_enqueue_commit(vsc, vq, slot, true);
2081
2082	/* wait for done */
2083	mutex_enter(&ctrlq->ctrlq_wait_lock);
2084	while (ctrlq->ctrlq_inuse != DONE)
2085		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2086	mutex_exit(&ctrlq->ctrlq_wait_lock);
2087	/* already dequeueued */
2088
2089	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2090	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2091	for (i = 0; i < nspecs; i++) {
2092		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2093		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2094	}
2095	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2096	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2097
2098	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2099		r = 0;
2100	else {
2101		device_printf(sc->sc_dev, "failed setting rx mode\n");
2102		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2103		r = EIO;
2104	}
2105
2106	return r;
2107}
2108
2109static int
2110vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2111{
2112	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2113	struct vioif_ctrl_cmdspec specs[1];
2114	int r;
2115
2116	if (!sc->sc_has_ctrl)
2117		return ENOTSUP;
2118
2119	vioif_ctrl_acquire(sc);
2120
2121	rx->onoff = onoff;
2122	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2123	specs[0].buf = rx;
2124	specs[0].bufsize = sizeof(*rx);
2125
2126	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2127	    specs, __arraycount(specs));
2128
2129	vioif_ctrl_release(sc);
2130	return r;
2131}
2132
2133static int
2134vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2135{
2136	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2137}
2138
2139static int
2140vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2141{
2142	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2143}
2144
2145/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2146static int
2147vioif_set_rx_filter(struct vioif_softc *sc)
2148{
2149	/* filter already set in ctrlq->ctrlq_mac_tbl */
2150	struct virtio_softc *vsc = sc->sc_virtio;
2151	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2152	struct vioif_ctrl_cmdspec specs[2];
2153	int nspecs = __arraycount(specs);
2154	int r;
2155
2156	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2157	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2158
2159	if (!sc->sc_has_ctrl)
2160		return ENOTSUP;
2161
2162	vioif_ctrl_acquire(sc);
2163
2164	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2165	specs[0].buf = mac_tbl_uc;
2166	specs[0].bufsize = sizeof(*mac_tbl_uc)
2167	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2168
2169	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2170	specs[1].buf = mac_tbl_mc;
2171	specs[1].bufsize = sizeof(*mac_tbl_mc)
2172	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2173
2174	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2175	if (r != 0)
2176		goto out;
2177
2178	r = vioif_ctrl_send_command(sc,
2179	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2180	    specs, nspecs);
2181
2182	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2183
2184out:
2185	vioif_ctrl_release(sc);
2186
2187	return r;
2188}
2189
2190static int
2191vioif_set_mac_addr(struct vioif_softc *sc)
2192{
2193	struct virtio_net_ctrl_mac_addr *ma =
2194	    sc->sc_ctrlq.ctrlq_mac_addr;
2195	struct vioif_ctrl_cmdspec specs[1];
2196	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2197	int nspecs = __arraycount(specs);
2198	int r;
2199
2200	if (!sc->sc_has_ctrl)
2201		return ENOTSUP;
2202
2203	vioif_ctrl_acquire(sc);
2204
2205	memcpy(ma->mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2206	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2207	specs[0].buf = ma;
2208	specs[0].bufsize = sizeof(*ma);
2209
2210	r = vioif_ctrl_send_command(sc,
2211	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2212	    specs, nspecs);
2213
2214	vioif_ctrl_release(sc);
2215
2216	return r;
2217}
2218
2219static int
2220vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2221{
2222	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2223	struct vioif_ctrl_cmdspec specs[1];
2224	int r;
2225
2226	if (!sc->sc_has_ctrl)
2227		return ENOTSUP;
2228
2229	if (nvq_pairs <= 1)
2230		return EINVAL;
2231
2232	vioif_ctrl_acquire(sc);
2233
2234	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2235	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2236	specs[0].buf = mq;
2237	specs[0].bufsize = sizeof(*mq);
2238
2239	r = vioif_ctrl_send_command(sc,
2240	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2241	    specs, __arraycount(specs));
2242
2243	vioif_ctrl_release(sc);
2244
2245	return r;
2246}
2247
2248/* ctrl vq interrupt; wake up the command issuer */
2249static int
2250vioif_ctrl_intr(void *arg)
2251{
2252	struct vioif_ctrlqueue *ctrlq = arg;
2253	struct virtqueue *vq = ctrlq->ctrlq_vq;
2254	struct virtio_softc *vsc = vq->vq_owner;
2255	int r, slot;
2256
2257	if (virtio_vq_is_enqueued(vsc, vq) == false)
2258		return 0;
2259
2260	r = virtio_dequeue(vsc, vq, &slot, NULL);
2261	if (r == ENOENT)
2262		return 0;
2263	virtio_dequeue_commit(vsc, vq, slot);
2264
2265	mutex_enter(&ctrlq->ctrlq_wait_lock);
2266	ctrlq->ctrlq_inuse = DONE;
2267	cv_signal(&ctrlq->ctrlq_wait);
2268	mutex_exit(&ctrlq->ctrlq_wait_lock);
2269
2270	return 1;
2271}
2272
2273static int
2274vioif_ifflags(struct vioif_softc *sc)
2275{
2276	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2277	bool onoff;
2278	int r;
2279
2280	if (!sc->sc_has_ctrl) {
2281		/* no ctrl vq; always promisc and allmulti */
2282		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2283		return 0;
2284	}
2285
2286	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2287	r = vioif_set_allmulti(sc, onoff);
2288	if (r != 0) {
2289		log(LOG_WARNING,
2290		    "%s: couldn't %sable ALLMULTI\n",
2291		    ifp->if_xname, onoff ? "en" : "dis");
2292		if (onoff == false) {
2293			ifp->if_flags |= IFF_ALLMULTI;
2294		}
2295	}
2296
2297	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2298	r = vioif_set_promisc(sc, onoff);
2299	if (r != 0) {
2300		log(LOG_WARNING,
2301		    "%s: couldn't %sable PROMISC\n",
2302		    ifp->if_xname, onoff ? "en" : "dis");
2303		if (onoff == false) {
2304			ifp->if_flags |= IFF_PROMISC;
2305		}
2306	}
2307
2308	return 0;
2309}
2310
2311static int
2312vioif_ifflags_cb(struct ethercom *ec)
2313{
2314	struct ifnet *ifp = &ec->ec_if;
2315	struct vioif_softc *sc = ifp->if_softc;
2316
2317	return vioif_ifflags(sc);
2318}
2319
2320/*
2321 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2322 * If large multicast filter exist use ALLMULTI
2323 * If setting rx filter fails fall back to ALLMULTI
2324 */
2325static int
2326vioif_rx_filter(struct vioif_softc *sc)
2327{
2328	struct virtio_softc *vsc = sc->sc_virtio;
2329	struct ethercom *ec = &sc->sc_ethercom;
2330	struct ifnet *ifp = &ec->ec_if;
2331	struct ether_multi *enm;
2332	struct ether_multistep step;
2333	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2334	int nentries;
2335	bool allmulti = 0;
2336	int r;
2337
2338	if (!sc->sc_has_ctrl) {
2339		goto set_ifflags;
2340	}
2341
2342	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2343	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2344
2345	nentries = 0;
2346	allmulti = false;
2347
2348	ETHER_LOCK(ec);
2349	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2350	    ETHER_NEXT_MULTI(step, enm)) {
2351		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2352			allmulti = true;
2353			break;
2354		}
2355		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2356			allmulti = true;
2357			break;
2358		}
2359
2360		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2361		    enm->enm_addrlo, ETHER_ADDR_LEN);
2362		nentries++;
2363	}
2364	ETHER_UNLOCK(ec);
2365
2366	r = vioif_set_mac_addr(sc);
2367	if (r != 0) {
2368		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2369		    ifp->if_xname);
2370	}
2371
2372	if (!allmulti) {
2373		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2374		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2375		r = vioif_set_rx_filter(sc);
2376		if (r != 0) {
2377			allmulti = true; /* fallback */
2378		}
2379	}
2380
2381	if (allmulti) {
2382		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2383		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2384		r = vioif_set_rx_filter(sc);
2385		if (r != 0) {
2386			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2387			    ifp->if_xname);
2388			/* what to do on failure? */
2389		}
2390
2391		ifp->if_flags |= IFF_ALLMULTI;
2392	}
2393
2394set_ifflags:
2395	r = vioif_ifflags(sc);
2396
2397	return r;
2398}
2399
2400static bool
2401vioif_is_link_up(struct vioif_softc *sc)
2402{
2403	struct virtio_softc *vsc = sc->sc_virtio;
2404	uint16_t status;
2405
2406	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2407		status = virtio_read_device_config_2(vsc,
2408		    VIRTIO_NET_CONFIG_STATUS);
2409	else
2410		status = VIRTIO_NET_S_LINK_UP;
2411
2412	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
2413}
2414
2415/* change link status */
2416static void
2417vioif_update_link_status(struct vioif_softc *sc)
2418{
2419	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2420	struct vioif_txqueue *txq;
2421	bool active, changed;
2422	int link, i;
2423
2424	mutex_enter(&sc->sc_lock);
2425
2426	active = vioif_is_link_up(sc);
2427	changed = false;
2428
2429	if (active) {
2430		if (!sc->sc_link_active)
2431			changed = true;
2432
2433		link = LINK_STATE_UP;
2434		sc->sc_link_active = true;
2435	} else {
2436		if (sc->sc_link_active)
2437			changed = true;
2438
2439		link = LINK_STATE_DOWN;
2440		sc->sc_link_active = false;
2441	}
2442
2443	if (changed) {
2444		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2445			txq = &sc->sc_txq[i];
2446
2447			mutex_enter(txq->txq_lock);
2448			txq->txq_link_active = sc->sc_link_active;
2449			mutex_exit(txq->txq_lock);
2450		}
2451
2452		if_link_state_change(ifp, link);
2453	}
2454
2455	mutex_exit(&sc->sc_lock);
2456}
2457
2458static int
2459vioif_config_change(struct virtio_softc *vsc)
2460{
2461	struct vioif_softc *sc = device_private(virtio_child(vsc));
2462
2463	softint_schedule(sc->sc_ctl_softint);
2464	return 0;
2465}
2466
2467static void
2468vioif_ctl_softint(void *arg)
2469{
2470	struct vioif_softc *sc = arg;
2471	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2472
2473	vioif_update_link_status(sc);
2474	vioif_start(ifp);
2475}
2476
2477static struct workqueue *
2478vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2479{
2480	struct workqueue *wq;
2481	int error;
2482
2483	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2484	    prio, ipl, flags);
2485
2486	if (error)
2487		return NULL;
2488
2489	return wq;
2490}
2491
2492static void
2493vioif_workq_destroy(struct workqueue *wq)
2494{
2495
2496	workqueue_destroy(wq);
2497}
2498
2499static void
2500vioif_workq_work(struct work *wk, void *context)
2501{
2502	struct vioif_work *work;
2503
2504	work = container_of(wk, struct vioif_work, cookie);
2505
2506	atomic_store_relaxed(&work->added, 0);
2507	work->func(work->arg);
2508}
2509
2510static void
2511vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2512{
2513
2514	memset(work, 0, sizeof(*work));
2515	work->func = func;
2516	work->arg = arg;
2517}
2518
2519static void
2520vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2521{
2522
2523	if (atomic_load_relaxed(&work->added) != 0)
2524		return;
2525
2526	atomic_store_relaxed(&work->added, 1);
2527	kpreempt_disable();
2528	workqueue_enqueue(wq, &work->cookie, NULL);
2529	kpreempt_enable();
2530}
2531
2532static void
2533vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2534{
2535
2536	workqueue_wait(wq, &work->cookie);
2537}
2538
2539static int
2540vioif_setup_sysctl(struct vioif_softc *sc)
2541{
2542	const char *devname;
2543	struct sysctllog **log;
2544	const struct sysctlnode *rnode, *rxnode, *txnode;
2545	int error;
2546
2547	log = &sc->sc_sysctllog;
2548	devname = device_xname(sc->sc_dev);
2549
2550	error = sysctl_createv(log, 0, NULL, &rnode,
2551	    0, CTLTYPE_NODE, devname,
2552	    SYSCTL_DESCR("virtio-net information and settings"),
2553	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2554	if (error)
2555		goto out;
2556
2557	error = sysctl_createv(log, 0, &rnode, NULL,
2558	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2559	    SYSCTL_DESCR("Use workqueue for packet processing"),
2560	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2561	if (error)
2562		goto out;
2563
2564	error = sysctl_createv(log, 0, &rnode, &rxnode,
2565	    0, CTLTYPE_NODE, "rx",
2566	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2567	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2568	if (error)
2569		goto out;
2570
2571	error = sysctl_createv(log, 0, &rxnode, NULL,
2572	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2573	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2574	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2575	if (error)
2576		goto out;
2577
2578	error = sysctl_createv(log, 0, &rxnode, NULL,
2579	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2580	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2581	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2582	if (error)
2583		goto out;
2584
2585	error = sysctl_createv(log, 0, &rnode, &txnode,
2586	    0, CTLTYPE_NODE, "tx",
2587	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2588	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2589	if (error)
2590		goto out;
2591
2592	error = sysctl_createv(log, 0, &txnode, NULL,
2593	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2594	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2595	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2596	if (error)
2597		goto out;
2598
2599	error = sysctl_createv(log, 0, &txnode, NULL,
2600	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2601	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2602	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2603
2604out:
2605	if (error)
2606		sysctl_teardown(log);
2607
2608	return error;
2609}
2610
2611static void
2612vioif_setup_stats(struct vioif_softc *sc)
2613{
2614	struct vioif_rxqueue *rxq;
2615	struct vioif_txqueue *txq;
2616	int i;
2617
2618	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2619		rxq = &sc->sc_rxq[i];
2620		txq = &sc->sc_txq[i];
2621
2622		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2623		    device_xname(sc->sc_dev), i);
2624		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2625		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2626		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2627		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2628		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2629		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2630
2631		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2632		    device_xname(sc->sc_dev), i);
2633		evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
2634		    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
2635	}
2636
2637	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2638	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2639	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2640	    NULL, device_xname(sc->sc_dev), "control command failed");
2641}
2642
2643MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2644
2645#ifdef _MODULE
2646#include "ioconf.c"
2647#endif
2648
2649static int
2650if_vioif_modcmd(modcmd_t cmd, void *opaque)
2651{
2652	int error = 0;
2653
2654#ifdef _MODULE
2655	switch (cmd) {
2656	case MODULE_CMD_INIT:
2657		error = config_init_component(cfdriver_ioconf_if_vioif,
2658		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2659		break;
2660	case MODULE_CMD_FINI:
2661		error = config_fini_component(cfdriver_ioconf_if_vioif,
2662		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2663		break;
2664	default:
2665		error = ENOTTY;
2666		break;
2667	}
2668#endif
2669
2670	return error;
2671}
2672