if_vioif.c revision 1.94
1/*	$NetBSD: if_vioif.c,v 1.94 2023/03/23 02:15:53 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.94 2023/03/23 02:15:53 yamaguchi Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
209 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
210 *      - more than one lock cannot be held at onece
211 * + ctrlq_inuse is protected by ctrlq_wait_lock.
212 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
213 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
214 * + fields in vioif_softc except queues are protected by
215 *   sc->sc_lock(an adaptive mutex)
216 *      - the lock is held before acquisition of other locks
217 */
218
219struct vioif_ctrl_cmdspec {
220	bus_dmamap_t	dmamap;
221	void		*buf;
222	bus_size_t	bufsize;
223};
224
225struct vioif_work {
226	struct work	 cookie;
227	void		(*func)(void *);
228	void		*arg;
229	unsigned int	 added;
230};
231
232struct vioif_txqueue {
233	kmutex_t		*txq_lock;	/* lock for tx operations */
234
235	struct virtqueue	*txq_vq;
236	bool			txq_stopping;
237	bool			txq_link_active;
238	pcq_t			*txq_intrq;
239
240	struct virtio_net_hdr	*txq_hdrs;
241	bus_dmamap_t		*txq_hdr_dmamaps;
242
243	struct mbuf		**txq_mbufs;
244	bus_dmamap_t		*txq_dmamaps;
245
246	void			*txq_deferred_transmit;
247	void			*txq_handle_si;
248	struct vioif_work	 txq_work;
249	bool			 txq_workqueue;
250	bool			 txq_running_handle;
251
252	char			 txq_evgroup[16];
253	struct evcnt		 txq_defrag_failed;
254	struct evcnt		 txq_mbuf_load_failed;
255	struct evcnt		 txq_enqueue_reserve_failed;
256};
257
258struct vioif_rxqueue {
259	kmutex_t		*rxq_lock;	/* lock for rx operations */
260
261	struct virtqueue	*rxq_vq;
262	bool			rxq_stopping;
263
264	struct virtio_net_hdr	*rxq_hdrs;
265	bus_dmamap_t		*rxq_hdr_dmamaps;
266
267	struct mbuf		**rxq_mbufs;
268	bus_dmamap_t		*rxq_dmamaps;
269
270	void			*rxq_handle_si;
271	struct vioif_work	 rxq_work;
272	bool			 rxq_workqueue;
273	bool			 rxq_running_handle;
274
275	char			 rxq_evgroup[16];
276	struct evcnt		 rxq_mbuf_enobufs;
277	struct evcnt		 rxq_mbuf_load_failed;
278	struct evcnt		 rxq_enqueue_reserve_failed;
279};
280
281struct vioif_ctrlqueue {
282	struct virtqueue		*ctrlq_vq;
283	enum {
284		FREE, INUSE, DONE
285	}				ctrlq_inuse;
286	kcondvar_t			ctrlq_wait;
287	kmutex_t			ctrlq_wait_lock;
288	struct lwp			*ctrlq_owner;
289
290	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
291	struct virtio_net_ctrl_status	*ctrlq_status;
292	struct virtio_net_ctrl_rx	*ctrlq_rx;
293	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
294	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
295	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
296	struct virtio_net_ctrl_mq	*ctrlq_mq;
297
298	bus_dmamap_t			ctrlq_cmd_dmamap;
299	bus_dmamap_t			ctrlq_status_dmamap;
300	bus_dmamap_t			ctrlq_rx_dmamap;
301	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
302	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
303	bus_dmamap_t			ctrlq_mac_addr_dmamap;
304	bus_dmamap_t			ctrlq_mq_dmamap;
305
306	struct evcnt			ctrlq_cmd_load_failed;
307	struct evcnt			ctrlq_cmd_failed;
308};
309
310struct vioif_softc {
311	device_t		sc_dev;
312	kmutex_t		sc_lock;
313	struct sysctllog	*sc_sysctllog;
314
315	struct virtio_softc	*sc_virtio;
316	struct virtqueue	*sc_vqs;
317	u_int			 sc_hdr_size;
318
319	int			sc_max_nvq_pairs;
320	int			sc_req_nvq_pairs;
321	int			sc_act_nvq_pairs;
322
323	uint8_t			sc_mac[ETHER_ADDR_LEN];
324	struct ethercom		sc_ethercom;
325	int			sc_link_state;
326
327	struct vioif_txqueue	*sc_txq;
328	struct vioif_rxqueue	*sc_rxq;
329
330	bool			sc_has_ctrl;
331	struct vioif_ctrlqueue	sc_ctrlq;
332
333	bus_dma_segment_t	sc_hdr_segs[1];
334	void			*sc_dmamem;
335	void			*sc_kmem;
336
337	void			*sc_ctl_softint;
338
339	struct workqueue	*sc_txrx_workqueue;
340	bool			 sc_txrx_workqueue_sysctl;
341	u_int			 sc_tx_intr_process_limit;
342	u_int			 sc_tx_process_limit;
343	u_int			 sc_rx_intr_process_limit;
344	u_int			 sc_rx_process_limit;
345};
346#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
347#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
348
349#define VIOIF_TX_INTR_PROCESS_LIMIT	256
350#define VIOIF_TX_PROCESS_LIMIT		256
351#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
352#define VIOIF_RX_PROCESS_LIMIT		256
353
354#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
355#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
356					    true : false)
357
358/* cfattach interface functions */
359static int	vioif_match(device_t, cfdata_t, void *);
360static void	vioif_attach(device_t, device_t, void *);
361static int	vioif_finalize_teardown(device_t);
362
363/* ifnet interface functions */
364static int	vioif_init(struct ifnet *);
365static void	vioif_stop(struct ifnet *, int);
366static void	vioif_start(struct ifnet *);
367static void	vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
368static int	vioif_transmit(struct ifnet *, struct mbuf *);
369static void	vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
370static int	vioif_ioctl(struct ifnet *, u_long, void *);
371static void	vioif_watchdog(struct ifnet *);
372static int	vioif_ifflags_cb(struct ethercom *);
373
374/* rx */
375static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
376		    struct vioif_rxqueue *);
377static void	vioif_rx_queue_clear(struct vioif_softc *, struct virtio_softc *,
378		    struct vioif_rxqueue *);
379static bool	vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
380		    struct vioif_rxqueue *, u_int, size_t *);
381static int	vioif_rx_intr(void *);
382static void	vioif_rx_handle(void *);
383static void	vioif_rx_sched_handle(struct vioif_softc *,
384		    struct vioif_rxqueue *);
385
386/* tx */
387static int	vioif_tx_intr(void *);
388static void	vioif_tx_handle(void *);
389static void	vioif_tx_sched_handle(struct vioif_softc *,
390		    struct vioif_txqueue *);
391static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
392		    struct vioif_txqueue *);
393static bool	vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
394		    struct vioif_txqueue *, u_int);
395static void	vioif_deferred_transmit(void *);
396
397/* workqueue */
398static struct workqueue*
399		vioif_workq_create(const char *, pri_t, int, int);
400static void	vioif_workq_destroy(struct workqueue *);
401static void	vioif_workq_work(struct work *, void *);
402static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
403static void	vioif_work_add(struct workqueue *, struct vioif_work *);
404static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
405
406/* other control */
407static int	vioif_get_link_status(struct vioif_softc *);
408static void	vioif_update_link_status(struct vioif_softc *);
409static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
410static int	vioif_set_promisc(struct vioif_softc *, bool);
411static int	vioif_set_allmulti(struct vioif_softc *, bool);
412static int	vioif_set_rx_filter(struct vioif_softc *);
413static int	vioif_rx_filter(struct vioif_softc *);
414static int	vioif_set_mac_addr(struct vioif_softc *);
415static int	vioif_ctrl_intr(void *);
416static int	vioif_config_change(struct virtio_softc *);
417static void	vioif_ctl_softint(void *);
418static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
419static void	vioif_enable_interrupt_vqpairs(struct vioif_softc *);
420static void	vioif_disable_interrupt_vqpairs(struct vioif_softc *);
421static int	vioif_setup_sysctl(struct vioif_softc *);
422static void	vioif_setup_stats(struct vioif_softc *);
423static int	vioif_ifflags(struct vioif_softc *);
424static void	vioif_intr_barrier(void);
425
426CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
427		  vioif_match, vioif_attach, NULL, NULL);
428
429static int
430vioif_match(device_t parent, cfdata_t match, void *aux)
431{
432	struct virtio_attach_args *va = aux;
433
434	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
435		return 1;
436
437	return 0;
438}
439
440static int
441vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
442    bus_size_t size, int nsegs, const char *usage)
443{
444	int r;
445
446	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
447	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
448
449	if (r != 0) {
450		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
451		    "error code %d\n", usage, r);
452	}
453
454	return r;
455}
456
457static void
458vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
459{
460
461	if (*map) {
462		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
463		*map = NULL;
464	}
465}
466
467static int
468vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
469    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
470{
471	int r;
472
473	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
474	if (r != 0)
475		return 1;
476
477	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
478	    size, NULL, rw | BUS_DMA_NOWAIT);
479	if (r != 0) {
480		vioif_dmamap_destroy(sc, map);
481		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
482		    "error code %d\n", usage, r);
483	}
484
485	return r;
486}
487
488static void *
489vioif_assign_mem(intptr_t *p, size_t size)
490{
491	intptr_t rv;
492
493	rv = *p;
494	*p += size;
495
496	return (void *)rv;
497}
498
499static void
500vioif_alloc_queues(struct vioif_softc *sc)
501{
502	int nvq_pairs = sc->sc_max_nvq_pairs;
503	int nvqs = nvq_pairs * 2;
504	int i;
505
506	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
507
508	sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
509	    KM_SLEEP);
510	sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
511	    KM_SLEEP);
512
513	if (sc->sc_has_ctrl)
514		nvqs++;
515
516	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
517	nvqs = 0;
518	for (i = 0; i < nvq_pairs; i++) {
519		sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
520		sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
521	}
522
523	if (sc->sc_has_ctrl)
524		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
525}
526
527static void
528vioif_free_queues(struct vioif_softc *sc)
529{
530	int nvq_pairs = sc->sc_max_nvq_pairs;
531	int nvqs = nvq_pairs * 2;
532
533	if (sc->sc_ctrlq.ctrlq_vq)
534		nvqs++;
535
536	if (sc->sc_txq) {
537		kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
538		sc->sc_txq = NULL;
539	}
540
541	if (sc->sc_rxq) {
542		kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
543		sc->sc_rxq = NULL;
544	}
545
546	if (sc->sc_vqs) {
547		kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
548		sc->sc_vqs = NULL;
549	}
550}
551
552/* allocate memory */
553/*
554 * dma memory is used for:
555 *   rxq_hdrs[slot]:	 metadata array for received frames (READ)
556 *   txq_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
557 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
558 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
559 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
560 *			 (WRITE)
561 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
562 *			 class command (WRITE)
563 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
564 *			 class command (WRITE)
565 * ctrlq_* structures are allocated only one each; they are protected by
566 * ctrlq_inuse variable and ctrlq_wait condvar.
567 */
568/*
569 * dynamically allocated memory is used for:
570 *   rxq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_rx_hdrs[slot]
571 *   txq_hdr_dmamaps[slot]:	bus_dmamap_t array for sc_tx_hdrs[slot]
572 *   rxq_dmamaps[slot]:		bus_dmamap_t array for received payload
573 *   txq_dmamaps[slot]:		bus_dmamap_t array for sent payload
574 *   rxq_mbufs[slot]:		mbuf pointer array for received frames
575 *   txq_mbufs[slot]:		mbuf pointer array for sent frames
576 */
577static int
578vioif_alloc_mems(struct vioif_softc *sc)
579{
580	struct virtio_softc *vsc = sc->sc_virtio;
581	struct vioif_txqueue *txq;
582	struct vioif_rxqueue *rxq;
583	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
584	int allocsize, allocsize2, r, rsegs, i, qid;
585	void *vaddr;
586	intptr_t p;
587
588	allocsize = 0;
589	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
590		rxq = &sc->sc_rxq[qid];
591		txq = &sc->sc_txq[qid];
592
593		allocsize += sizeof(struct virtio_net_hdr) *
594			(rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
595	}
596	if (sc->sc_has_ctrl) {
597		allocsize += sizeof(struct virtio_net_ctrl_cmd);
598		allocsize += sizeof(struct virtio_net_ctrl_status);
599		allocsize += sizeof(struct virtio_net_ctrl_rx);
600		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
601		    + ETHER_ADDR_LEN;
602		allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
603		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
604		allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
605		allocsize += sizeof(struct virtio_net_ctrl_mq);
606	}
607	r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
608	    &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
609	if (r != 0) {
610		aprint_error_dev(sc->sc_dev,
611		    "DMA memory allocation failed, size %d, "
612		    "error code %d\n", allocsize, r);
613		goto err_none;
614	}
615	r = bus_dmamem_map(virtio_dmat(vsc),
616	    &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
617	if (r != 0) {
618		aprint_error_dev(sc->sc_dev,
619		    "DMA memory map failed, error code %d\n", r);
620		goto err_dmamem_alloc;
621	}
622
623	memset(vaddr, 0, allocsize);
624	sc->sc_dmamem = vaddr;
625	p = (intptr_t) vaddr;
626
627	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
628		rxq = &sc->sc_rxq[qid];
629		txq = &sc->sc_txq[qid];
630
631		rxq->rxq_hdrs = vioif_assign_mem(&p,
632		    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
633		txq->txq_hdrs = vioif_assign_mem(&p,
634		    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
635	}
636	if (sc->sc_has_ctrl) {
637		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
638		    sizeof(*ctrlq->ctrlq_cmd));
639		ctrlq->ctrlq_status = vioif_assign_mem(&p,
640		    sizeof(*ctrlq->ctrlq_status));
641		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
642		    sizeof(*ctrlq->ctrlq_rx));
643		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
644		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
645		    + ETHER_ADDR_LEN);
646		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
647		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
648		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
649		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
650		    sizeof(*ctrlq->ctrlq_mac_addr));
651		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
652	}
653
654	allocsize2 = 0;
655	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
656		int rxqsize, txqsize;
657
658		rxq = &sc->sc_rxq[qid];
659		txq = &sc->sc_txq[qid];
660		rxqsize = rxq->rxq_vq->vq_num;
661		txqsize = txq->txq_vq->vq_num;
662
663		allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
664		allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
665		allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;
666
667		allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
668		allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
669		allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
670	}
671	vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
672	sc->sc_kmem = vaddr;
673	p = (intptr_t) vaddr;
674
675	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
676		int rxqsize, txqsize;
677		rxq = &sc->sc_rxq[qid];
678		txq = &sc->sc_txq[qid];
679		rxqsize = rxq->rxq_vq->vq_num;
680		txqsize = txq->txq_vq->vq_num;
681
682		rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
683		    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
684		txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
685		    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
686		rxq->rxq_dmamaps = vioif_assign_mem(&p,
687		    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
688		txq->txq_dmamaps = vioif_assign_mem(&p,
689		    sizeof(txq->txq_dmamaps[0]) * txqsize);
690		rxq->rxq_mbufs = vioif_assign_mem(&p,
691		    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
692		txq->txq_mbufs = vioif_assign_mem(&p,
693		    sizeof(txq->txq_mbufs[0]) * txqsize);
694	}
695
696	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
697		rxq = &sc->sc_rxq[qid];
698		txq = &sc->sc_txq[qid];
699
700		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
701			r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
702			    &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
703			    BUS_DMA_READ, "rx header");
704			if (r != 0)
705				goto err_reqs;
706
707			r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
708			    MCLBYTES - ETHER_ALIGN, 1, "rx payload");
709			if (r != 0)
710				goto err_reqs;
711		}
712
713		for (i = 0; i < txq->txq_vq->vq_num; i++) {
714			r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
715			    &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
716			    BUS_DMA_READ, "tx header");
717			if (r != 0)
718				goto err_reqs;
719
720			r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
721			    VIRTIO_NET_TX_MAXNSEGS, "tx payload");
722			if (r != 0)
723				goto err_reqs;
724		}
725	}
726
727	if (sc->sc_has_ctrl) {
728		/* control vq class & command */
729		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
730		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
731		    BUS_DMA_WRITE, "control command");
732		if (r != 0)
733			goto err_reqs;
734
735		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
736		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
737		    BUS_DMA_READ, "control status");
738		if (r != 0)
739			goto err_reqs;
740
741		/* control vq rx mode command parameter */
742		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
743		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
744		    BUS_DMA_WRITE, "rx mode control command");
745		if (r != 0)
746			goto err_reqs;
747
748		/* multiqueue set command */
749		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
750		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
751		    BUS_DMA_WRITE, "multiqueue set command");
752		if (r != 0)
753			goto err_reqs;
754
755		/* control vq MAC filter table for unicast */
756		/* do not load now since its length is variable */
757		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
758		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
759		    + ETHER_ADDR_LEN, 1,
760		    "unicast MAC address filter command");
761		if (r != 0)
762			goto err_reqs;
763
764		/* control vq MAC filter table for multicast */
765		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
766		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
767		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
768		    "multicast MAC address filter command");
769		if (r != 0)
770			goto err_reqs;
771
772		/* control vq MAC address set command */
773		r = vioif_dmamap_create_load(sc,
774		    &ctrlq->ctrlq_mac_addr_dmamap,
775		    ctrlq->ctrlq_mac_addr,
776		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
777		    BUS_DMA_WRITE, "mac addr set command");
778		if (r != 0)
779			goto err_reqs;
780	}
781
782	return 0;
783
784err_reqs:
785	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
786	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
787	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
788	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
789	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
790	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
791	for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
792		rxq = &sc->sc_rxq[qid];
793		txq = &sc->sc_txq[qid];
794
795		for (i = 0; i < txq->txq_vq->vq_num; i++) {
796			vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
797			vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
798		}
799		for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
800			vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
801			vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
802		}
803	}
804	if (sc->sc_kmem) {
805		kmem_free(sc->sc_kmem, allocsize2);
806		sc->sc_kmem = NULL;
807	}
808	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
809err_dmamem_alloc:
810	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
811err_none:
812	return -1;
813}
814
815static void
816vioif_attach(device_t parent, device_t self, void *aux)
817{
818	struct vioif_softc *sc = device_private(self);
819	struct virtio_softc *vsc = device_private(parent);
820	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
821	struct vioif_txqueue *txq;
822	struct vioif_rxqueue *rxq;
823	uint64_t features, req_features;
824	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
825	u_int softint_flags;
826	int r, i, nvqs = 0, req_flags;
827	char xnamebuf[MAXCOMLEN];
828
829	if (virtio_child(vsc) != NULL) {
830		aprint_normal(": child already attached for %s; "
831		    "something wrong...\n", device_xname(parent));
832		return;
833	}
834
835	sc->sc_dev = self;
836	sc->sc_virtio = vsc;
837	sc->sc_link_state = LINK_STATE_UNKNOWN;
838
839	sc->sc_max_nvq_pairs = 1;
840	sc->sc_req_nvq_pairs = 1;
841	sc->sc_act_nvq_pairs = 1;
842	sc->sc_txrx_workqueue_sysctl = true;
843	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
844	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
845	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
846	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
847
848	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
849
850	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
851	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
852	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
853	if (sc->sc_txrx_workqueue == NULL)
854		goto err;
855
856	req_flags = 0;
857
858#ifdef VIOIF_MPSAFE
859	req_flags |= VIRTIO_F_INTR_MPSAFE;
860#endif
861	req_flags |= VIRTIO_F_INTR_MSIX;
862
863	req_features =
864	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
865	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
866	req_features |= VIRTIO_F_RING_EVENT_IDX;
867	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
868#ifdef VIOIF_MULTIQ
869	req_features |= VIRTIO_NET_F_MQ;
870#endif
871	virtio_child_attach_start(vsc, self, IPL_NET, NULL,
872	    vioif_config_change, virtio_vq_intrhand, req_flags,
873	    req_features, VIRTIO_NET_FLAG_BITS);
874
875	features = virtio_features(vsc);
876	if (features == 0)
877		goto err;
878
879	if (features & VIRTIO_NET_F_MAC) {
880		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
881			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
882			    VIRTIO_NET_CONFIG_MAC + i);
883		}
884	} else {
885		/* code stolen from sys/net/if_tap.c */
886		struct timeval tv;
887		uint32_t ui;
888		getmicrouptime(&tv);
889		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
890		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
891		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
892			virtio_write_device_config_1(vsc,
893			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
894		}
895	}
896
897	/* 'Ethernet' with capital follows other ethernet driver attachment */
898	aprint_normal_dev(self, "Ethernet address %s\n",
899	    ether_sprintf(sc->sc_mac));
900
901	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
902		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
903	} else {
904		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
905	}
906
907	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
908	    (features & VIRTIO_NET_F_CTRL_RX)) {
909		sc->sc_has_ctrl = true;
910
911		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
912		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
913		ctrlq->ctrlq_inuse = FREE;
914	} else {
915		sc->sc_has_ctrl = false;
916	}
917
918	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
919		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
920		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
921
922		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
923			goto err;
924
925		/* Limit the number of queue pairs to use */
926		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
927	}
928
929	vioif_alloc_queues(sc);
930	virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);
931
932#ifdef VIOIF_MPSAFE
933	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
934#else
935	softint_flags = SOFTINT_NET;
936#endif
937
938	/*
939	 * Allocating virtqueues
940	 */
941	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
942		rxq = &sc->sc_rxq[i];
943		txq = &sc->sc_txq[i];
944		char qname[32];
945
946		rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
947
948		rxq->rxq_handle_si = softint_establish(softint_flags,
949		    vioif_rx_handle, rxq);
950		if (rxq->rxq_handle_si == NULL) {
951			aprint_error_dev(self, "cannot establish rx softint\n");
952			goto err;
953		}
954
955		snprintf(qname, sizeof(qname), "rx%d", i);
956		r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
957		    MCLBYTES + sc->sc_hdr_size, 2, qname);
958		if (r != 0)
959			goto err;
960		nvqs++;
961		rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
962		rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
963		rxq->rxq_stopping = false;
964		rxq->rxq_running_handle = false;
965		vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);
966
967		txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
968
969		txq->txq_deferred_transmit = softint_establish(softint_flags,
970		    vioif_deferred_transmit, txq);
971		if (txq->txq_deferred_transmit == NULL) {
972			aprint_error_dev(self, "cannot establish tx softint\n");
973			goto err;
974		}
975		txq->txq_handle_si = softint_establish(softint_flags,
976		    vioif_tx_handle, txq);
977		if (txq->txq_handle_si == NULL) {
978			aprint_error_dev(self, "cannot establish tx softint\n");
979			goto err;
980		}
981
982		snprintf(qname, sizeof(qname), "tx%d", i);
983		r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
984		    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
985		    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
986		if (r != 0)
987			goto err;
988		nvqs++;
989		txq->txq_vq->vq_intrhand = vioif_tx_intr;
990		txq->txq_vq->vq_intrhand_arg = (void *)txq;
991		txq->txq_link_active = VIOIF_IS_LINK_ACTIVE(sc);
992		txq->txq_stopping = false;
993		txq->txq_running_handle = false;
994		txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
995		vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
996	}
997
998	if (sc->sc_has_ctrl) {
999		/*
1000		 * Allocating a virtqueue for control channel
1001		 */
1002		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
1003		    NBPG, 1, "control");
1004		if (r != 0) {
1005			aprint_error_dev(self, "failed to allocate "
1006			    "a virtqueue for control channel, error code %d\n",
1007			    r);
1008
1009			sc->sc_has_ctrl = false;
1010			cv_destroy(&ctrlq->ctrlq_wait);
1011			mutex_destroy(&ctrlq->ctrlq_wait_lock);
1012		} else {
1013			nvqs++;
1014			ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
1015			ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
1016		}
1017	}
1018
1019	sc->sc_ctl_softint = softint_establish(softint_flags,
1020	    vioif_ctl_softint, sc);
1021	if (sc->sc_ctl_softint == NULL) {
1022		aprint_error_dev(self, "cannot establish ctl softint\n");
1023		goto err;
1024	}
1025
1026	if (vioif_alloc_mems(sc) < 0)
1027		goto err;
1028
1029	if (virtio_child_attach_finish(vsc) != 0)
1030		goto err;
1031
1032	if (vioif_setup_sysctl(sc) != 0) {
1033		aprint_error_dev(self, "unable to create sysctl node\n");
1034		/* continue */
1035	}
1036
1037	vioif_setup_stats(sc);
1038
1039	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
1040	ifp->if_softc = sc;
1041	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1042#ifdef VIOIF_MPSAFE
1043	ifp->if_extflags = IFEF_MPSAFE;
1044#endif
1045	ifp->if_start = vioif_start;
1046	if (sc->sc_req_nvq_pairs > 1)
1047		ifp->if_transmit = vioif_transmit;
1048	ifp->if_ioctl = vioif_ioctl;
1049	ifp->if_init = vioif_init;
1050	ifp->if_stop = vioif_stop;
1051	ifp->if_capabilities = 0;
1052	ifp->if_watchdog = vioif_watchdog;
1053	txq = &sc->sc_txq[0];
1054	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
1055	IFQ_SET_READY(&ifp->if_snd);
1056
1057	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
1058
1059	if_attach(ifp);
1060	if_deferred_start_init(ifp, NULL);
1061	ether_ifattach(ifp, sc->sc_mac);
1062	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
1063
1064	return;
1065
1066err:
1067	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
1068		rxq = &sc->sc_rxq[i];
1069		txq = &sc->sc_txq[i];
1070
1071		if (rxq->rxq_lock) {
1072			mutex_obj_free(rxq->rxq_lock);
1073			rxq->rxq_lock = NULL;
1074		}
1075
1076		if (rxq->rxq_handle_si) {
1077			softint_disestablish(rxq->rxq_handle_si);
1078			rxq->rxq_handle_si = NULL;
1079		}
1080
1081		if (txq->txq_lock) {
1082			mutex_obj_free(txq->txq_lock);
1083			txq->txq_lock = NULL;
1084		}
1085
1086		if (txq->txq_handle_si) {
1087			softint_disestablish(txq->txq_handle_si);
1088			txq->txq_handle_si = NULL;
1089		}
1090
1091		if (txq->txq_deferred_transmit) {
1092			softint_disestablish(txq->txq_deferred_transmit);
1093			txq->txq_deferred_transmit = NULL;
1094		}
1095
1096		if (txq->txq_intrq) {
1097			pcq_destroy(txq->txq_intrq);
1098			txq->txq_intrq = NULL;
1099		}
1100	}
1101
1102	if (sc->sc_has_ctrl) {
1103		cv_destroy(&ctrlq->ctrlq_wait);
1104		mutex_destroy(&ctrlq->ctrlq_wait_lock);
1105	}
1106
1107	while (nvqs > 0)
1108		virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);
1109
1110	vioif_free_queues(sc);
1111	mutex_destroy(&sc->sc_lock);
1112	virtio_child_attach_failed(vsc);
1113	config_finalize_register(self, vioif_finalize_teardown);
1114
1115	return;
1116}
1117
1118static int
1119vioif_finalize_teardown(device_t self)
1120{
1121	struct vioif_softc *sc = device_private(self);
1122
1123	if (sc->sc_txrx_workqueue != NULL) {
1124		vioif_workq_destroy(sc->sc_txrx_workqueue);
1125		sc->sc_txrx_workqueue = NULL;
1126	}
1127
1128	return 0;
1129}
1130
1131static void
1132vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
1133{
1134	struct virtio_softc *vsc = sc->sc_virtio;
1135	struct vioif_txqueue *txq;
1136	struct vioif_rxqueue *rxq;
1137	int i;
1138
1139	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1140		txq = &sc->sc_txq[i];
1141		rxq = &sc->sc_rxq[i];
1142
1143		virtio_start_vq_intr(vsc, txq->txq_vq);
1144		virtio_start_vq_intr(vsc, rxq->rxq_vq);
1145	}
1146}
1147
1148static void
1149vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
1150{
1151	struct virtio_softc *vsc = sc->sc_virtio;
1152	struct vioif_txqueue *txq;
1153	struct vioif_rxqueue *rxq;
1154	int i;
1155
1156	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1157		rxq = &sc->sc_rxq[i];
1158		txq = &sc->sc_txq[i];
1159
1160		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1161		virtio_stop_vq_intr(vsc, txq->txq_vq);
1162	}
1163}
1164
1165/*
1166 * Interface functions for ifnet
1167 */
1168static int
1169vioif_init(struct ifnet *ifp)
1170{
1171	struct vioif_softc *sc = ifp->if_softc;
1172	struct virtio_softc *vsc = sc->sc_virtio;
1173	struct vioif_rxqueue *rxq;
1174	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1175	int r, i;
1176
1177	vioif_stop(ifp, 0);
1178
1179	r = virtio_reinit_start(vsc);
1180	if (r != 0) {
1181		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
1182		return EIO;
1183	}
1184
1185	virtio_negotiate_features(vsc, virtio_features(vsc));
1186
1187	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
1188		rxq = &sc->sc_rxq[i];
1189
1190		mutex_enter(rxq->rxq_lock);
1191		vioif_populate_rx_mbufs_locked(sc, rxq);
1192		mutex_exit(rxq->rxq_lock);
1193
1194	}
1195
1196	virtio_reinit_end(vsc);
1197
1198	if (sc->sc_has_ctrl)
1199		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
1200
1201	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
1202	if (r == 0)
1203		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
1204	else
1205		sc->sc_act_nvq_pairs = 1;
1206
1207	vioif_enable_interrupt_vqpairs(sc);
1208
1209	vioif_update_link_status(sc);
1210	ifp->if_flags |= IFF_RUNNING;
1211	ifp->if_flags &= ~IFF_OACTIVE;
1212	r = vioif_rx_filter(sc);
1213
1214	return r;
1215}
1216
1217static void
1218vioif_stop(struct ifnet *ifp, int disable)
1219{
1220	struct vioif_softc *sc = ifp->if_softc;
1221	struct virtio_softc *vsc = sc->sc_virtio;
1222	struct vioif_txqueue *txq;
1223	struct vioif_rxqueue *rxq;
1224	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1225	int i;
1226
1227
1228	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1229		txq = &sc->sc_txq[i];
1230		rxq = &sc->sc_rxq[i];
1231
1232		mutex_enter(rxq->rxq_lock);
1233		rxq->rxq_stopping = true;
1234		mutex_exit(rxq->rxq_lock);
1235
1236		mutex_enter(txq->txq_lock);
1237		txq->txq_stopping = true;
1238		mutex_exit(txq->txq_lock);
1239	}
1240
1241	/* disable interrupts */
1242	vioif_disable_interrupt_vqpairs(sc);
1243	if (sc->sc_has_ctrl)
1244		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
1245
1246	/*
1247	 * only way to stop interrupt, I/O and DMA is resetting...
1248	 *
1249	 * NOTE: Devices based on VirtIO draft specification can not
1250	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
1251	 */
1252	virtio_reset(vsc);
1253
1254	vioif_intr_barrier();
1255
1256	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1257		txq = &sc->sc_txq[i];
1258		rxq = &sc->sc_rxq[i];
1259
1260		vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);
1261		vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
1262	}
1263
1264	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1265		vioif_rx_queue_clear(sc, vsc, &sc->sc_rxq[i]);
1266		vioif_tx_queue_clear(sc, vsc, &sc->sc_txq[i]);
1267	}
1268
1269	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1270
1271	/* all packet processing is stopped */
1272	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1273		txq = &sc->sc_txq[i];
1274		rxq = &sc->sc_rxq[i];
1275
1276		mutex_enter(rxq->rxq_lock);
1277		rxq->rxq_stopping = false;
1278		KASSERT(!rxq->rxq_running_handle);
1279		mutex_exit(rxq->rxq_lock);
1280
1281		mutex_enter(txq->txq_lock);
1282		txq->txq_stopping = false;
1283		KASSERT(!txq->txq_running_handle);
1284		mutex_exit(txq->txq_lock);
1285	}
1286}
1287
1288static void
1289vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
1290    bool is_transmit)
1291{
1292	struct vioif_softc *sc = ifp->if_softc;
1293	struct virtio_softc *vsc = sc->sc_virtio;
1294	struct virtqueue *vq = txq->txq_vq;
1295	struct virtio_net_hdr *hdr;
1296	struct mbuf *m;
1297	int queued = 0;
1298
1299	KASSERT(mutex_owned(txq->txq_lock));
1300
1301	if ((ifp->if_flags & IFF_RUNNING) == 0)
1302		return;
1303
1304	if (!txq->txq_link_active || txq->txq_stopping)
1305		return;
1306
1307	if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
1308		return;
1309
1310	for (;;) {
1311		int slot, r;
1312
1313		if (is_transmit)
1314			m = pcq_get(txq->txq_intrq);
1315		else
1316			IFQ_DEQUEUE(&ifp->if_snd, m);
1317
1318		if (m == NULL)
1319			break;
1320
1321		r = virtio_enqueue_prep(vsc, vq, &slot);
1322		if (r == EAGAIN) {
1323			ifp->if_flags |= IFF_OACTIVE;
1324			m_freem(m);
1325			if_statinc(ifp, if_oerrors);
1326			break;
1327		}
1328		if (r != 0)
1329			panic("enqueue_prep for a tx buffer");
1330
1331		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1332		    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1333		if (r != 0) {
1334			/* maybe just too fragmented */
1335			struct mbuf *newm;
1336
1337			newm = m_defrag(m, M_NOWAIT);
1338			if (newm == NULL) {
1339				txq->txq_defrag_failed.ev_count++;
1340				goto skip;
1341			}
1342
1343			m = newm;
1344			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1345			    txq->txq_dmamaps[slot], m,
1346			    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1347			if (r != 0) {
1348				txq->txq_mbuf_load_failed.ev_count++;
1349skip:
1350				m_freem(m);
1351				if_statinc(ifp, if_oerrors);
1352				virtio_enqueue_abort(vsc, vq, slot);
1353				continue;
1354			}
1355		}
1356
1357		/* This should actually never fail */
1358		r = virtio_enqueue_reserve(vsc, vq, slot,
1359		    txq->txq_dmamaps[slot]->dm_nsegs + 1);
1360		if (r != 0) {
1361			txq->txq_enqueue_reserve_failed.ev_count++;
1362			bus_dmamap_unload(virtio_dmat(vsc),
1363			     txq->txq_dmamaps[slot]);
1364			/* slot already freed by virtio_enqueue_reserve */
1365			m_freem(m);
1366			if_statinc(ifp, if_oerrors);
1367			continue;
1368		}
1369
1370		txq->txq_mbufs[slot] = m;
1371
1372		hdr = &txq->txq_hdrs[slot];
1373		memset(hdr, 0, sc->sc_hdr_size);
1374		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1375		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1376		    BUS_DMASYNC_PREWRITE);
1377		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1378		    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
1379		    BUS_DMASYNC_PREWRITE);
1380		virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
1381		virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
1382		virtio_enqueue_commit(vsc, vq, slot, false);
1383
1384		queued++;
1385		bpf_mtap(ifp, m, BPF_D_OUT);
1386	}
1387
1388	if (queued > 0) {
1389		virtio_enqueue_commit(vsc, vq, -1, true);
1390		ifp->if_timer = 5;
1391	}
1392}
1393
1394static void
1395vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1396{
1397
1398	/*
1399	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
1400	 */
1401	vioif_send_common_locked(ifp, txq, false);
1402
1403}
1404
1405static void
1406vioif_start(struct ifnet *ifp)
1407{
1408	struct vioif_softc *sc = ifp->if_softc;
1409	struct vioif_txqueue *txq = &sc->sc_txq[0];
1410
1411#ifdef VIOIF_MPSAFE
1412	KASSERT(if_is_mpsafe(ifp));
1413#endif
1414
1415	mutex_enter(txq->txq_lock);
1416	vioif_start_locked(ifp, txq);
1417	mutex_exit(txq->txq_lock);
1418}
1419
1420static inline int
1421vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
1422{
1423	struct vioif_softc *sc = ifp->if_softc;
1424	u_int cpuid = cpu_index(curcpu());
1425
1426	return cpuid % sc->sc_act_nvq_pairs;
1427}
1428
1429static void
1430vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
1431{
1432
1433	vioif_send_common_locked(ifp, txq, true);
1434}
1435
1436static int
1437vioif_transmit(struct ifnet *ifp, struct mbuf *m)
1438{
1439	struct vioif_softc *sc = ifp->if_softc;
1440	struct vioif_txqueue *txq;
1441	int qid;
1442
1443	qid = vioif_select_txqueue(ifp, m);
1444	txq = &sc->sc_txq[qid];
1445
1446	if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
1447		m_freem(m);
1448		return ENOBUFS;
1449	}
1450
1451	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1452	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1453	if (m->m_flags & M_MCAST)
1454		if_statinc_ref(nsr, if_omcasts);
1455	IF_STAT_PUTREF(ifp);
1456
1457	if (mutex_tryenter(txq->txq_lock)) {
1458		vioif_transmit_locked(ifp, txq);
1459		mutex_exit(txq->txq_lock);
1460	}
1461
1462	return 0;
1463}
1464
1465static void
1466vioif_deferred_transmit(void *arg)
1467{
1468	struct vioif_txqueue *txq = arg;
1469	struct virtio_softc *vsc = txq->txq_vq->vq_owner;
1470	struct vioif_softc *sc = device_private(virtio_child(vsc));
1471	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1472
1473	mutex_enter(txq->txq_lock);
1474	vioif_send_common_locked(ifp, txq, true);
1475	mutex_exit(txq->txq_lock);
1476}
1477
1478static int
1479vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1480{
1481	int s, r;
1482
1483	s = splnet();
1484
1485	r = ether_ioctl(ifp, cmd, data);
1486	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
1487		if (ifp->if_flags & IFF_RUNNING) {
1488			r = vioif_rx_filter(ifp->if_softc);
1489		} else {
1490			r = 0;
1491		}
1492	}
1493
1494	splx(s);
1495
1496	return r;
1497}
1498
1499void
1500vioif_watchdog(struct ifnet *ifp)
1501{
1502	struct vioif_softc *sc = ifp->if_softc;
1503	struct vioif_txqueue *txq;
1504	int i;
1505
1506	if (ifp->if_flags & IFF_RUNNING) {
1507		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
1508			txq = &sc->sc_txq[i];
1509
1510			mutex_enter(txq->txq_lock);
1511			if (!txq->txq_running_handle) {
1512				txq->txq_running_handle = true;
1513				vioif_tx_sched_handle(sc, txq);
1514			}
1515			mutex_exit(txq->txq_lock);
1516		}
1517	}
1518}
1519
1520/*
1521 * Receive implementation
1522 */
1523/* add mbufs for all the empty receive slots */
1524static void
1525vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1526{
1527	struct virtqueue *vq = rxq->rxq_vq;
1528	struct virtio_softc *vsc = vq->vq_owner;
1529	struct mbuf *m;
1530	int i, r, ndone = 0;
1531
1532	KASSERT(mutex_owned(rxq->rxq_lock));
1533
1534	for (i = 0; i < vq->vq_num; i++) {
1535		int slot;
1536		r = virtio_enqueue_prep(vsc, vq, &slot);
1537		if (r == EAGAIN)
1538			break;
1539		if (r != 0)
1540			panic("enqueue_prep for rx buffers");
1541
1542		MGETHDR(m, M_DONTWAIT, MT_DATA);
1543		if (m == NULL) {
1544			virtio_enqueue_abort(vsc, vq, slot);
1545			rxq->rxq_mbuf_enobufs.ev_count++;
1546			break;
1547		}
1548		MCLGET(m, M_DONTWAIT);
1549		if ((m->m_flags & M_EXT) == 0) {
1550			virtio_enqueue_abort(vsc, vq, slot);
1551			m_freem(m);
1552			rxq->rxq_mbuf_enobufs.ev_count++;
1553			break;
1554		}
1555
1556		m->m_len = m->m_pkthdr.len = MCLBYTES;
1557		m_adj(m, ETHER_ALIGN);
1558
1559		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1560		    rxq->rxq_dmamaps[slot], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
1561
1562		if (r != 0) {
1563			virtio_enqueue_abort(vsc, vq, slot);
1564			m_freem(m);
1565			rxq->rxq_mbuf_load_failed.ev_count++;
1566			break;
1567		}
1568
1569		r = virtio_enqueue_reserve(vsc, vq, slot,
1570		    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
1571		if (r != 0) {
1572			rxq->rxq_enqueue_reserve_failed.ev_count++;
1573			bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1574			m_freem(m);
1575			/* slot already freed by virtio_enqueue_reserve */
1576			break;
1577		}
1578		KASSERT(rxq->rxq_mbufs[slot] == NULL);
1579		rxq->rxq_mbufs[slot] = m;
1580		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1581		    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
1582		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1583		    0, rxq->rxq_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREREAD);
1584		virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
1585		    false);
1586		virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
1587		virtio_enqueue_commit(vsc, vq, slot, false);
1588		ndone++;
1589	}
1590	if (ndone > 0)
1591		virtio_enqueue_commit(vsc, vq, -1, true);
1592}
1593
1594static void
1595vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1596    struct vioif_rxqueue *rxq)
1597{
1598	struct mbuf *m;
1599	unsigned int i, vq_num;
1600	bool more;
1601
1602	mutex_enter(rxq->rxq_lock);
1603	vq_num = rxq->rxq_vq->vq_num;
1604
1605	for (;;) {
1606		more = vioif_rx_deq_locked(sc, vsc, rxq, vq_num, NULL);
1607		if (more == false)
1608			break;
1609	}
1610
1611	for (i = 0; i < vq_num; i++) {
1612		m = rxq->rxq_mbufs[i];
1613		if (m == NULL)
1614			continue;
1615		rxq->rxq_mbufs[i] = NULL;
1616
1617		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
1618		m_freem(m);
1619	}
1620	mutex_exit(rxq->rxq_lock);
1621}
1622
1623/* dequeue received packets */
1624static bool
1625vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1626    struct vioif_rxqueue *rxq, u_int limit, size_t *ndeqp)
1627{
1628	struct virtqueue *vq = rxq->rxq_vq;
1629	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1630	struct mbuf *m;
1631	int slot, len;
1632	bool more;
1633	size_t ndeq;
1634
1635	KASSERT(mutex_owned(rxq->rxq_lock));
1636
1637	more = false;
1638	ndeq = 0;
1639
1640	if (virtio_vq_is_enqueued(vsc, vq) == false)
1641		goto done;
1642
1643	for (;;ndeq++) {
1644		if (ndeq >= limit) {
1645			more = true;
1646			break;
1647		}
1648
1649		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1650			break;
1651
1652		len -= sc->sc_hdr_size;
1653		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
1654		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
1655		bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
1656		    0, rxq->rxq_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_POSTREAD);
1657		m = rxq->rxq_mbufs[slot];
1658		KASSERT(m != NULL);
1659		bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
1660		rxq->rxq_mbufs[slot] = NULL;
1661		virtio_dequeue_commit(vsc, vq, slot);
1662		m_set_rcvif(m, ifp);
1663		m->m_len = m->m_pkthdr.len = len;
1664
1665		if_percpuq_enqueue(ifp->if_percpuq, m);
1666	}
1667
1668
1669done:
1670	if (ndeqp != NULL)
1671		*ndeqp = ndeq;
1672
1673	return more;
1674}
1675
1676/* rx interrupt; call _dequeue above and schedule a softint */
1677
1678static void
1679vioif_rx_handle_locked(void *xrxq, u_int limit)
1680{
1681	struct vioif_rxqueue *rxq = xrxq;
1682	struct virtqueue *vq = rxq->rxq_vq;
1683	struct virtio_softc *vsc = vq->vq_owner;
1684	struct vioif_softc *sc = device_private(virtio_child(vsc));
1685	bool more;
1686	int enqueued;
1687	size_t ndeq;
1688
1689	KASSERT(mutex_owned(rxq->rxq_lock));
1690	KASSERT(!rxq->rxq_stopping);
1691
1692	more = vioif_rx_deq_locked(sc, vsc, rxq, limit, &ndeq);
1693	if (ndeq > 0)
1694		vioif_populate_rx_mbufs_locked(sc, rxq);
1695
1696	if (more) {
1697		vioif_rx_sched_handle(sc, rxq);
1698		return;
1699	}
1700
1701	enqueued = virtio_start_vq_intr(vsc, rxq->rxq_vq);
1702	if (enqueued != 0) {
1703		virtio_stop_vq_intr(vsc, rxq->rxq_vq);
1704		vioif_rx_sched_handle(sc, rxq);
1705		return;
1706	}
1707
1708	rxq->rxq_running_handle = false;
1709}
1710
1711static int
1712vioif_rx_intr(void *arg)
1713{
1714	struct vioif_rxqueue *rxq = arg;
1715	struct virtqueue *vq = rxq->rxq_vq;
1716	struct virtio_softc *vsc = vq->vq_owner;
1717	struct vioif_softc *sc = device_private(virtio_child(vsc));
1718	u_int limit;
1719
1720
1721	mutex_enter(rxq->rxq_lock);
1722
1723	/* rx handler is already running in softint/workqueue */
1724	if (rxq->rxq_running_handle)
1725		goto done;
1726
1727	if (rxq->rxq_stopping)
1728		goto done;
1729
1730	rxq->rxq_running_handle = true;
1731
1732	limit = sc->sc_rx_intr_process_limit;
1733	virtio_stop_vq_intr(vsc, vq);
1734	vioif_rx_handle_locked(rxq, limit);
1735
1736done:
1737	mutex_exit(rxq->rxq_lock);
1738	return 1;
1739}
1740
1741static void
1742vioif_rx_handle(void *xrxq)
1743{
1744	struct vioif_rxqueue *rxq = xrxq;
1745	struct virtqueue *vq = rxq->rxq_vq;
1746	struct virtio_softc *vsc = vq->vq_owner;
1747	struct vioif_softc *sc = device_private(virtio_child(vsc));
1748	u_int limit;
1749
1750	mutex_enter(rxq->rxq_lock);
1751
1752	KASSERT(rxq->rxq_running_handle);
1753
1754	if (rxq->rxq_stopping) {
1755		rxq->rxq_running_handle = false;
1756		goto done;
1757	}
1758
1759	limit = sc->sc_rx_process_limit;
1760	vioif_rx_handle_locked(rxq, limit);
1761
1762done:
1763	mutex_exit(rxq->rxq_lock);
1764}
1765
1766static void
1767vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
1768{
1769
1770	KASSERT(mutex_owned(rxq->rxq_lock));
1771
1772	if (rxq->rxq_stopping)
1773		return;
1774
1775	if (rxq->rxq_workqueue)
1776		vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
1777	else
1778		softint_schedule(rxq->rxq_handle_si);
1779}
1780
1781/*
1782 * Transmition implementation
1783 */
1784/* actual transmission is done in if_start */
1785/* tx interrupt; dequeue and free mbufs */
1786/*
1787 * tx interrupt is actually disabled; this should be called upon
1788 * tx vq full and watchdog
1789 */
1790
1791static void
1792vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
1793{
1794	struct virtqueue *vq = txq->txq_vq;
1795	struct virtio_softc *vsc = vq->vq_owner;
1796	struct vioif_softc *sc = device_private(virtio_child(vsc));
1797	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1798	bool more;
1799	int enqueued;
1800
1801	KASSERT(mutex_owned(txq->txq_lock));
1802	KASSERT(!txq->txq_stopping);
1803
1804	more = vioif_tx_deq_locked(sc, vsc, txq, limit);
1805	if (more) {
1806		vioif_tx_sched_handle(sc, txq);
1807		return;
1808	}
1809
1810	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
1811	    virtio_postpone_intr_smart(vsc, vq):
1812	    virtio_start_vq_intr(vsc, vq);
1813	if (enqueued != 0) {
1814		virtio_stop_vq_intr(vsc, vq);
1815		vioif_tx_sched_handle(sc, txq);
1816		return;
1817	}
1818
1819	txq->txq_running_handle = false;
1820
1821	/* for ALTQ */
1822	if (txq == &sc->sc_txq[0]) {
1823		if_schedule_deferred_start(ifp);
1824		ifp->if_flags &= ~IFF_OACTIVE;
1825	}
1826	softint_schedule(txq->txq_deferred_transmit);
1827}
1828
1829
1830static int
1831vioif_tx_intr(void *arg)
1832{
1833	struct vioif_txqueue *txq = arg;
1834	struct virtqueue *vq = txq->txq_vq;
1835	struct virtio_softc *vsc = vq->vq_owner;
1836	struct vioif_softc *sc = device_private(virtio_child(vsc));
1837	u_int limit;
1838
1839	limit = sc->sc_tx_intr_process_limit;
1840
1841	mutex_enter(txq->txq_lock);
1842
1843	/* tx handler is already running in softint/workqueue */
1844	if (txq->txq_running_handle)
1845		goto done;
1846
1847	if (txq->txq_stopping)
1848		goto done;
1849
1850	txq->txq_running_handle = true;
1851
1852	virtio_stop_vq_intr(vsc, vq);
1853	txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;
1854	vioif_tx_handle_locked(txq, limit);
1855
1856done:
1857	mutex_exit(txq->txq_lock);
1858	return 1;
1859}
1860
1861static void
1862vioif_tx_handle(void *xtxq)
1863{
1864	struct vioif_txqueue *txq = xtxq;
1865	struct virtqueue *vq = txq->txq_vq;
1866	struct virtio_softc *vsc = vq->vq_owner;
1867	struct vioif_softc *sc = device_private(virtio_child(vsc));
1868	u_int limit;
1869
1870	mutex_enter(txq->txq_lock);
1871
1872	KASSERT(txq->txq_running_handle);
1873
1874	if (txq->txq_stopping) {
1875		txq->txq_running_handle = false;
1876		goto done;
1877	}
1878
1879	limit = sc->sc_tx_process_limit;
1880	vioif_tx_handle_locked(txq, limit);
1881
1882done:
1883	mutex_exit(txq->txq_lock);
1884}
1885
1886static void
1887vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
1888{
1889
1890	KASSERT(mutex_owned(txq->txq_lock));
1891
1892	if (txq->txq_stopping)
1893		return;
1894
1895	if (txq->txq_workqueue)
1896		vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
1897	else
1898		softint_schedule(txq->txq_handle_si);
1899}
1900
1901static void
1902vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1903    struct vioif_txqueue *txq)
1904{
1905	struct mbuf *m;
1906	unsigned int i, vq_num;
1907	bool more;
1908
1909	mutex_enter(txq->txq_lock);
1910
1911	vq_num = txq->txq_vq->vq_num;
1912	for (;;) {
1913		more = vioif_tx_deq_locked(sc, vsc, txq, vq_num);
1914		if (more == false)
1915			break;
1916	}
1917
1918	for (i = 0; i < vq_num; i++) {
1919		m = txq->txq_mbufs[i];
1920		if (m == NULL)
1921			continue;
1922		txq->txq_mbufs[i] = NULL;
1923
1924		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
1925		m_freem(m);
1926	}
1927	mutex_exit(txq->txq_lock);
1928}
1929
1930static bool
1931vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1932    struct vioif_txqueue *txq, u_int limit)
1933{
1934	struct virtqueue *vq = txq->txq_vq;
1935	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1936	struct mbuf *m;
1937	int slot, len;
1938	bool more = false;
1939
1940	KASSERT(mutex_owned(txq->txq_lock));
1941
1942	if (virtio_vq_is_enqueued(vsc, vq) == false)
1943		return false;
1944
1945	for (;;) {
1946		if (limit-- == 0) {
1947			more = true;
1948			break;
1949		}
1950
1951		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1952			break;
1953
1954		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
1955		    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
1956		bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
1957		    0, txq->txq_dmamaps[slot]->dm_mapsize,
1958		    BUS_DMASYNC_POSTWRITE);
1959		m = txq->txq_mbufs[slot];
1960		bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
1961		txq->txq_mbufs[slot] = NULL;
1962		virtio_dequeue_commit(vsc, vq, slot);
1963		if_statinc(ifp, if_opackets);
1964		m_freem(m);
1965	}
1966
1967	return more;
1968}
1969
1970/*
1971 * Control vq
1972 */
1973/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1974static void
1975vioif_ctrl_acquire(struct vioif_softc *sc)
1976{
1977	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1978
1979	mutex_enter(&ctrlq->ctrlq_wait_lock);
1980	while (ctrlq->ctrlq_inuse != FREE)
1981		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
1982	ctrlq->ctrlq_inuse = INUSE;
1983	ctrlq->ctrlq_owner = curlwp;
1984	mutex_exit(&ctrlq->ctrlq_wait_lock);
1985}
1986
1987static void
1988vioif_ctrl_release(struct vioif_softc *sc)
1989{
1990	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1991
1992	KASSERT(ctrlq->ctrlq_inuse != FREE);
1993	KASSERT(ctrlq->ctrlq_owner == curlwp);
1994
1995	mutex_enter(&ctrlq->ctrlq_wait_lock);
1996	ctrlq->ctrlq_inuse = FREE;
1997	ctrlq->ctrlq_owner = NULL;
1998	cv_signal(&ctrlq->ctrlq_wait);
1999	mutex_exit(&ctrlq->ctrlq_wait_lock);
2000}
2001
2002static int
2003vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2004    struct vioif_ctrl_cmdspec *specs, int nspecs)
2005{
2006	struct virtio_softc *vsc = sc->sc_virtio;
2007	int i, r, loaded;
2008
2009	loaded = 0;
2010	for (i = 0; i < nspecs; i++) {
2011		r = bus_dmamap_load(virtio_dmat(vsc),
2012		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2013		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2014		if (r) {
2015			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2016			goto err;
2017		}
2018		loaded++;
2019
2020	}
2021
2022	return r;
2023
2024err:
2025	for (i = 0; i < loaded; i++) {
2026		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2027	}
2028
2029	return r;
2030}
2031
2032static void
2033vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2034    struct vioif_ctrl_cmdspec *specs, int nspecs)
2035{
2036	struct virtio_softc *vsc = sc->sc_virtio;
2037	int i;
2038
2039	for (i = 0; i < nspecs; i++) {
2040		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2041	}
2042}
2043
2044static int
2045vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2046    struct vioif_ctrl_cmdspec *specs, int nspecs)
2047{
2048	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2049	struct virtqueue *vq = ctrlq->ctrlq_vq;
2050	struct virtio_softc *vsc = sc->sc_virtio;
2051	int i, r, slot;
2052
2053	ctrlq->ctrlq_cmd->class = class;
2054	ctrlq->ctrlq_cmd->command = cmd;
2055
2056	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2057	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2058	for (i = 0; i < nspecs; i++) {
2059		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2060		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2061	}
2062	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2063	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2064
2065	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2066	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2067		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2068
2069	r = virtio_enqueue_prep(vsc, vq, &slot);
2070	if (r != 0)
2071		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2072	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2073	if (r != 0)
2074		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2075	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2076	for (i = 0; i < nspecs; i++) {
2077		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2078	}
2079	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2080	virtio_enqueue_commit(vsc, vq, slot, true);
2081
2082	/* wait for done */
2083	mutex_enter(&ctrlq->ctrlq_wait_lock);
2084	while (ctrlq->ctrlq_inuse != DONE)
2085		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2086	mutex_exit(&ctrlq->ctrlq_wait_lock);
2087	/* already dequeueued */
2088
2089	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2090	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2091	for (i = 0; i < nspecs; i++) {
2092		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2093		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2094	}
2095	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2096	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2097
2098	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2099		r = 0;
2100	else {
2101		device_printf(sc->sc_dev, "failed setting rx mode\n");
2102		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2103		r = EIO;
2104	}
2105
2106	return r;
2107}
2108
2109static int
2110vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2111{
2112	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2113	struct vioif_ctrl_cmdspec specs[1];
2114	int r;
2115
2116	if (!sc->sc_has_ctrl)
2117		return ENOTSUP;
2118
2119	vioif_ctrl_acquire(sc);
2120
2121	rx->onoff = onoff;
2122	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2123	specs[0].buf = rx;
2124	specs[0].bufsize = sizeof(*rx);
2125
2126	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2127	    specs, __arraycount(specs));
2128
2129	vioif_ctrl_release(sc);
2130	return r;
2131}
2132
2133static int
2134vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2135{
2136	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2137}
2138
2139static int
2140vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2141{
2142	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2143}
2144
2145/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
2146static int
2147vioif_set_rx_filter(struct vioif_softc *sc)
2148{
2149	/* filter already set in ctrlq->ctrlq_mac_tbl */
2150	struct virtio_softc *vsc = sc->sc_virtio;
2151	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2152	struct vioif_ctrl_cmdspec specs[2];
2153	int nspecs = __arraycount(specs);
2154	int r;
2155
2156	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2157	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2158
2159	if (!sc->sc_has_ctrl)
2160		return ENOTSUP;
2161
2162	vioif_ctrl_acquire(sc);
2163
2164	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2165	specs[0].buf = mac_tbl_uc;
2166	specs[0].bufsize = sizeof(*mac_tbl_uc)
2167	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2168
2169	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2170	specs[1].buf = mac_tbl_mc;
2171	specs[1].bufsize = sizeof(*mac_tbl_mc)
2172	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2173
2174	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2175	if (r != 0)
2176		goto out;
2177
2178	r = vioif_ctrl_send_command(sc,
2179	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2180	    specs, nspecs);
2181
2182	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2183
2184out:
2185	vioif_ctrl_release(sc);
2186
2187	return r;
2188}
2189
2190static int
2191vioif_set_mac_addr(struct vioif_softc *sc)
2192{
2193	struct virtio_net_ctrl_mac_addr *ma =
2194	    sc->sc_ctrlq.ctrlq_mac_addr;
2195	struct vioif_ctrl_cmdspec specs[1];
2196	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2197	int nspecs = __arraycount(specs);
2198	uint64_t features;
2199	int r;
2200	size_t i;
2201
2202	if (!sc->sc_has_ctrl)
2203		return ENOTSUP;
2204
2205	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2206	    ETHER_ADDR_LEN) == 0) {
2207		return 0;
2208	}
2209
2210	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2211
2212	features = virtio_features(sc->sc_virtio);
2213	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2214		vioif_ctrl_acquire(sc);
2215
2216		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2217		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2218		specs[0].buf = ma;
2219		specs[0].bufsize = sizeof(*ma);
2220
2221		r = vioif_ctrl_send_command(sc,
2222		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2223		    specs, nspecs);
2224
2225		vioif_ctrl_release(sc);
2226	} else {
2227		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2228			virtio_write_device_config_1(sc->sc_virtio,
2229			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2230		}
2231		r = 0;
2232	}
2233
2234	return r;
2235}
2236
2237static int
2238vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2239{
2240	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2241	struct vioif_ctrl_cmdspec specs[1];
2242	int r;
2243
2244	if (!sc->sc_has_ctrl)
2245		return ENOTSUP;
2246
2247	if (nvq_pairs <= 1)
2248		return EINVAL;
2249
2250	vioif_ctrl_acquire(sc);
2251
2252	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2253	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2254	specs[0].buf = mq;
2255	specs[0].bufsize = sizeof(*mq);
2256
2257	r = vioif_ctrl_send_command(sc,
2258	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2259	    specs, __arraycount(specs));
2260
2261	vioif_ctrl_release(sc);
2262
2263	return r;
2264}
2265
2266/* ctrl vq interrupt; wake up the command issuer */
2267static int
2268vioif_ctrl_intr(void *arg)
2269{
2270	struct vioif_ctrlqueue *ctrlq = arg;
2271	struct virtqueue *vq = ctrlq->ctrlq_vq;
2272	struct virtio_softc *vsc = vq->vq_owner;
2273	int r, slot;
2274
2275	if (virtio_vq_is_enqueued(vsc, vq) == false)
2276		return 0;
2277
2278	r = virtio_dequeue(vsc, vq, &slot, NULL);
2279	if (r == ENOENT)
2280		return 0;
2281	virtio_dequeue_commit(vsc, vq, slot);
2282
2283	mutex_enter(&ctrlq->ctrlq_wait_lock);
2284	ctrlq->ctrlq_inuse = DONE;
2285	cv_signal(&ctrlq->ctrlq_wait);
2286	mutex_exit(&ctrlq->ctrlq_wait_lock);
2287
2288	return 1;
2289}
2290
2291static int
2292vioif_ifflags(struct vioif_softc *sc)
2293{
2294	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2295	bool onoff;
2296	int r;
2297
2298	if (!sc->sc_has_ctrl) {
2299		/* no ctrl vq; always promisc and allmulti */
2300		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
2301		return 0;
2302	}
2303
2304	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
2305	r = vioif_set_allmulti(sc, onoff);
2306	if (r != 0) {
2307		log(LOG_WARNING,
2308		    "%s: couldn't %sable ALLMULTI\n",
2309		    ifp->if_xname, onoff ? "en" : "dis");
2310		if (onoff == false) {
2311			ifp->if_flags |= IFF_ALLMULTI;
2312		}
2313	}
2314
2315	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
2316	r = vioif_set_promisc(sc, onoff);
2317	if (r != 0) {
2318		log(LOG_WARNING,
2319		    "%s: couldn't %sable PROMISC\n",
2320		    ifp->if_xname, onoff ? "en" : "dis");
2321		if (onoff == false) {
2322			ifp->if_flags |= IFF_PROMISC;
2323		}
2324	}
2325
2326	return 0;
2327}
2328
2329static int
2330vioif_ifflags_cb(struct ethercom *ec)
2331{
2332	struct ifnet *ifp = &ec->ec_if;
2333	struct vioif_softc *sc = ifp->if_softc;
2334
2335	return vioif_ifflags(sc);
2336}
2337
2338/*
2339 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2340 * If large multicast filter exist use ALLMULTI
2341 * If setting rx filter fails fall back to ALLMULTI
2342 */
2343static int
2344vioif_rx_filter(struct vioif_softc *sc)
2345{
2346	struct virtio_softc *vsc = sc->sc_virtio;
2347	struct ethercom *ec = &sc->sc_ethercom;
2348	struct ifnet *ifp = &ec->ec_if;
2349	struct ether_multi *enm;
2350	struct ether_multistep step;
2351	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2352	int nentries;
2353	bool allmulti = 0;
2354	int r;
2355
2356	if (!sc->sc_has_ctrl) {
2357		goto set_ifflags;
2358	}
2359
2360	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2361	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2362
2363	nentries = 0;
2364	allmulti = false;
2365
2366	ETHER_LOCK(ec);
2367	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2368	    ETHER_NEXT_MULTI(step, enm)) {
2369		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2370			allmulti = true;
2371			break;
2372		}
2373		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2374			allmulti = true;
2375			break;
2376		}
2377
2378		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2379		    enm->enm_addrlo, ETHER_ADDR_LEN);
2380		nentries++;
2381	}
2382	ETHER_UNLOCK(ec);
2383
2384	r = vioif_set_mac_addr(sc);
2385	if (r != 0) {
2386		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2387		    ifp->if_xname);
2388	}
2389
2390	if (!allmulti) {
2391		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2392		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2393		r = vioif_set_rx_filter(sc);
2394		if (r != 0) {
2395			allmulti = true; /* fallback */
2396		}
2397	}
2398
2399	if (allmulti) {
2400		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2401		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2402		r = vioif_set_rx_filter(sc);
2403		if (r != 0) {
2404			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2405			    ifp->if_xname);
2406			/* what to do on failure? */
2407		}
2408
2409		ifp->if_flags |= IFF_ALLMULTI;
2410	}
2411
2412set_ifflags:
2413	r = vioif_ifflags(sc);
2414
2415	return r;
2416}
2417
2418static int
2419vioif_get_link_status(struct vioif_softc *sc)
2420{
2421	struct virtio_softc *vsc = sc->sc_virtio;
2422	uint16_t status;
2423
2424	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2425		status = virtio_read_device_config_2(vsc,
2426		    VIRTIO_NET_CONFIG_STATUS);
2427	else
2428		status = VIRTIO_NET_S_LINK_UP;
2429
2430	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2431		return LINK_STATE_UP;
2432
2433	return LINK_STATE_DOWN;
2434}
2435
2436/* change link status */
2437static void
2438vioif_update_link_status(struct vioif_softc *sc)
2439{
2440	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2441	struct vioif_txqueue *txq;
2442	bool active;
2443	int link, i;
2444
2445	mutex_enter(&sc->sc_lock);
2446
2447	link = vioif_get_link_status(sc);
2448
2449	if (link == sc->sc_link_state)
2450		goto done;
2451
2452	sc->sc_link_state = link;
2453
2454	active = VIOIF_IS_LINK_ACTIVE(sc);
2455	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2456		txq = &sc->sc_txq[i];
2457
2458		mutex_enter(txq->txq_lock);
2459		txq->txq_link_active = active;
2460		mutex_exit(txq->txq_lock);
2461	}
2462
2463	if_link_state_change(ifp, sc->sc_link_state);
2464
2465done:
2466	mutex_exit(&sc->sc_lock);
2467}
2468
2469static int
2470vioif_config_change(struct virtio_softc *vsc)
2471{
2472	struct vioif_softc *sc = device_private(virtio_child(vsc));
2473
2474	softint_schedule(sc->sc_ctl_softint);
2475	return 0;
2476}
2477
2478static void
2479vioif_ctl_softint(void *arg)
2480{
2481	struct vioif_softc *sc = arg;
2482	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2483
2484	vioif_update_link_status(sc);
2485	vioif_start(ifp);
2486}
2487
2488static struct workqueue *
2489vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2490{
2491	struct workqueue *wq;
2492	int error;
2493
2494	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2495	    prio, ipl, flags);
2496
2497	if (error)
2498		return NULL;
2499
2500	return wq;
2501}
2502
2503static void
2504vioif_workq_destroy(struct workqueue *wq)
2505{
2506
2507	workqueue_destroy(wq);
2508}
2509
2510static void
2511vioif_workq_work(struct work *wk, void *context)
2512{
2513	struct vioif_work *work;
2514
2515	work = container_of(wk, struct vioif_work, cookie);
2516
2517	atomic_store_relaxed(&work->added, 0);
2518	work->func(work->arg);
2519}
2520
2521static void
2522vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2523{
2524
2525	memset(work, 0, sizeof(*work));
2526	work->func = func;
2527	work->arg = arg;
2528}
2529
2530static void
2531vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2532{
2533
2534	if (atomic_load_relaxed(&work->added) != 0)
2535		return;
2536
2537	atomic_store_relaxed(&work->added, 1);
2538	kpreempt_disable();
2539	workqueue_enqueue(wq, &work->cookie, NULL);
2540	kpreempt_enable();
2541}
2542
2543static void
2544vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2545{
2546
2547	workqueue_wait(wq, &work->cookie);
2548}
2549
2550static int
2551vioif_setup_sysctl(struct vioif_softc *sc)
2552{
2553	const char *devname;
2554	struct sysctllog **log;
2555	const struct sysctlnode *rnode, *rxnode, *txnode;
2556	int error;
2557
2558	log = &sc->sc_sysctllog;
2559	devname = device_xname(sc->sc_dev);
2560
2561	error = sysctl_createv(log, 0, NULL, &rnode,
2562	    0, CTLTYPE_NODE, devname,
2563	    SYSCTL_DESCR("virtio-net information and settings"),
2564	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
2565	if (error)
2566		goto out;
2567
2568	error = sysctl_createv(log, 0, &rnode, NULL,
2569	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
2570	    SYSCTL_DESCR("Use workqueue for packet processing"),
2571	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
2572	if (error)
2573		goto out;
2574
2575	error = sysctl_createv(log, 0, &rnode, &rxnode,
2576	    0, CTLTYPE_NODE, "rx",
2577	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
2578	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2579	if (error)
2580		goto out;
2581
2582	error = sysctl_createv(log, 0, &rxnode, NULL,
2583	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2584	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
2585	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2586	if (error)
2587		goto out;
2588
2589	error = sysctl_createv(log, 0, &rxnode, NULL,
2590	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2591	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
2592	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
2593	if (error)
2594		goto out;
2595
2596	error = sysctl_createv(log, 0, &rnode, &txnode,
2597	    0, CTLTYPE_NODE, "tx",
2598	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
2599	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
2600	if (error)
2601		goto out;
2602
2603	error = sysctl_createv(log, 0, &txnode, NULL,
2604	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
2605	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
2606	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
2607	if (error)
2608		goto out;
2609
2610	error = sysctl_createv(log, 0, &txnode, NULL,
2611	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
2612	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
2613	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
2614
2615out:
2616	if (error)
2617		sysctl_teardown(log);
2618
2619	return error;
2620}
2621
2622static void
2623vioif_setup_stats(struct vioif_softc *sc)
2624{
2625	struct vioif_rxqueue *rxq;
2626	struct vioif_txqueue *txq;
2627	int i;
2628
2629	for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
2630		rxq = &sc->sc_rxq[i];
2631		txq = &sc->sc_txq[i];
2632
2633		snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
2634		    device_xname(sc->sc_dev), i);
2635		evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
2636		    NULL, txq->txq_evgroup, "tx m_defrag() failed");
2637		evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
2638		    NULL, txq->txq_evgroup, "tx dmamap load failed");
2639		evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2640		    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");
2641
2642		snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
2643		    device_xname(sc->sc_dev), i);
2644		evcnt_attach_dynamic(&rxq->rxq_mbuf_enobufs, EVCNT_TYPE_MISC,
2645		    NULL, rxq->rxq_evgroup, "no receive buffer");
2646		evcnt_attach_dynamic(&rxq->rxq_mbuf_load_failed, EVCNT_TYPE_MISC,
2647		    NULL, rxq->rxq_evgroup, "tx dmamap load failed");
2648		evcnt_attach_dynamic(&rxq->rxq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
2649		    NULL, rxq->rxq_evgroup, "virtio_enqueue_reserve failed");
2650	}
2651
2652	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
2653	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
2654	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
2655	    NULL, device_xname(sc->sc_dev), "control command failed");
2656}
2657
2658static void
2659vioif_intr_barrier(void)
2660{
2661
2662	/* wait for finish all interrupt handler */
2663	xc_barrier(0);
2664}
2665
2666MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2667
2668#ifdef _MODULE
2669#include "ioconf.c"
2670#endif
2671
2672static int
2673if_vioif_modcmd(modcmd_t cmd, void *opaque)
2674{
2675	int error = 0;
2676
2677#ifdef _MODULE
2678	switch (cmd) {
2679	case MODULE_CMD_INIT:
2680		error = config_init_component(cfdriver_ioconf_if_vioif,
2681		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2682		break;
2683	case MODULE_CMD_FINI:
2684		error = config_fini_component(cfdriver_ioconf_if_vioif,
2685		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2686		break;
2687	default:
2688		error = ENOTTY;
2689		break;
2690	}
2691#endif
2692
2693	return error;
2694}
2695