1/*	$NetBSD: if_vioif.c,v 1.111 2024/03/21 12:33:21 isaki Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2010 Minoura Makoto.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.111 2024/03/21 12:33:21 isaki Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_net_mpsafe.h"
34#endif
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/atomic.h>
40#include <sys/bus.h>
41#include <sys/condvar.h>
42#include <sys/device.h>
43#include <sys/evcnt.h>
44#include <sys/intr.h>
45#include <sys/kmem.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/sockio.h>
49#include <sys/syslog.h>
50#include <sys/cpu.h>
51#include <sys/module.h>
52#include <sys/pcq.h>
53#include <sys/workqueue.h>
54#include <sys/xcall.h>
55
56#include <dev/pci/virtioreg.h>
57#include <dev/pci/virtiovar.h>
58
59#include <net/if.h>
60#include <net/if_dl.h>
61#include <net/if_media.h>
62#include <net/if_ether.h>
63
64#include <net/bpf.h>
65
66#include "ioconf.h"
67
68#ifdef NET_MPSAFE
69#define VIOIF_MPSAFE	1
70#define VIOIF_MULTIQ	1
71#endif
72
73/*
74 * if_vioifreg.h:
75 */
76/* Configuration registers */
77#define VIRTIO_NET_CONFIG_MAC		 0 /* 8bit x 6byte */
78#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16bit */
79#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	 8 /* 16bit */
80#define VIRTIO_NET_CONFIG_MTU		10 /* 16bit */
81
82/* Feature bits */
83#define VIRTIO_NET_F_CSUM		__BIT(0)
84#define VIRTIO_NET_F_GUEST_CSUM		__BIT(1)
85#define VIRTIO_NET_F_MAC		__BIT(5)
86#define VIRTIO_NET_F_GSO		__BIT(6)
87#define VIRTIO_NET_F_GUEST_TSO4		__BIT(7)
88#define VIRTIO_NET_F_GUEST_TSO6		__BIT(8)
89#define VIRTIO_NET_F_GUEST_ECN		__BIT(9)
90#define VIRTIO_NET_F_GUEST_UFO		__BIT(10)
91#define VIRTIO_NET_F_HOST_TSO4		__BIT(11)
92#define VIRTIO_NET_F_HOST_TSO6		__BIT(12)
93#define VIRTIO_NET_F_HOST_ECN		__BIT(13)
94#define VIRTIO_NET_F_HOST_UFO		__BIT(14)
95#define VIRTIO_NET_F_MRG_RXBUF		__BIT(15)
96#define VIRTIO_NET_F_STATUS		__BIT(16)
97#define VIRTIO_NET_F_CTRL_VQ		__BIT(17)
98#define VIRTIO_NET_F_CTRL_RX		__BIT(18)
99#define VIRTIO_NET_F_CTRL_VLAN		__BIT(19)
100#define VIRTIO_NET_F_CTRL_RX_EXTRA	__BIT(20)
101#define VIRTIO_NET_F_GUEST_ANNOUNCE	__BIT(21)
102#define VIRTIO_NET_F_MQ			__BIT(22)
103#define VIRTIO_NET_F_CTRL_MAC_ADDR 	__BIT(23)
104
105#define VIRTIO_NET_FLAG_BITS			\
106	VIRTIO_COMMON_FLAG_BITS			\
107	"b\x17" "CTRL_MAC\0"			\
108	"b\x16" "MQ\0"				\
109	"b\x15" "GUEST_ANNOUNCE\0"		\
110	"b\x14" "CTRL_RX_EXTRA\0"		\
111	"b\x13" "CTRL_VLAN\0"			\
112	"b\x12" "CTRL_RX\0"			\
113	"b\x11" "CTRL_VQ\0"			\
114	"b\x10" "STATUS\0"			\
115	"b\x0f" "MRG_RXBUF\0"			\
116	"b\x0e" "HOST_UFO\0"			\
117	"b\x0d" "HOST_ECN\0"			\
118	"b\x0c" "HOST_TSO6\0"			\
119	"b\x0b" "HOST_TSO4\0"			\
120	"b\x0a" "GUEST_UFO\0"			\
121	"b\x09" "GUEST_ECN\0"			\
122	"b\x08" "GUEST_TSO6\0"			\
123	"b\x07" "GUEST_TSO4\0"			\
124	"b\x06" "GSO\0"				\
125	"b\x05" "MAC\0"				\
126	"b\x01" "GUEST_CSUM\0"			\
127	"b\x00" "CSUM\0"
128
129/* Status */
130#define VIRTIO_NET_S_LINK_UP	1
131
132/* Packet header structure */
133struct virtio_net_hdr {
134	uint8_t		flags;
135	uint8_t		gso_type;
136	uint16_t	hdr_len;
137	uint16_t	gso_size;
138	uint16_t	csum_start;
139	uint16_t	csum_offset;
140
141	uint16_t	num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
142} __packed;
143
144#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
145#define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
146#define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
147#define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
148#define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
149#define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
150
151#define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
152
153/* Control virtqueue */
154struct virtio_net_ctrl_cmd {
155	uint8_t	class;
156	uint8_t	command;
157} __packed;
158#define VIRTIO_NET_CTRL_RX		0
159# define VIRTIO_NET_CTRL_RX_PROMISC	0
160# define VIRTIO_NET_CTRL_RX_ALLMULTI	1
161
162#define VIRTIO_NET_CTRL_MAC		1
163# define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
164# define  VIRTIO_NET_CTRL_MAC_ADDR_SET	1
165
166#define VIRTIO_NET_CTRL_VLAN		2
167# define VIRTIO_NET_CTRL_VLAN_ADD	0
168# define VIRTIO_NET_CTRL_VLAN_DEL	1
169
170#define VIRTIO_NET_CTRL_MQ			4
171# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
172# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
173# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
174
175struct virtio_net_ctrl_status {
176	uint8_t	ack;
177} __packed;
178#define VIRTIO_NET_OK			0
179#define VIRTIO_NET_ERR			1
180
181struct virtio_net_ctrl_rx {
182	uint8_t	onoff;
183} __packed;
184
185struct virtio_net_ctrl_mac_tbl {
186	uint32_t nentries;
187	uint8_t macs[][ETHER_ADDR_LEN];
188} __packed;
189
190struct virtio_net_ctrl_mac_addr {
191	uint8_t mac[ETHER_ADDR_LEN];
192} __packed;
193
194struct virtio_net_ctrl_vlan {
195	uint16_t id;
196} __packed;
197
198struct virtio_net_ctrl_mq {
199	uint16_t virtqueue_pairs;
200} __packed;
201
202/*
203 * if_vioifvar.h:
204 */
205
206/*
207 * Locking notes:
208 * + a field in vioif_netqueue is protected by netq_lock (a spin mutex)
209 *      - more than one lock cannot be held at onece
210 * + a field in vioif_tx_context and vioif_rx_context is also protected
211 *   by netq_lock.
212 * + ctrlq_inuse is protected by ctrlq_wait_lock.
213 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
214 *      - netq_lock cannot be held along with ctrlq_wait_lock
215 * + fields in vioif_softc except queues are protected by
216 *   sc->sc_lock(an adaptive mutex)
217 *      - the lock is held before acquisition of other locks
218 */
219
220struct vioif_ctrl_cmdspec {
221	bus_dmamap_t	dmamap;
222	void		*buf;
223	bus_size_t	bufsize;
224};
225
226struct vioif_work {
227	struct work	 cookie;
228	void		(*func)(void *);
229	void		*arg;
230	unsigned int	 added;
231};
232
233struct vioif_net_map {
234	struct virtio_net_hdr	*vnm_hdr;
235	bus_dmamap_t		 vnm_hdr_map;
236	struct mbuf		*vnm_mbuf;
237	bus_dmamap_t		 vnm_mbuf_map;
238};
239
240#define VIOIF_NETQ_RX		0
241#define VIOIF_NETQ_TX		1
242#define VIOIF_NETQ_IDX		2
243#define VIOIF_NETQ_DIR(n)	((n) % VIOIF_NETQ_IDX)
244#define VIOIF_NETQ_PAIRIDX(n)	((n) / VIOIF_NETQ_IDX)
245#define VIOIF_NETQ_RXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_RX)
246#define VIOIF_NETQ_TXQID(n)	((n) * VIOIF_NETQ_IDX + VIOIF_NETQ_TX)
247
248struct vioif_netqueue {
249	kmutex_t		 netq_lock;
250	struct virtqueue	*netq_vq;
251	bool			 netq_stopping;
252	bool			 netq_running_handle;
253	void			*netq_maps_kva;
254	struct vioif_net_map	*netq_maps;
255
256	void			*netq_softint;
257	struct vioif_work	 netq_work;
258	bool			 netq_workqueue;
259
260	char			 netq_evgroup[32];
261	struct evcnt		 netq_mbuf_load_failed;
262	struct evcnt		 netq_enqueue_failed;
263
264	void			*netq_ctx;
265};
266
267struct vioif_tx_context {
268	bool			 txc_link_active;
269	bool			 txc_no_free_slots;
270	pcq_t			*txc_intrq;
271	void			*txc_deferred_transmit;
272
273	struct evcnt		 txc_defrag_failed;
274};
275
276struct vioif_rx_context {
277	struct evcnt		 rxc_mbuf_enobufs;
278};
279struct vioif_ctrlqueue {
280	struct virtqueue		*ctrlq_vq;
281	enum {
282		FREE, INUSE, DONE
283	}				ctrlq_inuse;
284	kcondvar_t			ctrlq_wait;
285	kmutex_t			ctrlq_wait_lock;
286	struct lwp			*ctrlq_owner;
287
288	struct virtio_net_ctrl_cmd	*ctrlq_cmd;
289	struct virtio_net_ctrl_status	*ctrlq_status;
290	struct virtio_net_ctrl_rx	*ctrlq_rx;
291	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_uc;
292	struct virtio_net_ctrl_mac_tbl	*ctrlq_mac_tbl_mc;
293	struct virtio_net_ctrl_mac_addr	*ctrlq_mac_addr;
294	struct virtio_net_ctrl_mq	*ctrlq_mq;
295
296	bus_dmamap_t			ctrlq_cmd_dmamap;
297	bus_dmamap_t			ctrlq_status_dmamap;
298	bus_dmamap_t			ctrlq_rx_dmamap;
299	bus_dmamap_t			ctrlq_tbl_uc_dmamap;
300	bus_dmamap_t			ctrlq_tbl_mc_dmamap;
301	bus_dmamap_t			ctrlq_mac_addr_dmamap;
302	bus_dmamap_t			ctrlq_mq_dmamap;
303
304	struct evcnt			ctrlq_cmd_load_failed;
305	struct evcnt			ctrlq_cmd_failed;
306};
307
308struct vioif_softc {
309	device_t		sc_dev;
310	kmutex_t		sc_lock;
311	struct sysctllog	*sc_sysctllog;
312
313	struct virtio_softc	*sc_virtio;
314	struct virtqueue	*sc_vqs;
315	u_int			 sc_hdr_size;
316
317	int			sc_max_nvq_pairs;
318	int			sc_req_nvq_pairs;
319	int			sc_act_nvq_pairs;
320
321	uint8_t			sc_mac[ETHER_ADDR_LEN];
322	struct ethercom		sc_ethercom;
323	int			sc_link_state;
324
325	struct vioif_netqueue	*sc_netqs;
326
327	bool			sc_has_ctrl;
328	struct vioif_ctrlqueue	sc_ctrlq;
329
330	bus_dma_segment_t	 sc_segs[1];
331	void			*sc_dmamem;
332	void			*sc_kmem;
333
334	void			*sc_cfg_softint;
335
336	struct workqueue	*sc_txrx_workqueue;
337	bool			 sc_txrx_workqueue_sysctl;
338	u_int			 sc_tx_intr_process_limit;
339	u_int			 sc_tx_process_limit;
340	u_int			 sc_rx_intr_process_limit;
341	u_int			 sc_rx_process_limit;
342};
343#define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
344#define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
345
346#define VIOIF_TX_INTR_PROCESS_LIMIT	256
347#define VIOIF_TX_PROCESS_LIMIT		256
348#define VIOIF_RX_INTR_PROCESS_LIMIT	0U
349#define VIOIF_RX_PROCESS_LIMIT		256
350
351#define VIOIF_WORKQUEUE_PRI		PRI_SOFTNET
352#define VIOIF_IS_LINK_ACTIVE(_sc)	((_sc)->sc_link_state == LINK_STATE_UP ? \
353					    true : false)
354
355/* cfattach interface functions */
356static int	vioif_match(device_t, cfdata_t, void *);
357static void	vioif_attach(device_t, device_t, void *);
358static int	vioif_finalize_teardown(device_t);
359
360/* ifnet interface functions */
361static int	vioif_init(struct ifnet *);
362static void	vioif_stop(struct ifnet *, int);
363static void	vioif_start(struct ifnet *);
364static int	vioif_transmit(struct ifnet *, struct mbuf *);
365static int	vioif_ioctl(struct ifnet *, u_long, void *);
366static void	vioif_watchdog(struct ifnet *);
367static int	vioif_ifflags(struct vioif_softc *);
368static int	vioif_ifflags_cb(struct ethercom *);
369
370/* tx & rx */
371static int	vioif_netqueue_init(struct vioif_softc *,
372		    struct virtio_softc *, size_t, u_int);
373static void	vioif_netqueue_teardown(struct vioif_softc *,
374		    struct virtio_softc *, size_t);
375static void	vioif_net_intr_enable(struct vioif_softc *,
376		    struct virtio_softc *);
377static void	vioif_net_intr_disable(struct vioif_softc *,
378		    struct virtio_softc *);
379static void	vioif_net_sched_handle(struct vioif_softc *,
380		    struct vioif_netqueue *);
381
382/* rx */
383static void	vioif_populate_rx_mbufs_locked(struct vioif_softc *,
384		    struct vioif_netqueue *);
385static int	vioif_rx_intr(void *);
386static void	vioif_rx_handle(void *);
387static void	vioif_rx_queue_clear(struct vioif_softc *,
388		    struct virtio_softc *, struct vioif_netqueue *);
389
390/* tx */
391static void	vioif_start_locked(struct ifnet *, struct vioif_netqueue *);
392static void	vioif_transmit_locked(struct ifnet *, struct vioif_netqueue *);
393static void	vioif_deferred_transmit(void *);
394static int	vioif_tx_intr(void *);
395static void	vioif_tx_handle(void *);
396static void	vioif_tx_queue_clear(struct vioif_softc *, struct virtio_softc *,
397		    struct vioif_netqueue *);
398
399/* controls */
400static int	vioif_ctrl_intr(void *);
401static int	vioif_ctrl_rx(struct vioif_softc *, int, bool);
402static int	vioif_set_promisc(struct vioif_softc *, bool);
403static int	vioif_set_allmulti(struct vioif_softc *, bool);
404static int	vioif_set_rx_filter(struct vioif_softc *);
405static int	vioif_rx_filter(struct vioif_softc *);
406static int	vioif_set_mac_addr(struct vioif_softc *);
407static int	vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
408
409/* config interrupt */
410static int	vioif_config_change(struct virtio_softc *);
411static void	vioif_cfg_softint(void *);
412static void	vioif_update_link_status(struct vioif_softc *);
413
414/* others */
415static void	vioif_alloc_queues(struct vioif_softc *);
416static void	vioif_free_queues(struct vioif_softc *);
417static int	vioif_alloc_mems(struct vioif_softc *);
418static struct workqueue*
419		vioif_workq_create(const char *, pri_t, int, int);
420static void	vioif_workq_destroy(struct workqueue *);
421static void	vioif_work_set(struct vioif_work *, void(*)(void *), void *);
422static void	vioif_work_add(struct workqueue *, struct vioif_work *);
423static void	vioif_work_wait(struct workqueue *, struct vioif_work *);
424static int	vioif_setup_sysctl(struct vioif_softc *);
425static void	vioif_setup_stats(struct vioif_softc *);
426
427CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
428		  vioif_match, vioif_attach, NULL, NULL);
429
430static void
431vioif_intr_barrier(void)
432{
433
434	/* wait for finish all interrupt handler */
435	xc_barrier(0);
436}
437
438static void
439vioif_notify(struct virtio_softc *vsc, struct virtqueue *vq)
440{
441
442	virtio_enqueue_commit(vsc, vq, -1, true);
443}
444
445static int
446vioif_match(device_t parent, cfdata_t match, void *aux)
447{
448	struct virtio_attach_args *va = aux;
449
450	if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
451		return 1;
452
453	return 0;
454}
455
456static void
457vioif_attach(device_t parent, device_t self, void *aux)
458{
459	struct vioif_softc *sc = device_private(self);
460	struct virtio_softc *vsc = device_private(parent);
461	struct vioif_netqueue *txq0;
462	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
463	uint64_t features, req_features;
464	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
465	u_int softint_flags;
466	int r, i, req_flags;
467	char xnamebuf[MAXCOMLEN];
468	size_t nvqs;
469
470	if (virtio_child(vsc) != NULL) {
471		aprint_normal(": child already attached for %s; "
472		    "something wrong...\n", device_xname(parent));
473		return;
474	}
475
476	sc->sc_dev = self;
477	sc->sc_virtio = vsc;
478	sc->sc_link_state = LINK_STATE_UNKNOWN;
479
480	sc->sc_max_nvq_pairs = 1;
481	sc->sc_req_nvq_pairs = 1;
482	sc->sc_act_nvq_pairs = 1;
483	sc->sc_txrx_workqueue_sysctl = true;
484	sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
485	sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
486	sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
487	sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;
488
489	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
490
491	snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
492	sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
493	    IPL_NET, WQ_PERCPU | WQ_MPSAFE);
494	if (sc->sc_txrx_workqueue == NULL)
495		goto err;
496
497	req_flags = 0;
498
499#ifdef VIOIF_MPSAFE
500	req_flags |= VIRTIO_F_INTR_MPSAFE;
501#endif
502	req_flags |= VIRTIO_F_INTR_MSIX;
503
504	req_features =
505	    VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
506	    VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
507	req_features |= VIRTIO_F_RING_EVENT_IDX;
508	req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
509#ifdef VIOIF_MULTIQ
510	req_features |= VIRTIO_NET_F_MQ;
511#endif
512
513	virtio_child_attach_start(vsc, self, IPL_NET,
514	    req_features, VIRTIO_NET_FLAG_BITS);
515	features = virtio_features(vsc);
516
517	if (features == 0)
518		goto err;
519
520	if (features & VIRTIO_NET_F_MAC) {
521		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
522			sc->sc_mac[i] = virtio_read_device_config_1(vsc,
523			    VIRTIO_NET_CONFIG_MAC + i);
524		}
525	} else {
526		/* code stolen from sys/net/if_tap.c */
527		struct timeval tv;
528		uint32_t ui;
529		getmicrouptime(&tv);
530		ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
531		memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
532		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
533			virtio_write_device_config_1(vsc,
534			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
535		}
536	}
537
538	/* 'Ethernet' with capital follows other ethernet driver attachment */
539	aprint_normal_dev(self, "Ethernet address %s\n",
540	    ether_sprintf(sc->sc_mac));
541
542	if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
543		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
544	} else {
545		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
546	}
547
548	if ((features & VIRTIO_NET_F_CTRL_VQ) &&
549	    (features & VIRTIO_NET_F_CTRL_RX)) {
550		sc->sc_has_ctrl = true;
551
552		cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
553		mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
554		ctrlq->ctrlq_inuse = FREE;
555	} else {
556		sc->sc_has_ctrl = false;
557	}
558
559	if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
560		sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
561		    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);
562
563		if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
564			goto err;
565
566		/* Limit the number of queue pairs to use */
567		sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
568
569		if (sc->sc_max_nvq_pairs > 1)
570			req_flags |= VIRTIO_F_INTR_PERVQ;
571	}
572
573	vioif_alloc_queues(sc);
574
575#ifdef VIOIF_MPSAFE
576	softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
577#else
578	softint_flags = SOFTINT_NET;
579#endif
580
581	/*
582	 * Initialize network queues
583	 */
584	nvqs = sc->sc_max_nvq_pairs * 2;
585	for (i = 0; i < nvqs; i++) {
586		r = vioif_netqueue_init(sc, vsc, i, softint_flags);
587		if (r != 0)
588			goto err;
589	}
590
591	if (sc->sc_has_ctrl) {
592		int ctrlq_idx = nvqs;
593
594		nvqs++;
595		/*
596		 * Allocating a virtqueue for control channel
597		 */
598		sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[ctrlq_idx];
599		virtio_init_vq(vsc, ctrlq->ctrlq_vq, ctrlq_idx,
600		    vioif_ctrl_intr, ctrlq);
601
602		r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, NBPG, 1, "control");
603		if (r != 0) {
604			aprint_error_dev(self, "failed to allocate "
605			    "a virtqueue for control channel, error code %d\n",
606			    r);
607
608			sc->sc_has_ctrl = false;
609			cv_destroy(&ctrlq->ctrlq_wait);
610			mutex_destroy(&ctrlq->ctrlq_wait_lock);
611		}
612	}
613
614	sc->sc_cfg_softint = softint_establish(softint_flags,
615	    vioif_cfg_softint, sc);
616	if (sc->sc_cfg_softint == NULL) {
617		aprint_error_dev(self, "cannot establish ctl softint\n");
618		goto err;
619	}
620
621	if (vioif_alloc_mems(sc) < 0)
622		goto err;
623
624	r = virtio_child_attach_finish(vsc, sc->sc_vqs, nvqs,
625	    vioif_config_change, req_flags);
626	if (r != 0)
627		goto err;
628
629	if (vioif_setup_sysctl(sc) != 0) {
630		aprint_error_dev(self, "unable to create sysctl node\n");
631		/* continue */
632	}
633
634	vioif_setup_stats(sc);
635
636	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
637	ifp->if_softc = sc;
638	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
639#ifdef VIOIF_MPSAFE
640	ifp->if_extflags = IFEF_MPSAFE;
641#endif
642	ifp->if_start = vioif_start;
643	if (sc->sc_req_nvq_pairs > 1)
644		ifp->if_transmit = vioif_transmit;
645	ifp->if_ioctl = vioif_ioctl;
646	ifp->if_init = vioif_init;
647	ifp->if_stop = vioif_stop;
648	ifp->if_capabilities = 0;
649	ifp->if_watchdog = vioif_watchdog;
650	txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
651	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq0->netq_vq->vq_num, IFQ_MAXLEN));
652	IFQ_SET_READY(&ifp->if_snd);
653
654	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
655
656	if_attach(ifp);
657	if_deferred_start_init(ifp, NULL);
658	ether_ifattach(ifp, sc->sc_mac);
659	ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);
660
661	return;
662
663err:
664	nvqs = sc->sc_max_nvq_pairs * 2;
665	for (i = 0; i < nvqs; i++) {
666		vioif_netqueue_teardown(sc, vsc, i);
667	}
668
669	if (sc->sc_has_ctrl) {
670		cv_destroy(&ctrlq->ctrlq_wait);
671		mutex_destroy(&ctrlq->ctrlq_wait_lock);
672		virtio_free_vq(vsc, ctrlq->ctrlq_vq);
673		ctrlq->ctrlq_vq = NULL;
674	}
675
676	vioif_free_queues(sc);
677	mutex_destroy(&sc->sc_lock);
678	virtio_child_attach_failed(vsc);
679	config_finalize_register(self, vioif_finalize_teardown);
680
681	return;
682}
683
684static int
685vioif_finalize_teardown(device_t self)
686{
687	struct vioif_softc *sc = device_private(self);
688
689	if (sc->sc_txrx_workqueue != NULL) {
690		vioif_workq_destroy(sc->sc_txrx_workqueue);
691		sc->sc_txrx_workqueue = NULL;
692	}
693
694	return 0;
695}
696
697/*
698 * Interface functions for ifnet
699 */
700static int
701vioif_init(struct ifnet *ifp)
702{
703	struct vioif_softc *sc = ifp->if_softc;
704	struct virtio_softc *vsc = sc->sc_virtio;
705	struct vioif_netqueue *netq;
706	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
707	int r, i;
708
709	vioif_stop(ifp, 0);
710
711	r = virtio_reinit_start(vsc);
712	if (r != 0) {
713		log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
714		return EIO;
715	}
716
717	virtio_negotiate_features(vsc, virtio_features(vsc));
718
719	for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
720		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
721
722		mutex_enter(&netq->netq_lock);
723		vioif_populate_rx_mbufs_locked(sc, netq);
724		mutex_exit(&netq->netq_lock);
725	}
726
727	virtio_reinit_end(vsc);
728
729	if (sc->sc_has_ctrl)
730		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
731
732	r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
733	if (r == 0)
734		sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
735	else
736		sc->sc_act_nvq_pairs = 1;
737
738	SET(ifp->if_flags, IFF_RUNNING);
739
740	vioif_net_intr_enable(sc, vsc);
741
742	vioif_update_link_status(sc);
743	r = vioif_rx_filter(sc);
744
745	return r;
746}
747
748static void
749vioif_stop(struct ifnet *ifp, int disable)
750{
751	struct vioif_softc *sc = ifp->if_softc;
752	struct virtio_softc *vsc = sc->sc_virtio;
753	struct vioif_netqueue *netq;
754	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
755	size_t i, act_qnum;
756
757	act_qnum = sc->sc_act_nvq_pairs * 2;
758
759	CLR(ifp->if_flags, IFF_RUNNING);
760	for (i = 0; i < act_qnum; i++) {
761		netq = &sc->sc_netqs[i];
762
763		mutex_enter(&netq->netq_lock);
764		netq->netq_stopping = true;
765		mutex_exit(&netq->netq_lock);
766	}
767
768	/* disable interrupts */
769	vioif_net_intr_disable(sc, vsc);
770	if (sc->sc_has_ctrl)
771		virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);
772
773	/*
774	 * only way to stop interrupt, I/O and DMA is resetting...
775	 *
776	 * NOTE: Devices based on VirtIO draft specification can not
777	 * stop interrupt completely even if virtio_stop_vq_intr() is called.
778	 */
779	virtio_reset(vsc);
780
781	vioif_intr_barrier();
782
783	for (i = 0; i < act_qnum; i++) {
784		netq = &sc->sc_netqs[i];
785		vioif_work_wait(sc->sc_txrx_workqueue, &netq->netq_work);
786	}
787
788	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
789		netq = &sc->sc_netqs[VIOIF_NETQ_RXQID(i)];
790		vioif_rx_queue_clear(sc, vsc, netq);
791
792		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
793		vioif_tx_queue_clear(sc, vsc, netq);
794	}
795
796	/* all packet processing is stopped */
797	for (i = 0; i < act_qnum; i++) {
798		netq = &sc->sc_netqs[i];
799
800		mutex_enter(&netq->netq_lock);
801		netq->netq_stopping = false;
802		mutex_exit(&netq->netq_lock);
803	}
804}
805
806static void
807vioif_start(struct ifnet *ifp)
808{
809	struct vioif_softc *sc = ifp->if_softc;
810	struct vioif_netqueue *txq0 = &sc->sc_netqs[VIOIF_NETQ_TXQID(0)];
811
812#ifdef VIOIF_MPSAFE
813	KASSERT(if_is_mpsafe(ifp));
814#endif
815
816	mutex_enter(&txq0->netq_lock);
817	vioif_start_locked(ifp, txq0);
818	mutex_exit(&txq0->netq_lock);
819}
820
821static inline int
822vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
823{
824	struct vioif_softc *sc = ifp->if_softc;
825	u_int cpuid = cpu_index(curcpu());
826
827	return VIOIF_NETQ_TXQID(cpuid % sc->sc_act_nvq_pairs);
828}
829
830static int
831vioif_transmit(struct ifnet *ifp, struct mbuf *m)
832{
833	struct vioif_softc *sc = ifp->if_softc;
834	struct vioif_netqueue *netq;
835	struct vioif_tx_context *txc;
836	int qid;
837
838	qid = vioif_select_txqueue(ifp, m);
839	netq = &sc->sc_netqs[qid];
840	txc = netq->netq_ctx;
841
842	if (__predict_false(!pcq_put(txc->txc_intrq, m))) {
843		m_freem(m);
844		return ENOBUFS;
845	}
846
847	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
848	if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
849	if (m->m_flags & M_MCAST)
850		if_statinc_ref(nsr, if_omcasts);
851	IF_STAT_PUTREF(ifp);
852
853	if (mutex_tryenter(&netq->netq_lock)) {
854		vioif_transmit_locked(ifp, netq);
855		mutex_exit(&netq->netq_lock);
856	}
857
858	return 0;
859}
860
861void
862vioif_watchdog(struct ifnet *ifp)
863{
864	struct vioif_softc *sc = ifp->if_softc;
865	struct vioif_netqueue *netq;
866	int i;
867
868	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
869		if (ISSET(ifp->if_flags, IFF_DEBUG)) {
870			log(LOG_DEBUG, "%s: watchdog timed out\n",
871			    ifp->if_xname);
872		}
873
874		for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
875			netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
876
877			mutex_enter(&netq->netq_lock);
878			if (!netq->netq_running_handle) {
879				netq->netq_running_handle = true;
880				vioif_net_sched_handle(sc, netq);
881			}
882			mutex_exit(&netq->netq_lock);
883		}
884	}
885}
886
887static int
888vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
889{
890	int s, r;
891
892	s = splnet();
893
894	r = ether_ioctl(ifp, cmd, data);
895	if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
896		if (ifp->if_flags & IFF_RUNNING) {
897			r = vioif_rx_filter(ifp->if_softc);
898		} else {
899			r = 0;
900		}
901	}
902
903	splx(s);
904
905	return r;
906}
907
908static int
909vioif_ifflags(struct vioif_softc *sc)
910{
911	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
912	bool onoff;
913	int r;
914
915	if (!sc->sc_has_ctrl) {
916		/* no ctrl vq; always promisc and allmulti */
917		ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
918		return 0;
919	}
920
921	onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
922	r = vioif_set_allmulti(sc, onoff);
923	if (r != 0) {
924		log(LOG_WARNING,
925		    "%s: couldn't %sable ALLMULTI\n",
926		    ifp->if_xname, onoff ? "en" : "dis");
927		if (onoff) {
928			CLR(ifp->if_flags, IFF_ALLMULTI);
929		} else {
930			SET(ifp->if_flags, IFF_ALLMULTI);
931		}
932	}
933
934	onoff = ifp->if_flags & IFF_PROMISC ? true : false;
935	r = vioif_set_promisc(sc, onoff);
936	if (r != 0) {
937		log(LOG_WARNING,
938		    "%s: couldn't %sable PROMISC\n",
939		    ifp->if_xname, onoff ? "en" : "dis");
940		if (onoff) {
941			CLR(ifp->if_flags, IFF_PROMISC);
942		} else {
943			SET(ifp->if_flags, IFF_PROMISC);
944		}
945	}
946
947	return 0;
948}
949
950static int
951vioif_ifflags_cb(struct ethercom *ec)
952{
953	struct ifnet *ifp = &ec->ec_if;
954	struct vioif_softc *sc = ifp->if_softc;
955
956	return vioif_ifflags(sc);
957}
958
959static int
960vioif_setup_sysctl(struct vioif_softc *sc)
961{
962	const char *devname;
963	struct sysctllog **log;
964	const struct sysctlnode *rnode, *rxnode, *txnode;
965	int error;
966
967	log = &sc->sc_sysctllog;
968	devname = device_xname(sc->sc_dev);
969
970	error = sysctl_createv(log, 0, NULL, &rnode,
971	    0, CTLTYPE_NODE, devname,
972	    SYSCTL_DESCR("virtio-net information and settings"),
973	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
974	if (error)
975		goto out;
976
977	error = sysctl_createv(log, 0, &rnode, NULL,
978	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
979	    SYSCTL_DESCR("Use workqueue for packet processing"),
980	    NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
981	if (error)
982		goto out;
983
984	error = sysctl_createv(log, 0, &rnode, &rxnode,
985	    0, CTLTYPE_NODE, "rx",
986	    SYSCTL_DESCR("virtio-net information and settings for Rx"),
987	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
988	if (error)
989		goto out;
990
991	error = sysctl_createv(log, 0, &rxnode, NULL,
992	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
993	    SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
994	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
995	if (error)
996		goto out;
997
998	error = sysctl_createv(log, 0, &rxnode, NULL,
999	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1000	    SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
1001	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
1002	if (error)
1003		goto out;
1004
1005	error = sysctl_createv(log, 0, &rnode, &txnode,
1006	    0, CTLTYPE_NODE, "tx",
1007	    SYSCTL_DESCR("virtio-net information and settings for Tx"),
1008	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1009	if (error)
1010		goto out;
1011
1012	error = sysctl_createv(log, 0, &txnode, NULL,
1013	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
1014	    SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
1015	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
1016	if (error)
1017		goto out;
1018
1019	error = sysctl_createv(log, 0, &txnode, NULL,
1020	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
1021	    SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
1022	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
1023
1024out:
1025	if (error)
1026		sysctl_teardown(log);
1027
1028	return error;
1029}
1030
1031static void
1032vioif_setup_stats(struct vioif_softc *sc)
1033{
1034	struct vioif_netqueue *netq;
1035	struct vioif_tx_context *txc;
1036	struct vioif_rx_context *rxc;
1037	size_t i, netq_num;
1038
1039	netq_num = sc->sc_max_nvq_pairs * 2;
1040	for (i = 0; i < netq_num; i++) {
1041		netq = &sc->sc_netqs[i];
1042		evcnt_attach_dynamic(&netq->netq_mbuf_load_failed, EVCNT_TYPE_MISC,
1043		    NULL, netq->netq_evgroup, "failed to load mbuf to DMA");
1044		evcnt_attach_dynamic(&netq->netq_enqueue_failed,
1045		    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1046		    "virtqueue enqueue failed failed");
1047
1048		switch (VIOIF_NETQ_DIR(i)) {
1049		case VIOIF_NETQ_RX:
1050			rxc = netq->netq_ctx;
1051			evcnt_attach_dynamic(&rxc->rxc_mbuf_enobufs,
1052			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1053			    "no receive buffer");
1054			break;
1055		case VIOIF_NETQ_TX:
1056			txc = netq->netq_ctx;
1057			evcnt_attach_dynamic(&txc->txc_defrag_failed,
1058			    EVCNT_TYPE_MISC, NULL, netq->netq_evgroup,
1059			    "m_defrag() failed");
1060			break;
1061		}
1062	}
1063
1064	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
1065	    NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
1066	evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
1067	    NULL, device_xname(sc->sc_dev), "control command failed");
1068}
1069
1070/*
1071 * allocate memory
1072 */
1073static int
1074vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
1075    bus_size_t size, int nsegs, const char *usage)
1076{
1077	int r;
1078
1079	r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
1080	    nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);
1081
1082	if (r != 0) {
1083		aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
1084		    "error code %d\n", usage, r);
1085	}
1086
1087	return r;
1088}
1089
1090static void
1091vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
1092{
1093
1094	if (*map) {
1095		bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
1096		*map = NULL;
1097	}
1098}
1099
1100static int
1101vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
1102    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
1103{
1104	int r;
1105
1106	r = vioif_dmamap_create(sc, map, size, nsegs, usage);
1107	if (r != 0)
1108		return 1;
1109
1110	r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
1111	    size, NULL, rw | BUS_DMA_NOWAIT);
1112	if (r != 0) {
1113		vioif_dmamap_destroy(sc, map);
1114		aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
1115		    "error code %d\n", usage, r);
1116	}
1117
1118	return r;
1119}
1120
1121static void *
1122vioif_assign_mem(intptr_t *p, size_t size)
1123{
1124	intptr_t rv;
1125
1126	rv = *p;
1127	*p += size;
1128
1129	return (void *)rv;
1130}
1131
1132/*
1133 * dma memory is used for:
1134 *   netq_maps_kva:	 metadata array for received frames (READ) and
1135 *			 sent frames (WRITE)
1136 *   ctrlq_cmd:		 command to be sent via ctrl vq (WRITE)
1137 *   ctrlq_status:	 return value for a command via ctrl vq (READ)
1138 *   ctrlq_rx:		 parameter for a VIRTIO_NET_CTRL_RX class command
1139 *			 (WRITE)
1140 *   ctrlq_mac_tbl_uc:	 unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1141 *			 class command (WRITE)
1142 *   ctrlq_mac_tbl_mc:	 multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
1143 *			 class command (WRITE)
1144 * ctrlq_* structures are allocated only one each; they are protected by
1145 * ctrlq_inuse variable and ctrlq_wait condvar.
1146 */
1147static int
1148vioif_alloc_mems(struct vioif_softc *sc)
1149{
1150	struct virtio_softc *vsc = sc->sc_virtio;
1151	struct vioif_netqueue *netq;
1152	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
1153	struct vioif_net_map *maps;
1154	unsigned int vq_num;
1155	int r, rsegs;
1156	bus_size_t dmamemsize;
1157	size_t qid, i, netq_num, kmemsize;
1158	void *vaddr;
1159	intptr_t p;
1160
1161	netq_num = sc->sc_max_nvq_pairs * 2;
1162
1163	/* allocate DMA memory */
1164	dmamemsize = 0;
1165
1166	for (qid = 0; qid < netq_num; qid++) {
1167		maps = sc->sc_netqs[qid].netq_maps;
1168		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1169		dmamemsize += sizeof(*maps[0].vnm_hdr) * vq_num;
1170	}
1171
1172	if (sc->sc_has_ctrl) {
1173		dmamemsize += sizeof(struct virtio_net_ctrl_cmd);
1174		dmamemsize += sizeof(struct virtio_net_ctrl_status);
1175		dmamemsize += sizeof(struct virtio_net_ctrl_rx);
1176		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1177		    + ETHER_ADDR_LEN;
1178		dmamemsize += sizeof(struct virtio_net_ctrl_mac_tbl)
1179		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
1180		dmamemsize += sizeof(struct virtio_net_ctrl_mac_addr);
1181		dmamemsize += sizeof(struct virtio_net_ctrl_mq);
1182	}
1183
1184	r = bus_dmamem_alloc(virtio_dmat(vsc), dmamemsize, 0, 0,
1185	    &sc->sc_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
1186	if (r != 0) {
1187		aprint_error_dev(sc->sc_dev,
1188		    "DMA memory allocation failed, size %" PRIuBUSSIZE ", "
1189		    "error code %d\n", dmamemsize, r);
1190		goto err_none;
1191	}
1192	r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_segs[0], 1,
1193	    dmamemsize, &vaddr, BUS_DMA_NOWAIT);
1194	if (r != 0) {
1195		aprint_error_dev(sc->sc_dev,
1196		    "DMA memory map failed, error code %d\n", r);
1197		goto err_dmamem_alloc;
1198	}
1199
1200	/* assign DMA memory */
1201	memset(vaddr, 0, dmamemsize);
1202	sc->sc_dmamem = vaddr;
1203	p = (intptr_t) vaddr;
1204
1205	for (qid = 0; qid < netq_num; qid++) {
1206		netq = &sc->sc_netqs[qid];
1207		maps = netq->netq_maps;
1208		vq_num = netq->netq_vq->vq_num;
1209
1210		netq->netq_maps_kva = vioif_assign_mem(&p,
1211		    sizeof(*maps[0].vnm_hdr) * vq_num);
1212	}
1213
1214	if (sc->sc_has_ctrl) {
1215		ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
1216		    sizeof(*ctrlq->ctrlq_cmd));
1217		ctrlq->ctrlq_status = vioif_assign_mem(&p,
1218		    sizeof(*ctrlq->ctrlq_status));
1219		ctrlq->ctrlq_rx = vioif_assign_mem(&p,
1220		    sizeof(*ctrlq->ctrlq_rx));
1221		ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
1222		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1223		    + ETHER_ADDR_LEN);
1224		ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
1225		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1226		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
1227		ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
1228		    sizeof(*ctrlq->ctrlq_mac_addr));
1229		ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
1230	}
1231
1232	/* allocate kmem */
1233	kmemsize = 0;
1234
1235	for (qid = 0; qid < netq_num; qid++) {
1236		netq = &sc->sc_netqs[qid];
1237		vq_num = netq->netq_vq->vq_num;
1238
1239		kmemsize += sizeof(netq->netq_maps[0]) * vq_num;
1240	}
1241
1242	vaddr = kmem_zalloc(kmemsize, KM_SLEEP);
1243	sc->sc_kmem = vaddr;
1244
1245	/* assign allocated kmem */
1246	p = (intptr_t) vaddr;
1247
1248	for (qid = 0; qid < netq_num; qid++) {
1249		netq = &sc->sc_netqs[qid];
1250		vq_num = netq->netq_vq->vq_num;
1251
1252		netq->netq_maps = vioif_assign_mem(&p,
1253		    sizeof(netq->netq_maps[0]) * vq_num);
1254	}
1255
1256	/* prepare dmamaps */
1257	for (qid = 0; qid < netq_num; qid++) {
1258		static const struct {
1259			const char	*msg_hdr;
1260			const char	*msg_payload;
1261			int		 dma_flag;
1262			bus_size_t	 dma_size;
1263			int		 dma_nsegs;
1264		} dmaparams[VIOIF_NETQ_IDX] = {
1265			[VIOIF_NETQ_RX] = {
1266				.msg_hdr	= "rx header",
1267				.msg_payload	= "rx payload",
1268				.dma_flag	= BUS_DMA_READ,
1269				.dma_size	= MCLBYTES - ETHER_ALIGN,
1270				.dma_nsegs	= 1,
1271			},
1272			[VIOIF_NETQ_TX] = {
1273				.msg_hdr	= "tx header",
1274				.msg_payload	= "tx payload",
1275				.dma_flag	= BUS_DMA_WRITE,
1276				.dma_size	= ETHER_MAX_LEN,
1277				.dma_nsegs	= VIRTIO_NET_TX_MAXNSEGS,
1278			}
1279		};
1280
1281		struct virtio_net_hdr *hdrs;
1282		int dir;
1283		int nsegs;
1284
1285		dir = VIOIF_NETQ_DIR(qid);
1286		netq = &sc->sc_netqs[qid];
1287		vq_num = netq->netq_vq->vq_num;
1288		maps = netq->netq_maps;
1289		hdrs = netq->netq_maps_kva;
1290		nsegs = uimin(dmaparams[dir].dma_nsegs, vq_num - 1/*hdr*/);
1291
1292		for (i = 0; i < vq_num; i++) {
1293			maps[i].vnm_hdr = &hdrs[i];
1294
1295			r = vioif_dmamap_create_load(sc, &maps[i].vnm_hdr_map,
1296			    maps[i].vnm_hdr, sc->sc_hdr_size, 1,
1297			    dmaparams[dir].dma_flag, dmaparams[dir].msg_hdr);
1298			if (r != 0)
1299				goto err_reqs;
1300
1301			r = vioif_dmamap_create(sc, &maps[i].vnm_mbuf_map,
1302			    dmaparams[dir].dma_size, nsegs,
1303			    dmaparams[dir].msg_payload);
1304			if (r != 0)
1305				goto err_reqs;
1306		}
1307	}
1308
1309	if (sc->sc_has_ctrl) {
1310		/* control vq class & command */
1311		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
1312		    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
1313		    BUS_DMA_WRITE, "control command");
1314		if (r != 0)
1315			goto err_reqs;
1316
1317		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
1318		    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
1319		    BUS_DMA_READ, "control status");
1320		if (r != 0)
1321			goto err_reqs;
1322
1323		/* control vq rx mode command parameter */
1324		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
1325		    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
1326		    BUS_DMA_WRITE, "rx mode control command");
1327		if (r != 0)
1328			goto err_reqs;
1329
1330		/* multiqueue set command */
1331		r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
1332		    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
1333		    BUS_DMA_WRITE, "multiqueue set command");
1334		if (r != 0)
1335			goto err_reqs;
1336
1337		/* control vq MAC filter table for unicast */
1338		/* do not load now since its length is variable */
1339		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
1340		    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
1341		    + ETHER_ADDR_LEN, 1,
1342		    "unicast MAC address filter command");
1343		if (r != 0)
1344			goto err_reqs;
1345
1346		/* control vq MAC filter table for multicast */
1347		r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
1348		    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
1349		    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
1350		    "multicast MAC address filter command");
1351		if (r != 0)
1352			goto err_reqs;
1353
1354		/* control vq MAC address set command */
1355		r = vioif_dmamap_create_load(sc,
1356		    &ctrlq->ctrlq_mac_addr_dmamap,
1357		    ctrlq->ctrlq_mac_addr,
1358		    sizeof(*ctrlq->ctrlq_mac_addr), 1,
1359		    BUS_DMA_WRITE, "mac addr set command");
1360		if (r != 0)
1361			goto err_reqs;
1362	}
1363
1364	return 0;
1365
1366err_reqs:
1367	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
1368	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
1369	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
1370	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
1371	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
1372	vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
1373	for (qid = 0; qid < netq_num; qid++) {
1374		vq_num = sc->sc_netqs[qid].netq_vq->vq_num;
1375		maps = sc->sc_netqs[qid].netq_maps;
1376
1377		for (i = 0; i < vq_num; i++) {
1378			vioif_dmamap_destroy(sc, &maps[i].vnm_mbuf_map);
1379			vioif_dmamap_destroy(sc, &maps[i].vnm_hdr_map);
1380		}
1381	}
1382	if (sc->sc_kmem) {
1383		kmem_free(sc->sc_kmem, kmemsize);
1384		sc->sc_kmem = NULL;
1385	}
1386	bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, dmamemsize);
1387err_dmamem_alloc:
1388	bus_dmamem_free(virtio_dmat(vsc), &sc->sc_segs[0], 1);
1389err_none:
1390	return -1;
1391}
1392
1393static void
1394vioif_alloc_queues(struct vioif_softc *sc)
1395{
1396	int nvq_pairs = sc->sc_max_nvq_pairs;
1397	size_t nvqs, netq_num;
1398
1399	KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
1400
1401	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1402	if (sc->sc_has_ctrl)
1403		nvqs++;
1404
1405	sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
1406	sc->sc_netqs = kmem_zalloc(sizeof(sc->sc_netqs[0]) * netq_num,
1407	    KM_SLEEP);
1408}
1409
1410static void
1411vioif_free_queues(struct vioif_softc *sc)
1412{
1413	size_t nvqs, netq_num;
1414
1415	nvqs = netq_num = sc->sc_max_nvq_pairs * 2;
1416	if (sc->sc_ctrlq.ctrlq_vq)
1417		nvqs++;
1418
1419	kmem_free(sc->sc_netqs, sizeof(sc->sc_netqs[0]) * netq_num);
1420	kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
1421	sc->sc_netqs = NULL;
1422	sc->sc_vqs = NULL;
1423}
1424
1425/*
1426 * Network queues
1427 */
1428static int
1429vioif_netqueue_init(struct vioif_softc *sc, struct virtio_softc *vsc,
1430    size_t qid, u_int softint_flags)
1431{
1432	static const struct {
1433		const char	*dirname;
1434		int		 segsize;
1435		int		 nsegs;
1436		int 		(*intrhand)(void *);
1437		void		(*sihand)(void *);
1438	} params[VIOIF_NETQ_IDX] = {
1439		[VIOIF_NETQ_RX] = {
1440			.dirname	= "rx",
1441			.segsize	= MCLBYTES,
1442			.nsegs		= 2,
1443			.intrhand	= vioif_rx_intr,
1444			.sihand		= vioif_rx_handle,
1445		},
1446		[VIOIF_NETQ_TX] = {
1447			.dirname	= "tx",
1448			.segsize	= ETHER_MAX_LEN - ETHER_HDR_LEN,
1449			.nsegs		= 2,
1450			.intrhand	= vioif_tx_intr,
1451			.sihand		= vioif_tx_handle,
1452		}
1453	};
1454
1455	struct virtqueue *vq;
1456	struct vioif_netqueue *netq;
1457	struct vioif_tx_context *txc;
1458	struct vioif_rx_context *rxc;
1459	char qname[32];
1460	int r, dir;
1461
1462	txc = NULL;
1463	rxc = NULL;
1464	netq = &sc->sc_netqs[qid];
1465	vq = &sc->sc_vqs[qid];
1466	dir = VIOIF_NETQ_DIR(qid);
1467
1468	netq->netq_vq = &sc->sc_vqs[qid];
1469	netq->netq_stopping = false;
1470	netq->netq_running_handle = false;
1471
1472	snprintf(qname, sizeof(qname), "%s%zu",
1473	    params[dir].dirname, VIOIF_NETQ_PAIRIDX(qid));
1474	snprintf(netq->netq_evgroup, sizeof(netq->netq_evgroup),
1475	    "%s-%s", device_xname(sc->sc_dev), qname);
1476
1477	mutex_init(&netq->netq_lock, MUTEX_DEFAULT, IPL_NET);
1478	virtio_init_vq(vsc, vq, qid, params[dir].intrhand, netq);
1479
1480	r = virtio_alloc_vq(vsc, vq,
1481	    params[dir].segsize + sc->sc_hdr_size,
1482	    params[dir].nsegs, qname);
1483	if (r != 0)
1484		goto err;
1485	netq->netq_vq = vq;
1486
1487	netq->netq_softint = softint_establish(softint_flags,
1488	    params[dir].sihand, netq);
1489	if (netq->netq_softint == NULL) {
1490		aprint_error_dev(sc->sc_dev,
1491		    "couldn't establish %s softint\n",
1492		    params[dir].dirname);
1493		goto err;
1494	}
1495	vioif_work_set(&netq->netq_work, params[dir].sihand, netq);
1496
1497	switch (dir) {
1498	case VIOIF_NETQ_RX:
1499		rxc = kmem_zalloc(sizeof(*rxc), KM_SLEEP);
1500		netq->netq_ctx = rxc;
1501		/* nothing to do */
1502		break;
1503	case VIOIF_NETQ_TX:
1504		txc = kmem_zalloc(sizeof(*txc), KM_SLEEP);
1505		netq->netq_ctx = (void *)txc;
1506		txc->txc_deferred_transmit = softint_establish(softint_flags,
1507		    vioif_deferred_transmit, netq);
1508		if (txc->txc_deferred_transmit == NULL) {
1509			aprint_error_dev(sc->sc_dev,
1510			    "couldn't establish softint for "
1511			    "tx deferred transmit\n");
1512			goto err;
1513		}
1514		txc->txc_link_active = VIOIF_IS_LINK_ACTIVE(sc);
1515		txc->txc_no_free_slots = false;
1516		txc->txc_intrq = pcq_create(vq->vq_num, KM_SLEEP);
1517		break;
1518	}
1519
1520	return 0;
1521
1522err:
1523	netq->netq_ctx = NULL;
1524
1525	if (rxc != NULL) {
1526		kmem_free(rxc, sizeof(*rxc));
1527	}
1528
1529	if (txc != NULL) {
1530		if (txc->txc_deferred_transmit != NULL)
1531			softint_disestablish(txc->txc_deferred_transmit);
1532		if (txc->txc_intrq != NULL)
1533			pcq_destroy(txc->txc_intrq);
1534		kmem_free(txc, sizeof(txc));
1535	}
1536
1537	vioif_work_set(&netq->netq_work, NULL, NULL);
1538	if (netq->netq_softint != NULL) {
1539		softint_disestablish(netq->netq_softint);
1540		netq->netq_softint = NULL;
1541	}
1542
1543	virtio_free_vq(vsc, vq);
1544	mutex_destroy(&netq->netq_lock);
1545	netq->netq_vq = NULL;
1546
1547	return -1;
1548}
1549
1550static void
1551vioif_netqueue_teardown(struct vioif_softc *sc, struct virtio_softc *vsc,
1552    size_t qid)
1553{
1554	struct vioif_netqueue *netq;
1555	struct vioif_rx_context *rxc;
1556	struct vioif_tx_context *txc;
1557	int dir;
1558
1559	netq = &sc->sc_netqs[qid];
1560
1561	if (netq->netq_vq == NULL)
1562		return;
1563
1564	netq = &sc->sc_netqs[qid];
1565	dir = VIOIF_NETQ_DIR(qid);
1566	switch (dir) {
1567	case VIOIF_NETQ_RX:
1568		rxc = netq->netq_ctx;
1569		netq->netq_ctx = NULL;
1570		kmem_free(rxc, sizeof(*rxc));
1571		break;
1572	case VIOIF_NETQ_TX:
1573		txc = netq->netq_ctx;
1574		netq->netq_ctx = NULL;
1575		softint_disestablish(txc->txc_deferred_transmit);
1576		pcq_destroy(txc->txc_intrq);
1577		kmem_free(txc, sizeof(*txc));
1578		break;
1579	}
1580
1581	softint_disestablish(netq->netq_softint);
1582	virtio_free_vq(vsc, netq->netq_vq);
1583	mutex_destroy(&netq->netq_lock);
1584	netq->netq_vq = NULL;
1585}
1586
1587static void
1588vioif_net_sched_handle(struct vioif_softc *sc, struct vioif_netqueue *netq)
1589{
1590
1591	KASSERT(mutex_owned(&netq->netq_lock));
1592	KASSERT(!netq->netq_stopping);
1593
1594	if (netq->netq_workqueue) {
1595		vioif_work_add(sc->sc_txrx_workqueue, &netq->netq_work);
1596	} else {
1597		softint_schedule(netq->netq_softint);
1598	}
1599}
1600
1601static int
1602vioif_net_load_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map,
1603   struct mbuf *m, int dma_flags)
1604{
1605	int r;
1606
1607	KASSERT(map->vnm_mbuf == NULL);
1608
1609	r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
1610	    map->vnm_mbuf_map, m, dma_flags | BUS_DMA_NOWAIT);
1611	if (r == 0) {
1612		map->vnm_mbuf = m;
1613	}
1614
1615	return r;
1616}
1617
1618static void
1619vioif_net_unload_mbuf(struct virtio_softc *vsc, struct vioif_net_map *map)
1620{
1621
1622	KASSERT(map->vnm_mbuf != NULL);
1623	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1624	map->vnm_mbuf = NULL;
1625}
1626
1627static int
1628vioif_net_enqueue(struct virtio_softc *vsc, struct virtqueue *vq,
1629    int slot, struct vioif_net_map *map, int dma_ops, bool is_write)
1630{
1631	int r;
1632
1633	KASSERT(map->vnm_mbuf != NULL);
1634
1635	/* This should actually never fail */
1636	r = virtio_enqueue_reserve(vsc, vq, slot,
1637	    map->vnm_mbuf_map->dm_nsegs + 1);
1638	if (r != 0) {
1639		/* slot already freed by virtio_enqueue_reserve */
1640		return r;
1641	}
1642
1643	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1644	    0, map->vnm_mbuf_map->dm_mapsize, dma_ops);
1645	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1646	    0, map->vnm_hdr_map->dm_mapsize, dma_ops);
1647
1648	virtio_enqueue(vsc, vq, slot, map->vnm_hdr_map, is_write);
1649	virtio_enqueue(vsc, vq, slot, map->vnm_mbuf_map, is_write);
1650	virtio_enqueue_commit(vsc, vq, slot, false);
1651
1652	return 0;
1653}
1654
1655static int
1656vioif_net_enqueue_tx(struct virtio_softc *vsc, struct virtqueue *vq,
1657    int slot, struct vioif_net_map *map)
1658{
1659
1660	return vioif_net_enqueue(vsc, vq, slot, map,
1661	    BUS_DMASYNC_PREWRITE, true);
1662}
1663
1664static int
1665vioif_net_enqueue_rx(struct virtio_softc *vsc, struct virtqueue *vq,
1666    int slot, struct vioif_net_map *map)
1667{
1668
1669	return vioif_net_enqueue(vsc, vq, slot, map,
1670	    BUS_DMASYNC_PREREAD, false);
1671}
1672
1673static struct mbuf *
1674vioif_net_dequeue_commit(struct virtio_softc *vsc, struct virtqueue *vq,
1675   int slot, struct vioif_net_map *map, int dma_flags)
1676{
1677	struct mbuf *m;
1678
1679	m = map->vnm_mbuf;
1680	KASSERT(m != NULL);
1681	map->vnm_mbuf = NULL;
1682
1683	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_hdr_map,
1684	    0, map->vnm_hdr_map->dm_mapsize, dma_flags);
1685	bus_dmamap_sync(virtio_dmat(vsc), map->vnm_mbuf_map,
1686	    0, map->vnm_mbuf_map->dm_mapsize, dma_flags);
1687
1688	bus_dmamap_unload(virtio_dmat(vsc), map->vnm_mbuf_map);
1689	virtio_dequeue_commit(vsc, vq, slot);
1690
1691	return m;
1692}
1693
1694static void
1695vioif_net_intr_enable(struct vioif_softc *sc, struct virtio_softc *vsc)
1696{
1697	struct vioif_netqueue *netq;
1698	size_t i, act_qnum;
1699	int enqueued;
1700
1701	act_qnum = sc->sc_act_nvq_pairs * 2;
1702	for (i = 0; i < act_qnum; i++) {
1703		netq = &sc->sc_netqs[i];
1704
1705		KASSERT(!netq->netq_stopping);
1706		KASSERT(!netq->netq_running_handle);
1707
1708		enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1709		if (enqueued != 0) {
1710			virtio_stop_vq_intr(vsc, netq->netq_vq);
1711
1712			mutex_enter(&netq->netq_lock);
1713			netq->netq_running_handle = true;
1714			vioif_net_sched_handle(sc, netq);
1715			mutex_exit(&netq->netq_lock);
1716		}
1717	}
1718}
1719
1720static void
1721vioif_net_intr_disable(struct vioif_softc *sc, struct virtio_softc *vsc)
1722{
1723	struct vioif_netqueue *netq;
1724	size_t i, act_qnum;
1725
1726	act_qnum = sc->sc_act_nvq_pairs * 2;
1727	for (i = 0; i < act_qnum; i++) {
1728		netq = &sc->sc_netqs[i];
1729
1730		virtio_stop_vq_intr(vsc, netq->netq_vq);
1731	}
1732}
1733
1734/*
1735 * Receive implementation
1736 */
1737/* enqueue mbufs to receive slots */
1738static void
1739vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_netqueue *netq)
1740{
1741	struct virtqueue *vq = netq->netq_vq;
1742	struct virtio_softc *vsc = vq->vq_owner;
1743	struct vioif_rx_context *rxc;
1744	struct vioif_net_map *map;
1745	struct mbuf *m;
1746	int i, r, ndone = 0;
1747
1748	KASSERT(mutex_owned(&netq->netq_lock));
1749
1750	rxc = netq->netq_ctx;
1751
1752	for (i = 0; i < vq->vq_num; i++) {
1753		int slot;
1754		r = virtio_enqueue_prep(vsc, vq, &slot);
1755		if (r == EAGAIN)
1756			break;
1757		if (__predict_false(r != 0))
1758			panic("enqueue_prep for rx buffers");
1759
1760		MGETHDR(m, M_DONTWAIT, MT_DATA);
1761		if (m == NULL) {
1762			virtio_enqueue_abort(vsc, vq, slot);
1763			rxc->rxc_mbuf_enobufs.ev_count++;
1764			break;
1765		}
1766		MCLGET(m, M_DONTWAIT);
1767		if ((m->m_flags & M_EXT) == 0) {
1768			virtio_enqueue_abort(vsc, vq, slot);
1769			m_freem(m);
1770			rxc->rxc_mbuf_enobufs.ev_count++;
1771			break;
1772		}
1773
1774		m->m_len = m->m_pkthdr.len = MCLBYTES;
1775		m_adj(m, ETHER_ALIGN);
1776
1777		map = &netq->netq_maps[slot];
1778		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_READ);
1779		if (r != 0) {
1780			virtio_enqueue_abort(vsc, vq, slot);
1781			m_freem(m);
1782			netq->netq_mbuf_load_failed.ev_count++;
1783			break;
1784		}
1785
1786		r = vioif_net_enqueue_rx(vsc, vq, slot, map);
1787		if (r != 0) {
1788			vioif_net_unload_mbuf(vsc, map);
1789			netq->netq_enqueue_failed.ev_count++;
1790			m_freem(m);
1791			/* slot already freed by vioif_net_enqueue_rx */
1792			break;
1793		}
1794
1795		ndone++;
1796	}
1797
1798	if (ndone > 0)
1799		vioif_notify(vsc, vq);
1800}
1801
1802/* dequeue received packets */
1803static bool
1804vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
1805    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
1806{
1807	struct virtqueue *vq = netq->netq_vq;
1808	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1809	struct vioif_net_map *map;
1810	struct mbuf *m;
1811	int slot, len;
1812	bool more;
1813	size_t ndeq;
1814
1815	KASSERT(mutex_owned(&netq->netq_lock));
1816
1817	more = false;
1818	ndeq = 0;
1819
1820	if (virtio_vq_is_enqueued(vsc, vq) == false)
1821		goto done;
1822
1823	for (;;ndeq++) {
1824		if (ndeq >= limit) {
1825			more = true;
1826			break;
1827		}
1828
1829		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
1830			break;
1831
1832		map = &netq->netq_maps[slot];
1833		KASSERT(map->vnm_mbuf != NULL);
1834		m = vioif_net_dequeue_commit(vsc, vq, slot,
1835		    map, BUS_DMASYNC_POSTREAD);
1836		KASSERT(m != NULL);
1837
1838		m->m_len = m->m_pkthdr.len = len - sc->sc_hdr_size;
1839		m_set_rcvif(m, ifp);
1840		if_percpuq_enqueue(ifp->if_percpuq, m);
1841	}
1842
1843done:
1844	if (ndeqp != NULL)
1845		*ndeqp = ndeq;
1846
1847	return more;
1848}
1849
1850static void
1851vioif_rx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
1852    struct vioif_netqueue *netq)
1853{
1854	struct vioif_net_map *map;
1855	struct mbuf *m;
1856	unsigned int i, vq_num;
1857	bool more;
1858
1859	mutex_enter(&netq->netq_lock);
1860
1861	vq_num = netq->netq_vq->vq_num;
1862	for (;;) {
1863		more = vioif_rx_deq_locked(sc, vsc, netq, vq_num, NULL);
1864		if (more == false)
1865			break;
1866	}
1867
1868	for (i = 0; i < vq_num; i++) {
1869		map = &netq->netq_maps[i];
1870
1871		m = map->vnm_mbuf;
1872		if (m == NULL)
1873			continue;
1874
1875		vioif_net_unload_mbuf(vsc, map);
1876		m_freem(m);
1877	}
1878	mutex_exit(&netq->netq_lock);
1879}
1880
1881static void
1882vioif_rx_handle_locked(void *xnetq, u_int limit)
1883{
1884	struct vioif_netqueue *netq = xnetq;
1885	struct virtqueue *vq = netq->netq_vq;
1886	struct virtio_softc *vsc = vq->vq_owner;
1887	struct vioif_softc *sc = device_private(virtio_child(vsc));
1888	bool more;
1889	int enqueued;
1890	size_t ndeq;
1891
1892	KASSERT(mutex_owned(&netq->netq_lock));
1893	KASSERT(!netq->netq_stopping);
1894
1895	more = vioif_rx_deq_locked(sc, vsc, netq, limit, &ndeq);
1896	if (ndeq > 0)
1897		vioif_populate_rx_mbufs_locked(sc, netq);
1898
1899	if (more) {
1900		vioif_net_sched_handle(sc, netq);
1901		return;
1902	}
1903
1904	enqueued = virtio_start_vq_intr(vsc, netq->netq_vq);
1905	if (enqueued != 0) {
1906		virtio_stop_vq_intr(vsc, netq->netq_vq);
1907		vioif_net_sched_handle(sc, netq);
1908		return;
1909	}
1910
1911	netq->netq_running_handle = false;
1912}
1913
1914static int
1915vioif_rx_intr(void *arg)
1916{
1917	struct vioif_netqueue *netq = arg;
1918	struct virtqueue *vq = netq->netq_vq;
1919	struct virtio_softc *vsc = vq->vq_owner;
1920	struct vioif_softc *sc = device_private(virtio_child(vsc));
1921	u_int limit;
1922
1923	mutex_enter(&netq->netq_lock);
1924
1925	/* handler is already running in softint/workqueue */
1926	if (netq->netq_running_handle)
1927		goto done;
1928
1929	if (netq->netq_stopping)
1930		goto done;
1931
1932	netq->netq_running_handle = true;
1933
1934	limit = sc->sc_rx_intr_process_limit;
1935	virtio_stop_vq_intr(vsc, vq);
1936	vioif_rx_handle_locked(netq, limit);
1937
1938done:
1939	mutex_exit(&netq->netq_lock);
1940	return 1;
1941}
1942
1943static void
1944vioif_rx_handle(void *xnetq)
1945{
1946	struct vioif_netqueue *netq = xnetq;
1947	struct virtqueue *vq = netq->netq_vq;
1948	struct virtio_softc *vsc = vq->vq_owner;
1949	struct vioif_softc *sc = device_private(virtio_child(vsc));
1950	u_int limit;
1951
1952	mutex_enter(&netq->netq_lock);
1953
1954	KASSERT(netq->netq_running_handle);
1955
1956	if (netq->netq_stopping) {
1957		netq->netq_running_handle = false;
1958		goto done;
1959	}
1960
1961	limit = sc->sc_rx_process_limit;
1962	vioif_rx_handle_locked(netq, limit);
1963
1964done:
1965	mutex_exit(&netq->netq_lock);
1966}
1967
1968/*
1969 * Transmission implementation
1970 */
1971/* enqueue mbufs to send */
1972static void
1973vioif_send_common_locked(struct ifnet *ifp, struct vioif_netqueue *netq,
1974    bool is_transmit)
1975{
1976	struct vioif_softc *sc = ifp->if_softc;
1977	struct virtio_softc *vsc = sc->sc_virtio;
1978	struct virtqueue *vq = netq->netq_vq;
1979	struct vioif_tx_context *txc;
1980	struct vioif_net_map *map;
1981	struct mbuf *m;
1982	int queued = 0;
1983
1984	KASSERT(mutex_owned(&netq->netq_lock));
1985
1986	if (netq->netq_stopping ||
1987	    !ISSET(ifp->if_flags, IFF_RUNNING))
1988		return;
1989
1990	txc = netq->netq_ctx;
1991
1992	if (!txc->txc_link_active ||
1993	    txc->txc_no_free_slots)
1994		return;
1995
1996	for (;;) {
1997		int slot, r;
1998		r = virtio_enqueue_prep(vsc, vq, &slot);
1999		if (r == EAGAIN) {
2000			txc->txc_no_free_slots = true;
2001			break;
2002		}
2003		if (__predict_false(r != 0))
2004			panic("enqueue_prep for tx buffers");
2005
2006		if (is_transmit)
2007			m = pcq_get(txc->txc_intrq);
2008		else
2009			IFQ_DEQUEUE(&ifp->if_snd, m);
2010
2011		if (m == NULL) {
2012			virtio_enqueue_abort(vsc, vq, slot);
2013			break;
2014		}
2015
2016		map = &netq->netq_maps[slot];
2017		KASSERT(map->vnm_mbuf == NULL);
2018
2019		r = vioif_net_load_mbuf(vsc, map, m, BUS_DMA_WRITE);
2020		if (r != 0) {
2021			/* maybe just too fragmented */
2022			struct mbuf *newm;
2023
2024			newm = m_defrag(m, M_NOWAIT);
2025			if (newm != NULL) {
2026				m = newm;
2027				r = vioif_net_load_mbuf(vsc, map, m,
2028				    BUS_DMA_WRITE);
2029			} else {
2030				txc->txc_defrag_failed.ev_count++;
2031				r = -1;
2032			}
2033
2034			if (r != 0) {
2035				netq->netq_mbuf_load_failed.ev_count++;
2036				m_freem(m);
2037				if_statinc(ifp, if_oerrors);
2038				virtio_enqueue_abort(vsc, vq, slot);
2039				continue;
2040			}
2041		}
2042
2043		memset(map->vnm_hdr, 0, sc->sc_hdr_size);
2044
2045		r = vioif_net_enqueue_tx(vsc, vq, slot, map);
2046		if (r != 0) {
2047			netq->netq_enqueue_failed.ev_count++;
2048			vioif_net_unload_mbuf(vsc, map);
2049			m_freem(m);
2050			/* slot already freed by vioif_net_enqueue_tx */
2051
2052			if_statinc(ifp, if_oerrors);
2053			continue;
2054		}
2055
2056		queued++;
2057		bpf_mtap(ifp, m, BPF_D_OUT);
2058	}
2059
2060	if (queued > 0) {
2061		vioif_notify(vsc, vq);
2062		ifp->if_timer = 5;
2063	}
2064}
2065
2066/* dequeue sent mbufs */
2067static bool
2068vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
2069    struct vioif_netqueue *netq, u_int limit, size_t *ndeqp)
2070{
2071	struct virtqueue *vq = netq->netq_vq;
2072	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2073	struct vioif_net_map *map;
2074	struct mbuf *m;
2075	int slot, len;
2076	bool more;
2077	size_t ndeq;
2078
2079	KASSERT(mutex_owned(&netq->netq_lock));
2080
2081	more = false;
2082	ndeq = 0;
2083
2084	if (virtio_vq_is_enqueued(vsc, vq) == false)
2085		goto done;
2086
2087	for (;;ndeq++) {
2088		if (limit-- == 0) {
2089			more = true;
2090			break;
2091		}
2092
2093		if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
2094			break;
2095
2096		map = &netq->netq_maps[slot];
2097		KASSERT(map->vnm_mbuf != NULL);
2098		m = vioif_net_dequeue_commit(vsc, vq, slot,
2099		    map, BUS_DMASYNC_POSTWRITE);
2100		KASSERT(m != NULL);
2101
2102		if_statinc(ifp, if_opackets);
2103		m_freem(m);
2104	}
2105
2106done:
2107	if (ndeqp != NULL)
2108		*ndeqp = ndeq;
2109	return more;
2110}
2111
2112static void
2113vioif_tx_queue_clear(struct vioif_softc *sc, struct virtio_softc *vsc,
2114    struct vioif_netqueue *netq)
2115{
2116	struct vioif_tx_context *txc;
2117	struct vioif_net_map *map;
2118	struct mbuf *m;
2119	unsigned int i, vq_num;
2120	bool more;
2121
2122	mutex_enter(&netq->netq_lock);
2123
2124	txc = netq->netq_ctx;
2125	vq_num = netq->netq_vq->vq_num;
2126
2127	for (;;) {
2128		more = vioif_tx_deq_locked(sc, vsc, netq, vq_num, NULL);
2129		if (more == false)
2130			break;
2131	}
2132
2133	for (i = 0; i < vq_num; i++) {
2134		map = &netq->netq_maps[i];
2135
2136		m = map->vnm_mbuf;
2137		if (m == NULL)
2138			continue;
2139
2140		vioif_net_unload_mbuf(vsc, map);
2141		m_freem(m);
2142	}
2143
2144	txc->txc_no_free_slots = false;
2145
2146	mutex_exit(&netq->netq_lock);
2147}
2148
2149static void
2150vioif_start_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2151{
2152
2153	/*
2154	 * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
2155	 */
2156	vioif_send_common_locked(ifp, netq, false);
2157
2158}
2159
2160static void
2161vioif_transmit_locked(struct ifnet *ifp, struct vioif_netqueue *netq)
2162{
2163
2164	vioif_send_common_locked(ifp, netq, true);
2165}
2166
2167static void
2168vioif_deferred_transmit(void *arg)
2169{
2170	struct vioif_netqueue *netq = arg;
2171	struct virtio_softc *vsc = netq->netq_vq->vq_owner;
2172	struct vioif_softc *sc = device_private(virtio_child(vsc));
2173	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2174
2175	mutex_enter(&netq->netq_lock);
2176	vioif_send_common_locked(ifp, netq, true);
2177	mutex_exit(&netq->netq_lock);
2178}
2179
2180static void
2181vioif_tx_handle_locked(struct vioif_netqueue *netq, u_int limit)
2182{
2183	struct virtqueue *vq = netq->netq_vq;
2184	struct vioif_tx_context *txc = netq->netq_ctx;
2185	struct virtio_softc *vsc = vq->vq_owner;
2186	struct vioif_softc *sc = device_private(virtio_child(vsc));
2187	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2188	bool more;
2189	int enqueued;
2190	size_t ndeq;
2191
2192	KASSERT(mutex_owned(&netq->netq_lock));
2193	KASSERT(!netq->netq_stopping);
2194
2195	more = vioif_tx_deq_locked(sc, vsc, netq, limit, &ndeq);
2196	if (txc->txc_no_free_slots && ndeq > 0) {
2197		txc->txc_no_free_slots = false;
2198		softint_schedule(txc->txc_deferred_transmit);
2199	}
2200
2201	if (more) {
2202		vioif_net_sched_handle(sc, netq);
2203		return;
2204	}
2205
2206	enqueued = (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX) ?
2207	    virtio_postpone_intr_smart(vsc, vq):
2208	    virtio_start_vq_intr(vsc, vq);
2209	if (enqueued != 0) {
2210		virtio_stop_vq_intr(vsc, vq);
2211		vioif_net_sched_handle(sc, netq);
2212		return;
2213	}
2214
2215	netq->netq_running_handle = false;
2216
2217	/* for ALTQ */
2218	if (netq == &sc->sc_netqs[VIOIF_NETQ_TXQID(0)])
2219		if_schedule_deferred_start(ifp);
2220
2221	softint_schedule(txc->txc_deferred_transmit);
2222}
2223
2224static int
2225vioif_tx_intr(void *arg)
2226{
2227	struct vioif_netqueue *netq = arg;
2228	struct virtqueue *vq = netq->netq_vq;
2229	struct virtio_softc *vsc = vq->vq_owner;
2230	struct vioif_softc *sc = device_private(virtio_child(vsc));
2231	u_int limit;
2232
2233	mutex_enter(&netq->netq_lock);
2234
2235	/* tx handler is already running in softint/workqueue */
2236	if (netq->netq_running_handle)
2237		goto done;
2238
2239	if (netq->netq_stopping)
2240		goto done;
2241
2242	netq->netq_running_handle = true;
2243
2244	virtio_stop_vq_intr(vsc, vq);
2245	netq->netq_workqueue = sc->sc_txrx_workqueue_sysctl;
2246	limit = sc->sc_tx_intr_process_limit;
2247	vioif_tx_handle_locked(netq, limit);
2248
2249done:
2250	mutex_exit(&netq->netq_lock);
2251	return 1;
2252}
2253
2254static void
2255vioif_tx_handle(void *xnetq)
2256{
2257	struct vioif_netqueue *netq = xnetq;
2258	struct virtqueue *vq = netq->netq_vq;
2259	struct virtio_softc *vsc = vq->vq_owner;
2260	struct vioif_softc *sc = device_private(virtio_child(vsc));
2261	u_int limit;
2262
2263	mutex_enter(&netq->netq_lock);
2264
2265	KASSERT(netq->netq_running_handle);
2266
2267	if (netq->netq_stopping) {
2268		netq->netq_running_handle = false;
2269		goto done;
2270	}
2271
2272	limit = sc->sc_tx_process_limit;
2273	vioif_tx_handle_locked(netq, limit);
2274
2275done:
2276	mutex_exit(&netq->netq_lock);
2277}
2278
2279/*
2280 * Control vq
2281 */
2282/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
2283static void
2284vioif_ctrl_acquire(struct vioif_softc *sc)
2285{
2286	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2287
2288	mutex_enter(&ctrlq->ctrlq_wait_lock);
2289	while (ctrlq->ctrlq_inuse != FREE)
2290		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2291	ctrlq->ctrlq_inuse = INUSE;
2292	ctrlq->ctrlq_owner = curlwp;
2293	mutex_exit(&ctrlq->ctrlq_wait_lock);
2294}
2295
2296static void
2297vioif_ctrl_release(struct vioif_softc *sc)
2298{
2299	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2300
2301	KASSERT(ctrlq->ctrlq_inuse != FREE);
2302	KASSERT(ctrlq->ctrlq_owner == curlwp);
2303
2304	mutex_enter(&ctrlq->ctrlq_wait_lock);
2305	ctrlq->ctrlq_inuse = FREE;
2306	ctrlq->ctrlq_owner = NULL;
2307	cv_signal(&ctrlq->ctrlq_wait);
2308	mutex_exit(&ctrlq->ctrlq_wait_lock);
2309}
2310
2311static int
2312vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
2313    struct vioif_ctrl_cmdspec *specs, int nspecs)
2314{
2315	struct virtio_softc *vsc = sc->sc_virtio;
2316	int i, r, loaded;
2317
2318	loaded = 0;
2319	for (i = 0; i < nspecs; i++) {
2320		r = bus_dmamap_load(virtio_dmat(vsc),
2321		    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
2322		    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
2323		if (r) {
2324			sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
2325			goto err;
2326		}
2327		loaded++;
2328
2329	}
2330
2331	return r;
2332
2333err:
2334	for (i = 0; i < loaded; i++) {
2335		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2336	}
2337
2338	return r;
2339}
2340
2341static void
2342vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
2343    struct vioif_ctrl_cmdspec *specs, int nspecs)
2344{
2345	struct virtio_softc *vsc = sc->sc_virtio;
2346	int i;
2347
2348	for (i = 0; i < nspecs; i++) {
2349		bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
2350	}
2351}
2352
2353static int
2354vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
2355    struct vioif_ctrl_cmdspec *specs, int nspecs)
2356{
2357	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2358	struct virtqueue *vq = ctrlq->ctrlq_vq;
2359	struct virtio_softc *vsc = sc->sc_virtio;
2360	int i, r, slot;
2361
2362	ctrlq->ctrlq_cmd->class = class;
2363	ctrlq->ctrlq_cmd->command = cmd;
2364
2365	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
2366	    0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
2367	for (i = 0; i < nspecs; i++) {
2368		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
2369		    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
2370	}
2371	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
2372	    0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);
2373
2374	/* we need to explicitly (re)start vq intr when using RING EVENT IDX */
2375	if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
2376		virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);
2377
2378	r = virtio_enqueue_prep(vsc, vq, &slot);
2379	if (r != 0)
2380		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2381	r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
2382	if (r != 0)
2383		panic("%s: control vq busy!?", device_xname(sc->sc_dev));
2384	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
2385	for (i = 0; i < nspecs; i++) {
2386		virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
2387	}
2388	virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
2389	virtio_enqueue_commit(vsc, vq, slot, true);
2390
2391	/* wait for done */
2392	mutex_enter(&ctrlq->ctrlq_wait_lock);
2393	while (ctrlq->ctrlq_inuse != DONE)
2394		cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
2395	mutex_exit(&ctrlq->ctrlq_wait_lock);
2396	/* already dequeued */
2397
2398	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
2399	    sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
2400	for (i = 0; i < nspecs; i++) {
2401		bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
2402		    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
2403	}
2404	bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
2405	    sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);
2406
2407	if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
2408		r = 0;
2409	else {
2410		device_printf(sc->sc_dev, "failed setting rx mode\n");
2411		sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
2412		r = EIO;
2413	}
2414
2415	return r;
2416}
2417
2418/* ctrl vq interrupt; wake up the command issuer */
2419static int
2420vioif_ctrl_intr(void *arg)
2421{
2422	struct vioif_ctrlqueue *ctrlq = arg;
2423	struct virtqueue *vq = ctrlq->ctrlq_vq;
2424	struct virtio_softc *vsc = vq->vq_owner;
2425	int r, slot;
2426
2427	if (virtio_vq_is_enqueued(vsc, vq) == false)
2428		return 0;
2429
2430	r = virtio_dequeue(vsc, vq, &slot, NULL);
2431	if (r == ENOENT)
2432		return 0;
2433	virtio_dequeue_commit(vsc, vq, slot);
2434
2435	mutex_enter(&ctrlq->ctrlq_wait_lock);
2436	ctrlq->ctrlq_inuse = DONE;
2437	cv_signal(&ctrlq->ctrlq_wait);
2438	mutex_exit(&ctrlq->ctrlq_wait_lock);
2439
2440	return 1;
2441}
2442
2443static int
2444vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
2445{
2446	struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
2447	struct vioif_ctrl_cmdspec specs[1];
2448	int r;
2449
2450	if (!sc->sc_has_ctrl)
2451		return ENOTSUP;
2452
2453	vioif_ctrl_acquire(sc);
2454
2455	rx->onoff = onoff;
2456	specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
2457	specs[0].buf = rx;
2458	specs[0].bufsize = sizeof(*rx);
2459
2460	r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
2461	    specs, __arraycount(specs));
2462
2463	vioif_ctrl_release(sc);
2464	return r;
2465}
2466
2467static int
2468vioif_set_promisc(struct vioif_softc *sc, bool onoff)
2469{
2470	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
2471}
2472
2473static int
2474vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
2475{
2476	return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
2477}
2478
2479static int
2480vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
2481{
2482	struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
2483	struct vioif_ctrl_cmdspec specs[1];
2484	int r;
2485
2486	if (!sc->sc_has_ctrl)
2487		return ENOTSUP;
2488
2489	if (nvq_pairs <= 1)
2490		return EINVAL;
2491
2492	vioif_ctrl_acquire(sc);
2493
2494	mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
2495	specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
2496	specs[0].buf = mq;
2497	specs[0].bufsize = sizeof(*mq);
2498
2499	r = vioif_ctrl_send_command(sc,
2500	    VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
2501	    specs, __arraycount(specs));
2502
2503	vioif_ctrl_release(sc);
2504
2505	return r;
2506}
2507
2508static int
2509vioif_set_mac_addr(struct vioif_softc *sc)
2510{
2511	struct virtio_net_ctrl_mac_addr *ma =
2512	    sc->sc_ctrlq.ctrlq_mac_addr;
2513	struct vioif_ctrl_cmdspec specs[1];
2514	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2515	int nspecs = __arraycount(specs);
2516	uint64_t features;
2517	int r;
2518	size_t i;
2519
2520	if (!sc->sc_has_ctrl)
2521		return ENOTSUP;
2522
2523	if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
2524	    ETHER_ADDR_LEN) == 0) {
2525		return 0;
2526	}
2527
2528	memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2529
2530	features = virtio_features(sc->sc_virtio);
2531	if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
2532		vioif_ctrl_acquire(sc);
2533
2534		memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
2535		specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
2536		specs[0].buf = ma;
2537		specs[0].bufsize = sizeof(*ma);
2538
2539		r = vioif_ctrl_send_command(sc,
2540		    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
2541		    specs, nspecs);
2542
2543		vioif_ctrl_release(sc);
2544	} else {
2545		for (i = 0; i < __arraycount(sc->sc_mac); i++) {
2546			virtio_write_device_config_1(sc->sc_virtio,
2547			    VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
2548		}
2549		r = 0;
2550	}
2551
2552	return r;
2553}
2554
2555static int
2556vioif_set_rx_filter(struct vioif_softc *sc)
2557{
2558	/* filter already set in ctrlq->ctrlq_mac_tbl */
2559	struct virtio_softc *vsc = sc->sc_virtio;
2560	struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
2561	struct vioif_ctrl_cmdspec specs[2];
2562	int nspecs = __arraycount(specs);
2563	int r;
2564
2565	mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
2566	mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;
2567
2568	if (!sc->sc_has_ctrl)
2569		return ENOTSUP;
2570
2571	vioif_ctrl_acquire(sc);
2572
2573	specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
2574	specs[0].buf = mac_tbl_uc;
2575	specs[0].bufsize = sizeof(*mac_tbl_uc)
2576	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));
2577
2578	specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
2579	specs[1].buf = mac_tbl_mc;
2580	specs[1].bufsize = sizeof(*mac_tbl_mc)
2581	    + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));
2582
2583	r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
2584	if (r != 0)
2585		goto out;
2586
2587	r = vioif_ctrl_send_command(sc,
2588	    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
2589	    specs, nspecs);
2590
2591	vioif_ctrl_unload_cmdspec(sc, specs, nspecs);
2592
2593out:
2594	vioif_ctrl_release(sc);
2595
2596	return r;
2597}
2598
2599/*
2600 * If multicast filter small enough (<=MAXENTRIES) set rx filter
2601 * If large multicast filter exist use ALLMULTI
2602 * If setting rx filter fails fall back to ALLMULTI
2603 */
2604static int
2605vioif_rx_filter(struct vioif_softc *sc)
2606{
2607	struct virtio_softc *vsc = sc->sc_virtio;
2608	struct ethercom *ec = &sc->sc_ethercom;
2609	struct ifnet *ifp = &ec->ec_if;
2610	struct ether_multi *enm;
2611	struct ether_multistep step;
2612	struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
2613	int nentries;
2614	bool allmulti = 0;
2615	int r;
2616
2617	if (!sc->sc_has_ctrl) {
2618		goto set_ifflags;
2619	}
2620
2621	memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
2622	    CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
2623
2624	nentries = 0;
2625	allmulti = false;
2626
2627	ETHER_LOCK(ec);
2628	for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
2629	    ETHER_NEXT_MULTI(step, enm)) {
2630		if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
2631			allmulti = true;
2632			break;
2633		}
2634		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
2635			allmulti = true;
2636			break;
2637		}
2638
2639		memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
2640		    enm->enm_addrlo, ETHER_ADDR_LEN);
2641		nentries++;
2642	}
2643	ETHER_UNLOCK(ec);
2644
2645	r = vioif_set_mac_addr(sc);
2646	if (r != 0) {
2647		log(LOG_WARNING, "%s: couldn't set MAC address\n",
2648		    ifp->if_xname);
2649	}
2650
2651	if (!allmulti) {
2652		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
2653		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
2654		r = vioif_set_rx_filter(sc);
2655		if (r != 0) {
2656			allmulti = true; /* fallback */
2657		}
2658	}
2659
2660	if (allmulti) {
2661		ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
2662		ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
2663		r = vioif_set_rx_filter(sc);
2664		if (r != 0) {
2665			log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
2666			    ifp->if_xname);
2667			/* what to do on failure? */
2668		}
2669
2670		ifp->if_flags |= IFF_ALLMULTI;
2671	}
2672
2673set_ifflags:
2674	r = vioif_ifflags(sc);
2675
2676	return r;
2677}
2678
2679/*
2680 * VM configuration changes
2681 */
2682static int
2683vioif_config_change(struct virtio_softc *vsc)
2684{
2685	struct vioif_softc *sc = device_private(virtio_child(vsc));
2686
2687	softint_schedule(sc->sc_cfg_softint);
2688	return 0;
2689}
2690
2691static void
2692vioif_cfg_softint(void *arg)
2693{
2694	struct vioif_softc *sc = arg;
2695	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2696
2697	vioif_update_link_status(sc);
2698	vioif_start(ifp);
2699}
2700
2701static int
2702vioif_get_link_status(struct vioif_softc *sc)
2703{
2704	struct virtio_softc *vsc = sc->sc_virtio;
2705	uint16_t status;
2706
2707	if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
2708		status = virtio_read_device_config_2(vsc,
2709		    VIRTIO_NET_CONFIG_STATUS);
2710	else
2711		status = VIRTIO_NET_S_LINK_UP;
2712
2713	if ((status & VIRTIO_NET_S_LINK_UP) != 0)
2714		return LINK_STATE_UP;
2715
2716	return LINK_STATE_DOWN;
2717}
2718
2719static void
2720vioif_update_link_status(struct vioif_softc *sc)
2721{
2722	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
2723	struct vioif_netqueue *netq;
2724	struct vioif_tx_context *txc;
2725	bool active;
2726	int link, i;
2727
2728	mutex_enter(&sc->sc_lock);
2729
2730	link = vioif_get_link_status(sc);
2731
2732	if (link == sc->sc_link_state)
2733		goto done;
2734
2735	sc->sc_link_state = link;
2736
2737	active = VIOIF_IS_LINK_ACTIVE(sc);
2738	for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
2739		netq = &sc->sc_netqs[VIOIF_NETQ_TXQID(i)];
2740
2741		mutex_enter(&netq->netq_lock);
2742		txc = netq->netq_ctx;
2743		txc->txc_link_active = active;
2744		mutex_exit(&netq->netq_lock);
2745	}
2746
2747	if_link_state_change(ifp, sc->sc_link_state);
2748
2749done:
2750	mutex_exit(&sc->sc_lock);
2751}
2752
2753static void
2754vioif_workq_work(struct work *wk, void *context)
2755{
2756	struct vioif_work *work;
2757
2758	work = container_of(wk, struct vioif_work, cookie);
2759
2760	atomic_store_relaxed(&work->added, 0);
2761	work->func(work->arg);
2762}
2763
2764static struct workqueue *
2765vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
2766{
2767	struct workqueue *wq;
2768	int error;
2769
2770	error = workqueue_create(&wq, name, vioif_workq_work, NULL,
2771	    prio, ipl, flags);
2772
2773	if (error)
2774		return NULL;
2775
2776	return wq;
2777}
2778
2779static void
2780vioif_workq_destroy(struct workqueue *wq)
2781{
2782
2783	workqueue_destroy(wq);
2784}
2785
2786static void
2787vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
2788{
2789
2790	memset(work, 0, sizeof(*work));
2791	work->func = func;
2792	work->arg = arg;
2793}
2794
2795static void
2796vioif_work_add(struct workqueue *wq, struct vioif_work *work)
2797{
2798
2799	if (atomic_load_relaxed(&work->added) != 0)
2800		return;
2801
2802	atomic_store_relaxed(&work->added, 1);
2803	kpreempt_disable();
2804	workqueue_enqueue(wq, &work->cookie, NULL);
2805	kpreempt_enable();
2806}
2807
2808static void
2809vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
2810{
2811
2812	workqueue_wait(wq, &work->cookie);
2813}
2814
2815MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");
2816
2817#ifdef _MODULE
2818#include "ioconf.c"
2819#endif
2820
2821static int
2822if_vioif_modcmd(modcmd_t cmd, void *opaque)
2823{
2824	int error = 0;
2825
2826#ifdef _MODULE
2827	switch (cmd) {
2828	case MODULE_CMD_INIT:
2829		error = config_init_component(cfdriver_ioconf_if_vioif,
2830		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2831		break;
2832	case MODULE_CMD_FINI:
2833		error = config_fini_component(cfdriver_ioconf_if_vioif,
2834		    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
2835		break;
2836	default:
2837		error = ENOTTY;
2838		break;
2839	}
2840#endif
2841
2842	return error;
2843}
2844