1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/linker_set.h>
34221828Sgrehan#include <sys/select.h>
35221828Sgrehan#include <sys/uio.h>
36221828Sgrehan#include <sys/ioctl.h>
37252682Sgrehan#include <net/ethernet.h>
38221828Sgrehan
39221828Sgrehan#include <errno.h>
40221828Sgrehan#include <fcntl.h>
41221828Sgrehan#include <stdio.h>
42221828Sgrehan#include <stdlib.h>
43221828Sgrehan#include <stdint.h>
44221828Sgrehan#include <string.h>
45221828Sgrehan#include <strings.h>
46221828Sgrehan#include <unistd.h>
47221828Sgrehan#include <assert.h>
48221828Sgrehan#include <md5.h>
49221828Sgrehan#include <pthread.h>
50249917Sgrehan#include <pthread_np.h>
51221828Sgrehan
52244167Sgrehan#include "bhyverun.h"
53221828Sgrehan#include "pci_emul.h"
54221828Sgrehan#include "mevent.h"
55221828Sgrehan#include "virtio.h"
56221828Sgrehan
57249917Sgrehan#define VTNET_RINGSZ	1024
58221828Sgrehan
59221828Sgrehan#define VTNET_MAXSEGS	32
60221828Sgrehan
61221828Sgrehan/*
62253440Sgrehan * Host capabilities.  Note that we only offer a few of these.
63221828Sgrehan */
64253440Sgrehan#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
65253440Sgrehan#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
66253440Sgrehan#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
67253440Sgrehan#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
68253440Sgrehan#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
69253440Sgrehan#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
70253440Sgrehan#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
71253440Sgrehan#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
72253440Sgrehan#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
73253440Sgrehan#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
74253440Sgrehan#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
75253440Sgrehan#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
76253440Sgrehan#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
77253440Sgrehan#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
78253440Sgrehan#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
79253440Sgrehan#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
80253440Sgrehan#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
81253440Sgrehan#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
82253440Sgrehan				(1 << 21) /* guest can send gratuitous pkts */
83221828Sgrehan
84253440Sgrehan#define VTNET_S_HOSTCAPS      \
85253440Sgrehan  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
86253440Sgrehan    VIRTIO_F_NOTIFY_ON_EMPTY)
87221828Sgrehan
88221828Sgrehan/*
89253440Sgrehan * PCI config-space "registers"
90221828Sgrehan */
91253440Sgrehanstruct virtio_net_config {
92253440Sgrehan	uint8_t  mac[6];
93253440Sgrehan	uint16_t status;
94253440Sgrehan} __packed;
95221828Sgrehan
96221828Sgrehan/*
97221828Sgrehan * Queue definitions.
98221828Sgrehan */
99221828Sgrehan#define VTNET_RXQ	0
100221828Sgrehan#define VTNET_TXQ	1
101253440Sgrehan#define VTNET_CTLQ	2	/* NB: not yet supported */
102221828Sgrehan
103221828Sgrehan#define VTNET_MAXQ	3
104221828Sgrehan
105221828Sgrehan/*
106221828Sgrehan * Fixed network header size
107221828Sgrehan */
108221828Sgrehanstruct virtio_net_rxhdr {
109221828Sgrehan	uint8_t		vrh_flags;
110221828Sgrehan	uint8_t		vrh_gso_type;
111221828Sgrehan	uint16_t	vrh_hdr_len;
112221828Sgrehan	uint16_t	vrh_gso_size;
113221828Sgrehan	uint16_t	vrh_csum_start;
114221828Sgrehan	uint16_t	vrh_csum_offset;
115221828Sgrehan	uint16_t	vrh_bufs;
116221828Sgrehan} __packed;
117221828Sgrehan
118221828Sgrehan/*
119221828Sgrehan * Debug printf
120221828Sgrehan */
121221828Sgrehanstatic int pci_vtnet_debug;
122221828Sgrehan#define DPRINTF(params) if (pci_vtnet_debug) printf params
123221828Sgrehan#define WPRINTF(params) printf params
124221828Sgrehan
125221828Sgrehan/*
126221828Sgrehan * Per-device softc
127221828Sgrehan */
128221828Sgrehanstruct pci_vtnet_softc {
129253440Sgrehan	struct virtio_softc vsc_vs;
130253440Sgrehan	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
131221828Sgrehan	pthread_mutex_t vsc_mtx;
132221828Sgrehan	struct mevent	*vsc_mevp;
133221828Sgrehan
134221828Sgrehan	int		vsc_tapfd;
135221828Sgrehan	int		vsc_rx_ready;
136253440Sgrehan	volatile int	resetting;	/* set and checked outside lock */
137221828Sgrehan
138271685Sgrehan	uint64_t	vsc_features;	/* negotiated features */
139271685Sgrehan
140253440Sgrehan	struct virtio_net_config vsc_config;
141221828Sgrehan
142250083Sneel	pthread_mutex_t	rx_mtx;
143250083Sneel	int		rx_in_progress;
144271685Sgrehan	int		rx_vhdrlen;
145271685Sgrehan	int		rx_merge;	/* merged rx bufs in use */
146250083Sneel
147249917Sgrehan	pthread_t 	tx_tid;
148249917Sgrehan	pthread_mutex_t	tx_mtx;
149249917Sgrehan	pthread_cond_t	tx_cond;
150250083Sneel	int		tx_in_progress;
151221828Sgrehan};
152221828Sgrehan
153253440Sgrehanstatic void pci_vtnet_reset(void *);
154253440Sgrehan/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
155253440Sgrehanstatic int pci_vtnet_cfgread(void *, int, int, uint32_t *);
156253440Sgrehanstatic int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
157271685Sgrehanstatic void pci_vtnet_neg_features(void *, uint64_t);
158246109Sneel
159253440Sgrehanstatic struct virtio_consts vtnet_vi_consts = {
160253440Sgrehan	"vtnet",		/* our name */
161253440Sgrehan	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
162253440Sgrehan	sizeof(struct virtio_net_config), /* config reg size */
163253440Sgrehan	pci_vtnet_reset,	/* reset */
164253440Sgrehan	NULL,			/* device-wide qnotify -- not used */
165253440Sgrehan	pci_vtnet_cfgread,	/* read PCI config */
166253440Sgrehan	pci_vtnet_cfgwrite,	/* write PCI config */
167271685Sgrehan	pci_vtnet_neg_features,	/* apply negotiated features */
168253440Sgrehan	VTNET_S_HOSTCAPS,	/* our capabilities */
169253440Sgrehan};
170221828Sgrehan
171250083Sneel/*
172250083Sneel * If the transmit thread is active then stall until it is done.
173250083Sneel */
174244160Sgrehanstatic void
175250083Sneelpci_vtnet_txwait(struct pci_vtnet_softc *sc)
176250083Sneel{
177250083Sneel
178250083Sneel	pthread_mutex_lock(&sc->tx_mtx);
179250083Sneel	while (sc->tx_in_progress) {
180250083Sneel		pthread_mutex_unlock(&sc->tx_mtx);
181250083Sneel		usleep(10000);
182250083Sneel		pthread_mutex_lock(&sc->tx_mtx);
183250083Sneel	}
184250083Sneel	pthread_mutex_unlock(&sc->tx_mtx);
185250083Sneel}
186250083Sneel
187250083Sneel/*
188250083Sneel * If the receive thread is active then stall until it is done.
189250083Sneel */
190250083Sneelstatic void
191250083Sneelpci_vtnet_rxwait(struct pci_vtnet_softc *sc)
192250083Sneel{
193250083Sneel
194250083Sneel	pthread_mutex_lock(&sc->rx_mtx);
195250083Sneel	while (sc->rx_in_progress) {
196250083Sneel		pthread_mutex_unlock(&sc->rx_mtx);
197250083Sneel		usleep(10000);
198250083Sneel		pthread_mutex_lock(&sc->rx_mtx);
199250083Sneel	}
200250083Sneel	pthread_mutex_unlock(&sc->rx_mtx);
201250083Sneel}
202250083Sneel
203250083Sneelstatic void
204253440Sgrehanpci_vtnet_reset(void *vsc)
205221828Sgrehan{
206253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
207244160Sgrehan
208253440Sgrehan	DPRINTF(("vtnet: device reset requested !\n"));
209249917Sgrehan
210253440Sgrehan	sc->resetting = 1;
211250083Sneel
212253440Sgrehan	/*
213253440Sgrehan	 * Wait for the transmit and receive threads to finish their
214253440Sgrehan	 * processing.
215253440Sgrehan	 */
216253440Sgrehan	pci_vtnet_txwait(sc);
217253440Sgrehan	pci_vtnet_rxwait(sc);
218250083Sneel
219253440Sgrehan	sc->vsc_rx_ready = 0;
220271685Sgrehan	sc->rx_merge = 1;
221271685Sgrehan	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
222250086Sneel
223253440Sgrehan	/* now reset rings, MSI-X vectors, and negotiated capabilities */
224253440Sgrehan	vi_reset_dev(&sc->vsc_vs);
225250086Sneel
226253440Sgrehan	sc->resetting = 0;
227221828Sgrehan}
228221828Sgrehan
229221828Sgrehan/*
230221828Sgrehan * Called to send a buffer chain out to the tap device
231221828Sgrehan */
232221828Sgrehanstatic void
233221828Sgrehanpci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
234221828Sgrehan		 int len)
235221828Sgrehan{
236253440Sgrehan	static char pad[60]; /* all zero bytes */
237221828Sgrehan
238221828Sgrehan	if (sc->vsc_tapfd == -1)
239221828Sgrehan		return;
240221828Sgrehan
241221828Sgrehan	/*
242221828Sgrehan	 * If the length is < 60, pad out to that and add the
243221828Sgrehan	 * extra zero'd segment to the iov. It is guaranteed that
244221828Sgrehan	 * there is always an extra iov available by the caller.
245221828Sgrehan	 */
246221828Sgrehan	if (len < 60) {
247221828Sgrehan		iov[iovcnt].iov_base = pad;
248221828Sgrehan		iov[iovcnt].iov_len = 60 - len;
249221828Sgrehan		iovcnt++;
250221828Sgrehan	}
251221828Sgrehan	(void) writev(sc->vsc_tapfd, iov, iovcnt);
252221828Sgrehan}
253221828Sgrehan
254221828Sgrehan/*
255221828Sgrehan *  Called when there is read activity on the tap file descriptor.
256221828Sgrehan * Each buffer posted by the guest is assumed to be able to contain
257221828Sgrehan * an entire ethernet frame + rx header.
258221828Sgrehan *  MP note: the dummybuf is only used for discarding frames, so there
259221828Sgrehan * is no need for it to be per-vtnet or locked.
260221828Sgrehan */
261221828Sgrehanstatic uint8_t dummybuf[2048];
262221828Sgrehan
263271685Sgrehanstatic __inline struct iovec *
264271685Sgrehanrx_iov_trim(struct iovec *iov, int *niov, int tlen)
265271685Sgrehan{
266271685Sgrehan	struct iovec *riov;
267271685Sgrehan
268271685Sgrehan	/* XXX short-cut: assume first segment is >= tlen */
269271685Sgrehan	assert(iov[0].iov_len >= tlen);
270271685Sgrehan
271271685Sgrehan	iov[0].iov_len -= tlen;
272271685Sgrehan	if (iov[0].iov_len == 0) {
273271685Sgrehan		assert(*niov > 1);
274271685Sgrehan		*niov -= 1;
275271685Sgrehan		riov = &iov[1];
276271685Sgrehan	} else {
277271685Sgrehan		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
278271685Sgrehan		riov = &iov[0];
279271685Sgrehan	}
280271685Sgrehan
281271685Sgrehan	return (riov);
282271685Sgrehan}
283271685Sgrehan
284221828Sgrehanstatic void
285221828Sgrehanpci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
286221828Sgrehan{
287271685Sgrehan	struct iovec iov[VTNET_MAXSEGS], *riov;
288253440Sgrehan	struct vqueue_info *vq;
289271685Sgrehan	void *vrx;
290271685Sgrehan	int len, n;
291221828Sgrehan
292221828Sgrehan	/*
293221828Sgrehan	 * Should never be called without a valid tap fd
294221828Sgrehan	 */
295221828Sgrehan	assert(sc->vsc_tapfd != -1);
296221828Sgrehan
297221828Sgrehan	/*
298221828Sgrehan	 * But, will be called when the rx ring hasn't yet
299250083Sneel	 * been set up or the guest is resetting the device.
300221828Sgrehan	 */
301250083Sneel	if (!sc->vsc_rx_ready || sc->resetting) {
302221828Sgrehan		/*
303221828Sgrehan		 * Drop the packet and try later.
304221828Sgrehan		 */
305221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
306221828Sgrehan		return;
307221828Sgrehan	}
308221828Sgrehan
309221828Sgrehan	/*
310253440Sgrehan	 * Check for available rx buffers
311221828Sgrehan	 */
312253440Sgrehan	vq = &sc->vsc_queues[VTNET_RXQ];
313253440Sgrehan	vq_startchains(vq);
314253440Sgrehan	if (!vq_has_descs(vq)) {
315221828Sgrehan		/*
316253440Sgrehan		 * Drop the packet and try later.  Interrupt on
317253440Sgrehan		 * empty, if that's negotiated.
318221828Sgrehan		 */
319221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
320253440Sgrehan		vq_endchains(vq, 1);
321221828Sgrehan		return;
322221828Sgrehan	}
323221828Sgrehan
324253440Sgrehan	do {
325221828Sgrehan		/*
326271685Sgrehan		 * Get descriptor chain.
327221828Sgrehan		 */
328271685Sgrehan		n = vq_getchain(vq, iov, VTNET_MAXSEGS, NULL);
329271685Sgrehan		assert(n >= 1 && n <= VTNET_MAXSEGS);
330221828Sgrehan
331221828Sgrehan		/*
332221828Sgrehan		 * Get a pointer to the rx header, and use the
333221828Sgrehan		 * data immediately following it for the packet buffer.
334221828Sgrehan		 */
335271685Sgrehan		vrx = iov[0].iov_base;
336271685Sgrehan		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
337221828Sgrehan
338271685Sgrehan		len = readv(sc->vsc_tapfd, riov, n);
339221828Sgrehan
340221828Sgrehan		if (len < 0 && errno == EWOULDBLOCK) {
341253440Sgrehan			/*
342253440Sgrehan			 * No more packets, but still some avail ring
343253440Sgrehan			 * entries.  Interrupt if needed/appropriate.
344253440Sgrehan			 */
345253440Sgrehan			vq_endchains(vq, 0);
346253440Sgrehan			return;
347221828Sgrehan		}
348221828Sgrehan
349221828Sgrehan		/*
350221828Sgrehan		 * The only valid field in the rx packet header is the
351271685Sgrehan		 * number of buffers if merged rx bufs were negotiated.
352221828Sgrehan		 */
353271685Sgrehan		memset(vrx, 0, sc->rx_vhdrlen);
354221828Sgrehan
355271685Sgrehan		if (sc->rx_merge) {
356271685Sgrehan			struct virtio_net_rxhdr *vrxh;
357271685Sgrehan
358271685Sgrehan			vrxh = vrx;
359271685Sgrehan			vrxh->vrh_bufs = 1;
360271685Sgrehan		}
361271685Sgrehan
362221828Sgrehan		/*
363253440Sgrehan		 * Release this chain and handle more chains.
364221828Sgrehan		 */
365271685Sgrehan		vq_relchain(vq, len + sc->rx_vhdrlen);
366253440Sgrehan	} while (vq_has_descs(vq));
367221828Sgrehan
368253440Sgrehan	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
369253440Sgrehan	vq_endchains(vq, 1);
370221828Sgrehan}
371221828Sgrehan
372221828Sgrehanstatic void
373221828Sgrehanpci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
374221828Sgrehan{
375221828Sgrehan	struct pci_vtnet_softc *sc = param;
376221828Sgrehan
377250083Sneel	pthread_mutex_lock(&sc->rx_mtx);
378250083Sneel	sc->rx_in_progress = 1;
379221828Sgrehan	pci_vtnet_tap_rx(sc);
380250083Sneel	sc->rx_in_progress = 0;
381250083Sneel	pthread_mutex_unlock(&sc->rx_mtx);
382221828Sgrehan
383221828Sgrehan}
384221828Sgrehan
385221828Sgrehanstatic void
386253440Sgrehanpci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
387221828Sgrehan{
388253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
389253440Sgrehan
390221828Sgrehan	/*
391221828Sgrehan	 * A qnotify means that the rx process can now begin
392221828Sgrehan	 */
393221828Sgrehan	if (sc->vsc_rx_ready == 0) {
394221828Sgrehan		sc->vsc_rx_ready = 1;
395221828Sgrehan	}
396221828Sgrehan}
397221828Sgrehan
398221828Sgrehanstatic void
399253440Sgrehanpci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
400221828Sgrehan{
401221828Sgrehan	struct iovec iov[VTNET_MAXSEGS + 1];
402253440Sgrehan	int i, n;
403253440Sgrehan	int plen, tlen;
404221828Sgrehan
405221828Sgrehan	/*
406253440Sgrehan	 * Obtain chain of descriptors.  The first one is
407253440Sgrehan	 * really the header descriptor, so we need to sum
408253440Sgrehan	 * up two lengths: packet length and transfer length.
409221828Sgrehan	 */
410253440Sgrehan	n = vq_getchain(vq, iov, VTNET_MAXSEGS, NULL);
411253440Sgrehan	assert(n >= 1 && n <= VTNET_MAXSEGS);
412253440Sgrehan	plen = 0;
413253440Sgrehan	tlen = iov[0].iov_len;
414253440Sgrehan	for (i = 1; i < n; i++) {
415253440Sgrehan		plen += iov[i].iov_len;
416253440Sgrehan		tlen += iov[i].iov_len;
417221828Sgrehan	}
418221828Sgrehan
419253440Sgrehan	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
420253440Sgrehan	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
421221828Sgrehan
422253440Sgrehan	/* chain is processed, release it and set tlen */
423253440Sgrehan	vq_relchain(vq, tlen);
424221828Sgrehan}
425221828Sgrehan
426221828Sgrehanstatic void
427253440Sgrehanpci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
428221828Sgrehan{
429253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
430221828Sgrehan
431221828Sgrehan	/*
432253440Sgrehan	 * Any ring entries to process?
433221828Sgrehan	 */
434253440Sgrehan	if (!vq_has_descs(vq))
435221828Sgrehan		return;
436221828Sgrehan
437249917Sgrehan	/* Signal the tx thread for processing */
438249917Sgrehan	pthread_mutex_lock(&sc->tx_mtx);
439249917Sgrehan	if (sc->tx_in_progress == 0)
440249917Sgrehan		pthread_cond_signal(&sc->tx_cond);
441249917Sgrehan	pthread_mutex_unlock(&sc->tx_mtx);
442221828Sgrehan}
443221828Sgrehan
444249917Sgrehan/*
445249917Sgrehan * Thread which will handle processing of TX desc
446249917Sgrehan */
447249917Sgrehanstatic void *
448249917Sgrehanpci_vtnet_tx_thread(void *param)
449249917Sgrehan{
450253440Sgrehan	struct pci_vtnet_softc *sc = param;
451253440Sgrehan	struct vqueue_info *vq;
452253440Sgrehan	int have_work, error;
453253440Sgrehan
454253440Sgrehan	vq = &sc->vsc_queues[VTNET_TXQ];
455253440Sgrehan
456253440Sgrehan	/*
457253440Sgrehan	 * Let us wait till the tx queue pointers get initialised &
458253440Sgrehan	 * first tx signaled
459249917Sgrehan	 */
460249917Sgrehan	pthread_mutex_lock(&sc->tx_mtx);
461249917Sgrehan	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
462249917Sgrehan	assert(error == 0);
463253440Sgrehan
464249917Sgrehan	for (;;) {
465253440Sgrehan		/* note - tx mutex is locked here */
466253440Sgrehan		do {
467250197Sneel			if (sc->resetting)
468253440Sgrehan				have_work = 0;
469250197Sneel			else
470253440Sgrehan				have_work = vq_has_descs(vq);
471250197Sneel
472253440Sgrehan			if (!have_work) {
473253440Sgrehan				sc->tx_in_progress = 0;
474253440Sgrehan				error = pthread_cond_wait(&sc->tx_cond,
475253440Sgrehan							  &sc->tx_mtx);
476253440Sgrehan				assert(error == 0);
477253440Sgrehan			}
478253440Sgrehan		} while (!have_work);
479249917Sgrehan		sc->tx_in_progress = 1;
480249917Sgrehan		pthread_mutex_unlock(&sc->tx_mtx);
481249917Sgrehan
482253440Sgrehan		vq_startchains(vq);
483253440Sgrehan		do {
484249917Sgrehan			/*
485253440Sgrehan			 * Run through entries, placing them into
486253440Sgrehan			 * iovecs and sending when an end-of-packet
487253440Sgrehan			 * is found
488249917Sgrehan			 */
489253440Sgrehan			pci_vtnet_proctx(sc, vq);
490253440Sgrehan		} while (vq_has_descs(vq));
491250197Sneel
492250197Sneel		/*
493250197Sneel		 * Generate an interrupt if needed.
494250197Sneel		 */
495253440Sgrehan		vq_endchains(vq, 1);
496253440Sgrehan
497253440Sgrehan		pthread_mutex_lock(&sc->tx_mtx);
498249917Sgrehan	}
499221828Sgrehan}
500221828Sgrehan
501253440Sgrehan#ifdef notyet
502221828Sgrehanstatic void
503253440Sgrehanpci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
504221828Sgrehan{
505221828Sgrehan
506253440Sgrehan	DPRINTF(("vtnet: control qnotify!\n\r"));
507221828Sgrehan}
508253440Sgrehan#endif
509221828Sgrehan
510221828Sgrehanstatic int
511252682Sgrehanpci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
512252682Sgrehan{
513252682Sgrehan        struct ether_addr *ea;
514252682Sgrehan        char *tmpstr;
515252682Sgrehan        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
516252682Sgrehan
517252682Sgrehan        tmpstr = strsep(&mac_str,"=");
518252682Sgrehan
519252682Sgrehan        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
520252682Sgrehan                ea = ether_aton(mac_str);
521252682Sgrehan
522252682Sgrehan                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
523252682Sgrehan                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
524252682Sgrehan			fprintf(stderr, "Invalid MAC %s\n", mac_str);
525252682Sgrehan                        return (EINVAL);
526252682Sgrehan                } else
527252682Sgrehan                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
528252682Sgrehan        }
529252682Sgrehan
530252682Sgrehan        return (0);
531252682Sgrehan}
532252682Sgrehan
533252682Sgrehan
534252682Sgrehanstatic int
535221828Sgrehanpci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
536221828Sgrehan{
537221828Sgrehan	MD5_CTX mdctx;
538221828Sgrehan	unsigned char digest[16];
539221828Sgrehan	char nstr[80];
540249917Sgrehan	char tname[MAXCOMLEN + 1];
541221828Sgrehan	struct pci_vtnet_softc *sc;
542252682Sgrehan	char *devname;
543252682Sgrehan	char *vtopts;
544252682Sgrehan	int mac_provided;
545221828Sgrehan
546268953Sjhb	sc = calloc(1, sizeof(struct pci_vtnet_softc));
547221828Sgrehan
548253440Sgrehan	pthread_mutex_init(&sc->vsc_mtx, NULL);
549221828Sgrehan
550253440Sgrehan	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
551267393Sjhb	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
552267393Sjhb
553253440Sgrehan	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
554253440Sgrehan	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
555253440Sgrehan	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
556253440Sgrehan	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
557253440Sgrehan#ifdef notyet
558253440Sgrehan	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
559253440Sgrehan        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
560253440Sgrehan#endif
561246109Sneel
562246109Sneel	/*
563252682Sgrehan	 * Attempt to open the tap device and read the MAC address
564252682Sgrehan	 * if specified
565221828Sgrehan	 */
566252682Sgrehan	mac_provided = 0;
567221828Sgrehan	sc->vsc_tapfd = -1;
568221828Sgrehan	if (opts != NULL) {
569221828Sgrehan		char tbuf[80];
570252682Sgrehan		int err;
571221828Sgrehan
572252682Sgrehan		devname = vtopts = strdup(opts);
573252682Sgrehan		(void) strsep(&vtopts, ",");
574252682Sgrehan
575252682Sgrehan		if (vtopts != NULL) {
576253440Sgrehan			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
577252682Sgrehan			if (err != 0) {
578252682Sgrehan				free(devname);
579252682Sgrehan				return (err);
580252682Sgrehan			}
581252682Sgrehan			mac_provided = 1;
582252682Sgrehan		}
583252682Sgrehan
584221828Sgrehan		strcpy(tbuf, "/dev/");
585252682Sgrehan		strlcat(tbuf, devname, sizeof(tbuf));
586221828Sgrehan
587252682Sgrehan		free(devname);
588252682Sgrehan
589221828Sgrehan		sc->vsc_tapfd = open(tbuf, O_RDWR);
590221828Sgrehan		if (sc->vsc_tapfd == -1) {
591221828Sgrehan			WPRINTF(("open of tap device %s failed\n", tbuf));
592221828Sgrehan		} else {
593221828Sgrehan			/*
594221828Sgrehan			 * Set non-blocking and register for read
595221828Sgrehan			 * notifications with the event loop
596221828Sgrehan			 */
597221828Sgrehan			int opt = 1;
598221828Sgrehan			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
599221828Sgrehan				WPRINTF(("tap device O_NONBLOCK failed\n"));
600221828Sgrehan				close(sc->vsc_tapfd);
601221828Sgrehan				sc->vsc_tapfd = -1;
602221828Sgrehan			}
603221828Sgrehan
604221828Sgrehan			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
605221828Sgrehan						  EVF_READ,
606221828Sgrehan						  pci_vtnet_tap_callback,
607221828Sgrehan						  sc);
608221828Sgrehan			if (sc->vsc_mevp == NULL) {
609221828Sgrehan				WPRINTF(("Could not register event\n"));
610221828Sgrehan				close(sc->vsc_tapfd);
611221828Sgrehan				sc->vsc_tapfd = -1;
612221828Sgrehan			}
613221828Sgrehan		}
614221828Sgrehan	}
615221828Sgrehan
616221828Sgrehan	/*
617252682Sgrehan	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
618252682Sgrehan	 * followed by an MD5 of the PCI slot/func number and dev name
619221828Sgrehan	 */
620252682Sgrehan	if (!mac_provided) {
621244159Sgrehan		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
622259301Sgrehan		    pi->pi_func, vmname);
623221828Sgrehan
624252682Sgrehan		MD5Init(&mdctx);
625252682Sgrehan		MD5Update(&mdctx, nstr, strlen(nstr));
626252682Sgrehan		MD5Final(digest, &mdctx);
627221828Sgrehan
628253440Sgrehan		sc->vsc_config.mac[0] = 0x00;
629253440Sgrehan		sc->vsc_config.mac[1] = 0xa0;
630253440Sgrehan		sc->vsc_config.mac[2] = 0x98;
631253440Sgrehan		sc->vsc_config.mac[3] = digest[0];
632253440Sgrehan		sc->vsc_config.mac[4] = digest[1];
633253440Sgrehan		sc->vsc_config.mac[5] = digest[2];
634252682Sgrehan	}
635221828Sgrehan
636221828Sgrehan	/* initialize config space */
637221828Sgrehan	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
638221828Sgrehan	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
639221828Sgrehan	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
640221828Sgrehan	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
641253440Sgrehan
642267393Sjhb	pci_lintr_request(pi);
643267393Sjhb
644253440Sgrehan	/* link always up */
645253440Sgrehan	sc->vsc_config.status = 1;
646246109Sneel
647253440Sgrehan	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
648256755Sgrehan	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
649253440Sgrehan		return (1);
650246109Sneel
651253440Sgrehan	/* use BAR 0 to map config regs in IO space */
652253440Sgrehan	vi_set_io_bar(&sc->vsc_vs, 0);
653246109Sneel
654250083Sneel	sc->resetting = 0;
655250083Sneel
656271685Sgrehan	sc->rx_merge = 1;
657271685Sgrehan	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
658250083Sneel	sc->rx_in_progress = 0;
659250083Sneel	pthread_mutex_init(&sc->rx_mtx, NULL);
660250083Sneel
661249917Sgrehan	/*
662253440Sgrehan	 * Initialize tx semaphore & spawn TX processing thread.
663249917Sgrehan	 * As of now, only one thread for TX desc processing is
664249917Sgrehan	 * spawned.
665249917Sgrehan	 */
666249917Sgrehan	sc->tx_in_progress = 0;
667249917Sgrehan	pthread_mutex_init(&sc->tx_mtx, NULL);
668249917Sgrehan	pthread_cond_init(&sc->tx_cond, NULL);
669249917Sgrehan	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
670259301Sgrehan	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
671259301Sgrehan	    pi->pi_func);
672249917Sgrehan        pthread_set_name_np(sc->tx_tid, tname);
673221828Sgrehan
674221828Sgrehan	return (0);
675221828Sgrehan}
676221828Sgrehan
677253440Sgrehanstatic int
678253440Sgrehanpci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
679246109Sneel{
680253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
681222830Sgrehan	void *ptr;
682222830Sgrehan
683253440Sgrehan	if (offset < 6) {
684253440Sgrehan		assert(offset + size <= 6);
685221828Sgrehan		/*
686221828Sgrehan		 * The driver is allowed to change the MAC address
687221828Sgrehan		 */
688253440Sgrehan		ptr = &sc->vsc_config.mac[offset];
689253440Sgrehan		memcpy(ptr, &value, size);
690253440Sgrehan	} else {
691271685Sgrehan		/* silently ignore other writes */
692253440Sgrehan		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
693221828Sgrehan	}
694271685Sgrehan
695253440Sgrehan	return (0);
696221828Sgrehan}
697221828Sgrehan
698253440Sgrehanstatic int
699253440Sgrehanpci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
700221828Sgrehan{
701253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
702222830Sgrehan	void *ptr;
703221828Sgrehan
704253440Sgrehan	ptr = (uint8_t *)&sc->vsc_config + offset;
705253440Sgrehan	memcpy(retval, ptr, size);
706253440Sgrehan	return (0);
707221828Sgrehan}
708221828Sgrehan
709271685Sgrehanstatic void
710271685Sgrehanpci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
711271685Sgrehan{
712271685Sgrehan	struct pci_vtnet_softc *sc = vsc;
713271685Sgrehan
714271685Sgrehan	sc->vsc_features = negotiated_features;
715271685Sgrehan
716271685Sgrehan	if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
717271685Sgrehan		sc->rx_merge = 0;
718271685Sgrehan		/* non-merge rx header is 2 bytes shorter */
719271685Sgrehan		sc->rx_vhdrlen -= 2;
720271685Sgrehan	}
721271685Sgrehan}
722271685Sgrehan
723221828Sgrehanstruct pci_devemu pci_de_vnet = {
724241744Sgrehan	.pe_emu = 	"virtio-net",
725241744Sgrehan	.pe_init =	pci_vtnet_init,
726253440Sgrehan	.pe_barwrite =	vi_pci_write,
727253440Sgrehan	.pe_barread =	vi_pci_read
728221828Sgrehan};
729221828SgrehanPCI_EMUL_SET(pci_de_vnet);
730