1/*	$NetBSD: if_virt.c,v 1.25 2011/10/31 13:25:21 yamt Exp $	*/
2
3/*
4 * Copyright (c) 2008 Antti Kantee.  All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: if_virt.c,v 1.25 2011/10/31 13:25:21 yamt Exp $");
30
31#include <sys/param.h>
32#include <sys/condvar.h>
33#include <sys/fcntl.h>
34#include <sys/kernel.h>
35#include <sys/kmem.h>
36#include <sys/kthread.h>
37#include <sys/mutex.h>
38#include <sys/poll.h>
39#include <sys/sockio.h>
40#include <sys/socketvar.h>
41#include <sys/cprng.h>
42
43#include <net/bpf.h>
44#include <net/if.h>
45#include <net/if_ether.h>
46#include <net/if_tap.h>
47
48#include <netinet/in.h>
49#include <netinet/in_var.h>
50
51#include <rump/rump.h>
52#include <rump/rumpuser.h>
53
54#include "rump_private.h"
55#include "rump_net_private.h"
56
57/*
58 * Virtual interface for userspace purposes.  Uses tap(4) to
59 * interface with the kernel and just simply shovels data
60 * to/from /dev/tap.
61 */
62
63#define VIRTIF_BASE "virt"
64
65static int	virtif_init(struct ifnet *);
66static int	virtif_ioctl(struct ifnet *, u_long, void *);
67static void	virtif_start(struct ifnet *);
68static void	virtif_stop(struct ifnet *, int);
69
70struct virtif_sc {
71	struct ethercom sc_ec;
72	int sc_tapfd;
73	bool sc_dying;
74	struct lwp *sc_l_snd, *sc_l_rcv;
75	kmutex_t sc_mtx;
76	kcondvar_t sc_cv;
77};
78
79static void virtif_receiver(void *);
80static void virtif_sender(void *);
81static int  virtif_clone(struct if_clone *, int);
82static int  virtif_unclone(struct ifnet *);
83
84struct if_clone virtif_cloner =
85    IF_CLONE_INITIALIZER(VIRTIF_BASE, virtif_clone, virtif_unclone);
86
87int
88rump_virtif_create(int num)
89{
90	struct virtif_sc *sc;
91	struct ifnet *ifp;
92	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
93	char tapdev[16];
94	int fd, error = 0;
95
96	if (num >= 0x100)
97		return E2BIG;
98
99	snprintf(tapdev, sizeof(tapdev), "/dev/tap%d", num);
100	fd = rumpuser_open(tapdev, O_RDWR, &error);
101	if (fd == -1) {
102		printf("virtif_create: can't open /dev/tap%d: %d\n",
103		    num, error);
104		return error;
105	}
106	enaddr[2] = cprng_fast32() & 0xff;
107	enaddr[5] = num;
108
109	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
110	sc->sc_dying = false;
111	sc->sc_tapfd = fd;
112
113	mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);
114	cv_init(&sc->sc_cv, "virtsnd");
115	ifp = &sc->sc_ec.ec_if;
116	sprintf(ifp->if_xname, "%s%d", VIRTIF_BASE, num);
117	ifp->if_softc = sc;
118
119	if (rump_threads) {
120		if ((error = kthread_create(PRI_NONE, KTHREAD_MUSTJOIN, NULL,
121		    virtif_receiver, ifp, &sc->sc_l_rcv, "virtifr")) != 0)
122			goto out;
123
124		if ((error = kthread_create(PRI_NONE,
125		    KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL,
126		    virtif_sender, ifp, &sc->sc_l_snd, "virtifs")) != 0)
127			goto out;
128	} else {
129		printf("WARNING: threads not enabled, receive NOT working\n");
130	}
131
132	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
133	ifp->if_init = virtif_init;
134	ifp->if_ioctl = virtif_ioctl;
135	ifp->if_start = virtif_start;
136	ifp->if_stop = virtif_stop;
137	IFQ_SET_READY(&ifp->if_snd);
138
139	if_attach(ifp);
140	ether_ifattach(ifp, enaddr);
141
142 out:
143	if (error) {
144		virtif_unclone(ifp);
145	}
146
147	return error;
148}
149
150static int
151virtif_clone(struct if_clone *ifc, int unit)
152{
153
154	return rump_virtif_create(unit);
155}
156
157static int
158virtif_unclone(struct ifnet *ifp)
159{
160	struct virtif_sc *sc = ifp->if_softc;
161
162	mutex_enter(&sc->sc_mtx);
163	if (sc->sc_dying) {
164		mutex_exit(&sc->sc_mtx);
165		return EINPROGRESS;
166	}
167	sc->sc_dying = true;
168	cv_broadcast(&sc->sc_cv);
169	mutex_exit(&sc->sc_mtx);
170
171	virtif_stop(ifp, 1);
172	if_down(ifp);
173
174	if (sc->sc_l_snd) {
175		kthread_join(sc->sc_l_snd);
176		sc->sc_l_snd = NULL;
177	}
178	if (sc->sc_l_rcv) {
179		kthread_join(sc->sc_l_rcv);
180		sc->sc_l_rcv = NULL;
181	}
182
183	rumpuser_close(sc->sc_tapfd, NULL);
184
185	mutex_destroy(&sc->sc_mtx);
186	cv_destroy(&sc->sc_cv);
187	kmem_free(sc, sizeof(*sc));
188
189	ether_ifdetach(ifp);
190	if_detach(ifp);
191
192	return 0;
193}
194
195static int
196virtif_init(struct ifnet *ifp)
197{
198	struct virtif_sc *sc = ifp->if_softc;
199
200	ifp->if_flags |= IFF_RUNNING;
201
202	mutex_enter(&sc->sc_mtx);
203	cv_broadcast(&sc->sc_cv);
204	mutex_exit(&sc->sc_mtx);
205
206	return 0;
207}
208
209static int
210virtif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
211{
212	int s, rv;
213
214	s = splnet();
215	rv = ether_ioctl(ifp, cmd, data);
216	if (rv == ENETRESET)
217		rv = 0;
218	splx(s);
219
220	return rv;
221}
222
223/* just send everything in-context */
224static void
225virtif_start(struct ifnet *ifp)
226{
227	struct virtif_sc *sc = ifp->if_softc;
228
229	mutex_enter(&sc->sc_mtx);
230	ifp->if_flags |= IFF_OACTIVE;
231	cv_broadcast(&sc->sc_cv);
232	mutex_exit(&sc->sc_mtx);
233}
234
235static void
236virtif_stop(struct ifnet *ifp, int disable)
237{
238	struct virtif_sc *sc = ifp->if_softc;
239
240	ifp->if_flags &= ~IFF_RUNNING;
241
242	mutex_enter(&sc->sc_mtx);
243	cv_broadcast(&sc->sc_cv);
244	mutex_exit(&sc->sc_mtx);
245}
246
247#define POLLTIMO_MS 1
248static void
249virtif_receiver(void *arg)
250{
251	struct ifnet *ifp = arg;
252	struct virtif_sc *sc = ifp->if_softc;
253	struct mbuf *m;
254	size_t plen = ETHER_MAX_LEN_JUMBO+1;
255	struct pollfd pfd;
256	ssize_t n;
257	int error, rv;
258
259	pfd.fd = sc->sc_tapfd;
260	pfd.events = POLLIN;
261
262	for (;;) {
263		m = m_gethdr(M_WAIT, MT_DATA);
264		MEXTMALLOC(m, plen, M_WAIT);
265
266 again:
267		/* poll, but periodically check if we should die */
268		rv = rumpuser_poll(&pfd, 1, POLLTIMO_MS, &error);
269		if (sc->sc_dying) {
270			m_freem(m);
271			break;
272		}
273		if (rv == 0)
274			goto again;
275
276		n = rumpuser_read(sc->sc_tapfd, mtod(m, void *), plen, &error);
277		KASSERT(n < ETHER_MAX_LEN_JUMBO);
278		if (__predict_false(n < 0)) {
279			if (n == -1 && error == EAGAIN) {
280				goto again;
281			}
282
283			printf("%s: read from /dev/tap failed. host is down?\n",
284			    ifp->if_xname);
285			mutex_enter(&sc->sc_mtx);
286			/* could check if need go, done soon anyway */
287			cv_timedwait(&sc->sc_cv, &sc->sc_mtx, hz);
288			mutex_exit(&sc->sc_mtx);
289			goto again;
290		}
291
292		/* tap sometimes returns EOF.  don't sweat it and plow on */
293		if (__predict_false(n == 0))
294			goto again;
295
296		/* discard if we're not up */
297		if ((ifp->if_flags & IFF_RUNNING) == 0)
298			goto again;
299
300		m->m_len = m->m_pkthdr.len = n;
301		m->m_pkthdr.rcvif = ifp;
302		bpf_mtap(ifp, m);
303		ether_input(ifp, m);
304	}
305
306	kthread_exit(0);
307}
308
309/* lazy bum stetson-harrison magic value */
310#define LB_SH 32
311static void
312virtif_sender(void *arg)
313{
314	struct ifnet *ifp = arg;
315	struct virtif_sc *sc = ifp->if_softc;
316	struct mbuf *m, *m0;
317	struct rumpuser_iovec io[LB_SH];
318	int i, error;
319
320	mutex_enter(&sc->sc_mtx);
321	KERNEL_LOCK(1, NULL);
322	while (!sc->sc_dying) {
323		if (!(ifp->if_flags & IFF_RUNNING)) {
324			cv_wait(&sc->sc_cv, &sc->sc_mtx);
325			continue;
326		}
327		IF_DEQUEUE(&ifp->if_snd, m0);
328		if (!m0) {
329			ifp->if_flags &= ~IFF_OACTIVE;
330			cv_wait(&sc->sc_cv, &sc->sc_mtx);
331			continue;
332		}
333		mutex_exit(&sc->sc_mtx);
334
335		m = m0;
336		for (i = 0; i < LB_SH && m; i++) {
337			io[i].iov_base = mtod(m, void *);
338			io[i].iov_len = m->m_len;
339			m = m->m_next;
340		}
341		if (i == LB_SH)
342			panic("lazy bum");
343		bpf_mtap(ifp, m0);
344		KERNEL_UNLOCK_LAST(curlwp);
345
346		rumpuser_writev(sc->sc_tapfd, io, i, &error);
347
348		KERNEL_LOCK(1, NULL);
349		m_freem(m0);
350		mutex_enter(&sc->sc_mtx);
351	}
352	KERNEL_UNLOCK_LAST(curlwp);
353
354	mutex_exit(&sc->sc_mtx);
355
356	kthread_exit(0);
357}
358
359/*
360 * dummyif is a nada-interface.
361 * As it requires nothing external, it can be used for testing
362 * interface configuration.
363 */
364static int	dummyif_init(struct ifnet *);
365static void	dummyif_start(struct ifnet *);
366
367void
368rump_dummyif_create()
369{
370	struct ifnet *ifp;
371	struct ethercom *ec;
372	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
373
374	enaddr[2] = cprng_fast32() & 0xff;
375	enaddr[5] = cprng_fast32() & 0xff;
376
377	ec = kmem_zalloc(sizeof(*ec), KM_SLEEP);
378
379	ifp = &ec->ec_if;
380	strlcpy(ifp->if_xname, "dummy0", sizeof(ifp->if_xname));
381	ifp->if_softc = ifp;
382	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
383	ifp->if_init = dummyif_init;
384	ifp->if_ioctl = virtif_ioctl;
385	ifp->if_start = dummyif_start;
386
387	if_attach(ifp);
388	ether_ifattach(ifp, enaddr);
389}
390
391static int
392dummyif_init(struct ifnet *ifp)
393{
394
395	ifp->if_flags |= IFF_RUNNING;
396	return 0;
397}
398
399static void
400dummyif_start(struct ifnet *ifp)
401{
402
403}
404