1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3#include <getopt.h>
4#include <limits.h>
5#include <string.h>
6#include <poll.h>
7#include <sys/eventfd.h>
8#include <stdlib.h>
9#include <assert.h>
10#include <unistd.h>
11#include <sys/ioctl.h>
12#include <sys/stat.h>
13#include <sys/types.h>
14#include <fcntl.h>
15#include <stdbool.h>
16#include <linux/vhost.h>
17#include <linux/if.h>
18#include <linux/if_tun.h>
19#include <linux/in.h>
20#include <linux/if_packet.h>
21#include <linux/virtio_net.h>
22#include <netinet/ether.h>
23
24#define HDR_LEN		sizeof(struct virtio_net_hdr_mrg_rxbuf)
25#define TEST_BUF_LEN	256
26#define TEST_PTYPE	ETH_P_LOOPBACK
27#define DESC_NUM	256
28
29/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */
30void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
31
32struct vq_info {
33	int kick;
34	int call;
35	int idx;
36	long started;
37	long completed;
38	struct pollfd fds;
39	void *ring;
40	/* copy used for control */
41	struct vring vring;
42	struct virtqueue *vq;
43};
44
45struct vdev_info {
46	struct virtio_device vdev;
47	int control;
48	struct vq_info vqs[2];
49	int nvqs;
50	void *buf;
51	size_t buf_size;
52	char *test_buf;
53	char *res_buf;
54	struct vhost_memory *mem;
55	int sock;
56	int ifindex;
57	unsigned char mac[ETHER_ADDR_LEN];
58};
59
60static int tun_alloc(struct vdev_info *dev, char *tun_name)
61{
62	struct ifreq ifr;
63	int len = HDR_LEN;
64	int fd, e;
65
66	fd = open("/dev/net/tun", O_RDWR);
67	if (fd < 0) {
68		perror("Cannot open /dev/net/tun");
69		return fd;
70	}
71
72	memset(&ifr, 0, sizeof(ifr));
73
74	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
75	strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
76
77	e = ioctl(fd, TUNSETIFF, &ifr);
78	if (e < 0) {
79		perror("ioctl[TUNSETIFF]");
80		close(fd);
81		return e;
82	}
83
84	e = ioctl(fd, TUNSETVNETHDRSZ, &len);
85	if (e < 0) {
86		perror("ioctl[TUNSETVNETHDRSZ]");
87		close(fd);
88		return e;
89	}
90
91	e = ioctl(fd, SIOCGIFHWADDR, &ifr);
92	if (e < 0) {
93		perror("ioctl[SIOCGIFHWADDR]");
94		close(fd);
95		return e;
96	}
97
98	memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
99	return fd;
100}
101
102static void vdev_create_socket(struct vdev_info *dev, char *tun_name)
103{
104	struct ifreq ifr;
105
106	dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE));
107	assert(dev->sock != -1);
108
109	strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
110	assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0);
111
112	dev->ifindex = ifr.ifr_ifindex;
113
114	/* Set the flags that bring the device up */
115	assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0);
116	ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
117	assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0);
118}
119
120static void vdev_send_packet(struct vdev_info *dev)
121{
122	char *sendbuf = dev->test_buf + HDR_LEN;
123	struct sockaddr_ll saddrll = {0};
124	int sockfd = dev->sock;
125	int ret;
126
127	saddrll.sll_family = PF_PACKET;
128	saddrll.sll_ifindex = dev->ifindex;
129	saddrll.sll_halen = ETH_ALEN;
130	saddrll.sll_protocol = htons(TEST_PTYPE);
131
132	ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0,
133		     (struct sockaddr *)&saddrll,
134		     sizeof(struct sockaddr_ll));
135	assert(ret >= 0);
136}
137
138static bool vq_notify(struct virtqueue *vq)
139{
140	struct vq_info *info = vq->priv;
141	unsigned long long v = 1;
142	int r;
143
144	r = write(info->kick, &v, sizeof(v));
145	assert(r == sizeof(v));
146
147	return true;
148}
149
150static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
151{
152	struct vhost_vring_addr addr = {
153		.index = info->idx,
154		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
155		.avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
156		.used_user_addr = (uint64_t)(unsigned long)info->vring.used,
157	};
158	struct vhost_vring_state state = { .index = info->idx };
159	struct vhost_vring_file file = { .index = info->idx };
160	int r;
161
162	state.num = info->vring.num;
163	r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
164	assert(r >= 0);
165
166	state.num = 0;
167	r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
168	assert(r >= 0);
169
170	r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
171	assert(r >= 0);
172
173	file.fd = info->kick;
174	r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
175	assert(r >= 0);
176}
177
178static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
179{
180	if (info->vq)
181		vring_del_virtqueue(info->vq);
182
183	memset(info->ring, 0, vring_size(num, 4096));
184	vring_init(&info->vring, num, info->ring, 4096);
185	info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
186				       info->ring, vq_notify, NULL, "test");
187	assert(info->vq);
188	info->vq->priv = info;
189}
190
191static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd)
192{
193	struct vhost_vring_file backend = { .index = idx, .fd = fd };
194	struct vq_info *info = &dev->vqs[idx];
195	int r;
196
197	info->idx = idx;
198	info->kick = eventfd(0, EFD_NONBLOCK);
199	r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
200	assert(r >= 0);
201	vq_reset(info, num, &dev->vdev);
202	vhost_vq_setup(dev, info);
203
204	r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend);
205	assert(!r);
206}
207
208static void vdev_info_init(struct vdev_info *dev, unsigned long long features)
209{
210	struct ether_header *eh;
211	int i, r;
212
213	dev->vdev.features = features;
214	INIT_LIST_HEAD(&dev->vdev.vqs);
215	spin_lock_init(&dev->vdev.vqs_list_lock);
216
217	dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2;
218	dev->buf = malloc(dev->buf_size);
219	assert(dev->buf);
220	dev->test_buf = dev->buf;
221	dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN;
222
223	memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN);
224	eh = (struct ether_header *)(dev->test_buf + HDR_LEN);
225	eh->ether_type = htons(TEST_PTYPE);
226	memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN);
227	memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN);
228
229	for (i = sizeof(*eh); i < TEST_BUF_LEN; i++)
230		dev->test_buf[i + HDR_LEN] = (char)i;
231
232	dev->control = open("/dev/vhost-net", O_RDWR);
233	assert(dev->control >= 0);
234
235	r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
236	assert(r >= 0);
237
238	dev->mem = malloc(offsetof(struct vhost_memory, regions) +
239			  sizeof(dev->mem->regions[0]));
240	assert(dev->mem);
241	memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
242	       sizeof(dev->mem->regions[0]));
243	dev->mem->nregions = 1;
244	dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
245	dev->mem->regions[0].userspace_addr = (long)dev->buf;
246	dev->mem->regions[0].memory_size = dev->buf_size;
247
248	r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
249	assert(r >= 0);
250
251	r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
252	assert(r >= 0);
253
254	dev->nvqs = 2;
255}
256
257static void wait_for_interrupt(struct vq_info *vq)
258{
259	unsigned long long val;
260
261	poll(&vq->fds, 1, 100);
262
263	if (vq->fds.revents & POLLIN)
264		read(vq->fds.fd, &val, sizeof(val));
265}
266
267static void verify_res_buf(char *res_buf)
268{
269	int i;
270
271	for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++)
272		assert(res_buf[i] == (char)i);
273}
274
275static void run_tx_test(struct vdev_info *dev, struct vq_info *vq,
276			bool delayed, int bufs)
277{
278	long long spurious = 0;
279	struct scatterlist sl;
280	unsigned int len;
281	int r;
282
283	for (;;) {
284		long started_before = vq->started;
285		long completed_before = vq->completed;
286
287		virtqueue_disable_cb(vq->vq);
288		do {
289			while (vq->started < bufs &&
290			       (vq->started - vq->completed) < 1) {
291				sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN);
292				r = virtqueue_add_outbuf(vq->vq, &sl, 1,
293							 dev->test_buf + vq->started,
294							 GFP_ATOMIC);
295				if (unlikely(r != 0))
296					break;
297
298				++vq->started;
299
300				if (unlikely(!virtqueue_kick(vq->vq))) {
301					r = -1;
302					break;
303				}
304			}
305
306			if (vq->started >= bufs)
307				r = -1;
308
309			/* Flush out completed bufs if any */
310			while (virtqueue_get_buf(vq->vq, &len)) {
311				int n;
312
313				n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL);
314				assert(n == TEST_BUF_LEN);
315				verify_res_buf(dev->res_buf);
316
317				++vq->completed;
318				r = 0;
319			}
320		} while (r == 0);
321
322		if (vq->completed == completed_before && vq->started == started_before)
323			++spurious;
324
325		assert(vq->completed <= bufs);
326		assert(vq->started <= bufs);
327		if (vq->completed == bufs)
328			break;
329
330		if (delayed) {
331			if (virtqueue_enable_cb_delayed(vq->vq))
332				wait_for_interrupt(vq);
333		} else {
334			if (virtqueue_enable_cb(vq->vq))
335				wait_for_interrupt(vq);
336		}
337	}
338	printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
339	       spurious, vq->started, vq->completed);
340}
341
342static void run_rx_test(struct vdev_info *dev, struct vq_info *vq,
343			bool delayed, int bufs)
344{
345	long long spurious = 0;
346	struct scatterlist sl;
347	unsigned int len;
348	int r;
349
350	for (;;) {
351		long started_before = vq->started;
352		long completed_before = vq->completed;
353
354		do {
355			while (vq->started < bufs &&
356			       (vq->started - vq->completed) < 1) {
357				sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN);
358
359				r = virtqueue_add_inbuf(vq->vq, &sl, 1,
360							dev->res_buf + vq->started,
361							GFP_ATOMIC);
362				if (unlikely(r != 0))
363					break;
364
365				++vq->started;
366
367				vdev_send_packet(dev);
368
369				if (unlikely(!virtqueue_kick(vq->vq))) {
370					r = -1;
371					break;
372				}
373			}
374
375			if (vq->started >= bufs)
376				r = -1;
377
378			/* Flush out completed bufs if any */
379			while (virtqueue_get_buf(vq->vq, &len)) {
380				struct ether_header *eh;
381
382				eh = (struct ether_header *)(dev->res_buf + HDR_LEN);
383
384				/* tun netdev is up and running, only handle the
385				 * TEST_PTYPE packet.
386				 */
387				if (eh->ether_type == htons(TEST_PTYPE)) {
388					assert(len == TEST_BUF_LEN + HDR_LEN);
389					verify_res_buf(dev->res_buf + HDR_LEN);
390				}
391
392				++vq->completed;
393				r = 0;
394			}
395		} while (r == 0);
396
397		if (vq->completed == completed_before && vq->started == started_before)
398			++spurious;
399
400		assert(vq->completed <= bufs);
401		assert(vq->started <= bufs);
402		if (vq->completed == bufs)
403			break;
404	}
405
406	printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
407	       spurious, vq->started, vq->completed);
408}
409
410static const char optstring[] = "h";
411static const struct option longopts[] = {
412	{
413		.name = "help",
414		.val = 'h',
415	},
416	{
417		.name = "event-idx",
418		.val = 'E',
419	},
420	{
421		.name = "no-event-idx",
422		.val = 'e',
423	},
424	{
425		.name = "indirect",
426		.val = 'I',
427	},
428	{
429		.name = "no-indirect",
430		.val = 'i',
431	},
432	{
433		.name = "virtio-1",
434		.val = '1',
435	},
436	{
437		.name = "no-virtio-1",
438		.val = '0',
439	},
440	{
441		.name = "delayed-interrupt",
442		.val = 'D',
443	},
444	{
445		.name = "no-delayed-interrupt",
446		.val = 'd',
447	},
448	{
449		.name = "buf-num",
450		.val = 'n',
451		.has_arg = required_argument,
452	},
453	{
454		.name = "batch",
455		.val = 'b',
456		.has_arg = required_argument,
457	},
458	{
459	}
460};
461
462static void help(int status)
463{
464	fprintf(stderr, "Usage: vhost_net_test [--help]"
465		" [--no-indirect]"
466		" [--no-event-idx]"
467		" [--no-virtio-1]"
468		" [--delayed-interrupt]"
469		" [--buf-num]"
470		"\n");
471
472	exit(status);
473}
474
475int main(int argc, char **argv)
476{
477	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
478		(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
479	char tun_name[IFNAMSIZ];
480	long nbufs = 0x100000;
481	struct vdev_info dev;
482	bool delayed = false;
483	int o, fd;
484
485	for (;;) {
486		o = getopt_long(argc, argv, optstring, longopts, NULL);
487		switch (o) {
488		case -1:
489			goto done;
490		case '?':
491			help(2);
492		case 'e':
493			features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
494			break;
495		case 'h':
496			help(0);
497		case 'i':
498			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
499			break;
500		case '0':
501			features &= ~(1ULL << VIRTIO_F_VERSION_1);
502			break;
503		case 'D':
504			delayed = true;
505			break;
506		case 'n':
507			nbufs = strtol(optarg, NULL, 10);
508			assert(nbufs > 0);
509			break;
510		default:
511			assert(0);
512			break;
513		}
514	}
515
516done:
517	memset(&dev, 0, sizeof(dev));
518	snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid());
519
520	fd = tun_alloc(&dev, tun_name);
521	assert(fd >= 0);
522
523	vdev_info_init(&dev, features);
524	vq_info_add(&dev, 0, DESC_NUM, fd);
525	vq_info_add(&dev, 1, DESC_NUM, fd);
526	vdev_create_socket(&dev, tun_name);
527
528	run_rx_test(&dev, &dev.vqs[0], delayed, nbufs);
529	run_tx_test(&dev, &dev.vqs[1], delayed, nbufs);
530
531	return 0;
532}
533