pkt-gen.c revision 227614
1/*
2 * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 227614 2011-11-17 12:17:39Z luigi $
28 * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $
29 *
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
32 *
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
36 *
37 */
38
39const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
40	"http://info.iet.unipi.it/~luigi/netmap/ ";
41
42#include <errno.h>
43#include <pthread.h>	/* pthread_* */
44#include <pthread_np.h>	/* pthread w/ affinity */
45#include <signal.h>	/* signal */
46#include <stdlib.h>
47#include <stdio.h>
48#include <string.h>	/* strcmp */
49#include <fcntl.h>	/* open */
50#include <unistd.h>	/* close */
51#include <ifaddrs.h>	/* getifaddrs */
52
53#include <sys/mman.h>	/* PROT_* */
54#include <sys/ioctl.h>	/* ioctl */
55#include <sys/poll.h>
56#include <sys/socket.h>	/* sockaddr.. */
57#include <arpa/inet.h>	/* ntohs */
58#include <sys/param.h>
59#include <sys/cpuset.h>	/* cpu_set */
60#include <sys/sysctl.h>	/* sysctl */
61#include <sys/time.h>	/* timersub */
62
63#include <net/ethernet.h>
64#include <net/if.h>	/* ifreq */
65#include <net/if_dl.h>	/* LLADDR */
66
67#include <netinet/in.h>
68#include <netinet/ip.h>
69#include <netinet/udp.h>
70
71#include <net/netmap.h>
72#include <net/netmap_user.h>
73#include <pcap/pcap.h>
74
75
76static inline int min(int a, int b) { return a < b ? a : b; }
77
78/* debug support */
79#define D(format, ...)				\
80	fprintf(stderr, "%s [%d] " format "\n", 	\
81	__FUNCTION__, __LINE__, ##__VA_ARGS__)
82
83#ifndef EXPERIMENTAL
84#define EXPERIMENTAL 0
85#endif
86
87int verbose = 0;
88#define MAX_QUEUES 64	/* no need to limit */
89
90#define SKIP_PAYLOAD 1 /* do not check payload. */
91
92#if EXPERIMENTAL
93/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
94#define netmap_rdtsc(t) \
95	do { \
96		u_int __regs[4];					\
97									\
98		do_cpuid(0, __regs);					\
99		(t) = rdtsc();						\
100	} while (0)
101
102static __inline void
103do_cpuid(u_int ax, u_int *p)
104{
105	__asm __volatile("cpuid"
106			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
107			 :  "0" (ax));
108}
109
110static __inline uint64_t
111rdtsc(void)
112{
113	uint64_t rv;
114
115	__asm __volatile("rdtsc" : "=A" (rv));
116	return (rv);
117}
118#define MAX_SAMPLES 100000
119#endif /* EXPERIMENTAL */
120
121
122struct pkt {
123	struct ether_header eh;
124	struct ip ip;
125	struct udphdr udp;
126	uint8_t body[NETMAP_BUF_SIZE];
127} __attribute__((__packed__));
128
129/*
130 * global arguments for all threads
131 */
132struct glob_arg {
133	const char *src_ip;
134	const char *dst_ip;
135	const char *src_mac;
136	const char *dst_mac;
137	int pkt_size;
138	int burst;
139	int npackets;	/* total packets to send */
140	int nthreads;
141	int cpus;
142	int use_pcap;
143	pcap_t *p;
144};
145
146struct mystat {
147	uint64_t containers[8];
148};
149
150/*
151 * Arguments for a new thread. The same structure is used by
152 * the source and the sink
153 */
154struct targ {
155	struct glob_arg *g;
156	int used;
157	int completed;
158	int fd;
159	struct nmreq nmr;
160	struct netmap_if *nifp;
161	uint16_t	qfirst, qlast; /* range of queues to scan */
162	uint64_t count;
163	struct timeval tic, toc;
164	int me;
165	pthread_t thread;
166	int affinity;
167
168	uint8_t	dst_mac[6];
169	uint8_t	src_mac[6];
170	u_int dst_mac_range;
171	u_int src_mac_range;
172	uint32_t dst_ip;
173	uint32_t src_ip;
174	u_int dst_ip_range;
175	u_int src_ip_range;
176
177	struct pkt pkt;
178};
179
180
181static struct targ *targs;
182static int global_nthreads;
183
184/* control-C handler */
185static void
186sigint_h(__unused int sig)
187{
188	for (int i = 0; i < global_nthreads; i++) {
189		/* cancel active threads. */
190		if (targs[i].used == 0)
191			continue;
192
193		D("Cancelling thread #%d\n", i);
194		pthread_cancel(targs[i].thread);
195		targs[i].used = 0;
196	}
197
198	signal(SIGINT, SIG_DFL);
199}
200
201
202/* sysctl wrapper to return the number of active CPUs */
203static int
204system_ncpus(void)
205{
206	int mib[2], ncpus;
207	size_t len;
208
209	mib[0] = CTL_HW;
210	mib[1] = HW_NCPU;
211	len = sizeof(mib);
212	sysctl(mib, 2, &ncpus, &len, NULL, 0);
213
214	return (ncpus);
215}
216
217/*
218 * locate the src mac address for our interface, put it
219 * into the user-supplied buffer. return 0 if ok, -1 on error.
220 */
221static int
222source_hwaddr(const char *ifname, char *buf)
223{
224	struct ifaddrs *ifaphead, *ifap;
225	int l = sizeof(ifap->ifa_name);
226
227	if (getifaddrs(&ifaphead) != 0) {
228		D("getifaddrs %s failed", ifname);
229		return (-1);
230	}
231
232	for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
233		struct sockaddr_dl *sdl =
234			(struct sockaddr_dl *)ifap->ifa_addr;
235		uint8_t *mac;
236
237		if (!sdl || sdl->sdl_family != AF_LINK)
238			continue;
239		if (strncmp(ifap->ifa_name, ifname, l) != 0)
240			continue;
241		mac = (uint8_t *)LLADDR(sdl);
242		sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
243			mac[0], mac[1], mac[2],
244			mac[3], mac[4], mac[5]);
245		if (verbose)
246			D("source hwaddr %s", buf);
247		break;
248	}
249	freeifaddrs(ifaphead);
250	return ifap ? 0 : 1;
251}
252
253
254/* set the thread affinity. */
255static int
256setaffinity(pthread_t me, int i)
257{
258	cpuset_t cpumask;
259
260	if (i == -1)
261		return 0;
262
263	/* Set thread affinity affinity.*/
264	CPU_ZERO(&cpumask);
265	CPU_SET(i, &cpumask);
266
267	if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
268		D("Unable to set affinity");
269		return 1;
270	}
271	return 0;
272}
273
274/* Compute the checksum of the given ip header. */
275static uint16_t
276checksum(const void *data, uint16_t len)
277{
278        const uint8_t *addr = data;
279        uint32_t sum = 0;
280
281        while (len > 1) {
282                sum += addr[0] * 256 + addr[1];
283                addr += 2;
284                len -= 2;
285        }
286
287        if (len == 1)
288                sum += *addr * 256;
289
290        sum = (sum >> 16) + (sum & 0xffff);
291        sum += (sum >> 16);
292
293        sum = htons(sum);
294
295        return ~sum;
296}
297
298/*
299 * Fill a packet with some payload.
300 */
301static void
302initialize_packet(struct targ *targ)
303{
304	struct pkt *pkt = &targ->pkt;
305	struct ether_header *eh;
306	struct ip *ip;
307	struct udphdr *udp;
308	uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip);
309	int i, l, l0 = strlen(default_payload);
310	char *p;
311
312	for (i = 0; i < paylen;) {
313		l = min(l0, paylen - i);
314		bcopy(default_payload, pkt->body + i, l);
315		i += l;
316	}
317	pkt->body[i-1] = '\0';
318
319	udp = &pkt->udp;
320	udp->uh_sport = htons(1234);
321        udp->uh_dport = htons(4321);
322	udp->uh_ulen = htons(paylen);
323	udp->uh_sum = 0; // checksum(udp, sizeof(*udp));
324
325	ip = &pkt->ip;
326        ip->ip_v = IPVERSION;
327        ip->ip_hl = 5;
328        ip->ip_id = 0;
329        ip->ip_tos = IPTOS_LOWDELAY;
330	ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
331        ip->ip_id = 0;
332        ip->ip_off = htons(IP_DF); /* Don't fragment */
333        ip->ip_ttl = IPDEFTTL;
334	ip->ip_p = IPPROTO_UDP;
335	inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src);
336	inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst);
337	targ->dst_ip = ip->ip_dst.s_addr;
338	targ->src_ip = ip->ip_src.s_addr;
339	p = index(targ->g->src_ip, '-');
340	if (p) {
341		targ->dst_ip_range = atoi(p+1);
342		D("dst-ip sweep %d addresses", targ->dst_ip_range);
343	}
344	ip->ip_sum = checksum(ip, sizeof(*ip));
345
346	eh = &pkt->eh;
347	bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6);
348	bcopy(targ->src_mac, eh->ether_shost, 6);
349	p = index(targ->g->src_mac, '-');
350	if (p)
351		targ->src_mac_range = atoi(p+1);
352
353	bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
354	bcopy(targ->dst_mac, eh->ether_dhost, 6);
355	p = index(targ->g->dst_mac, '-');
356	if (p)
357		targ->dst_mac_range = atoi(p+1);
358	eh->ether_type = htons(ETHERTYPE_IP);
359}
360
361/* Check the payload of the packet for errors (use it for debug).
362 * Look for consecutive ascii representations of the size of the packet.
363 */
364static void
365check_payload(char *p, int psize)
366{
367	char temp[64];
368	int n_read, size, sizelen;
369
370	/* get the length in ASCII of the length of the packet. */
371	sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace
372
373	/* dummy payload. */
374	p += 14; /* skip packet header. */
375	n_read = 14;
376	while (psize - n_read >= sizelen) {
377		sscanf(p, "%d", &size);
378		if (size != psize) {
379			D("Read %d instead of %d", size, psize);
380			break;
381		}
382
383		p += sizelen;
384		n_read += sizelen;
385	}
386}
387
388
389/*
390 * create and enqueue a batch of packets on a ring.
391 * On the last one set NS_REPORT to tell the driver to generate
392 * an interrupt when done.
393 */
394static int
395send_packets(struct netmap_ring *ring, struct pkt *pkt,
396		int size, u_int count, int fill_all)
397{
398	u_int sent, cur = ring->cur;
399
400	if (ring->avail < count)
401		count = ring->avail;
402
403	for (sent = 0; sent < count; sent++) {
404		struct netmap_slot *slot = &ring->slot[cur];
405		char *p = NETMAP_BUF(ring, slot->buf_idx);
406
407		if (fill_all)
408			memcpy(p, pkt, size);
409
410		slot->len = size;
411		if (sent == count - 1)
412			slot->flags |= NS_REPORT;
413		cur = NETMAP_RING_NEXT(ring, cur);
414	}
415	ring->avail -= sent;
416	ring->cur = cur;
417
418	return (sent);
419}
420
421static void *
422sender_body(void *data)
423{
424	struct targ *targ = (struct targ *) data;
425
426	struct pollfd fds[1];
427	struct netmap_if *nifp = targ->nifp;
428	struct netmap_ring *txring;
429	int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
430	int fill_all = 1;
431
432	if (setaffinity(targ->thread, targ->affinity))
433		goto quit;
434	/* setup poll(2) machanism. */
435	memset(fds, 0, sizeof(fds));
436	fds[0].fd = targ->fd;
437	fds[0].events = (POLLOUT);
438
439	/* main loop.*/
440	gettimeofday(&targ->tic, NULL);
441    if (targ->g->use_pcap) {
442	int size = targ->g->pkt_size;
443	void *pkt = &targ->pkt;
444	pcap_t *p = targ->g->p;
445
446	for (; sent < n; sent++) {
447		if (pcap_inject(p, pkt, size) == -1)
448			break;
449	}
450    } else {
451	while (sent < n) {
452
453		/*
454		 * wait for available room in the send queue(s)
455		 */
456		if (poll(fds, 1, 2000) <= 0) {
457			D("poll error/timeout on queue %d\n", targ->me);
458			goto quit;
459		}
460		/*
461		 * scan our queues and send on those with room
462		 */
463		if (sent > 100000)
464			fill_all = 0;
465		for (i = targ->qfirst; i < targ->qlast; i++) {
466			int m, limit = MIN(n - sent, targ->g->burst);
467
468			txring = NETMAP_TXRING(nifp, i);
469			if (txring->avail == 0)
470				continue;
471			m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
472					 limit, fill_all);
473			sent += m;
474			targ->count = sent;
475		}
476	}
477	/* Tell the interface that we have new packets. */
478	ioctl(fds[0].fd, NIOCTXSYNC, NULL);
479
480	/* final part: wait all the TX queues to be empty. */
481	for (i = targ->qfirst; i < targ->qlast; i++) {
482		txring = NETMAP_TXRING(nifp, i);
483		while (!NETMAP_TX_RING_EMPTY(txring)) {
484			ioctl(fds[0].fd, NIOCTXSYNC, NULL);
485			usleep(1); /* wait 1 tick */
486		}
487	}
488    }
489
490	gettimeofday(&targ->toc, NULL);
491	targ->completed = 1;
492	targ->count = sent;
493
494quit:
495	/* reset the ``used`` flag. */
496	targ->used = 0;
497
498	return (NULL);
499}
500
501
502static void
503receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h,
504	__unused const u_char * bytes)
505{
506	int *count = (int *)user;
507	(*count)++;
508}
509
510static int
511receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload)
512{
513	u_int cur, rx;
514
515	cur = ring->cur;
516	if (ring->avail < limit)
517		limit = ring->avail;
518	for (rx = 0; rx < limit; rx++) {
519		struct netmap_slot *slot = &ring->slot[cur];
520		char *p = NETMAP_BUF(ring, slot->buf_idx);
521
522		if (!skip_payload)
523			check_payload(p, slot->len);
524
525		cur = NETMAP_RING_NEXT(ring, cur);
526	}
527	ring->avail -= rx;
528	ring->cur = cur;
529
530	return (rx);
531}
532
533static void *
534receiver_body(void *data)
535{
536	struct targ *targ = (struct targ *) data;
537	struct pollfd fds[1];
538	struct netmap_if *nifp = targ->nifp;
539	struct netmap_ring *rxring;
540	int i, received = 0;
541
542	if (setaffinity(targ->thread, targ->affinity))
543		goto quit;
544
545	/* setup poll(2) machanism. */
546	memset(fds, 0, sizeof(fds));
547	fds[0].fd = targ->fd;
548	fds[0].events = (POLLIN);
549
550	/* unbounded wait for the first packet. */
551	for (;;) {
552		i = poll(fds, 1, 1000);
553		if (i > 0 && !(fds[0].revents & POLLERR))
554			break;
555		D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
556	}
557
558	/* main loop, exit after 1s silence */
559	gettimeofday(&targ->tic, NULL);
560    if (targ->g->use_pcap) {
561	for (;;) {
562		pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
563	}
564    } else {
565	while (1) {
566		/* Once we started to receive packets, wait at most 1 seconds
567		   before quitting. */
568		if (poll(fds, 1, 1 * 1000) <= 0) {
569			gettimeofday(&targ->toc, NULL);
570			targ->toc.tv_sec -= 1; /* Substract timeout time. */
571			break;
572		}
573
574		for (i = targ->qfirst; i < targ->qlast; i++) {
575			int m;
576
577			rxring = NETMAP_RXRING(nifp, i);
578			if (rxring->avail == 0)
579				continue;
580
581			m = receive_packets(rxring, targ->g->burst,
582					SKIP_PAYLOAD);
583			received += m;
584			targ->count = received;
585		}
586
587		// tell the card we have read the data
588		//ioctl(fds[0].fd, NIOCRXSYNC, NULL);
589	}
590    }
591
592	targ->completed = 1;
593	targ->count = received;
594
595quit:
596	/* reset the ``used`` flag. */
597	targ->used = 0;
598
599	return (NULL);
600}
601
602static void
603tx_output(uint64_t sent, int size, double delta)
604{
605	double amount = 8.0 * (1.0 * size * sent) / delta;
606	double pps = sent / delta;
607	char units[4] = { '\0', 'K', 'M', 'G' };
608	int aunit = 0, punit = 0;
609
610	while (amount >= 1000) {
611		amount /= 1000;
612		aunit += 1;
613	}
614	while (pps >= 1000) {
615		pps /= 1000;
616		punit += 1;
617	}
618
619	printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
620	       sent, size, delta);
621	printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n",
622	       pps, units[punit], amount, units[aunit]);
623}
624
625
626static void
627rx_output(uint64_t received, double delta)
628{
629
630	double pps = received / delta;
631	char units[4] = { '\0', 'K', 'M', 'G' };
632	int punit = 0;
633
634	while (pps >= 1000) {
635		pps /= 1000;
636		punit += 1;
637	}
638
639	printf("Received %llu packets, in %.2f seconds.\n", received, delta);
640	printf("Speed: %.2f%cpps.\n", pps, units[punit]);
641}
642
643static void
644usage(void)
645{
646	const char *cmd = "pkt-gen";
647	fprintf(stderr,
648		"Usage:\n"
649		"%s arguments\n"
650		"\t-i interface		interface name\n"
651		"\t-t pkts_to_send	also forces send mode\n"
652		"\t-r pkts_to_receive	also forces receive mode\n"
653		"\t-l pkts_size		in bytes excluding CRC\n"
654		"\t-d dst-ip		end with %%n to sweep n addresses\n"
655		"\t-s src-ip		end with %%n to sweep n addresses\n"
656		"\t-D dst-mac		end with %%n to sweep n addresses\n"
657		"\t-S src-mac		end with %%n to sweep n addresses\n"
658		"\t-b burst size		testing, mostly\n"
659		"\t-c cores		cores to use\n"
660		"\t-p threads		processes/threads to use\n"
661		"\t-T report_ms		milliseconds between reports\n"
662		"\t-w wait_for_link_time	in seconds\n"
663		"",
664		cmd);
665
666	exit(0);
667}
668
669
670int
671main(int arc, char **argv)
672{
673	int i, fd;
674
675	struct glob_arg g;
676
677	struct nmreq nmr;
678	void *mmap_addr;		/* the mmap address */
679	void *(*td_body)(void *) = receiver_body;
680	int ch;
681	int report_interval = 1000;	/* report interval */
682	char *ifname = NULL;
683	int wait_link = 2;
684	int devqueues = 1;	/* how many device queues */
685
686	bzero(&g, sizeof(g));
687
688	g.src_ip = "10.0.0.1";
689	g.dst_ip = "10.1.0.1";
690	g.dst_mac = "ff:ff:ff:ff:ff:ff";
691	g.src_mac = NULL;
692	g.pkt_size = 60;
693	g.burst = 512;		// default
694	g.nthreads = 1;
695	g.cpus = 1;
696
697	while ( (ch = getopt(arc, argv,
698			"i:t:r:l:d:s:D:S:b:c:p:T:w:v")) != -1) {
699		switch(ch) {
700		default:
701			D("bad option %c %s", ch, optarg);
702			usage();
703			break;
704		case 'i':	/* interface */
705			ifname = optarg;
706			break;
707		case 't':	/* send */
708			td_body = sender_body;
709			g.npackets = atoi(optarg);
710			break;
711		case 'r':	/* receive */
712			td_body = receiver_body;
713			g.npackets = atoi(optarg);
714			break;
715		case 'l':	/* pkt_size */
716			g.pkt_size = atoi(optarg);
717			break;
718		case 'd':
719			g.dst_ip = optarg;
720			break;
721		case 's':
722			g.src_ip = optarg;
723			break;
724		case 'T':	/* report interval */
725			report_interval = atoi(optarg);
726			break;
727		case 'w':
728			wait_link = atoi(optarg);
729			break;
730		case 'b':	/* burst */
731			g.burst = atoi(optarg);
732			break;
733		case 'c':
734			g.cpus = atoi(optarg);
735			break;
736		case 'p':
737			g.nthreads = atoi(optarg);
738			break;
739
740		case 'P':
741			g.use_pcap = 1;
742			break;
743
744		case 'D': /* destination mac */
745			g.dst_mac = optarg;
746	{
747		struct ether_addr *mac = ether_aton(g.dst_mac);
748		D("ether_aton(%s) gives %p", g.dst_mac, mac);
749	}
750			break;
751		case 'S': /* source mac */
752			g.src_mac = optarg;
753			break;
754		case 'v':
755			verbose++;
756		}
757	}
758
759	if (ifname == NULL) {
760		D("missing ifname");
761		usage();
762	}
763	{
764		int n = system_ncpus();
765		if (g.cpus < 0 || g.cpus > n) {
766			D("%d cpus is too high, have only %d cpus", g.cpus, n);
767			usage();
768		}
769		if (g.cpus == 0)
770			g.cpus = n;
771	}
772	if (g.pkt_size < 16 || g.pkt_size > 1536) {
773		D("bad pktsize %d\n", g.pkt_size);
774		usage();
775	}
776
777	bzero(&nmr, sizeof(nmr));
778	/*
779	 * Open the netmap device to fetch the number of queues of our
780	 * interface.
781	 *
782	 * The first NIOCREGIF also detaches the card from the
783	 * protocol stack and may cause a reset of the card,
784	 * which in turn may take some time for the PHY to
785	 * reconfigure.
786	 */
787	fd = open("/dev/netmap", O_RDWR);
788	if (fd == -1) {
789		D("Unable to open /dev/netmap");
790		// fail later
791	} else {
792		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
793			D("Unable to get if info without name");
794		} else {
795			D("map size is %d Kb", nmr.nr_memsize >> 10);
796		}
797		bzero(&nmr, sizeof(nmr));
798		strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name));
799		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
800			D("Unable to get if info for %s", ifname);
801		}
802		devqueues = nmr.nr_numrings;
803	}
804
805	/* validate provided nthreads. */
806	if (g.nthreads < 1 || g.nthreads > devqueues) {
807		D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
808		// continue, fail later
809	}
810
811	if (td_body == sender_body && g.src_mac == NULL) {
812		static char mybuf[20] = "ff:ff:ff:ff:ff:ff";
813		/* retrieve source mac address. */
814		if (source_hwaddr(ifname, mybuf) == -1) {
815			D("Unable to retrieve source mac");
816			// continue, fail later
817		}
818		g.src_mac = mybuf;
819	}
820
821	/*
822	 * Map the netmap shared memory: instead of issuing mmap()
823	 * inside the body of the threads, we prefer to keep this
824	 * operation here to simplify the thread logic.
825	 */
826	D("mmapping %d Kbytes", nmr.nr_memsize>>10);
827	mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
828					    PROT_WRITE | PROT_READ,
829					    MAP_SHARED, fd, 0);
830	if (mmap_addr == MAP_FAILED) {
831		D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
832		// continue, fail later
833	}
834
835	/*
836	 * Register the interface on the netmap device: from now on,
837	 * we can operate on the network interface without any
838	 * interference from the legacy network stack.
839	 *
840	 * We decide to put the first interface registration here to
841	 * give time to cards that take a long time to reset the PHY.
842	 */
843	if (ioctl(fd, NIOCREGIF, &nmr) == -1) {
844		D("Unable to register interface %s", ifname);
845		//continue, fail later
846	}
847
848
849	/* Print some debug information. */
850	fprintf(stdout,
851		"%s %s: %d queues, %d threads and %d cpus.\n",
852		(td_body == sender_body) ? "Sending on" : "Receiving from",
853		ifname,
854		devqueues,
855		g.nthreads,
856		g.cpus);
857	if (td_body == sender_body) {
858		fprintf(stdout, "%s -> %s (%s -> %s)\n",
859			g.src_ip, g.dst_ip,
860			g.src_mac, g.dst_mac);
861	}
862
863	/* Exit if something went wrong. */
864	if (fd < 0) {
865		D("aborting");
866		usage();
867	}
868
869
870	/* Wait for PHY reset. */
871	D("Wait %d secs for phy reset", wait_link);
872	sleep(wait_link);
873	D("Ready...");
874
875	/* Install ^C handler. */
876	global_nthreads = g.nthreads;
877	signal(SIGINT, sigint_h);
878
879	if (g.use_pcap) {
880		// XXX g.p = pcap_open_live(..);
881	}
882
883	targs = calloc(g.nthreads, sizeof(*targs));
884	/*
885	 * Now create the desired number of threads, each one
886	 * using a single descriptor.
887 	 */
888	for (i = 0; i < g.nthreads; i++) {
889		struct netmap_if *tnifp;
890		struct nmreq tifreq;
891		int tfd;
892
893	    if (g.use_pcap) {
894		tfd = -1;
895		tnifp = NULL;
896	    } else {
897		/* register interface. */
898		tfd = open("/dev/netmap", O_RDWR);
899		if (tfd == -1) {
900			D("Unable to open /dev/netmap");
901			continue;
902		}
903
904		bzero(&tifreq, sizeof(tifreq));
905		strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name));
906		tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
907
908		/*
909		 * if we are acting as a receiver only, do not touch the transmit ring.
910		 * This is not the default because many apps may use the interface
911		 * in both directions, but a pure receiver does not.
912		 */
913		if (td_body == receiver_body) {
914			tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
915		}
916
917		if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
918			D("Unable to register %s", ifname);
919			continue;
920		}
921		tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset);
922	    }
923		/* start threads. */
924		bzero(&targs[i], sizeof(targs[i]));
925		targs[i].g = &g;
926		targs[i].used = 1;
927		targs[i].completed = 0;
928		targs[i].fd = tfd;
929		targs[i].nmr = tifreq;
930		targs[i].nifp = tnifp;
931		targs[i].qfirst = (g.nthreads > 1) ? i : 0;
932		targs[i].qlast = (g.nthreads > 1) ? i+1 : tifreq.nr_numrings;
933		targs[i].me = i;
934		targs[i].affinity = g.cpus ? i % g.cpus : -1;
935		if (td_body == sender_body) {
936			/* initialize the packet to send. */
937			initialize_packet(&targs[i]);
938		}
939
940		if (pthread_create(&targs[i].thread, NULL, td_body,
941				   &targs[i]) == -1) {
942			D("Unable to create thread %d", i);
943			targs[i].used = 0;
944		}
945	}
946
947    {
948	uint64_t my_count = 0, prev = 0;
949	uint64_t count = 0;
950	double delta_t;
951	struct timeval tic, toc;
952
953	gettimeofday(&toc, NULL);
954	for (;;) {
955		struct timeval now, delta;
956		uint64_t pps;
957		int done = 0;
958
959		delta.tv_sec = report_interval/1000;
960		delta.tv_usec = (report_interval%1000)*1000;
961		select(0, NULL, NULL, NULL, &delta);
962		gettimeofday(&now, NULL);
963		timersub(&now, &toc, &toc);
964		my_count = 0;
965		for (i = 0; i < g.nthreads; i++) {
966			my_count += targs[i].count;
967			if (targs[i].used == 0)
968				done++;
969		}
970		pps = toc.tv_sec* 1000000 + toc.tv_usec;
971		if (pps < 10000)
972			continue;
973		pps = (my_count - prev)*1000000 / pps;
974		D("%llu pps", pps);
975		prev = my_count;
976		toc = now;
977		if (done == g.nthreads)
978			break;
979	}
980
981	timerclear(&tic);
982	timerclear(&toc);
983	for (i = 0; i < g.nthreads; i++) {
984		/*
985		 * Join active threads, unregister interfaces and close
986		 * file descriptors.
987		 */
988		pthread_join(targs[i].thread, NULL);
989		ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr);
990		close(targs[i].fd);
991
992		if (targs[i].completed == 0)
993			continue;
994
995		/*
996		 * Collect threads o1utput and extract information about
997		 * how log it took to send all the packets.
998		 */
999		count += targs[i].count;
1000		if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
1001			tic = targs[i].tic;
1002		if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
1003			toc = targs[i].toc;
1004	}
1005
1006	/* print output. */
1007	timersub(&toc, &tic, &toc);
1008	delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
1009	if (td_body == sender_body)
1010		tx_output(count, g.pkt_size, delta_t);
1011	else
1012		rx_output(count, delta_t);
1013    }
1014
1015	ioctl(fd, NIOCUNREGIF, &nmr);
1016	munmap(mmap_addr, nmr.nr_memsize);
1017	close(fd);
1018
1019	return (0);
1020}
1021/* end of file */
1022