pkt-gen.c revision 238165
1/*
2 * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *   1. Redistributions of source code must retain the above copyright
8 *      notice, this list of conditions and the following disclaimer.
9 *   2. Redistributions in binary form must reproduce the above copyright
10 *      notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 238165 2012-07-06 13:21:23Z emaste $
28 * $Id: pkt-gen.c 10967 2012-05-03 11:29:23Z luigi $
29 *
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
32 *
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
36 *
37 */
38
39const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
40	"http://info.iet.unipi.it/~luigi/netmap/ ";
41
42#include <errno.h>
43#include <pthread.h>	/* pthread_* */
44#include <pthread_np.h>	/* pthread w/ affinity */
45#include <signal.h>	/* signal */
46#include <stdlib.h>
47#include <stdio.h>
48#include <inttypes.h>	/* PRI* macros */
49#include <string.h>	/* strcmp */
50#include <fcntl.h>	/* open */
51#include <unistd.h>	/* close */
52#include <ifaddrs.h>	/* getifaddrs */
53
54#include <sys/mman.h>	/* PROT_* */
55#include <sys/ioctl.h>	/* ioctl */
56#include <sys/poll.h>
57#include <sys/socket.h>	/* sockaddr.. */
58#include <arpa/inet.h>	/* ntohs */
59#include <sys/param.h>
60#include <sys/cpuset.h>	/* cpu_set */
61#include <sys/sysctl.h>	/* sysctl */
62#include <sys/time.h>	/* timersub */
63
64#include <net/ethernet.h>
65#include <net/if.h>	/* ifreq */
66#include <net/if_dl.h>	/* LLADDR */
67
68#include <netinet/in.h>
69#include <netinet/ip.h>
70#include <netinet/udp.h>
71
72#include <net/netmap.h>
73#include <net/netmap_user.h>
74#include <pcap/pcap.h>
75
76
77static inline int min(int a, int b) { return a < b ? a : b; }
78
79/* debug support */
80#define D(format, ...)				\
81	fprintf(stderr, "%s [%d] " format "\n", 	\
82	__FUNCTION__, __LINE__, ##__VA_ARGS__)
83
84#ifndef EXPERIMENTAL
85#define EXPERIMENTAL 0
86#endif
87
88int verbose = 0;
89#define MAX_QUEUES 64	/* no need to limit */
90
91#define SKIP_PAYLOAD 1 /* do not check payload. */
92
93inline void prefetch (const void *x)
94{
95        __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
96}
97
98// XXX only for multiples of 64 bytes, non overlapped.
99static inline void
100pkt_copy(void *_src, void *_dst, int l)
101{
102	uint64_t *src = _src;
103	uint64_t *dst = _dst;
104#define likely(x)       __builtin_expect(!!(x), 1)
105#define unlikely(x)       __builtin_expect(!!(x), 0)
106	if (unlikely(l >= 1024)) {
107		bcopy(src, dst, l);
108		return;
109	}
110	for (; l > 0; l-=64) {
111		*dst++ = *src++;
112		*dst++ = *src++;
113		*dst++ = *src++;
114		*dst++ = *src++;
115		*dst++ = *src++;
116		*dst++ = *src++;
117		*dst++ = *src++;
118		*dst++ = *src++;
119	}
120}
121
122
123#if EXPERIMENTAL
124/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
125#define netmap_rdtsc(t) \
126	do { \
127		u_int __regs[4];					\
128									\
129		do_cpuid(0, __regs);					\
130		(t) = rdtsc();						\
131	} while (0)
132
133static __inline void
134do_cpuid(u_int ax, u_int *p)
135{
136	__asm __volatile("cpuid"
137			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
138			 :  "0" (ax));
139}
140
141static __inline uint64_t
142rdtsc(void)
143{
144	uint64_t rv;
145
146	__asm __volatile("rdtsc" : "=A" (rv));
147	return (rv);
148}
149#define MAX_SAMPLES 100000
150#endif /* EXPERIMENTAL */
151
152
153struct pkt {
154	struct ether_header eh;
155	struct ip ip;
156	struct udphdr udp;
157	uint8_t body[2048];	// XXX hardwired
158} __attribute__((__packed__));
159
160/*
161 * global arguments for all threads
162 */
163struct glob_arg {
164	const char *src_ip;
165	const char *dst_ip;
166	const char *src_mac;
167	const char *dst_mac;
168	int pkt_size;
169	int burst;
170	int npackets;	/* total packets to send */
171	int nthreads;
172	int cpus;
173	int options;	/* testing */
174#define OPT_PREFETCH	1
175#define OPT_ACCESS	2
176#define OPT_COPY	4
177#define OPT_MEMCPY	8
178	int use_pcap;
179	pcap_t *p;
180};
181
182struct mystat {
183	uint64_t containers[8];
184};
185
186/*
187 * Arguments for a new thread. The same structure is used by
188 * the source and the sink
189 */
190struct targ {
191	struct glob_arg *g;
192	int used;
193	int completed;
194	int cancel;
195	int fd;
196	struct nmreq nmr;
197	struct netmap_if *nifp;
198	uint16_t	qfirst, qlast; /* range of queues to scan */
199	uint64_t count;
200	struct timeval tic, toc;
201	int me;
202	pthread_t thread;
203	int affinity;
204
205	uint8_t	dst_mac[6];
206	uint8_t	src_mac[6];
207	u_int dst_mac_range;
208	u_int src_mac_range;
209	uint32_t dst_ip;
210	uint32_t src_ip;
211	u_int dst_ip_range;
212	u_int src_ip_range;
213
214	struct pkt pkt;
215};
216
217
218static struct targ *targs;
219static int global_nthreads;
220
221/* control-C handler */
222static void
223sigint_h(__unused int sig)
224{
225	for (int i = 0; i < global_nthreads; i++)
226		targs[i].cancel = 1;
227
228	signal(SIGINT, SIG_DFL);
229}
230
231
232/* sysctl wrapper to return the number of active CPUs */
233static int
234system_ncpus(void)
235{
236	int mib[2], ncpus;
237	size_t len;
238
239	mib[0] = CTL_HW;
240	mib[1] = HW_NCPU;
241	len = sizeof(mib);
242	sysctl(mib, 2, &ncpus, &len, NULL, 0);
243
244	return (ncpus);
245}
246
247/*
248 * locate the src mac address for our interface, put it
249 * into the user-supplied buffer. return 0 if ok, -1 on error.
250 */
251static int
252source_hwaddr(const char *ifname, char *buf)
253{
254	struct ifaddrs *ifaphead, *ifap;
255	int l = sizeof(ifap->ifa_name);
256
257	if (getifaddrs(&ifaphead) != 0) {
258		D("getifaddrs %s failed", ifname);
259		return (-1);
260	}
261
262	for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
263		struct sockaddr_dl *sdl =
264			(struct sockaddr_dl *)ifap->ifa_addr;
265		uint8_t *mac;
266
267		if (!sdl || sdl->sdl_family != AF_LINK)
268			continue;
269		if (strncmp(ifap->ifa_name, ifname, l) != 0)
270			continue;
271		mac = (uint8_t *)LLADDR(sdl);
272		sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
273			mac[0], mac[1], mac[2],
274			mac[3], mac[4], mac[5]);
275		if (verbose)
276			D("source hwaddr %s", buf);
277		break;
278	}
279	freeifaddrs(ifaphead);
280	return ifap ? 0 : 1;
281}
282
283
284/* set the thread affinity. */
285static int
286setaffinity(pthread_t me, int i)
287{
288	cpuset_t cpumask;
289
290	if (i == -1)
291		return 0;
292
293	/* Set thread affinity affinity.*/
294	CPU_ZERO(&cpumask);
295	CPU_SET(i, &cpumask);
296
297	if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
298		D("Unable to set affinity");
299		return 1;
300	}
301	return 0;
302}
303
304/* Compute the checksum of the given ip header. */
305static uint16_t
306checksum(const void *data, uint16_t len)
307{
308        const uint8_t *addr = data;
309        uint32_t sum = 0;
310
311        while (len > 1) {
312                sum += addr[0] * 256 + addr[1];
313                addr += 2;
314                len -= 2;
315        }
316
317        if (len == 1)
318                sum += *addr * 256;
319
320        sum = (sum >> 16) + (sum & 0xffff);
321        sum += (sum >> 16);
322
323        sum = htons(sum);
324
325        return ~sum;
326}
327
328/*
329 * Fill a packet with some payload.
330 */
331static void
332initialize_packet(struct targ *targ)
333{
334	struct pkt *pkt = &targ->pkt;
335	struct ether_header *eh;
336	struct ip *ip;
337	struct udphdr *udp;
338	uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip);
339	int i, l, l0 = strlen(default_payload);
340	char *p;
341
342	for (i = 0; i < paylen;) {
343		l = min(l0, paylen - i);
344		bcopy(default_payload, pkt->body + i, l);
345		i += l;
346	}
347	pkt->body[i-1] = '\0';
348
349	udp = &pkt->udp;
350	udp->uh_sport = htons(1234);
351        udp->uh_dport = htons(4321);
352	udp->uh_ulen = htons(paylen);
353	udp->uh_sum = 0; // checksum(udp, sizeof(*udp));
354
355	ip = &pkt->ip;
356        ip->ip_v = IPVERSION;
357        ip->ip_hl = 5;
358        ip->ip_id = 0;
359        ip->ip_tos = IPTOS_LOWDELAY;
360	ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
361        ip->ip_id = 0;
362        ip->ip_off = htons(IP_DF); /* Don't fragment */
363        ip->ip_ttl = IPDEFTTL;
364	ip->ip_p = IPPROTO_UDP;
365	inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src);
366	inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst);
367	targ->dst_ip = ip->ip_dst.s_addr;
368	targ->src_ip = ip->ip_src.s_addr;
369	p = index(targ->g->src_ip, '-');
370	if (p) {
371		targ->dst_ip_range = atoi(p+1);
372		D("dst-ip sweep %d addresses", targ->dst_ip_range);
373	}
374	ip->ip_sum = checksum(ip, sizeof(*ip));
375
376	eh = &pkt->eh;
377	bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6);
378	bcopy(targ->src_mac, eh->ether_shost, 6);
379	p = index(targ->g->src_mac, '-');
380	if (p)
381		targ->src_mac_range = atoi(p+1);
382
383	bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
384	bcopy(targ->dst_mac, eh->ether_dhost, 6);
385	p = index(targ->g->dst_mac, '-');
386	if (p)
387		targ->dst_mac_range = atoi(p+1);
388	eh->ether_type = htons(ETHERTYPE_IP);
389}
390
391/* Check the payload of the packet for errors (use it for debug).
392 * Look for consecutive ascii representations of the size of the packet.
393 */
394static void
395check_payload(char *p, int psize)
396{
397	char temp[64];
398	int n_read, size, sizelen;
399
400	/* get the length in ASCII of the length of the packet. */
401	sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace
402
403	/* dummy payload. */
404	p += 14; /* skip packet header. */
405	n_read = 14;
406	while (psize - n_read >= sizelen) {
407		sscanf(p, "%d", &size);
408		if (size != psize) {
409			D("Read %d instead of %d", size, psize);
410			break;
411		}
412
413		p += sizelen;
414		n_read += sizelen;
415	}
416}
417
418
419/*
420 * create and enqueue a batch of packets on a ring.
421 * On the last one set NS_REPORT to tell the driver to generate
422 * an interrupt when done.
423 */
424static int
425send_packets(struct netmap_ring *ring, struct pkt *pkt,
426		int size, u_int count, int options)
427{
428	u_int sent, cur = ring->cur;
429
430	if (ring->avail < count)
431		count = ring->avail;
432
433#if 0
434	if (options & (OPT_COPY | OPT_PREFETCH) ) {
435		for (sent = 0; sent < count; sent++) {
436			struct netmap_slot *slot = &ring->slot[cur];
437			char *p = NETMAP_BUF(ring, slot->buf_idx);
438
439			prefetch(p);
440			cur = NETMAP_RING_NEXT(ring, cur);
441		}
442		cur = ring->cur;
443	}
444#endif
445	for (sent = 0; sent < count; sent++) {
446		struct netmap_slot *slot = &ring->slot[cur];
447		char *p = NETMAP_BUF(ring, slot->buf_idx);
448
449		if (options & OPT_COPY)
450			pkt_copy(pkt, p, size);
451		else if (options & OPT_MEMCPY)
452			memcpy(p, pkt, size);
453		else if (options & OPT_PREFETCH)
454			prefetch(p);
455
456		slot->len = size;
457		if (sent == count - 1)
458			slot->flags |= NS_REPORT;
459		cur = NETMAP_RING_NEXT(ring, cur);
460	}
461	ring->avail -= sent;
462	ring->cur = cur;
463
464	return (sent);
465}
466
467static void *
468sender_body(void *data)
469{
470	struct targ *targ = (struct targ *) data;
471
472	struct pollfd fds[1];
473	struct netmap_if *nifp = targ->nifp;
474	struct netmap_ring *txring;
475	int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
476	int options = targ->g->options | OPT_COPY;
477D("start");
478	if (setaffinity(targ->thread, targ->affinity))
479		goto quit;
480	/* setup poll(2) mechanism. */
481	memset(fds, 0, sizeof(fds));
482	fds[0].fd = targ->fd;
483	fds[0].events = (POLLOUT);
484
485	/* main loop.*/
486	gettimeofday(&targ->tic, NULL);
487    if (targ->g->use_pcap) {
488	int size = targ->g->pkt_size;
489	void *pkt = &targ->pkt;
490	pcap_t *p = targ->g->p;
491
492	for (i = 0; sent < n && !targ->cancel; i++) {
493		if (pcap_inject(p, pkt, size) != -1)
494			sent++;
495		if (i > 10000) {
496			targ->count = sent;
497			i = 0;
498		}
499	}
500    } else {
501	while (sent < n) {
502
503		/*
504		 * wait for available room in the send queue(s)
505		 */
506		if (poll(fds, 1, 2000) <= 0) {
507			if (targ->cancel)
508				break;
509			D("poll error/timeout on queue %d\n", targ->me);
510			goto quit;
511		}
512		/*
513		 * scan our queues and send on those with room
514		 */
515		if (sent > 100000 && !(targ->g->options & OPT_COPY) )
516			options &= ~OPT_COPY;
517		for (i = targ->qfirst; i < targ->qlast && !targ->cancel; i++) {
518			int m, limit = MIN(n - sent, targ->g->burst);
519
520			txring = NETMAP_TXRING(nifp, i);
521			if (txring->avail == 0)
522				continue;
523			m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
524					 limit, options);
525			sent += m;
526			targ->count = sent;
527		}
528		if (targ->cancel)
529			break;
530	}
531	/* flush any remaining packets */
532	ioctl(fds[0].fd, NIOCTXSYNC, NULL);
533
534	/* final part: wait all the TX queues to be empty. */
535	for (i = targ->qfirst; i < targ->qlast; i++) {
536		txring = NETMAP_TXRING(nifp, i);
537		while (!NETMAP_TX_RING_EMPTY(txring)) {
538			ioctl(fds[0].fd, NIOCTXSYNC, NULL);
539			usleep(1); /* wait 1 tick */
540		}
541	}
542    }
543
544	gettimeofday(&targ->toc, NULL);
545	targ->completed = 1;
546	targ->count = sent;
547
548quit:
549	/* reset the ``used`` flag. */
550	targ->used = 0;
551
552	return (NULL);
553}
554
555
556static void
557receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h,
558	__unused const u_char * bytes)
559{
560	int *count = (int *)user;
561	(*count)++;
562}
563
564static int
565receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload)
566{
567	u_int cur, rx;
568
569	cur = ring->cur;
570	if (ring->avail < limit)
571		limit = ring->avail;
572	for (rx = 0; rx < limit; rx++) {
573		struct netmap_slot *slot = &ring->slot[cur];
574		char *p = NETMAP_BUF(ring, slot->buf_idx);
575
576		if (!skip_payload)
577			check_payload(p, slot->len);
578
579		cur = NETMAP_RING_NEXT(ring, cur);
580	}
581	ring->avail -= rx;
582	ring->cur = cur;
583
584	return (rx);
585}
586
587static void *
588receiver_body(void *data)
589{
590	struct targ *targ = (struct targ *) data;
591	struct pollfd fds[1];
592	struct netmap_if *nifp = targ->nifp;
593	struct netmap_ring *rxring;
594	int i, received = 0;
595
596	if (setaffinity(targ->thread, targ->affinity))
597		goto quit;
598
599	/* setup poll(2) mechanism. */
600	memset(fds, 0, sizeof(fds));
601	fds[0].fd = targ->fd;
602	fds[0].events = (POLLIN);
603
604	/* unbounded wait for the first packet. */
605	for (;;) {
606		i = poll(fds, 1, 1000);
607		if (i > 0 && !(fds[0].revents & POLLERR))
608			break;
609		D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
610	}
611
612	/* main loop, exit after 1s silence */
613	gettimeofday(&targ->tic, NULL);
614    if (targ->g->use_pcap) {
615	while (!targ->cancel) {
616		pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
617	}
618    } else {
619	while (!targ->cancel) {
620		/* Once we started to receive packets, wait at most 1 seconds
621		   before quitting. */
622		if (poll(fds, 1, 1 * 1000) <= 0) {
623			gettimeofday(&targ->toc, NULL);
624			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
625			break;
626		}
627
628		for (i = targ->qfirst; i < targ->qlast; i++) {
629			int m;
630
631			rxring = NETMAP_RXRING(nifp, i);
632			if (rxring->avail == 0)
633				continue;
634
635			m = receive_packets(rxring, targ->g->burst,
636					SKIP_PAYLOAD);
637			received += m;
638			targ->count = received;
639		}
640
641		// tell the card we have read the data
642		//ioctl(fds[0].fd, NIOCRXSYNC, NULL);
643	}
644    }
645
646	targ->completed = 1;
647	targ->count = received;
648
649quit:
650	/* reset the ``used`` flag. */
651	targ->used = 0;
652
653	return (NULL);
654}
655
656static void
657tx_output(uint64_t sent, int size, double delta)
658{
659	double amount = 8.0 * (1.0 * size * sent) / delta;
660	double pps = sent / delta;
661	char units[4] = { '\0', 'K', 'M', 'G' };
662	int aunit = 0, punit = 0;
663
664	while (amount >= 1000) {
665		amount /= 1000;
666		aunit += 1;
667	}
668	while (pps >= 1000) {
669		pps /= 1000;
670		punit += 1;
671	}
672
673	printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
674	       sent, size, delta);
675	printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n",
676	       pps, units[punit], amount, units[aunit]);
677}
678
679
680static void
681rx_output(uint64_t received, double delta)
682{
683
684	double pps = received / delta;
685	char units[4] = { '\0', 'K', 'M', 'G' };
686	int punit = 0;
687
688	while (pps >= 1000) {
689		pps /= 1000;
690		punit += 1;
691	}
692
693	printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
694	printf("Speed: %.2f%cpps.\n", pps, units[punit]);
695}
696
697static void
698usage(void)
699{
700	const char *cmd = "pkt-gen";
701	fprintf(stderr,
702		"Usage:\n"
703		"%s arguments\n"
704		"\t-i interface		interface name\n"
705		"\t-t pkts_to_send	also forces send mode\n"
706		"\t-r pkts_to_receive	also forces receive mode\n"
707		"\t-l pkts_size		in bytes excluding CRC\n"
708		"\t-d dst-ip		end with %%n to sweep n addresses\n"
709		"\t-s src-ip		end with %%n to sweep n addresses\n"
710		"\t-D dst-mac		end with %%n to sweep n addresses\n"
711		"\t-S src-mac		end with %%n to sweep n addresses\n"
712		"\t-b burst size		testing, mostly\n"
713		"\t-c cores		cores to use\n"
714		"\t-p threads		processes/threads to use\n"
715		"\t-T report_ms		milliseconds between reports\n"
716		"\t-w wait_for_link_time	in seconds\n"
717		"",
718		cmd);
719
720	exit(0);
721}
722
723
724int
725main(int arc, char **argv)
726{
727	int i, fd;
728	char pcap_errbuf[PCAP_ERRBUF_SIZE];
729
730	struct glob_arg g;
731
732	struct nmreq nmr;
733	void *mmap_addr;		/* the mmap address */
734	void *(*td_body)(void *) = receiver_body;
735	int ch;
736	int report_interval = 1000;	/* report interval */
737	char *ifname = NULL;
738	int wait_link = 2;
739	int devqueues = 1;	/* how many device queues */
740
741	bzero(&g, sizeof(g));
742
743	g.src_ip = "10.0.0.1";
744	g.dst_ip = "10.1.0.1";
745	g.dst_mac = "ff:ff:ff:ff:ff:ff";
746	g.src_mac = NULL;
747	g.pkt_size = 60;
748	g.burst = 512;		// default
749	g.nthreads = 1;
750	g.cpus = 1;
751
752	while ( (ch = getopt(arc, argv,
753			"i:t:r:l:d:s:D:S:b:c:o:p:PT:w:v")) != -1) {
754		switch(ch) {
755		default:
756			D("bad option %c %s", ch, optarg);
757			usage();
758			break;
759		case 'o':
760			g.options = atoi(optarg);
761			break;
762		case 'i':	/* interface */
763			ifname = optarg;
764			break;
765		case 't':	/* send */
766			td_body = sender_body;
767			g.npackets = atoi(optarg);
768			break;
769		case 'r':	/* receive */
770			td_body = receiver_body;
771			g.npackets = atoi(optarg);
772			break;
773		case 'l':	/* pkt_size */
774			g.pkt_size = atoi(optarg);
775			break;
776		case 'd':
777			g.dst_ip = optarg;
778			break;
779		case 's':
780			g.src_ip = optarg;
781			break;
782		case 'T':	/* report interval */
783			report_interval = atoi(optarg);
784			break;
785		case 'w':
786			wait_link = atoi(optarg);
787			break;
788		case 'b':	/* burst */
789			g.burst = atoi(optarg);
790			break;
791		case 'c':
792			g.cpus = atoi(optarg);
793			break;
794		case 'p':
795			g.nthreads = atoi(optarg);
796			break;
797
798		case 'P':
799			g.use_pcap = 1;
800			break;
801
802		case 'D': /* destination mac */
803			g.dst_mac = optarg;
804	{
805		struct ether_addr *mac = ether_aton(g.dst_mac);
806		D("ether_aton(%s) gives %p", g.dst_mac, mac);
807	}
808			break;
809		case 'S': /* source mac */
810			g.src_mac = optarg;
811			break;
812		case 'v':
813			verbose++;
814		}
815	}
816
817	if (ifname == NULL) {
818		D("missing ifname");
819		usage();
820	}
821	{
822		int n = system_ncpus();
823		if (g.cpus < 0 || g.cpus > n) {
824			D("%d cpus is too high, have only %d cpus", g.cpus, n);
825			usage();
826		}
827		if (g.cpus == 0)
828			g.cpus = n;
829	}
830	if (g.pkt_size < 16 || g.pkt_size > 1536) {
831		D("bad pktsize %d\n", g.pkt_size);
832		usage();
833	}
834
835	if (td_body == sender_body && g.src_mac == NULL) {
836		static char mybuf[20] = "ff:ff:ff:ff:ff:ff";
837		/* retrieve source mac address. */
838		if (source_hwaddr(ifname, mybuf) == -1) {
839			D("Unable to retrieve source mac");
840			// continue, fail later
841		}
842		g.src_mac = mybuf;
843	}
844
845    if (g.use_pcap) {
846	D("using pcap on %s", ifname);
847	g.p = pcap_open_live(ifname, 0, 1, 100, pcap_errbuf);
848	if (g.p == NULL) {
849		D("cannot open pcap on %s", ifname);
850		usage();
851	}
852	mmap_addr = NULL;
853	fd = -1;
854    } else {
855	bzero(&nmr, sizeof(nmr));
856	nmr.nr_version = NETMAP_API;
857	/*
858	 * Open the netmap device to fetch the number of queues of our
859	 * interface.
860	 *
861	 * The first NIOCREGIF also detaches the card from the
862	 * protocol stack and may cause a reset of the card,
863	 * which in turn may take some time for the PHY to
864	 * reconfigure.
865	 */
866	fd = open("/dev/netmap", O_RDWR);
867	if (fd == -1) {
868		D("Unable to open /dev/netmap");
869		// fail later
870	} else {
871		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
872			D("Unable to get if info without name");
873		} else {
874			D("map size is %d Kb", nmr.nr_memsize >> 10);
875		}
876		bzero(&nmr, sizeof(nmr));
877		nmr.nr_version = NETMAP_API;
878		strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name));
879		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
880			D("Unable to get if info for %s", ifname);
881		}
882		devqueues = nmr.nr_rx_rings;
883	}
884
885	/* validate provided nthreads. */
886	if (g.nthreads < 1 || g.nthreads > devqueues) {
887		D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
888		// continue, fail later
889	}
890
891	/*
892	 * Map the netmap shared memory: instead of issuing mmap()
893	 * inside the body of the threads, we prefer to keep this
894	 * operation here to simplify the thread logic.
895	 */
896	D("mmapping %d Kbytes", nmr.nr_memsize>>10);
897	mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
898					    PROT_WRITE | PROT_READ,
899					    MAP_SHARED, fd, 0);
900	if (mmap_addr == MAP_FAILED) {
901		D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
902		// continue, fail later
903	}
904
905	/*
906	 * Register the interface on the netmap device: from now on,
907	 * we can operate on the network interface without any
908	 * interference from the legacy network stack.
909	 *
910	 * We decide to put the first interface registration here to
911	 * give time to cards that take a long time to reset the PHY.
912	 */
913	nmr.nr_version = NETMAP_API;
914	if (ioctl(fd, NIOCREGIF, &nmr) == -1) {
915		D("Unable to register interface %s", ifname);
916		//continue, fail later
917	}
918
919
920	/* Print some debug information. */
921	fprintf(stdout,
922		"%s %s: %d queues, %d threads and %d cpus.\n",
923		(td_body == sender_body) ? "Sending on" : "Receiving from",
924		ifname,
925		devqueues,
926		g.nthreads,
927		g.cpus);
928	if (td_body == sender_body) {
929		fprintf(stdout, "%s -> %s (%s -> %s)\n",
930			g.src_ip, g.dst_ip,
931			g.src_mac, g.dst_mac);
932	}
933
934	/* Exit if something went wrong. */
935	if (fd < 0) {
936		D("aborting");
937		usage();
938	}
939    }
940
941	if (g.options) {
942		D("special options:%s%s%s%s\n",
943			g.options & OPT_PREFETCH ? " prefetch" : "",
944			g.options & OPT_ACCESS ? " access" : "",
945			g.options & OPT_MEMCPY ? " memcpy" : "",
946			g.options & OPT_COPY ? " copy" : "");
947	}
948	/* Wait for PHY reset. */
949	D("Wait %d secs for phy reset", wait_link);
950	sleep(wait_link);
951	D("Ready...");
952
953	/* Install ^C handler. */
954	global_nthreads = g.nthreads;
955	signal(SIGINT, sigint_h);
956
957	if (g.use_pcap) {
958		g.p = pcap_open_live(ifname, 0, 1, 100, NULL);
959		if (g.p == NULL) {
960			D("cannot open pcap on %s", ifname);
961			usage();
962		} else
963			D("using pcap %p on %s", g.p, ifname);
964	}
965
966	targs = calloc(g.nthreads, sizeof(*targs));
967	/*
968	 * Now create the desired number of threads, each one
969	 * using a single descriptor.
970 	 */
971	for (i = 0; i < g.nthreads; i++) {
972		struct netmap_if *tnifp;
973		struct nmreq tifreq;
974		int tfd;
975
976	    if (g.use_pcap) {
977		tfd = -1;
978		tnifp = NULL;
979	    } else {
980		/* register interface. */
981		tfd = open("/dev/netmap", O_RDWR);
982		if (tfd == -1) {
983			D("Unable to open /dev/netmap");
984			continue;
985		}
986
987		bzero(&tifreq, sizeof(tifreq));
988		strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name));
989		tifreq.nr_version = NETMAP_API;
990		tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
991
992		/*
993		 * if we are acting as a receiver only, do not touch the transmit ring.
994		 * This is not the default because many apps may use the interface
995		 * in both directions, but a pure receiver does not.
996		 */
997		if (td_body == receiver_body) {
998			tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
999		}
1000
1001		if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
1002			D("Unable to register %s", ifname);
1003			continue;
1004		}
1005		tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset);
1006	    }
1007		/* start threads. */
1008		bzero(&targs[i], sizeof(targs[i]));
1009		targs[i].g = &g;
1010		targs[i].used = 1;
1011		targs[i].completed = 0;
1012		targs[i].fd = tfd;
1013		targs[i].nmr = tifreq;
1014		targs[i].nifp = tnifp;
1015		targs[i].qfirst = (g.nthreads > 1) ? i : 0;
1016		targs[i].qlast = (g.nthreads > 1) ? i+1 :
1017			(td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
1018		targs[i].me = i;
1019		targs[i].affinity = g.cpus ? i % g.cpus : -1;
1020		if (td_body == sender_body) {
1021			/* initialize the packet to send. */
1022			initialize_packet(&targs[i]);
1023		}
1024
1025		if (pthread_create(&targs[i].thread, NULL, td_body,
1026				   &targs[i]) == -1) {
1027			D("Unable to create thread %d", i);
1028			targs[i].used = 0;
1029		}
1030	}
1031
1032    {
1033	uint64_t my_count = 0, prev = 0;
1034	uint64_t count = 0;
1035	double delta_t;
1036	struct timeval tic, toc;
1037
1038	gettimeofday(&toc, NULL);
1039	for (;;) {
1040		struct timeval now, delta;
1041		uint64_t pps;
1042		int done = 0;
1043
1044		delta.tv_sec = report_interval/1000;
1045		delta.tv_usec = (report_interval%1000)*1000;
1046		select(0, NULL, NULL, NULL, &delta);
1047		gettimeofday(&now, NULL);
1048		timersub(&now, &toc, &toc);
1049		my_count = 0;
1050		for (i = 0; i < g.nthreads; i++) {
1051			my_count += targs[i].count;
1052			if (targs[i].used == 0)
1053				done++;
1054		}
1055		pps = toc.tv_sec* 1000000 + toc.tv_usec;
1056		if (pps < 10000)
1057			continue;
1058		pps = (my_count - prev)*1000000 / pps;
1059		D("%" PRIu64 " pps", pps);
1060		prev = my_count;
1061		toc = now;
1062		if (done == g.nthreads)
1063			break;
1064	}
1065
1066	timerclear(&tic);
1067	timerclear(&toc);
1068	for (i = 0; i < g.nthreads; i++) {
1069		/*
1070		 * Join active threads, unregister interfaces and close
1071		 * file descriptors.
1072		 */
1073		pthread_join(targs[i].thread, NULL);
1074		ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr);
1075		close(targs[i].fd);
1076
1077		if (targs[i].completed == 0)
1078			continue;
1079
1080		/*
1081		 * Collect threads output and extract information about
1082		 * how long it took to send all the packets.
1083		 */
1084		count += targs[i].count;
1085		if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
1086			tic = targs[i].tic;
1087		if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
1088			toc = targs[i].toc;
1089	}
1090
1091	/* print output. */
1092	timersub(&toc, &tic, &toc);
1093	delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
1094	if (td_body == sender_body)
1095		tx_output(count, g.pkt_size, delta_t);
1096	else
1097		rx_output(count, delta_t);
1098    }
1099
1100    if (g.use_pcap == 0) {
1101	ioctl(fd, NIOCUNREGIF, &nmr);
1102	munmap(mmap_addr, nmr.nr_memsize);
1103	close(fd);
1104    }
1105
1106	return (0);
1107}
1108/* end of file */
1109