pkt-gen.c revision 238175
1/*
2 * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *   1. Redistributions of source code must retain the above copyright
8 *      notice, this list of conditions and the following disclaimer.
9 *   2. Redistributions in binary form must reproduce the above copyright
10 *      notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 238175 2012-07-06 17:03:43Z emaste $
28 * $Id: pkt-gen.c 10967 2012-05-03 11:29:23Z luigi $
29 *
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
32 *
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
36 *
37 */
38
39const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
40	"http://info.iet.unipi.it/~luigi/netmap/ ";
41
42#include <errno.h>
43#include <pthread.h>	/* pthread_* */
44#include <pthread_np.h>	/* pthread w/ affinity */
45#include <signal.h>	/* signal */
46#include <stdlib.h>
47#include <stdio.h>
48#include <inttypes.h>	/* PRI* macros */
49#include <string.h>	/* strcmp */
50#include <fcntl.h>	/* open */
51#include <unistd.h>	/* close */
52#include <ifaddrs.h>	/* getifaddrs */
53
54#include <sys/mman.h>	/* PROT_* */
55#include <sys/ioctl.h>	/* ioctl */
56#include <sys/poll.h>
57#include <sys/socket.h>	/* sockaddr.. */
58#include <arpa/inet.h>	/* ntohs */
59#include <sys/param.h>
60#include <sys/cpuset.h>	/* cpu_set */
61#include <sys/sysctl.h>	/* sysctl */
62#include <sys/time.h>	/* timersub */
63
64#include <net/ethernet.h>
65#include <net/if.h>	/* ifreq */
66#include <net/if_dl.h>	/* LLADDR */
67
68#include <netinet/in.h>
69#include <netinet/ip.h>
70#include <netinet/udp.h>
71
72#include <net/netmap.h>
73#include <net/netmap_user.h>
74#include <pcap/pcap.h>
75
76
77static inline int min(int a, int b) { return a < b ? a : b; }
78
79/* debug support */
80#define D(format, ...)				\
81	fprintf(stderr, "%s [%d] " format "\n", 	\
82	__FUNCTION__, __LINE__, ##__VA_ARGS__)
83
84#ifndef EXPERIMENTAL
85#define EXPERIMENTAL 0
86#endif
87
88int verbose = 0;
89#define MAX_QUEUES 64	/* no need to limit */
90
91#define SKIP_PAYLOAD 1 /* do not check payload. */
92
93inline void prefetch (const void *x)
94{
95        __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
96}
97
98// XXX only for multiples of 64 bytes, non overlapped.
99static inline void
100pkt_copy(void *_src, void *_dst, int l)
101{
102	uint64_t *src = _src;
103	uint64_t *dst = _dst;
104#define likely(x)       __builtin_expect(!!(x), 1)
105#define unlikely(x)       __builtin_expect(!!(x), 0)
106	if (unlikely(l >= 1024)) {
107		bcopy(src, dst, l);
108		return;
109	}
110	for (; l > 0; l-=64) {
111		*dst++ = *src++;
112		*dst++ = *src++;
113		*dst++ = *src++;
114		*dst++ = *src++;
115		*dst++ = *src++;
116		*dst++ = *src++;
117		*dst++ = *src++;
118		*dst++ = *src++;
119	}
120}
121
122
123#if EXPERIMENTAL
124/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
125#define netmap_rdtsc(t) \
126	do { \
127		u_int __regs[4];					\
128									\
129		do_cpuid(0, __regs);					\
130		(t) = rdtsc();						\
131	} while (0)
132
133static __inline void
134do_cpuid(u_int ax, u_int *p)
135{
136	__asm __volatile("cpuid"
137			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
138			 :  "0" (ax));
139}
140
141static __inline uint64_t
142rdtsc(void)
143{
144	uint64_t rv;
145
146	__asm __volatile("rdtsc" : "=A" (rv));
147	return (rv);
148}
149#define MAX_SAMPLES 100000
150#endif /* EXPERIMENTAL */
151
152
153struct pkt {
154	struct ether_header eh;
155	struct ip ip;
156	struct udphdr udp;
157	uint8_t body[2048];	// XXX hardwired
158} __attribute__((__packed__));
159
160/*
161 * global arguments for all threads
162 */
163struct glob_arg {
164	const char *src_ip;
165	const char *dst_ip;
166	const char *src_mac;
167	const char *dst_mac;
168	int pkt_size;
169	int burst;
170	int npackets;	/* total packets to send */
171	int nthreads;
172	int cpus;
173	int options;	/* testing */
174#define OPT_PREFETCH	1
175#define OPT_ACCESS	2
176#define OPT_COPY	4
177#define OPT_MEMCPY	8
178	int use_pcap;
179	pcap_t *p;
180};
181
182struct mystat {
183	uint64_t containers[8];
184};
185
186/*
187 * Arguments for a new thread. The same structure is used by
188 * the source and the sink
189 */
190struct targ {
191	struct glob_arg *g;
192	int used;
193	int completed;
194	int cancel;
195	int fd;
196	struct nmreq nmr;
197	struct netmap_if *nifp;
198	uint16_t	qfirst, qlast; /* range of queues to scan */
199	uint64_t count;
200	struct timeval tic, toc;
201	int me;
202	pthread_t thread;
203	int affinity;
204
205	uint8_t	dst_mac[6];
206	uint8_t	src_mac[6];
207	u_int dst_mac_range;
208	u_int src_mac_range;
209	uint32_t dst_ip;
210	uint32_t src_ip;
211	u_int dst_ip_range;
212	u_int src_ip_range;
213
214	struct pkt pkt;
215};
216
217
218static struct targ *targs;
219static int global_nthreads;
220
221/* control-C handler */
222static void
223sigint_h(__unused int sig)
224{
225	for (int i = 0; i < global_nthreads; i++)
226		targs[i].cancel = 1;
227
228	signal(SIGINT, SIG_DFL);
229}
230
231
232/* sysctl wrapper to return the number of active CPUs */
233static int
234system_ncpus(void)
235{
236	int mib[2], ncpus;
237	size_t len;
238
239	mib[0] = CTL_HW;
240	mib[1] = HW_NCPU;
241	len = sizeof(mib);
242	sysctl(mib, 2, &ncpus, &len, NULL, 0);
243
244	return (ncpus);
245}
246
247/*
248 * locate the src mac address for our interface, put it
249 * into the user-supplied buffer. return 0 if ok, -1 on error.
250 */
251static int
252source_hwaddr(const char *ifname, char *buf)
253{
254	struct ifaddrs *ifaphead, *ifap;
255	int l = sizeof(ifap->ifa_name);
256
257	if (getifaddrs(&ifaphead) != 0) {
258		D("getifaddrs %s failed", ifname);
259		return (-1);
260	}
261
262	for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
263		struct sockaddr_dl *sdl =
264			(struct sockaddr_dl *)ifap->ifa_addr;
265		uint8_t *mac;
266
267		if (!sdl || sdl->sdl_family != AF_LINK)
268			continue;
269		if (strncmp(ifap->ifa_name, ifname, l) != 0)
270			continue;
271		mac = (uint8_t *)LLADDR(sdl);
272		sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
273			mac[0], mac[1], mac[2],
274			mac[3], mac[4], mac[5]);
275		if (verbose)
276			D("source hwaddr %s", buf);
277		break;
278	}
279	freeifaddrs(ifaphead);
280	return ifap ? 0 : 1;
281}
282
283
284/* set the thread affinity. */
285static int
286setaffinity(pthread_t me, int i)
287{
288	cpuset_t cpumask;
289
290	if (i == -1)
291		return 0;
292
293	/* Set thread affinity affinity.*/
294	CPU_ZERO(&cpumask);
295	CPU_SET(i, &cpumask);
296
297	if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
298		D("Unable to set affinity");
299		return 1;
300	}
301	return 0;
302}
303
304/* Compute the checksum of the given ip header. */
305static uint16_t
306checksum(const void *data, uint16_t len)
307{
308        const uint8_t *addr = data;
309        uint32_t sum = 0;
310
311        while (len > 1) {
312                sum += addr[0] * 256 + addr[1];
313                addr += 2;
314                len -= 2;
315        }
316
317        if (len == 1)
318                sum += *addr * 256;
319
320        sum = (sum >> 16) + (sum & 0xffff);
321        sum += (sum >> 16);
322
323        sum = htons(sum);
324
325        return ~sum;
326}
327
328/*
329 * Fill a packet with some payload.
330 */
331static void
332initialize_packet(struct targ *targ)
333{
334	struct pkt *pkt = &targ->pkt;
335	struct ether_header *eh;
336	struct ip *ip;
337	struct udphdr *udp;
338	uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip);
339	int i, l, l0 = strlen(default_payload);
340	char *p;
341
342	for (i = 0; i < paylen;) {
343		l = min(l0, paylen - i);
344		bcopy(default_payload, pkt->body + i, l);
345		i += l;
346	}
347	pkt->body[i-1] = '\0';
348
349	udp = &pkt->udp;
350	udp->uh_sport = htons(1234);
351        udp->uh_dport = htons(4321);
352	udp->uh_ulen = htons(paylen);
353	udp->uh_sum = 0; // checksum(udp, sizeof(*udp));
354
355	ip = &pkt->ip;
356        ip->ip_v = IPVERSION;
357        ip->ip_hl = 5;
358        ip->ip_id = 0;
359        ip->ip_tos = IPTOS_LOWDELAY;
360	ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
361        ip->ip_id = 0;
362        ip->ip_off = htons(IP_DF); /* Don't fragment */
363        ip->ip_ttl = IPDEFTTL;
364	ip->ip_p = IPPROTO_UDP;
365	inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src);
366	inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst);
367	targ->dst_ip = ip->ip_dst.s_addr;
368	targ->src_ip = ip->ip_src.s_addr;
369	p = index(targ->g->src_ip, '-');
370	if (p) {
371		targ->dst_ip_range = atoi(p+1);
372		D("dst-ip sweep %d addresses", targ->dst_ip_range);
373	}
374	ip->ip_sum = checksum(ip, sizeof(*ip));
375
376	eh = &pkt->eh;
377	bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6);
378	bcopy(targ->src_mac, eh->ether_shost, 6);
379	p = index(targ->g->src_mac, '-');
380	if (p)
381		targ->src_mac_range = atoi(p+1);
382
383	bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
384	bcopy(targ->dst_mac, eh->ether_dhost, 6);
385	p = index(targ->g->dst_mac, '-');
386	if (p)
387		targ->dst_mac_range = atoi(p+1);
388	eh->ether_type = htons(ETHERTYPE_IP);
389}
390
391/* Check the payload of the packet for errors (use it for debug).
392 * Look for consecutive ascii representations of the size of the packet.
393 */
394static void
395check_payload(char *p, int psize)
396{
397	char temp[64];
398	int n_read, size, sizelen;
399
400	/* get the length in ASCII of the length of the packet. */
401	sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace
402
403	/* dummy payload. */
404	p += 14; /* skip packet header. */
405	n_read = 14;
406	while (psize - n_read >= sizelen) {
407		sscanf(p, "%d", &size);
408		if (size != psize) {
409			D("Read %d instead of %d", size, psize);
410			break;
411		}
412
413		p += sizelen;
414		n_read += sizelen;
415	}
416}
417
418
419/*
420 * create and enqueue a batch of packets on a ring.
421 * On the last one set NS_REPORT to tell the driver to generate
422 * an interrupt when done.
423 */
424static int
425send_packets(struct netmap_ring *ring, struct pkt *pkt,
426		int size, u_int count, int options)
427{
428	u_int sent, cur = ring->cur;
429
430	if (ring->avail < count)
431		count = ring->avail;
432
433#if 0
434	if (options & (OPT_COPY | OPT_PREFETCH) ) {
435		for (sent = 0; sent < count; sent++) {
436			struct netmap_slot *slot = &ring->slot[cur];
437			char *p = NETMAP_BUF(ring, slot->buf_idx);
438
439			prefetch(p);
440			cur = NETMAP_RING_NEXT(ring, cur);
441		}
442		cur = ring->cur;
443	}
444#endif
445	for (sent = 0; sent < count; sent++) {
446		struct netmap_slot *slot = &ring->slot[cur];
447		char *p = NETMAP_BUF(ring, slot->buf_idx);
448
449		if (options & OPT_COPY)
450			pkt_copy(pkt, p, size);
451		else if (options & OPT_MEMCPY)
452			memcpy(p, pkt, size);
453		else if (options & OPT_PREFETCH)
454			prefetch(p);
455
456		slot->len = size;
457		if (sent == count - 1)
458			slot->flags |= NS_REPORT;
459		cur = NETMAP_RING_NEXT(ring, cur);
460	}
461	ring->avail -= sent;
462	ring->cur = cur;
463
464	return (sent);
465}
466
467static void *
468sender_body(void *data)
469{
470	struct targ *targ = (struct targ *) data;
471
472	struct pollfd fds[1];
473	struct netmap_if *nifp = targ->nifp;
474	struct netmap_ring *txring;
475	int i, pkts_per_td = targ->g->npackets / targ->g->nthreads, sent = 0;
476	int continuous = 0;
477	int options = targ->g->options | OPT_COPY;
478D("start");
479	if (pkts_per_td == 0) {
480		continuous = 1;
481		pkts_per_td = 100000;
482	}
483	if (setaffinity(targ->thread, targ->affinity))
484		goto quit;
485	/* setup poll(2) mechanism. */
486	memset(fds, 0, sizeof(fds));
487	fds[0].fd = targ->fd;
488	fds[0].events = (POLLOUT);
489
490	/* main loop.*/
491	gettimeofday(&targ->tic, NULL);
492    if (targ->g->use_pcap) {
493	int size = targ->g->pkt_size;
494	void *pkt = &targ->pkt;
495	pcap_t *p = targ->g->p;
496
497	for (i = 0; (sent < pkts_per_td && !targ->cancel) || continuous; i++) {
498		if (pcap_inject(p, pkt, size) != -1)
499			sent++;
500		if (i > 10000) {
501			targ->count = sent;
502			i = 0;
503		}
504	}
505    } else {
506	while (sent < pkts_per_td || continuous) {
507
508		/*
509		 * wait for available room in the send queue(s)
510		 */
511		if (poll(fds, 1, 2000) <= 0) {
512			if (targ->cancel)
513				break;
514			D("poll error/timeout on queue %d\n", targ->me);
515			goto quit;
516		}
517		/*
518		 * scan our queues and send on those with room
519		 */
520		if (sent > 100000 && !(targ->g->options & OPT_COPY) )
521			options &= ~OPT_COPY;
522		for (i = targ->qfirst; i < targ->qlast && !targ->cancel; i++) {
523			int m, limit = targ->g->burst;
524			if (!continuous && pkts_per_td - sent < limit)
525				limit = pkts_per_td - sent;
526
527			txring = NETMAP_TXRING(nifp, i);
528			if (txring->avail == 0)
529				continue;
530			m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
531					 limit, options);
532			sent += m;
533			targ->count = sent;
534		}
535		if (targ->cancel)
536			break;
537	}
538	/* flush any remaining packets */
539	ioctl(fds[0].fd, NIOCTXSYNC, NULL);
540
541	/* final part: wait all the TX queues to be empty. */
542	for (i = targ->qfirst; i < targ->qlast; i++) {
543		txring = NETMAP_TXRING(nifp, i);
544		while (!NETMAP_TX_RING_EMPTY(txring)) {
545			ioctl(fds[0].fd, NIOCTXSYNC, NULL);
546			usleep(1); /* wait 1 tick */
547		}
548	}
549    }
550
551	gettimeofday(&targ->toc, NULL);
552	targ->completed = 1;
553	targ->count = sent;
554
555quit:
556	/* reset the ``used`` flag. */
557	targ->used = 0;
558
559	return (NULL);
560}
561
562
563static void
564receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h,
565	__unused const u_char * bytes)
566{
567	int *count = (int *)user;
568	(*count)++;
569}
570
571static int
572receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload)
573{
574	u_int cur, rx;
575
576	cur = ring->cur;
577	if (ring->avail < limit)
578		limit = ring->avail;
579	for (rx = 0; rx < limit; rx++) {
580		struct netmap_slot *slot = &ring->slot[cur];
581		char *p = NETMAP_BUF(ring, slot->buf_idx);
582
583		if (!skip_payload)
584			check_payload(p, slot->len);
585
586		cur = NETMAP_RING_NEXT(ring, cur);
587	}
588	ring->avail -= rx;
589	ring->cur = cur;
590
591	return (rx);
592}
593
594static void *
595receiver_body(void *data)
596{
597	struct targ *targ = (struct targ *) data;
598	struct pollfd fds[1];
599	struct netmap_if *nifp = targ->nifp;
600	struct netmap_ring *rxring;
601	int i, received = 0;
602
603	if (setaffinity(targ->thread, targ->affinity))
604		goto quit;
605
606	/* setup poll(2) mechanism. */
607	memset(fds, 0, sizeof(fds));
608	fds[0].fd = targ->fd;
609	fds[0].events = (POLLIN);
610
611	/* unbounded wait for the first packet. */
612	while (!targ->cancel) {
613		i = poll(fds, 1, 1000);
614		if (i > 0 && !(fds[0].revents & POLLERR))
615			break;
616		D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
617	}
618
619	/* main loop, exit after 1s silence */
620	gettimeofday(&targ->tic, NULL);
621    if (targ->g->use_pcap) {
622	while (!targ->cancel) {
623		pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
624	}
625    } else {
626	while (!targ->cancel) {
627		/* Once we started to receive packets, wait at most 1 seconds
628		   before quitting. */
629		if (poll(fds, 1, 1 * 1000) <= 0) {
630			gettimeofday(&targ->toc, NULL);
631			targ->toc.tv_sec -= 1; /* Subtract timeout time. */
632			break;
633		}
634
635		for (i = targ->qfirst; i < targ->qlast; i++) {
636			int m;
637
638			rxring = NETMAP_RXRING(nifp, i);
639			if (rxring->avail == 0)
640				continue;
641
642			m = receive_packets(rxring, targ->g->burst,
643					SKIP_PAYLOAD);
644			received += m;
645			targ->count = received;
646		}
647
648		// tell the card we have read the data
649		//ioctl(fds[0].fd, NIOCRXSYNC, NULL);
650	}
651    }
652
653	targ->completed = 1;
654	targ->count = received;
655
656quit:
657	/* reset the ``used`` flag. */
658	targ->used = 0;
659
660	return (NULL);
661}
662
663static char *
664scaled_val(double val)
665{
666	static char buf[64];
667	const char *units[] = {"", "K", "M", "G"};
668	int i = 0;
669
670	while (val >= 1000 && i < 3) {
671		val /= 1000;
672		i++;
673	}
674	snprintf(buf, sizeof(buf), "%.2f%s", val, units[i]);
675	return (buf);
676}
677
678static void
679tx_output(uint64_t sent, int size, double delta)
680{
681	uint64_t bytes_sent = sent * size;
682	double bw = 8.0 * bytes_sent / delta;
683	double pps = sent / delta;
684	/*
685	 * Assume Ethernet overhead of 24 bytes per packet excluding header:
686	 * FCS       4 bytes
687	 * Preamble  8 bytes
688	 * IFG      12 bytes
689	 */
690	double bw_with_overhead = 8.0 * (bytes_sent + sent * 24) / delta;
691
692	printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
693	       sent, size, delta);
694	printf("Speed: %spps. ", scaled_val(pps));
695	printf("Bandwidth: %sbps ", scaled_val(bw));
696	printf("(%sbps with overhead).\n", scaled_val(bw_with_overhead));
697
698}
699
700
701static void
702rx_output(uint64_t received, double delta)
703{
704
705	double pps = received / delta;
706	char units[4] = { '\0', 'K', 'M', 'G' };
707	int punit = 0;
708
709	while (pps >= 1000) {
710		pps /= 1000;
711		punit += 1;
712	}
713
714	printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
715	printf("Speed: %.2f%cpps.\n", pps, units[punit]);
716}
717
718static void
719usage(void)
720{
721	const char *cmd = "pkt-gen";
722	fprintf(stderr,
723		"Usage:\n"
724		"%s arguments\n"
725		"\t-i interface		interface name\n"
726		"\t-t pkts_to_send	also forces send mode, 0 = continuous\n"
727		"\t-r pkts_to_receive	also forces receive mode\n"
728		"\t-l pkts_size		in bytes excluding CRC\n"
729		"\t-d dst-ip		end with %%n to sweep n addresses\n"
730		"\t-s src-ip		end with %%n to sweep n addresses\n"
731		"\t-D dst-mac		end with %%n to sweep n addresses\n"
732		"\t-S src-mac		end with %%n to sweep n addresses\n"
733		"\t-b burst size		testing, mostly\n"
734		"\t-c cores		cores to use\n"
735		"\t-p threads		processes/threads to use\n"
736		"\t-T report_ms		milliseconds between reports\n"
737		"\t-w wait_for_link_time	in seconds\n"
738		"",
739		cmd);
740
741	exit(0);
742}
743
744
745int
746main(int arc, char **argv)
747{
748	int i, fd;
749	char pcap_errbuf[PCAP_ERRBUF_SIZE];
750
751	struct glob_arg g;
752
753	struct nmreq nmr;
754	void *mmap_addr;		/* the mmap address */
755	void *(*td_body)(void *) = receiver_body;
756	int ch;
757	int report_interval = 1000;	/* report interval */
758	char *ifname = NULL;
759	int wait_link = 2;
760	int devqueues = 1;	/* how many device queues */
761
762	bzero(&g, sizeof(g));
763
764	g.src_ip = "10.0.0.1";
765	g.dst_ip = "10.1.0.1";
766	g.dst_mac = "ff:ff:ff:ff:ff:ff";
767	g.src_mac = NULL;
768	g.pkt_size = 60;
769	g.burst = 512;		// default
770	g.nthreads = 1;
771	g.cpus = 1;
772
773	while ( (ch = getopt(arc, argv,
774			"i:t:r:l:d:s:D:S:b:c:o:p:PT:w:v")) != -1) {
775		switch(ch) {
776		default:
777			D("bad option %c %s", ch, optarg);
778			usage();
779			break;
780		case 'o':
781			g.options = atoi(optarg);
782			break;
783		case 'i':	/* interface */
784			ifname = optarg;
785			break;
786		case 't':	/* send */
787			td_body = sender_body;
788			g.npackets = atoi(optarg);
789			break;
790		case 'r':	/* receive */
791			td_body = receiver_body;
792			g.npackets = atoi(optarg);
793			break;
794		case 'l':	/* pkt_size */
795			g.pkt_size = atoi(optarg);
796			break;
797		case 'd':
798			g.dst_ip = optarg;
799			break;
800		case 's':
801			g.src_ip = optarg;
802			break;
803		case 'T':	/* report interval */
804			report_interval = atoi(optarg);
805			break;
806		case 'w':
807			wait_link = atoi(optarg);
808			break;
809		case 'b':	/* burst */
810			g.burst = atoi(optarg);
811			break;
812		case 'c':
813			g.cpus = atoi(optarg);
814			break;
815		case 'p':
816			g.nthreads = atoi(optarg);
817			break;
818
819		case 'P':
820			g.use_pcap = 1;
821			break;
822
823		case 'D': /* destination mac */
824			g.dst_mac = optarg;
825	{
826		struct ether_addr *mac = ether_aton(g.dst_mac);
827		D("ether_aton(%s) gives %p", g.dst_mac, mac);
828	}
829			break;
830		case 'S': /* source mac */
831			g.src_mac = optarg;
832			break;
833		case 'v':
834			verbose++;
835		}
836	}
837
838	if (ifname == NULL) {
839		D("missing ifname");
840		usage();
841	}
842	{
843		int n = system_ncpus();
844		if (g.cpus < 0 || g.cpus > n) {
845			D("%d cpus is too high, have only %d cpus", g.cpus, n);
846			usage();
847		}
848		if (g.cpus == 0)
849			g.cpus = n;
850	}
851	if (g.pkt_size < 16 || g.pkt_size > 1536) {
852		D("bad pktsize %d\n", g.pkt_size);
853		usage();
854	}
855
856	if (td_body == sender_body && g.src_mac == NULL) {
857		static char mybuf[20] = "ff:ff:ff:ff:ff:ff";
858		/* retrieve source mac address. */
859		if (source_hwaddr(ifname, mybuf) == -1) {
860			D("Unable to retrieve source mac");
861			// continue, fail later
862		}
863		g.src_mac = mybuf;
864	}
865
866    if (g.use_pcap) {
867	D("using pcap on %s", ifname);
868	g.p = pcap_open_live(ifname, 0, 1, 100, pcap_errbuf);
869	if (g.p == NULL) {
870		D("cannot open pcap on %s", ifname);
871		usage();
872	}
873	mmap_addr = NULL;
874	fd = -1;
875    } else {
876	bzero(&nmr, sizeof(nmr));
877	nmr.nr_version = NETMAP_API;
878	/*
879	 * Open the netmap device to fetch the number of queues of our
880	 * interface.
881	 *
882	 * The first NIOCREGIF also detaches the card from the
883	 * protocol stack and may cause a reset of the card,
884	 * which in turn may take some time for the PHY to
885	 * reconfigure.
886	 */
887	fd = open("/dev/netmap", O_RDWR);
888	if (fd == -1) {
889		D("Unable to open /dev/netmap");
890		// fail later
891	} else {
892		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
893			D("Unable to get if info without name");
894		} else {
895			D("map size is %d Kb", nmr.nr_memsize >> 10);
896		}
897		bzero(&nmr, sizeof(nmr));
898		nmr.nr_version = NETMAP_API;
899		strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name));
900		if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
901			D("Unable to get if info for %s", ifname);
902		}
903		devqueues = nmr.nr_rx_rings;
904	}
905
906	/* validate provided nthreads. */
907	if (g.nthreads < 1 || g.nthreads > devqueues) {
908		D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
909		// continue, fail later
910	}
911
912	/*
913	 * Map the netmap shared memory: instead of issuing mmap()
914	 * inside the body of the threads, we prefer to keep this
915	 * operation here to simplify the thread logic.
916	 */
917	D("mmapping %d Kbytes", nmr.nr_memsize>>10);
918	mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
919					    PROT_WRITE | PROT_READ,
920					    MAP_SHARED, fd, 0);
921	if (mmap_addr == MAP_FAILED) {
922		D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
923		// continue, fail later
924	}
925
926	/*
927	 * Register the interface on the netmap device: from now on,
928	 * we can operate on the network interface without any
929	 * interference from the legacy network stack.
930	 *
931	 * We decide to put the first interface registration here to
932	 * give time to cards that take a long time to reset the PHY.
933	 */
934	nmr.nr_version = NETMAP_API;
935	if (ioctl(fd, NIOCREGIF, &nmr) == -1) {
936		D("Unable to register interface %s", ifname);
937		//continue, fail later
938	}
939
940
941	/* Print some debug information. */
942	fprintf(stdout,
943		"%s %s: %d queues, %d threads and %d cpus.\n",
944		(td_body == sender_body) ? "Sending on" : "Receiving from",
945		ifname,
946		devqueues,
947		g.nthreads,
948		g.cpus);
949	if (td_body == sender_body) {
950		fprintf(stdout, "%s -> %s (%s -> %s)\n",
951			g.src_ip, g.dst_ip,
952			g.src_mac, g.dst_mac);
953	}
954
955	/* Exit if something went wrong. */
956	if (fd < 0) {
957		D("aborting");
958		usage();
959	}
960    }
961
962	if (g.options) {
963		D("special options:%s%s%s%s\n",
964			g.options & OPT_PREFETCH ? " prefetch" : "",
965			g.options & OPT_ACCESS ? " access" : "",
966			g.options & OPT_MEMCPY ? " memcpy" : "",
967			g.options & OPT_COPY ? " copy" : "");
968	}
969	/* Wait for PHY reset. */
970	D("Wait %d secs for phy reset", wait_link);
971	sleep(wait_link);
972	D("Ready...");
973
974	/* Install ^C handler. */
975	global_nthreads = g.nthreads;
976	signal(SIGINT, sigint_h);
977
978	if (g.use_pcap) {
979		g.p = pcap_open_live(ifname, 0, 1, 100, NULL);
980		if (g.p == NULL) {
981			D("cannot open pcap on %s", ifname);
982			usage();
983		} else
984			D("using pcap %p on %s", g.p, ifname);
985	}
986
987	targs = calloc(g.nthreads, sizeof(*targs));
988	/*
989	 * Now create the desired number of threads, each one
990	 * using a single descriptor.
991 	 */
992	for (i = 0; i < g.nthreads; i++) {
993		struct netmap_if *tnifp;
994		struct nmreq tifreq;
995		int tfd;
996
997	    if (g.use_pcap) {
998		tfd = -1;
999		tnifp = NULL;
1000	    } else {
1001		/* register interface. */
1002		tfd = open("/dev/netmap", O_RDWR);
1003		if (tfd == -1) {
1004			D("Unable to open /dev/netmap");
1005			continue;
1006		}
1007
1008		bzero(&tifreq, sizeof(tifreq));
1009		strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name));
1010		tifreq.nr_version = NETMAP_API;
1011		tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
1012
1013		/*
1014		 * if we are acting as a receiver only, do not touch the transmit ring.
1015		 * This is not the default because many apps may use the interface
1016		 * in both directions, but a pure receiver does not.
1017		 */
1018		if (td_body == receiver_body) {
1019			tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
1020		}
1021
1022		if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
1023			D("Unable to register %s", ifname);
1024			continue;
1025		}
1026		tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset);
1027	    }
1028		/* start threads. */
1029		bzero(&targs[i], sizeof(targs[i]));
1030		targs[i].g = &g;
1031		targs[i].used = 1;
1032		targs[i].completed = 0;
1033		targs[i].fd = tfd;
1034		targs[i].nmr = tifreq;
1035		targs[i].nifp = tnifp;
1036		targs[i].qfirst = (g.nthreads > 1) ? i : 0;
1037		targs[i].qlast = (g.nthreads > 1) ? i+1 :
1038			(td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
1039		targs[i].me = i;
1040		targs[i].affinity = g.cpus ? i % g.cpus : -1;
1041		if (td_body == sender_body) {
1042			/* initialize the packet to send. */
1043			initialize_packet(&targs[i]);
1044		}
1045
1046		if (pthread_create(&targs[i].thread, NULL, td_body,
1047				   &targs[i]) == -1) {
1048			D("Unable to create thread %d", i);
1049			targs[i].used = 0;
1050		}
1051	}
1052
1053    {
1054	uint64_t my_count = 0, prev = 0;
1055	uint64_t count = 0;
1056	double delta_t;
1057	struct timeval tic, toc;
1058
1059	gettimeofday(&toc, NULL);
1060	for (;;) {
1061		struct timeval now, delta;
1062		uint64_t pps;
1063		int done = 0;
1064
1065		delta.tv_sec = report_interval/1000;
1066		delta.tv_usec = (report_interval%1000)*1000;
1067		select(0, NULL, NULL, NULL, &delta);
1068		gettimeofday(&now, NULL);
1069		timersub(&now, &toc, &toc);
1070		my_count = 0;
1071		for (i = 0; i < g.nthreads; i++) {
1072			my_count += targs[i].count;
1073			if (targs[i].used == 0)
1074				done++;
1075		}
1076		pps = toc.tv_sec* 1000000 + toc.tv_usec;
1077		if (pps < 10000)
1078			continue;
1079		pps = (my_count - prev)*1000000 / pps;
1080		D("%" PRIu64 " pps", pps);
1081		prev = my_count;
1082		toc = now;
1083		if (done == g.nthreads)
1084			break;
1085	}
1086
1087	timerclear(&tic);
1088	timerclear(&toc);
1089	for (i = 0; i < g.nthreads; i++) {
1090		/*
1091		 * Join active threads, unregister interfaces and close
1092		 * file descriptors.
1093		 */
1094		pthread_join(targs[i].thread, NULL);
1095		ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr);
1096		close(targs[i].fd);
1097
1098		if (targs[i].completed == 0)
1099			continue;
1100
1101		/*
1102		 * Collect threads output and extract information about
1103		 * how long it took to send all the packets.
1104		 */
1105		count += targs[i].count;
1106		if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
1107			tic = targs[i].tic;
1108		if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
1109			toc = targs[i].toc;
1110	}
1111
1112	/* print output. */
1113	timersub(&toc, &tic, &toc);
1114	delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
1115	if (td_body == sender_body)
1116		tx_output(count, g.pkt_size, delta_t);
1117	else
1118		rx_output(count, delta_t);
1119    }
1120
1121    if (g.use_pcap == 0) {
1122	ioctl(fd, NIOCUNREGIF, &nmr);
1123	munmap(mmap_addr, nmr.nr_memsize);
1124	close(fd);
1125    }
1126
1127	return (0);
1128}
1129/* end of file */
1130