1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $FreeBSD$ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40#define _GNU_SOURCE /* for CPU_SET() */ 41#include <stdio.h> 42#define NETMAP_WITH_LIBS 43#include <net/netmap_user.h> 44 45 46#include <ctype.h> // isprint() 47#include <unistd.h> // sysconf() 48#include <sys/poll.h> 49#include <arpa/inet.h> /* ntohs */ 50#include <sys/sysctl.h> /* sysctl */ 51#include <ifaddrs.h> /* getifaddrs */ 52#include <net/ethernet.h> 53#include <netinet/in.h> 54#include <netinet/ip.h> 55#include <netinet/udp.h> 56 57#include <pthread.h> 58 59#ifndef NO_PCAP 60#include <pcap/pcap.h> 61#endif 62 63#ifdef linux 64 65#define cpuset_t cpu_set_t 66 67#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 68#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 69#include <linux/ethtool.h> 70#include <linux/sockios.h> 71 72#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 73#include <netinet/ether.h> /* ether_aton */ 74#include <linux/if_packet.h> /* sockaddr_ll */ 75#endif /* linux */ 76 77#ifdef __FreeBSD__ 78#include <sys/endian.h> /* le64toh */ 79#include <machine/param.h> 80 81#include <pthread_np.h> /* pthread w/ affinity */ 82#include <sys/cpuset.h> /* cpu_set */ 83#include <net/if_dl.h> /* LLADDR */ 84#endif /* __FreeBSD__ */ 85 86#ifdef __APPLE__ 87 88#define cpuset_t uint64_t // XXX 89static inline void CPU_ZERO(cpuset_t *p) 90{ 91 *p = 0; 92} 93 94static inline void CPU_SET(uint32_t i, cpuset_t *p) 95{ 96 *p |= 1<< (i & 0x3f); 97} 98 99#define pthread_setaffinity_np(a, b, c) ((void)a, 0) 100 101#define ifr_flagshigh ifr_flags // XXX 102#define IFF_PPROMISC IFF_PROMISC 103#include <net/if_dl.h> /* LLADDR */ 104#define clock_gettime(a,b) \ 105 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 106#endif /* __APPLE__ */ 107 108const char *default_payload="netmap pkt-gen DIRECT payload\n" 109 "http://info.iet.unipi.it/~luigi/netmap/ "; 110 111const char *indirect_payload="netmap pkt-gen indirect payload\n" 112 "http://info.iet.unipi.it/~luigi/netmap/ "; 113 114int verbose = 0; 115 116#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 117 118 119#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 120#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 121#define VIRT_HDR_MAX VIRT_HDR_2 122struct virt_header { 123 uint8_t fields[VIRT_HDR_MAX]; 124}; 125 126struct pkt { 127 struct virt_header vh; 128 struct ether_header eh; 129 struct ip ip; 130 struct udphdr udp; 131 uint8_t body[2048]; // XXX hardwired 132} __attribute__((__packed__)); 133 134struct ip_range { 135 char *name; 136 uint32_t start, end; /* same as struct in_addr */ 137 uint16_t port0, port1; 138}; 139 140struct mac_range { 141 char *name; 142 struct ether_addr start, end; 143}; 144 145/* ifname can be netmap:foo-xxxx */ 146#define MAX_IFNAMELEN 64 /* our buffer for ifname */ 147/* 148 * global arguments for all threads 149 */ 150 151struct glob_arg { 152 struct ip_range src_ip; 153 struct ip_range dst_ip; 154 struct mac_range dst_mac; 155 struct mac_range src_mac; 156 int pkt_size; 157 int burst; 158 int forever; 159 int npackets; /* total packets to send */ 160 int frags; /* fragments per packet */ 161 int nthreads; 162 int cpus; 163 int options; /* testing */ 164#define OPT_PREFETCH 1 165#define OPT_ACCESS 2 166#define OPT_COPY 4 167#define OPT_MEMCPY 8 168#define OPT_TS 16 /* add a timestamp */ 169#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 170#define OPT_DUMP 64 /* dump rx/tx traffic */ 171 int dev_type; 172#ifndef NO_PCAP 173 pcap_t *p; 174#endif 175 176 int tx_rate; 177 struct timespec tx_period; 178 179 int affinity; 180 int main_fd; 181 struct nm_desc *nmd; 182 uint64_t nmd_flags; 183 int report_interval; /* milliseconds between prints */ 184 void *(*td_body)(void *); 185 void *mmap_addr; 186 char ifname[MAX_IFNAMELEN]; 187 char *nmr_config; 188 int dummy_send; 189 int virt_header; /* send also the virt_header */ 190 int extra_bufs; /* goes in nr_arg3 */ 191}; 192enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 193 194 195/* 196 * Arguments for a new thread. The same structure is used by 197 * the source and the sink 198 */ 199struct targ { 200 struct glob_arg *g; 201 int used; 202 int completed; 203 int cancel; 204 int fd; 205 struct nm_desc *nmd; 206 volatile uint64_t count; 207 struct timespec tic, toc; 208 int me; 209 pthread_t thread; 210 int affinity; 211 212 struct pkt pkt; 213}; 214 215 216/* 217 * extract the extremes from a range of ipv4 addresses. 218 * addr_lo[-addr_hi][:port_lo[-port_hi]] 219 */ 220static void 221extract_ip_range(struct ip_range *r) 222{ 223 char *ap, *pp; 224 struct in_addr a; 225 226 if (verbose) 227 D("extract IP range from %s", r->name); 228 r->port0 = r->port1 = 0; 229 r->start = r->end = 0; 230 231 /* the first - splits start/end of range */ 232 ap = index(r->name, '-'); /* do we have ports ? */ 233 if (ap) { 234 *ap++ = '\0'; 235 } 236 /* grab the initial values (mandatory) */ 237 pp = index(r->name, ':'); 238 if (pp) { 239 *pp++ = '\0'; 240 r->port0 = r->port1 = strtol(pp, NULL, 0); 241 }; 242 inet_aton(r->name, &a); 243 r->start = r->end = ntohl(a.s_addr); 244 if (ap) { 245 pp = index(ap, ':'); 246 if (pp) { 247 *pp++ = '\0'; 248 if (*pp) 249 r->port1 = strtol(pp, NULL, 0); 250 } 251 if (*ap) { 252 inet_aton(ap, &a); 253 r->end = ntohl(a.s_addr); 254 } 255 } 256 if (r->port0 > r->port1) { 257 uint16_t tmp = r->port0; 258 r->port0 = r->port1; 259 r->port1 = tmp; 260 } 261 if (r->start > r->end) { 262 uint32_t tmp = r->start; 263 r->start = r->end; 264 r->end = tmp; 265 } 266 { 267 struct in_addr a; 268 char buf1[16]; // one ip address 269 270 a.s_addr = htonl(r->end); 271 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 272 a.s_addr = htonl(r->start); 273 if (1) 274 D("range is %s:%d to %s:%d", 275 inet_ntoa(a), r->port0, buf1, r->port1); 276 } 277} 278 279static void 280extract_mac_range(struct mac_range *r) 281{ 282 if (verbose) 283 D("extract MAC range from %s", r->name); 284 bcopy(ether_aton(r->name), &r->start, 6); 285 bcopy(ether_aton(r->name), &r->end, 6); 286#if 0 287 bcopy(targ->src_mac, eh->ether_shost, 6); 288 p = index(targ->g->src_mac, '-'); 289 if (p) 290 targ->src_mac_range = atoi(p+1); 291 292 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 293 bcopy(targ->dst_mac, eh->ether_dhost, 6); 294 p = index(targ->g->dst_mac, '-'); 295 if (p) 296 targ->dst_mac_range = atoi(p+1); 297#endif 298 if (verbose) 299 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 300} 301 302static struct targ *targs; 303static int global_nthreads; 304 305/* control-C handler */ 306static void 307sigint_h(int sig) 308{ 309 int i; 310 311 (void)sig; /* UNUSED */ 312 for (i = 0; i < global_nthreads; i++) { 313 targs[i].cancel = 1; 314 } 315 signal(SIGINT, SIG_DFL); 316} 317 318/* sysctl wrapper to return the number of active CPUs */ 319static int 320system_ncpus(void) 321{ 322 int ncpus; 323#if defined (__FreeBSD__) 324 int mib[2] = { CTL_HW, HW_NCPU }; 325 size_t len = sizeof(mib); 326 sysctl(mib, 2, &ncpus, &len, NULL, 0); 327#elif defined(linux) 328 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 329#else /* others */ 330 ncpus = 1; 331#endif /* others */ 332 return (ncpus); 333} 334 335#ifdef __linux__ 336#define sockaddr_dl sockaddr_ll 337#define sdl_family sll_family 338#define AF_LINK AF_PACKET 339#define LLADDR(s) s->sll_addr; 340#include <linux/if_tun.h> 341#define TAP_CLONEDEV "/dev/net/tun" 342#endif /* __linux__ */ 343 344#ifdef __FreeBSD__ 345#include <net/if_tun.h> 346#define TAP_CLONEDEV "/dev/tap" 347#endif /* __FreeBSD */ 348 349#ifdef __APPLE__ 350// #warning TAP not supported on apple ? 351#include <net/if_utun.h> 352#define TAP_CLONEDEV "/dev/tap" 353#endif /* __APPLE__ */ 354 355 356/* 357 * parse the vale configuration in conf and put it in nmr. 358 * Return the flag set if necessary. 359 * The configuration may consist of 0 to 4 numbers separated 360 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 361 * Missing numbers or zeroes stand for default values. 362 * As an additional convenience, if exactly one number 363 * is specified, then this is assigned to both #tx-slots and #rx-slots. 364 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 365 * and #rx-rings. 366 */ 367int 368parse_nmr_config(const char* conf, struct nmreq *nmr) 369{ 370 char *w, *tok; 371 int i, v; 372 373 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 374 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 375 if (conf == NULL || ! *conf) 376 return 0; 377 w = strdup(conf); 378 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 379 v = atoi(tok); 380 switch (i) { 381 case 0: 382 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 383 break; 384 case 1: 385 nmr->nr_rx_slots = v; 386 break; 387 case 2: 388 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 389 break; 390 case 3: 391 nmr->nr_rx_rings = v; 392 break; 393 default: 394 D("ignored config: %s", tok); 395 break; 396 } 397 } 398 D("txr %d txd %d rxr %d rxd %d", 399 nmr->nr_tx_rings, nmr->nr_tx_slots, 400 nmr->nr_rx_rings, nmr->nr_rx_slots); 401 free(w); 402 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 403 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 404 NM_OPEN_RING_CFG : 0; 405} 406 407 408/* 409 * locate the src mac address for our interface, put it 410 * into the user-supplied buffer. return 0 if ok, -1 on error. 411 */ 412static int 413source_hwaddr(const char *ifname, char *buf) 414{ 415 struct ifaddrs *ifaphead, *ifap; 416 int l = sizeof(ifap->ifa_name); 417 418 if (getifaddrs(&ifaphead) != 0) { 419 D("getifaddrs %s failed", ifname); 420 return (-1); 421 } 422 423 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 424 struct sockaddr_dl *sdl = 425 (struct sockaddr_dl *)ifap->ifa_addr; 426 uint8_t *mac; 427 428 if (!sdl || sdl->sdl_family != AF_LINK) 429 continue; 430 if (strncmp(ifap->ifa_name, ifname, l) != 0) 431 continue; 432 mac = (uint8_t *)LLADDR(sdl); 433 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 434 mac[0], mac[1], mac[2], 435 mac[3], mac[4], mac[5]); 436 if (verbose) 437 D("source hwaddr %s", buf); 438 break; 439 } 440 freeifaddrs(ifaphead); 441 return ifap ? 0 : 1; 442} 443 444 445/* set the thread affinity. */ 446static int 447setaffinity(pthread_t me, int i) 448{ 449 cpuset_t cpumask; 450 451 if (i == -1) 452 return 0; 453 454 /* Set thread affinity affinity.*/ 455 CPU_ZERO(&cpumask); 456 CPU_SET(i, &cpumask); 457 458 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 459 D("Unable to set affinity: %s", strerror(errno)); 460 return 1; 461 } 462 return 0; 463} 464 465/* Compute the checksum of the given ip header. */ 466static uint16_t 467checksum(const void *data, uint16_t len, uint32_t sum) 468{ 469 const uint8_t *addr = data; 470 uint32_t i; 471 472 /* Checksum all the pairs of bytes first... */ 473 for (i = 0; i < (len & ~1U); i += 2) { 474 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 475 if (sum > 0xFFFF) 476 sum -= 0xFFFF; 477 } 478 /* 479 * If there's a single byte left over, checksum it, too. 480 * Network byte order is big-endian, so the remaining byte is 481 * the high byte. 482 */ 483 if (i < len) { 484 sum += addr[i] << 8; 485 if (sum > 0xFFFF) 486 sum -= 0xFFFF; 487 } 488 return sum; 489} 490 491static u_int16_t 492wrapsum(u_int32_t sum) 493{ 494 sum = ~sum & 0xFFFF; 495 return (htons(sum)); 496} 497 498/* Check the payload of the packet for errors (use it for debug). 499 * Look for consecutive ascii representations of the size of the packet. 500 */ 501static void 502dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 503{ 504 char buf[128]; 505 int i, j, i0; 506 507 /* get the length in ASCII of the length of the packet. */ 508 509 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 510 ring, cur, ring->slot[cur].buf_idx, 511 ring->slot[cur].flags, len); 512 /* hexdump routine */ 513 for (i = 0; i < len; ) { 514 memset(buf, sizeof(buf), ' '); 515 sprintf(buf, "%5d: ", i); 516 i0 = i; 517 for (j=0; j < 16 && i < len; i++, j++) 518 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 519 i = i0; 520 for (j=0; j < 16 && i < len; i++, j++) 521 sprintf(buf+7+j + 48, "%c", 522 isprint(p[i]) ? p[i] : '.'); 523 printf("%s\n", buf); 524 } 525} 526 527/* 528 * Fill a packet with some payload. 529 * We create a UDP packet so the payload starts at 530 * 14+20+8 = 42 bytes. 531 */ 532#ifdef __linux__ 533#define uh_sport source 534#define uh_dport dest 535#define uh_ulen len 536#define uh_sum check 537#endif /* linux */ 538 539/* 540 * increment the addressed in the packet, 541 * starting from the least significant field. 542 * DST_IP DST_PORT SRC_IP SRC_PORT 543 */ 544static void 545update_addresses(struct pkt *pkt, struct glob_arg *g) 546{ 547 uint32_t a; 548 uint16_t p; 549 struct ip *ip = &pkt->ip; 550 struct udphdr *udp = &pkt->udp; 551 552 do { 553 p = ntohs(udp->uh_sport); 554 if (p < g->src_ip.port1) { /* just inc, no wrap */ 555 udp->uh_sport = htons(p + 1); 556 break; 557 } 558 udp->uh_sport = htons(g->src_ip.port0); 559 560 a = ntohl(ip->ip_src.s_addr); 561 if (a < g->src_ip.end) { /* just inc, no wrap */ 562 ip->ip_src.s_addr = htonl(a + 1); 563 break; 564 } 565 ip->ip_src.s_addr = htonl(g->src_ip.start); 566 567 udp->uh_sport = htons(g->src_ip.port0); 568 p = ntohs(udp->uh_dport); 569 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 570 udp->uh_dport = htons(p + 1); 571 break; 572 } 573 udp->uh_dport = htons(g->dst_ip.port0); 574 575 a = ntohl(ip->ip_dst.s_addr); 576 if (a < g->dst_ip.end) { /* just inc, no wrap */ 577 ip->ip_dst.s_addr = htonl(a + 1); 578 break; 579 } 580 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 581 } while (0); 582 // update checksum 583} 584 585/* 586 * initialize one packet and prepare for the next one. 587 * The copy could be done better instead of repeating it each time. 588 */ 589static void 590initialize_packet(struct targ *targ) 591{ 592 struct pkt *pkt = &targ->pkt; 593 struct ether_header *eh; 594 struct ip *ip; 595 struct udphdr *udp; 596 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 597 const char *payload = targ->g->options & OPT_INDIRECT ? 598 indirect_payload : default_payload; 599 int i, l0 = strlen(payload); 600 601 /* create a nice NUL-terminated string */ 602 for (i = 0; i < paylen; i += l0) { 603 if (l0 > paylen - i) 604 l0 = paylen - i; // last round 605 bcopy(payload, pkt->body + i, l0); 606 } 607 pkt->body[i-1] = '\0'; 608 ip = &pkt->ip; 609 610 /* prepare the headers */ 611 ip->ip_v = IPVERSION; 612 ip->ip_hl = 5; 613 ip->ip_id = 0; 614 ip->ip_tos = IPTOS_LOWDELAY; 615 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 616 ip->ip_id = 0; 617 ip->ip_off = htons(IP_DF); /* Don't fragment */ 618 ip->ip_ttl = IPDEFTTL; 619 ip->ip_p = IPPROTO_UDP; 620 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 621 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 622 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 623 624 625 udp = &pkt->udp; 626 udp->uh_sport = htons(targ->g->src_ip.port0); 627 udp->uh_dport = htons(targ->g->dst_ip.port0); 628 udp->uh_ulen = htons(paylen); 629 /* Magic: taken from sbin/dhclient/packet.c */ 630 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 631 checksum(pkt->body, 632 paylen - sizeof(*udp), 633 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 634 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 635 ) 636 ) 637 )); 638 639 eh = &pkt->eh; 640 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 641 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 642 eh->ether_type = htons(ETHERTYPE_IP); 643 644 bzero(&pkt->vh, sizeof(pkt->vh)); 645 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 646} 647 648 649 650/* 651 * create and enqueue a batch of packets on a ring. 652 * On the last one set NS_REPORT to tell the driver to generate 653 * an interrupt when done. 654 */ 655static int 656send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 657 int size, struct glob_arg *g, u_int count, int options, 658 u_int nfrags) 659{ 660 u_int n, sent, cur = ring->cur; 661 u_int fcnt; 662 663 n = nm_ring_space(ring); 664 if (n < count) 665 count = n; 666 if (count < nfrags) { 667 D("truncating packet, no room for frags %d %d", 668 count, nfrags); 669 } 670#if 0 671 if (options & (OPT_COPY | OPT_PREFETCH) ) { 672 for (sent = 0; sent < count; sent++) { 673 struct netmap_slot *slot = &ring->slot[cur]; 674 char *p = NETMAP_BUF(ring, slot->buf_idx); 675 676 __builtin_prefetch(p); 677 cur = nm_ring_next(ring, cur); 678 } 679 cur = ring->cur; 680 } 681#endif 682 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 683 struct netmap_slot *slot = &ring->slot[cur]; 684 char *p = NETMAP_BUF(ring, slot->buf_idx); 685 686 slot->flags = 0; 687 if (options & OPT_INDIRECT) { 688 slot->flags |= NS_INDIRECT; 689 slot->ptr = (uint64_t)frame; 690 } else if (options & OPT_COPY) { 691 nm_pkt_copy(frame, p, size); 692 if (fcnt == nfrags) 693 update_addresses(pkt, g); 694 } else if (options & OPT_MEMCPY) { 695 memcpy(p, frame, size); 696 if (fcnt == nfrags) 697 update_addresses(pkt, g); 698 } else if (options & OPT_PREFETCH) { 699 __builtin_prefetch(p); 700 } 701 if (options & OPT_DUMP) 702 dump_payload(p, size, ring, cur); 703 slot->len = size; 704 if (--fcnt > 0) 705 slot->flags |= NS_MOREFRAG; 706 else 707 fcnt = nfrags; 708 if (sent == count - 1) { 709 slot->flags &= ~NS_MOREFRAG; 710 slot->flags |= NS_REPORT; 711 } 712 cur = nm_ring_next(ring, cur); 713 } 714 ring->head = ring->cur = cur; 715 716 return (sent); 717} 718 719/* 720 * Send a packet, and wait for a response. 721 * The payload (after UDP header, ofs 42) has a 4-byte sequence 722 * followed by a struct timeval (or bintime?) 723 */ 724#define PAY_OFS 42 /* where in the pkt... */ 725 726static void * 727pinger_body(void *data) 728{ 729 struct targ *targ = (struct targ *) data; 730 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 731 struct netmap_if *nifp = targ->nmd->nifp; 732 int i, rx = 0, n = targ->g->npackets; 733 void *frame; 734 int size; 735 uint32_t sent = 0; 736 struct timespec ts, now, last_print; 737 uint32_t count = 0, min = 1000000000, av = 0; 738 739 frame = &targ->pkt; 740 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 741 size = targ->g->pkt_size + targ->g->virt_header; 742 743 744 if (targ->g->nthreads > 1) { 745 D("can only ping with 1 thread"); 746 return NULL; 747 } 748 749 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 750 now = last_print; 751 while (n == 0 || (int)sent < n) { 752 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 753 struct netmap_slot *slot; 754 char *p; 755 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 756 slot = &ring->slot[ring->cur]; 757 slot->len = size; 758 p = NETMAP_BUF(ring, slot->buf_idx); 759 760 if (nm_ring_empty(ring)) { 761 D("-- ouch, cannot send"); 762 } else { 763 nm_pkt_copy(frame, p, size); 764 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 765 bcopy(&sent, p+42, sizeof(sent)); 766 bcopy(&ts, p+46, sizeof(ts)); 767 sent++; 768 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 769 } 770 } 771 /* should use a parameter to decide how often to send */ 772 if (poll(&pfd, 1, 3000) <= 0) { 773 D("poll error/timeout on queue %d: %s", targ->me, 774 strerror(errno)); 775 continue; 776 } 777 /* see what we got back */ 778 for (i = targ->nmd->first_tx_ring; 779 i <= targ->nmd->last_tx_ring; i++) { 780 ring = NETMAP_RXRING(nifp, i); 781 while (!nm_ring_empty(ring)) { 782 uint32_t seq; 783 slot = &ring->slot[ring->cur]; 784 p = NETMAP_BUF(ring, slot->buf_idx); 785 786 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 787 bcopy(p+42, &seq, sizeof(seq)); 788 bcopy(p+46, &ts, sizeof(ts)); 789 ts.tv_sec = now.tv_sec - ts.tv_sec; 790 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 791 if (ts.tv_nsec < 0) { 792 ts.tv_nsec += 1000000000; 793 ts.tv_sec--; 794 } 795 if (1) D("seq %d/%d delta %d.%09d", seq, sent, 796 (int)ts.tv_sec, (int)ts.tv_nsec); 797 if (ts.tv_nsec < (int)min) 798 min = ts.tv_nsec; 799 count ++; 800 av += ts.tv_nsec; 801 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 802 rx++; 803 } 804 } 805 //D("tx %d rx %d", sent, rx); 806 //usleep(100000); 807 ts.tv_sec = now.tv_sec - last_print.tv_sec; 808 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 809 if (ts.tv_nsec < 0) { 810 ts.tv_nsec += 1000000000; 811 ts.tv_sec--; 812 } 813 if (ts.tv_sec >= 1) { 814 D("count %d min %d av %d", 815 count, min, av/count); 816 count = 0; 817 av = 0; 818 min = 100000000; 819 last_print = now; 820 } 821 } 822 return NULL; 823} 824 825 826/* 827 * reply to ping requests 828 */ 829static void * 830ponger_body(void *data) 831{ 832 struct targ *targ = (struct targ *) data; 833 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 834 struct netmap_if *nifp = targ->nmd->nifp; 835 struct netmap_ring *txring, *rxring; 836 int i, rx = 0, sent = 0, n = targ->g->npackets; 837 838 if (targ->g->nthreads > 1) { 839 D("can only reply ping with 1 thread"); 840 return NULL; 841 } 842 D("understood ponger %d but don't know how to do it", n); 843 while (n == 0 || sent < n) { 844 uint32_t txcur, txavail; 845//#define BUSYWAIT 846#ifdef BUSYWAIT 847 ioctl(pfd.fd, NIOCRXSYNC, NULL); 848#else 849 if (poll(&pfd, 1, 1000) <= 0) { 850 D("poll error/timeout on queue %d: %s", targ->me, 851 strerror(errno)); 852 continue; 853 } 854#endif 855 txring = NETMAP_TXRING(nifp, 0); 856 txcur = txring->cur; 857 txavail = nm_ring_space(txring); 858 /* see what we got back */ 859 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 860 rxring = NETMAP_RXRING(nifp, i); 861 while (!nm_ring_empty(rxring)) { 862 uint16_t *spkt, *dpkt; 863 uint32_t cur = rxring->cur; 864 struct netmap_slot *slot = &rxring->slot[cur]; 865 char *src, *dst; 866 src = NETMAP_BUF(rxring, slot->buf_idx); 867 //D("got pkt %p of size %d", src, slot->len); 868 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 869 rx++; 870 if (txavail == 0) 871 continue; 872 dst = NETMAP_BUF(txring, 873 txring->slot[txcur].buf_idx); 874 /* copy... */ 875 dpkt = (uint16_t *)dst; 876 spkt = (uint16_t *)src; 877 nm_pkt_copy(src, dst, slot->len); 878 dpkt[0] = spkt[3]; 879 dpkt[1] = spkt[4]; 880 dpkt[2] = spkt[5]; 881 dpkt[3] = spkt[0]; 882 dpkt[4] = spkt[1]; 883 dpkt[5] = spkt[2]; 884 txring->slot[txcur].len = slot->len; 885 /* XXX swap src dst mac */ 886 txcur = nm_ring_next(txring, txcur); 887 txavail--; 888 sent++; 889 } 890 } 891 txring->head = txring->cur = txcur; 892 targ->count = sent; 893#ifdef BUSYWAIT 894 ioctl(pfd.fd, NIOCTXSYNC, NULL); 895#endif 896 //D("tx %d rx %d", sent, rx); 897 } 898 return NULL; 899} 900 901static __inline int 902timespec_ge(const struct timespec *a, const struct timespec *b) 903{ 904 905 if (a->tv_sec > b->tv_sec) 906 return (1); 907 if (a->tv_sec < b->tv_sec) 908 return (0); 909 if (a->tv_nsec >= b->tv_nsec) 910 return (1); 911 return (0); 912} 913 914static __inline struct timespec 915timeval2spec(const struct timeval *a) 916{ 917 struct timespec ts = { 918 .tv_sec = a->tv_sec, 919 .tv_nsec = a->tv_usec * 1000 920 }; 921 return ts; 922} 923 924static __inline struct timeval 925timespec2val(const struct timespec *a) 926{ 927 struct timeval tv = { 928 .tv_sec = a->tv_sec, 929 .tv_usec = a->tv_nsec / 1000 930 }; 931 return tv; 932} 933 934 935static __inline struct timespec 936timespec_add(struct timespec a, struct timespec b) 937{ 938 struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 939 if (ret.tv_nsec >= 1000000000) { 940 ret.tv_sec++; 941 ret.tv_nsec -= 1000000000; 942 } 943 return ret; 944} 945 946static __inline struct timespec 947timespec_sub(struct timespec a, struct timespec b) 948{ 949 struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 950 if (ret.tv_nsec < 0) { 951 ret.tv_sec--; 952 ret.tv_nsec += 1000000000; 953 } 954 return ret; 955} 956 957 958/* 959 * wait until ts, either busy or sleeping if more than 1ms. 960 * Return wakeup time. 961 */ 962static struct timespec 963wait_time(struct timespec ts) 964{ 965 for (;;) { 966 struct timespec w, cur; 967 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 968 w = timespec_sub(ts, cur); 969 if (w.tv_sec < 0) 970 return cur; 971 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 972 poll(NULL, 0, 1); 973 } 974} 975 976static void * 977sender_body(void *data) 978{ 979 struct targ *targ = (struct targ *) data; 980 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 981 struct netmap_if *nifp = targ->nmd->nifp; 982 struct netmap_ring *txring; 983 int i, n = targ->g->npackets / targ->g->nthreads; 984 int64_t sent = 0; 985 int options = targ->g->options | OPT_COPY; 986 struct timespec nexttime = { 0, 0}; // XXX silence compiler 987 int rate_limit = targ->g->tx_rate; 988 struct pkt *pkt = &targ->pkt; 989 void *frame; 990 int size; 991 992 frame = pkt; 993 frame += sizeof(pkt->vh) - targ->g->virt_header; 994 size = targ->g->pkt_size + targ->g->virt_header; 995 996 D("start"); 997 if (setaffinity(targ->thread, targ->affinity)) 998 goto quit; 999 1000 /* main loop.*/ 1001 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1002 if (rate_limit) { 1003 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1004 targ->tic.tv_nsec = 0; 1005 wait_time(targ->tic); 1006 nexttime = targ->tic; 1007 } 1008 if (targ->g->dev_type == DEV_TAP) { 1009 D("writing to file desc %d", targ->g->main_fd); 1010 1011 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1012 if (write(targ->g->main_fd, frame, size) != -1) 1013 sent++; 1014 update_addresses(pkt, targ->g); 1015 if (i > 10000) { 1016 targ->count = sent; 1017 i = 0; 1018 } 1019 } 1020#ifndef NO_PCAP 1021 } else if (targ->g->dev_type == DEV_PCAP) { 1022 pcap_t *p = targ->g->p; 1023 1024 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1025 if (pcap_inject(p, frame, size) != -1) 1026 sent++; 1027 update_addresses(pkt, targ->g); 1028 if (i > 10000) { 1029 targ->count = sent; 1030 i = 0; 1031 } 1032 } 1033#endif /* NO_PCAP */ 1034 } else { 1035 int tosend = 0; 1036 int frags = targ->g->frags; 1037 1038 while (!targ->cancel && (n == 0 || sent < n)) { 1039 1040 if (rate_limit && tosend <= 0) { 1041 tosend = targ->g->burst; 1042 nexttime = timespec_add(nexttime, targ->g->tx_period); 1043 wait_time(nexttime); 1044 } 1045 1046 /* 1047 * wait for available room in the send queue(s) 1048 */ 1049 if (poll(&pfd, 1, 2000) <= 0) { 1050 if (targ->cancel) 1051 break; 1052 D("poll error/timeout on queue %d: %s", targ->me, 1053 strerror(errno)); 1054 // goto quit; 1055 } 1056 if (pfd.revents & POLLERR) { 1057 D("poll error"); 1058 goto quit; 1059 } 1060 /* 1061 * scan our queues and send on those with room 1062 */ 1063 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1064 D("drop copy"); 1065 options &= ~OPT_COPY; 1066 } 1067 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1068 int m, limit = rate_limit ? tosend : targ->g->burst; 1069 if (n > 0 && n - sent < limit) 1070 limit = n - sent; 1071 txring = NETMAP_TXRING(nifp, i); 1072 if (nm_ring_empty(txring)) 1073 continue; 1074 if (frags > 1) 1075 limit = ((limit + frags - 1) / frags) * frags; 1076 1077 m = send_packets(txring, pkt, frame, size, targ->g, 1078 limit, options, frags); 1079 ND("limit %d tail %d frags %d m %d", 1080 limit, txring->tail, frags, m); 1081 sent += m; 1082 targ->count = sent; 1083 if (rate_limit) { 1084 tosend -= m; 1085 if (tosend <= 0) 1086 break; 1087 } 1088 } 1089 } 1090 /* flush any remaining packets */ 1091 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1092 1093 /* final part: wait all the TX queues to be empty. */ 1094 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1095 txring = NETMAP_TXRING(nifp, i); 1096 while (nm_tx_pending(txring)) { 1097 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1098 usleep(1); /* wait 1 tick */ 1099 } 1100 } 1101 } /* end DEV_NETMAP */ 1102 1103 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1104 targ->completed = 1; 1105 targ->count = sent; 1106 1107quit: 1108 /* reset the ``used`` flag. */ 1109 targ->used = 0; 1110 1111 return (NULL); 1112} 1113 1114 1115#ifndef NO_PCAP 1116static void 1117receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1118 const u_char * bytes) 1119{ 1120 int *count = (int *)user; 1121 (void)h; /* UNUSED */ 1122 (void)bytes; /* UNUSED */ 1123 (*count)++; 1124} 1125#endif /* !NO_PCAP */ 1126 1127static int 1128receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1129{ 1130 u_int cur, rx, n; 1131 1132 cur = ring->cur; 1133 n = nm_ring_space(ring); 1134 if (n < limit) 1135 limit = n; 1136 for (rx = 0; rx < limit; rx++) { 1137 struct netmap_slot *slot = &ring->slot[cur]; 1138 char *p = NETMAP_BUF(ring, slot->buf_idx); 1139 1140 if (dump) 1141 dump_payload(p, slot->len, ring, cur); 1142 1143 cur = nm_ring_next(ring, cur); 1144 } 1145 ring->head = ring->cur = cur; 1146 1147 return (rx); 1148} 1149 1150static void * 1151receiver_body(void *data) 1152{ 1153 struct targ *targ = (struct targ *) data; 1154 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1155 struct netmap_if *nifp = targ->nmd->nifp; 1156 struct netmap_ring *rxring; 1157 int i; 1158 uint64_t received = 0; 1159 1160 if (setaffinity(targ->thread, targ->affinity)) 1161 goto quit; 1162 1163 /* unbounded wait for the first packet. */ 1164 for (;;) { 1165 i = poll(&pfd, 1, 1000); 1166 if (i > 0 && !(pfd.revents & POLLERR)) 1167 break; 1168 RD(1, "waiting for initial packets, poll returns %d %d", 1169 i, pfd.revents); 1170 } 1171 1172 /* main loop, exit after 1s silence */ 1173 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1174 if (targ->g->dev_type == DEV_TAP) { 1175 D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); 1176 while (!targ->cancel) { 1177 char buf[2048]; 1178 /* XXX should we poll ? */ 1179 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1180 targ->count++; 1181 } 1182#ifndef NO_PCAP 1183 } else if (targ->g->dev_type == DEV_PCAP) { 1184 while (!targ->cancel) { 1185 /* XXX should we poll ? */ 1186 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); 1187 } 1188#endif /* !NO_PCAP */ 1189 } else { 1190 int dump = targ->g->options & OPT_DUMP; 1191 while (!targ->cancel) { 1192 /* Once we started to receive packets, wait at most 1 seconds 1193 before quitting. */ 1194 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1195 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1196 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1197 goto out; 1198 } 1199 1200 if (pfd.revents & POLLERR) { 1201 D("poll err"); 1202 goto quit; 1203 } 1204 1205 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1206 int m; 1207 1208 rxring = NETMAP_RXRING(nifp, i); 1209 if (nm_ring_empty(rxring)) 1210 continue; 1211 1212 m = receive_packets(rxring, targ->g->burst, dump); 1213 received += m; 1214 } 1215 targ->count = received; 1216 } 1217 } 1218 1219 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1220 1221out: 1222 targ->completed = 1; 1223 targ->count = received; 1224 1225quit: 1226 /* reset the ``used`` flag. */ 1227 targ->used = 0; 1228 1229 return (NULL); 1230} 1231 1232/* very crude code to print a number in normalized form. 1233 * Caller has to make sure that the buffer is large enough. 1234 */ 1235static const char * 1236norm(char *buf, double val) 1237{ 1238 char *units[] = { "", "K", "M", "G", "T" }; 1239 u_int i; 1240 1241 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) 1242 val /= 1000; 1243 sprintf(buf, "%.2f %s", val, units[i]); 1244 return buf; 1245} 1246 1247static void 1248tx_output(uint64_t sent, int size, double delta) 1249{ 1250 double bw, raw_bw, pps; 1251 char b1[40], b2[80], b3[80]; 1252 1253 printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", 1254 (unsigned long long)sent, size, delta); 1255 if (delta == 0) 1256 delta = 1e-6; 1257 if (size < 60) /* correct for min packet size */ 1258 size = 60; 1259 pps = sent / delta; 1260 bw = (8.0 * size * sent) / delta; 1261 /* raw packets have4 bytes crc + 20 bytes framing */ 1262 raw_bw = (8.0 * (size + 24) * sent) / delta; 1263 1264 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1265 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1266} 1267 1268 1269static void 1270rx_output(uint64_t received, double delta) 1271{ 1272 double pps; 1273 char b1[40]; 1274 1275 printf("Received %llu packets, in %.2f seconds.\n", 1276 (unsigned long long) received, delta); 1277 1278 if (delta == 0) 1279 delta = 1e-6; 1280 pps = received / delta; 1281 printf("Speed: %spps\n", norm(b1, pps)); 1282} 1283 1284static void 1285usage(void) 1286{ 1287 const char *cmd = "pkt-gen"; 1288 fprintf(stderr, 1289 "Usage:\n" 1290 "%s arguments\n" 1291 "\t-i interface interface name\n" 1292 "\t-f function tx rx ping pong\n" 1293 "\t-n count number of iterations (can be 0)\n" 1294 "\t-t pkts_to_send also forces tx mode\n" 1295 "\t-r pkts_to_receive also forces rx mode\n" 1296 "\t-l pkt_size in bytes excluding CRC\n" 1297 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1298 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1299 "\t-D dst-mac\n" 1300 "\t-S src-mac\n" 1301 "\t-a cpu_id use setaffinity\n" 1302 "\t-b burst size testing, mostly\n" 1303 "\t-c cores cores to use\n" 1304 "\t-p threads processes/threads to use\n" 1305 "\t-T report_ms milliseconds between reports\n" 1306 "\t-P use libpcap instead of netmap\n" 1307 "\t-w wait_for_link_time in seconds\n" 1308 "\t-R rate in packets per second\n" 1309 "\t-X dump payload\n" 1310 "\t-H len add empty virtio-net-header with size 'len'\n" 1311 "", 1312 cmd); 1313 1314 exit(0); 1315} 1316 1317static void 1318start_threads(struct glob_arg *g) 1319{ 1320 int i; 1321 1322 targs = calloc(g->nthreads, sizeof(*targs)); 1323 /* 1324 * Now create the desired number of threads, each one 1325 * using a single descriptor. 1326 */ 1327 for (i = 0; i < g->nthreads; i++) { 1328 struct targ *t = &targs[i]; 1329 1330 bzero(t, sizeof(*t)); 1331 t->fd = -1; /* default, with pcap */ 1332 t->g = g; 1333 1334 if (g->dev_type == DEV_NETMAP) { 1335 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1336 1337 if (g->nthreads > 1) { 1338 if (nmd.req.nr_flags != NR_REG_ALL_NIC) { 1339 D("invalid nthreads mode %d", nmd.req.nr_flags); 1340 continue; 1341 } 1342 nmd.req.nr_flags = NR_REG_ONE_NIC; 1343 nmd.req.nr_ringid = i; 1344 } 1345 /* Only touch one of the rings (rx is already ok) */ 1346 if (g->td_body == receiver_body) 1347 nmd.req.nr_ringid |= NETMAP_NO_TX_POLL; 1348 1349 /* register interface. Override ifname and ringid etc. */ 1350 1351 t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags | 1352 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd); 1353 if (t->nmd == NULL) { 1354 D("Unable to open %s: %s", 1355 t->g->ifname, strerror(errno)); 1356 continue; 1357 } 1358 t->fd = t->nmd->fd; 1359 1360 } else { 1361 targs[i].fd = g->main_fd; 1362 } 1363 t->used = 1; 1364 t->me = i; 1365 if (g->affinity >= 0) { 1366 if (g->affinity < g->cpus) 1367 t->affinity = g->affinity; 1368 else 1369 t->affinity = i % g->cpus; 1370 } else { 1371 t->affinity = -1; 1372 } 1373 /* default, init packets */ 1374 initialize_packet(t); 1375 1376 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1377 D("Unable to create thread %d: %s", i, strerror(errno)); 1378 t->used = 0; 1379 } 1380 } 1381} 1382 1383static void 1384main_thread(struct glob_arg *g) 1385{ 1386 int i; 1387 1388 uint64_t prev = 0; 1389 uint64_t count = 0; 1390 double delta_t; 1391 struct timeval tic, toc; 1392 1393 gettimeofday(&toc, NULL); 1394 for (;;) { 1395 struct timeval now, delta; 1396 uint64_t pps, usec, my_count, npkts; 1397 int done = 0; 1398 1399 delta.tv_sec = g->report_interval/1000; 1400 delta.tv_usec = (g->report_interval%1000)*1000; 1401 select(0, NULL, NULL, NULL, &delta); 1402 gettimeofday(&now, NULL); 1403 timersub(&now, &toc, &toc); 1404 my_count = 0; 1405 for (i = 0; i < g->nthreads; i++) { 1406 my_count += targs[i].count; 1407 if (targs[i].used == 0) 1408 done++; 1409 } 1410 usec = toc.tv_sec* 1000000 + toc.tv_usec; 1411 if (usec < 10000) 1412 continue; 1413 npkts = my_count - prev; 1414 pps = (npkts*1000000 + usec/2) / usec; 1415 D("%llu pps (%llu pkts in %llu usec)", 1416 (unsigned long long)pps, 1417 (unsigned long long)npkts, 1418 (unsigned long long)usec); 1419 prev = my_count; 1420 toc = now; 1421 if (done == g->nthreads) 1422 break; 1423 } 1424 1425 timerclear(&tic); 1426 timerclear(&toc); 1427 for (i = 0; i < g->nthreads; i++) { 1428 struct timespec t_tic, t_toc; 1429 /* 1430 * Join active threads, unregister interfaces and close 1431 * file descriptors. 1432 */ 1433 if (targs[i].used) 1434 pthread_join(targs[i].thread, NULL); 1435 close(targs[i].fd); 1436 1437 if (targs[i].completed == 0) 1438 D("ouch, thread %d exited with error", i); 1439 1440 /* 1441 * Collect threads output and extract information about 1442 * how long it took to send all the packets. 1443 */ 1444 count += targs[i].count; 1445 t_tic = timeval2spec(&tic); 1446 t_toc = timeval2spec(&toc); 1447 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1448 tic = timespec2val(&targs[i].tic); 1449 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1450 toc = timespec2val(&targs[i].toc); 1451 } 1452 1453 /* print output. */ 1454 timersub(&toc, &tic, &toc); 1455 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1456 if (g->td_body == sender_body) 1457 tx_output(count, g->pkt_size, delta_t); 1458 else 1459 rx_output(count, delta_t); 1460 1461 if (g->dev_type == DEV_NETMAP) { 1462 munmap(g->nmd->mem, g->nmd->req.nr_memsize); 1463 close(g->main_fd); 1464 } 1465} 1466 1467 1468struct sf { 1469 char *key; 1470 void *f; 1471}; 1472 1473static struct sf func[] = { 1474 { "tx", sender_body }, 1475 { "rx", receiver_body }, 1476 { "ping", pinger_body }, 1477 { "pong", ponger_body }, 1478 { NULL, NULL } 1479}; 1480 1481static int 1482tap_alloc(char *dev) 1483{ 1484 struct ifreq ifr; 1485 int fd, err; 1486 char *clonedev = TAP_CLONEDEV; 1487 1488 (void)err; 1489 (void)dev; 1490 /* Arguments taken by the function: 1491 * 1492 * char *dev: the name of an interface (or '\0'). MUST have enough 1493 * space to hold the interface name if '\0' is passed 1494 * int flags: interface flags (eg, IFF_TUN etc.) 1495 */ 1496 1497#ifdef __FreeBSD__ 1498 if (dev[3]) { /* tapSomething */ 1499 static char buf[128]; 1500 snprintf(buf, sizeof(buf), "/dev/%s", dev); 1501 clonedev = buf; 1502 } 1503#endif 1504 /* open the device */ 1505 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1506 return fd; 1507 } 1508 D("%s open successful", clonedev); 1509 1510 /* preparation of the struct ifr, of type "struct ifreq" */ 1511 memset(&ifr, 0, sizeof(ifr)); 1512 1513#ifdef linux 1514 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1515 1516 if (*dev) { 1517 /* if a device name was specified, put it in the structure; otherwise, 1518 * the kernel will try to allocate the "next" device of the 1519 * specified type */ 1520 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1521 } 1522 1523 /* try to create the device */ 1524 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1525 D("failed to to a TUNSETIFF: %s", strerror(errno)); 1526 close(fd); 1527 return err; 1528 } 1529 1530 /* if the operation was successful, write back the name of the 1531 * interface to the variable "dev", so the caller can know 1532 * it. Note that the caller MUST reserve space in *dev (see calling 1533 * code below) */ 1534 strcpy(dev, ifr.ifr_name); 1535 D("new name is %s", dev); 1536#endif /* linux */ 1537 1538 /* this is the special file descriptor that the caller will use to talk 1539 * with the virtual interface */ 1540 return fd; 1541} 1542 1543int 1544main(int arc, char **argv) 1545{ 1546 int i; 1547 1548 struct glob_arg g; 1549 1550 int ch; 1551 int wait_link = 2; 1552 int devqueues = 1; /* how many device queues */ 1553 1554 bzero(&g, sizeof(g)); 1555 1556 g.main_fd = -1; 1557 g.td_body = receiver_body; 1558 g.report_interval = 1000; /* report interval */ 1559 g.affinity = -1; 1560 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1561 g.src_ip.name = "10.0.0.1"; 1562 g.dst_ip.name = "10.1.0.1"; 1563 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1564 g.src_mac.name = NULL; 1565 g.pkt_size = 60; 1566 g.burst = 512; // default 1567 g.nthreads = 1; 1568 g.cpus = 1; 1569 g.forever = 1; 1570 g.tx_rate = 0; 1571 g.frags = 1; 1572 g.nmr_config = ""; 1573 g.virt_header = 0; 1574 1575 while ( (ch = getopt(arc, argv, 1576 "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) { 1577 struct sf *fn; 1578 1579 switch(ch) { 1580 default: 1581 D("bad option %c %s", ch, optarg); 1582 usage(); 1583 break; 1584 1585 case 'n': 1586 g.npackets = atoi(optarg); 1587 break; 1588 1589 case 'F': 1590 i = atoi(optarg); 1591 if (i < 1 || i > 63) { 1592 D("invalid frags %d [1..63], ignore", i); 1593 break; 1594 } 1595 g.frags = i; 1596 break; 1597 1598 case 'f': 1599 for (fn = func; fn->key; fn++) { 1600 if (!strcmp(fn->key, optarg)) 1601 break; 1602 } 1603 if (fn->key) 1604 g.td_body = fn->f; 1605 else 1606 D("unrecognised function %s", optarg); 1607 break; 1608 1609 case 'o': /* data generation options */ 1610 g.options = atoi(optarg); 1611 break; 1612 1613 case 'a': /* force affinity */ 1614 g.affinity = atoi(optarg); 1615 break; 1616 1617 case 'i': /* interface */ 1618 /* a prefix of tap: netmap: or pcap: forces the mode. 1619 * otherwise we guess 1620 */ 1621 D("interface is %s", optarg); 1622 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 1623 D("ifname too long %s", optarg); 1624 break; 1625 } 1626 strcpy(g.ifname, optarg); 1627 if (!strcmp(optarg, "null")) { 1628 g.dev_type = DEV_NETMAP; 1629 g.dummy_send = 1; 1630 } else if (!strncmp(optarg, "tap:", 4)) { 1631 g.dev_type = DEV_TAP; 1632 strcpy(g.ifname, optarg + 4); 1633 } else if (!strncmp(optarg, "pcap:", 5)) { 1634 g.dev_type = DEV_PCAP; 1635 strcpy(g.ifname, optarg + 5); 1636 } else if (!strncmp(optarg, "netmap:", 7) || 1637 !strncmp(optarg, "vale", 4)) { 1638 g.dev_type = DEV_NETMAP; 1639 } else if (!strncmp(optarg, "tap", 3)) { 1640 g.dev_type = DEV_TAP; 1641 } else { /* prepend netmap: */ 1642 g.dev_type = DEV_NETMAP; 1643 sprintf(g.ifname, "netmap:%s", optarg); 1644 } 1645 break; 1646 1647 case 'I': 1648 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1649 break; 1650 1651 case 'l': /* pkt_size */ 1652 g.pkt_size = atoi(optarg); 1653 break; 1654 1655 case 'd': 1656 g.dst_ip.name = optarg; 1657 break; 1658 1659 case 's': 1660 g.src_ip.name = optarg; 1661 break; 1662 1663 case 'T': /* report interval */ 1664 g.report_interval = atoi(optarg); 1665 break; 1666 1667 case 'w': 1668 wait_link = atoi(optarg); 1669 break; 1670 1671 case 'W': /* XXX changed default */ 1672 g.forever = 0; /* do not exit rx even with no traffic */ 1673 break; 1674 1675 case 'b': /* burst */ 1676 g.burst = atoi(optarg); 1677 break; 1678 case 'c': 1679 g.cpus = atoi(optarg); 1680 break; 1681 case 'p': 1682 g.nthreads = atoi(optarg); 1683 break; 1684 1685 case 'D': /* destination mac */ 1686 g.dst_mac.name = optarg; 1687 break; 1688 1689 case 'S': /* source mac */ 1690 g.src_mac.name = optarg; 1691 break; 1692 case 'v': 1693 verbose++; 1694 break; 1695 case 'R': 1696 g.tx_rate = atoi(optarg); 1697 break; 1698 case 'X': 1699 g.options |= OPT_DUMP; 1700 break; 1701 case 'C': 1702 g.nmr_config = strdup(optarg); 1703 break; 1704 case 'H': 1705 g.virt_header = atoi(optarg); 1706 break; 1707 case 'e': /* extra bufs */ 1708 g.extra_bufs = atoi(optarg); 1709 break; 1710 } 1711 } 1712 1713 if (g.ifname == NULL) { 1714 D("missing ifname"); 1715 usage(); 1716 } 1717 1718 i = system_ncpus(); 1719 if (g.cpus < 0 || g.cpus > i) { 1720 D("%d cpus is too high, have only %d cpus", g.cpus, i); 1721 usage(); 1722 } 1723 if (g.cpus == 0) 1724 g.cpus = i; 1725 1726 if (g.pkt_size < 16 || g.pkt_size > 1536) { 1727 D("bad pktsize %d\n", g.pkt_size); 1728 usage(); 1729 } 1730 1731 if (g.src_mac.name == NULL) { 1732 static char mybuf[20] = "00:00:00:00:00:00"; 1733 /* retrieve source mac address. */ 1734 if (source_hwaddr(g.ifname, mybuf) == -1) { 1735 D("Unable to retrieve source mac"); 1736 // continue, fail later 1737 } 1738 g.src_mac.name = mybuf; 1739 } 1740 /* extract address ranges */ 1741 extract_ip_range(&g.src_ip); 1742 extract_ip_range(&g.dst_ip); 1743 extract_mac_range(&g.src_mac); 1744 extract_mac_range(&g.dst_mac); 1745 1746 if (g.src_ip.start != g.src_ip.end || 1747 g.src_ip.port0 != g.src_ip.port1 || 1748 g.dst_ip.start != g.dst_ip.end || 1749 g.dst_ip.port0 != g.dst_ip.port1) 1750 g.options |= OPT_COPY; 1751 1752 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1753 && g.virt_header != VIRT_HDR_2) { 1754 D("bad virtio-net-header length"); 1755 usage(); 1756 } 1757 1758 if (g.dev_type == DEV_TAP) { 1759 D("want to use tap %s", g.ifname); 1760 g.main_fd = tap_alloc(g.ifname); 1761 if (g.main_fd < 0) { 1762 D("cannot open tap %s", g.ifname); 1763 usage(); 1764 } 1765#ifndef NO_PCAP 1766 } else if (g.dev_type == DEV_PCAP) { 1767 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1768 1769 D("using pcap on %s", g.ifname); 1770 pcap_errbuf[0] = '\0'; // init the buffer 1771 g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); 1772 if (g.p == NULL) { 1773 D("cannot open pcap on %s", g.ifname); 1774 usage(); 1775 } 1776#endif /* !NO_PCAP */ 1777 } else if (g.dummy_send) { /* but DEV_NETMAP */ 1778 D("using a dummy send routine"); 1779 } else { 1780 struct nm_desc base_nmd; 1781 1782 bzero(&base_nmd, sizeof(base_nmd)); 1783 1784 g.nmd_flags = 0; 1785 g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req); 1786 if (g.extra_bufs) { 1787 base_nmd.req.nr_arg3 = g.extra_bufs; 1788 g.nmd_flags |= NM_OPEN_ARG3; 1789 } 1790 1791 /* 1792 * Open the netmap device using nm_open(). 1793 * 1794 * protocol stack and may cause a reset of the card, 1795 * which in turn may take some time for the PHY to 1796 * reconfigure. We do the open here to have time to reset. 1797 */ 1798 g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd); 1799 if (g.nmd == NULL) { 1800 D("Unable to open %s: %s", g.ifname, strerror(errno)); 1801 goto out; 1802 } 1803 g.main_fd = g.nmd->fd; 1804 D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 1805 1806 devqueues = g.nmd->req.nr_rx_rings; 1807 1808 /* validate provided nthreads. */ 1809 if (g.nthreads < 1 || g.nthreads > devqueues) { 1810 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1811 // continue, fail later 1812 } 1813 1814 if (verbose) { 1815 struct netmap_if *nifp = g.nmd->nifp; 1816 struct nmreq *req = &g.nmd->req; 1817 1818 D("nifp at offset %d, %d tx %d rx region %d", 1819 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 1820 req->nr_arg2); 1821 for (i = 0; i <= req->nr_tx_rings; i++) { 1822 D(" TX%d at 0x%lx", i, 1823 (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); 1824 } 1825 for (i = 0; i <= req->nr_rx_rings; i++) { 1826 D(" RX%d at 0x%lx", i, 1827 (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); 1828 } 1829 } 1830 1831 /* Print some debug information. */ 1832 fprintf(stdout, 1833 "%s %s: %d queues, %d threads and %d cpus.\n", 1834 (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1835 g.ifname, 1836 devqueues, 1837 g.nthreads, 1838 g.cpus); 1839 if (g.td_body == sender_body) { 1840 fprintf(stdout, "%s -> %s (%s -> %s)\n", 1841 g.src_ip.name, g.dst_ip.name, 1842 g.src_mac.name, g.dst_mac.name); 1843 } 1844 1845out: 1846 /* Exit if something went wrong. */ 1847 if (g.main_fd < 0) { 1848 D("aborting"); 1849 usage(); 1850 } 1851 } 1852 1853 1854 if (g.options) { 1855 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1856 g.options & OPT_PREFETCH ? " prefetch" : "", 1857 g.options & OPT_ACCESS ? " access" : "", 1858 g.options & OPT_MEMCPY ? " memcpy" : "", 1859 g.options & OPT_INDIRECT ? " indirect" : "", 1860 g.options & OPT_COPY ? " copy" : ""); 1861 } 1862 1863 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1864 if (g.tx_rate > 0) { 1865 /* try to have at least something every second, 1866 * reducing the burst size to some 0.01s worth of data 1867 * (but no less than one full set of fragments) 1868 */ 1869 uint64_t x; 1870 int lim = (g.tx_rate)/300; 1871 if (g.burst > lim) 1872 g.burst = lim; 1873 if (g.burst < g.frags) 1874 g.burst = g.frags; 1875 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 1876 g.tx_period.tv_nsec = x; 1877 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1878 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1879 } 1880 if (g.td_body == sender_body) 1881 D("Sending %d packets every %ld.%09ld s", 1882 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1883 /* Wait for PHY reset. */ 1884 D("Wait %d secs for phy reset", wait_link); 1885 sleep(wait_link); 1886 D("Ready..."); 1887 1888 /* Install ^C handler. */ 1889 global_nthreads = g.nthreads; 1890 signal(SIGINT, sigint_h); 1891 1892 start_threads(&g); 1893 main_thread(&g); 1894 return 0; 1895} 1896 1897/* end of file */ 1898