pkt-gen.c revision 302408
1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $FreeBSD: stable/11/tools/tools/netmap/pkt-gen.c 281746 2015-04-19 17:07:51Z adrian $ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40// #define TRASH_VHOST_HDR 41 42#define _GNU_SOURCE /* for CPU_SET() */ 43#include <stdio.h> 44#define NETMAP_WITH_LIBS 45#include <net/netmap_user.h> 46 47 48#include <ctype.h> // isprint() 49#include <unistd.h> // sysconf() 50#include <sys/poll.h> 51#include <arpa/inet.h> /* ntohs */ 52#include <sys/sysctl.h> /* sysctl */ 53#include <ifaddrs.h> /* getifaddrs */ 54#include <net/ethernet.h> 55#include <netinet/in.h> 56#include <netinet/ip.h> 57#include <netinet/udp.h> 58 59#include <pthread.h> 60 61#ifndef NO_PCAP 62#include <pcap/pcap.h> 63#endif 64 65#ifdef linux 66 67#define cpuset_t cpu_set_t 68 69#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 70#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 71#include <linux/ethtool.h> 72#include <linux/sockios.h> 73 74#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 75#include <netinet/ether.h> /* ether_aton */ 76#include <linux/if_packet.h> /* sockaddr_ll */ 77#endif /* linux */ 78 79#ifdef __FreeBSD__ 80#include <sys/endian.h> /* le64toh */ 81#include <machine/param.h> 82 83#include <pthread_np.h> /* pthread w/ affinity */ 84#include <sys/cpuset.h> /* cpu_set */ 85#include <net/if_dl.h> /* LLADDR */ 86#endif /* __FreeBSD__ */ 87 88#ifdef __APPLE__ 89 90#define cpuset_t uint64_t // XXX 91static inline void CPU_ZERO(cpuset_t *p) 92{ 93 *p = 0; 94} 95 96static inline void CPU_SET(uint32_t i, cpuset_t *p) 97{ 98 *p |= 1<< (i & 0x3f); 99} 100 101#define pthread_setaffinity_np(a, b, c) ((void)a, 0) 102 103#define ifr_flagshigh ifr_flags // XXX 104#define IFF_PPROMISC IFF_PROMISC 105#include <net/if_dl.h> /* LLADDR */ 106#define clock_gettime(a,b) \ 107 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 108#endif /* __APPLE__ */ 109 110const char *default_payload="netmap pkt-gen DIRECT payload\n" 111 "http://info.iet.unipi.it/~luigi/netmap/ "; 112 113const char *indirect_payload="netmap pkt-gen indirect payload\n" 114 "http://info.iet.unipi.it/~luigi/netmap/ "; 115 116int verbose = 0; 117 118#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 119 120 121#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 122#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 123#define VIRT_HDR_MAX VIRT_HDR_2 124struct virt_header { 125 uint8_t fields[VIRT_HDR_MAX]; 126}; 127 128#define MAX_BODYSIZE 16384 129 130struct pkt { 131 struct virt_header vh; 132 struct ether_header eh; 133 struct ip ip; 134 struct udphdr udp; 135 uint8_t body[MAX_BODYSIZE]; // XXX hardwired 136} __attribute__((__packed__)); 137 138struct ip_range { 139 char *name; 140 uint32_t start, end; /* same as struct in_addr */ 141 uint16_t port0, port1; 142}; 143 144struct mac_range { 145 char *name; 146 struct ether_addr start, end; 147}; 148 149/* ifname can be netmap:foo-xxxx */ 150#define MAX_IFNAMELEN 64 /* our buffer for ifname */ 151//#define MAX_PKTSIZE 1536 152#define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 153 154/* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 155struct tstamp { 156 uint32_t sec; 157 uint32_t nsec; 158}; 159 160/* 161 * global arguments for all threads 162 */ 163 164struct glob_arg { 165 struct ip_range src_ip; 166 struct ip_range dst_ip; 167 struct mac_range dst_mac; 168 struct mac_range src_mac; 169 int pkt_size; 170 int burst; 171 int forever; 172 int npackets; /* total packets to send */ 173 int frags; /* fragments per packet */ 174 int nthreads; 175 int cpus; 176 int options; /* testing */ 177#define OPT_PREFETCH 1 178#define OPT_ACCESS 2 179#define OPT_COPY 4 180#define OPT_MEMCPY 8 181#define OPT_TS 16 /* add a timestamp */ 182#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 183#define OPT_DUMP 64 /* dump rx/tx traffic */ 184#define OPT_MONITOR_TX 128 185#define OPT_MONITOR_RX 256 186#define OPT_RANDOM_SRC 512 187#define OPT_RANDOM_DST 1024 188 int dev_type; 189#ifndef NO_PCAP 190 pcap_t *p; 191#endif 192 193 int tx_rate; 194 struct timespec tx_period; 195 196 int affinity; 197 int main_fd; 198 struct nm_desc *nmd; 199 int report_interval; /* milliseconds between prints */ 200 void *(*td_body)(void *); 201 void *mmap_addr; 202 char ifname[MAX_IFNAMELEN]; 203 char *nmr_config; 204 int dummy_send; 205 int virt_header; /* send also the virt_header */ 206 int extra_bufs; /* goes in nr_arg3 */ 207 char *packet_file; /* -P option */ 208}; 209enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 210 211 212/* 213 * Arguments for a new thread. The same structure is used by 214 * the source and the sink 215 */ 216struct targ { 217 struct glob_arg *g; 218 int used; 219 int completed; 220 int cancel; 221 int fd; 222 struct nm_desc *nmd; 223 volatile uint64_t count; 224 struct timespec tic, toc; 225 int me; 226 pthread_t thread; 227 int affinity; 228 229 struct pkt pkt; 230 void *frame; 231}; 232 233 234/* 235 * extract the extremes from a range of ipv4 addresses. 236 * addr_lo[-addr_hi][:port_lo[-port_hi]] 237 */ 238static void 239extract_ip_range(struct ip_range *r) 240{ 241 char *ap, *pp; 242 struct in_addr a; 243 244 if (verbose) 245 D("extract IP range from %s", r->name); 246 r->port0 = r->port1 = 0; 247 r->start = r->end = 0; 248 249 /* the first - splits start/end of range */ 250 ap = index(r->name, '-'); /* do we have ports ? */ 251 if (ap) { 252 *ap++ = '\0'; 253 } 254 /* grab the initial values (mandatory) */ 255 pp = index(r->name, ':'); 256 if (pp) { 257 *pp++ = '\0'; 258 r->port0 = r->port1 = strtol(pp, NULL, 0); 259 }; 260 inet_aton(r->name, &a); 261 r->start = r->end = ntohl(a.s_addr); 262 if (ap) { 263 pp = index(ap, ':'); 264 if (pp) { 265 *pp++ = '\0'; 266 if (*pp) 267 r->port1 = strtol(pp, NULL, 0); 268 } 269 if (*ap) { 270 inet_aton(ap, &a); 271 r->end = ntohl(a.s_addr); 272 } 273 } 274 if (r->port0 > r->port1) { 275 uint16_t tmp = r->port0; 276 r->port0 = r->port1; 277 r->port1 = tmp; 278 } 279 if (r->start > r->end) { 280 uint32_t tmp = r->start; 281 r->start = r->end; 282 r->end = tmp; 283 } 284 { 285 struct in_addr a; 286 char buf1[16]; // one ip address 287 288 a.s_addr = htonl(r->end); 289 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 290 a.s_addr = htonl(r->start); 291 if (1) 292 D("range is %s:%d to %s:%d", 293 inet_ntoa(a), r->port0, buf1, r->port1); 294 } 295} 296 297static void 298extract_mac_range(struct mac_range *r) 299{ 300 if (verbose) 301 D("extract MAC range from %s", r->name); 302 bcopy(ether_aton(r->name), &r->start, 6); 303 bcopy(ether_aton(r->name), &r->end, 6); 304#if 0 305 bcopy(targ->src_mac, eh->ether_shost, 6); 306 p = index(targ->g->src_mac, '-'); 307 if (p) 308 targ->src_mac_range = atoi(p+1); 309 310 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 311 bcopy(targ->dst_mac, eh->ether_dhost, 6); 312 p = index(targ->g->dst_mac, '-'); 313 if (p) 314 targ->dst_mac_range = atoi(p+1); 315#endif 316 if (verbose) 317 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 318} 319 320static struct targ *targs; 321static int global_nthreads; 322 323/* control-C handler */ 324static void 325sigint_h(int sig) 326{ 327 int i; 328 329 (void)sig; /* UNUSED */ 330 D("received control-C on thread %p", pthread_self()); 331 for (i = 0; i < global_nthreads; i++) { 332 targs[i].cancel = 1; 333 } 334 signal(SIGINT, SIG_DFL); 335} 336 337/* sysctl wrapper to return the number of active CPUs */ 338static int 339system_ncpus(void) 340{ 341 int ncpus; 342#if defined (__FreeBSD__) 343 int mib[2] = { CTL_HW, HW_NCPU }; 344 size_t len = sizeof(mib); 345 sysctl(mib, 2, &ncpus, &len, NULL, 0); 346#elif defined(linux) 347 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 348#else /* others */ 349 ncpus = 1; 350#endif /* others */ 351 return (ncpus); 352} 353 354#ifdef __linux__ 355#define sockaddr_dl sockaddr_ll 356#define sdl_family sll_family 357#define AF_LINK AF_PACKET 358#define LLADDR(s) s->sll_addr; 359#include <linux/if_tun.h> 360#define TAP_CLONEDEV "/dev/net/tun" 361#endif /* __linux__ */ 362 363#ifdef __FreeBSD__ 364#include <net/if_tun.h> 365#define TAP_CLONEDEV "/dev/tap" 366#endif /* __FreeBSD */ 367 368#ifdef __APPLE__ 369// #warning TAP not supported on apple ? 370#include <net/if_utun.h> 371#define TAP_CLONEDEV "/dev/tap" 372#endif /* __APPLE__ */ 373 374 375/* 376 * parse the vale configuration in conf and put it in nmr. 377 * Return the flag set if necessary. 378 * The configuration may consist of 0 to 4 numbers separated 379 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 380 * Missing numbers or zeroes stand for default values. 381 * As an additional convenience, if exactly one number 382 * is specified, then this is assigned to both #tx-slots and #rx-slots. 383 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 384 * and #rx-rings. 385 */ 386int 387parse_nmr_config(const char* conf, struct nmreq *nmr) 388{ 389 char *w, *tok; 390 int i, v; 391 392 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 393 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 394 if (conf == NULL || ! *conf) 395 return 0; 396 w = strdup(conf); 397 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 398 v = atoi(tok); 399 switch (i) { 400 case 0: 401 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 402 break; 403 case 1: 404 nmr->nr_rx_slots = v; 405 break; 406 case 2: 407 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 408 break; 409 case 3: 410 nmr->nr_rx_rings = v; 411 break; 412 default: 413 D("ignored config: %s", tok); 414 break; 415 } 416 } 417 D("txr %d txd %d rxr %d rxd %d", 418 nmr->nr_tx_rings, nmr->nr_tx_slots, 419 nmr->nr_rx_rings, nmr->nr_rx_slots); 420 free(w); 421 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 422 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 423 NM_OPEN_RING_CFG : 0; 424} 425 426 427/* 428 * locate the src mac address for our interface, put it 429 * into the user-supplied buffer. return 0 if ok, -1 on error. 430 */ 431static int 432source_hwaddr(const char *ifname, char *buf) 433{ 434 struct ifaddrs *ifaphead, *ifap; 435 int l = sizeof(ifap->ifa_name); 436 437 if (getifaddrs(&ifaphead) != 0) { 438 D("getifaddrs %s failed", ifname); 439 return (-1); 440 } 441 442 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 443 struct sockaddr_dl *sdl = 444 (struct sockaddr_dl *)ifap->ifa_addr; 445 uint8_t *mac; 446 447 if (!sdl || sdl->sdl_family != AF_LINK) 448 continue; 449 if (strncmp(ifap->ifa_name, ifname, l) != 0) 450 continue; 451 mac = (uint8_t *)LLADDR(sdl); 452 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 453 mac[0], mac[1], mac[2], 454 mac[3], mac[4], mac[5]); 455 if (verbose) 456 D("source hwaddr %s", buf); 457 break; 458 } 459 freeifaddrs(ifaphead); 460 return ifap ? 0 : 1; 461} 462 463 464/* set the thread affinity. */ 465static int 466setaffinity(pthread_t me, int i) 467{ 468 cpuset_t cpumask; 469 470 if (i == -1) 471 return 0; 472 473 /* Set thread affinity affinity.*/ 474 CPU_ZERO(&cpumask); 475 CPU_SET(i, &cpumask); 476 477 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 478 D("Unable to set affinity: %s", strerror(errno)); 479 return 1; 480 } 481 return 0; 482} 483 484/* Compute the checksum of the given ip header. */ 485static uint16_t 486checksum(const void *data, uint16_t len, uint32_t sum) 487{ 488 const uint8_t *addr = data; 489 uint32_t i; 490 491 /* Checksum all the pairs of bytes first... */ 492 for (i = 0; i < (len & ~1U); i += 2) { 493 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 494 if (sum > 0xFFFF) 495 sum -= 0xFFFF; 496 } 497 /* 498 * If there's a single byte left over, checksum it, too. 499 * Network byte order is big-endian, so the remaining byte is 500 * the high byte. 501 */ 502 if (i < len) { 503 sum += addr[i] << 8; 504 if (sum > 0xFFFF) 505 sum -= 0xFFFF; 506 } 507 return sum; 508} 509 510static u_int16_t 511wrapsum(u_int32_t sum) 512{ 513 sum = ~sum & 0xFFFF; 514 return (htons(sum)); 515} 516 517/* Check the payload of the packet for errors (use it for debug). 518 * Look for consecutive ascii representations of the size of the packet. 519 */ 520static void 521dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 522{ 523 char buf[128]; 524 int i, j, i0; 525 526 /* get the length in ASCII of the length of the packet. */ 527 528 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 529 ring, cur, ring->slot[cur].buf_idx, 530 ring->slot[cur].flags, len); 531 /* hexdump routine */ 532 for (i = 0; i < len; ) { 533 memset(buf, sizeof(buf), ' '); 534 sprintf(buf, "%5d: ", i); 535 i0 = i; 536 for (j=0; j < 16 && i < len; i++, j++) 537 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 538 i = i0; 539 for (j=0; j < 16 && i < len; i++, j++) 540 sprintf(buf+7+j + 48, "%c", 541 isprint(p[i]) ? p[i] : '.'); 542 printf("%s\n", buf); 543 } 544} 545 546/* 547 * Fill a packet with some payload. 548 * We create a UDP packet so the payload starts at 549 * 14+20+8 = 42 bytes. 550 */ 551#ifdef __linux__ 552#define uh_sport source 553#define uh_dport dest 554#define uh_ulen len 555#define uh_sum check 556#endif /* linux */ 557 558/* 559 * increment the addressed in the packet, 560 * starting from the least significant field. 561 * DST_IP DST_PORT SRC_IP SRC_PORT 562 */ 563static void 564update_addresses(struct pkt *pkt, struct glob_arg *g) 565{ 566 uint32_t a; 567 uint16_t p; 568 struct ip *ip = &pkt->ip; 569 struct udphdr *udp = &pkt->udp; 570 571 do { 572 /* XXX for now it doesn't handle non-random src, random dst */ 573 if (g->options & OPT_RANDOM_SRC) { 574 udp->uh_sport = random(); 575 ip->ip_src.s_addr = random(); 576 } else { 577 p = ntohs(udp->uh_sport); 578 if (p < g->src_ip.port1) { /* just inc, no wrap */ 579 udp->uh_sport = htons(p + 1); 580 break; 581 } 582 udp->uh_sport = htons(g->src_ip.port0); 583 584 a = ntohl(ip->ip_src.s_addr); 585 if (a < g->src_ip.end) { /* just inc, no wrap */ 586 ip->ip_src.s_addr = htonl(a + 1); 587 break; 588 } 589 ip->ip_src.s_addr = htonl(g->src_ip.start); 590 591 udp->uh_sport = htons(g->src_ip.port0); 592 } 593 594 if (g->options & OPT_RANDOM_DST) { 595 udp->uh_dport = random(); 596 ip->ip_dst.s_addr = random(); 597 } else { 598 p = ntohs(udp->uh_dport); 599 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 600 udp->uh_dport = htons(p + 1); 601 break; 602 } 603 udp->uh_dport = htons(g->dst_ip.port0); 604 605 a = ntohl(ip->ip_dst.s_addr); 606 if (a < g->dst_ip.end) { /* just inc, no wrap */ 607 ip->ip_dst.s_addr = htonl(a + 1); 608 break; 609 } 610 } 611 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 612 } while (0); 613 // update checksum 614} 615 616/* 617 * initialize one packet and prepare for the next one. 618 * The copy could be done better instead of repeating it each time. 619 */ 620static void 621initialize_packet(struct targ *targ) 622{ 623 struct pkt *pkt = &targ->pkt; 624 struct ether_header *eh; 625 struct ip *ip; 626 struct udphdr *udp; 627 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 628 const char *payload = targ->g->options & OPT_INDIRECT ? 629 indirect_payload : default_payload; 630 int i, l0 = strlen(payload); 631 632 char errbuf[PCAP_ERRBUF_SIZE]; 633 pcap_t *file; 634 struct pcap_pkthdr *header; 635 const unsigned char *packet; 636 637 /* Read a packet from a PCAP file if asked. */ 638 if (targ->g->packet_file != NULL) { 639 if ((file = pcap_open_offline(targ->g->packet_file, 640 errbuf)) == NULL) 641 D("failed to open pcap file %s", 642 targ->g->packet_file); 643 if (pcap_next_ex(file, &header, &packet) < 0) 644 D("failed to read packet from %s", 645 targ->g->packet_file); 646 if ((targ->frame = malloc(header->caplen)) == NULL) 647 D("out of memory"); 648 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 649 targ->g->pkt_size = header->caplen; 650 pcap_close(file); 651 return; 652 } 653 654 /* create a nice NUL-terminated string */ 655 for (i = 0; i < paylen; i += l0) { 656 if (l0 > paylen - i) 657 l0 = paylen - i; // last round 658 bcopy(payload, pkt->body + i, l0); 659 } 660 pkt->body[i-1] = '\0'; 661 ip = &pkt->ip; 662 663 /* prepare the headers */ 664 ip->ip_v = IPVERSION; 665 ip->ip_hl = 5; 666 ip->ip_id = 0; 667 ip->ip_tos = IPTOS_LOWDELAY; 668 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 669 ip->ip_id = 0; 670 ip->ip_off = htons(IP_DF); /* Don't fragment */ 671 ip->ip_ttl = IPDEFTTL; 672 ip->ip_p = IPPROTO_UDP; 673 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 674 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 675 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 676 677 678 udp = &pkt->udp; 679 udp->uh_sport = htons(targ->g->src_ip.port0); 680 udp->uh_dport = htons(targ->g->dst_ip.port0); 681 udp->uh_ulen = htons(paylen); 682 /* Magic: taken from sbin/dhclient/packet.c */ 683 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 684 checksum(pkt->body, 685 paylen - sizeof(*udp), 686 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 687 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 688 ) 689 ) 690 )); 691 692 eh = &pkt->eh; 693 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 694 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 695 eh->ether_type = htons(ETHERTYPE_IP); 696 697 bzero(&pkt->vh, sizeof(pkt->vh)); 698#ifdef TRASH_VHOST_HDR 699 /* set bogus content */ 700 pkt->vh.fields[0] = 0xff; 701 pkt->vh.fields[1] = 0xff; 702 pkt->vh.fields[2] = 0xff; 703 pkt->vh.fields[3] = 0xff; 704 pkt->vh.fields[4] = 0xff; 705 pkt->vh.fields[5] = 0xff; 706#endif /* TRASH_VHOST_HDR */ 707 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 708} 709 710static void 711set_vnet_hdr_len(struct targ *t) 712{ 713 int err, l = t->g->virt_header; 714 struct nmreq req; 715 716 if (l == 0) 717 return; 718 719 memset(&req, 0, sizeof(req)); 720 bcopy(t->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 721 req.nr_version = NETMAP_API; 722 req.nr_cmd = NETMAP_BDG_VNET_HDR; 723 req.nr_arg1 = l; 724 err = ioctl(t->fd, NIOCREGIF, &req); 725 if (err) { 726 D("Unable to set vnet header length %d", l); 727 } 728} 729 730 731/* 732 * create and enqueue a batch of packets on a ring. 733 * On the last one set NS_REPORT to tell the driver to generate 734 * an interrupt when done. 735 */ 736static int 737send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 738 int size, struct glob_arg *g, u_int count, int options, 739 u_int nfrags) 740{ 741 u_int n, sent, cur = ring->cur; 742 u_int fcnt; 743 744 n = nm_ring_space(ring); 745 if (n < count) 746 count = n; 747 if (count < nfrags) { 748 D("truncating packet, no room for frags %d %d", 749 count, nfrags); 750 } 751#if 0 752 if (options & (OPT_COPY | OPT_PREFETCH) ) { 753 for (sent = 0; sent < count; sent++) { 754 struct netmap_slot *slot = &ring->slot[cur]; 755 char *p = NETMAP_BUF(ring, slot->buf_idx); 756 757 __builtin_prefetch(p); 758 cur = nm_ring_next(ring, cur); 759 } 760 cur = ring->cur; 761 } 762#endif 763 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 764 struct netmap_slot *slot = &ring->slot[cur]; 765 char *p = NETMAP_BUF(ring, slot->buf_idx); 766 767 slot->flags = 0; 768 if (options & OPT_INDIRECT) { 769 slot->flags |= NS_INDIRECT; 770 slot->ptr = (uint64_t)frame; 771 } else if (options & OPT_COPY) { 772 nm_pkt_copy(frame, p, size); 773 if (fcnt == nfrags) 774 update_addresses(pkt, g); 775 } else if (options & OPT_MEMCPY) { 776 memcpy(p, frame, size); 777 if (fcnt == nfrags) 778 update_addresses(pkt, g); 779 } else if (options & OPT_PREFETCH) { 780 __builtin_prefetch(p); 781 } 782 if (options & OPT_DUMP) 783 dump_payload(p, size, ring, cur); 784 slot->len = size; 785 if (--fcnt > 0) 786 slot->flags |= NS_MOREFRAG; 787 else 788 fcnt = nfrags; 789 if (sent == count - 1) { 790 slot->flags &= ~NS_MOREFRAG; 791 slot->flags |= NS_REPORT; 792 } 793 cur = nm_ring_next(ring, cur); 794 } 795 ring->head = ring->cur = cur; 796 797 return (sent); 798} 799 800/* 801 * Send a packet, and wait for a response. 802 * The payload (after UDP header, ofs 42) has a 4-byte sequence 803 * followed by a struct timeval (or bintime?) 804 */ 805#define PAY_OFS 42 /* where in the pkt... */ 806 807static void * 808pinger_body(void *data) 809{ 810 struct targ *targ = (struct targ *) data; 811 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 812 struct netmap_if *nifp = targ->nmd->nifp; 813 int i, rx = 0, n = targ->g->npackets; 814 void *frame; 815 int size; 816 uint32_t sent = 0; 817 struct timespec ts, now, last_print; 818 uint32_t count = 0, min = 1000000000, av = 0; 819 820 frame = &targ->pkt; 821 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 822 size = targ->g->pkt_size + targ->g->virt_header; 823 824 if (targ->g->nthreads > 1) { 825 D("can only ping with 1 thread"); 826 return NULL; 827 } 828 829 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 830 now = last_print; 831 while (n == 0 || (int)sent < n) { 832 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 833 struct netmap_slot *slot; 834 char *p; 835 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 836 slot = &ring->slot[ring->cur]; 837 slot->len = size; 838 p = NETMAP_BUF(ring, slot->buf_idx); 839 840 if (nm_ring_empty(ring)) { 841 D("-- ouch, cannot send"); 842 } else { 843 struct tstamp *tp; 844 nm_pkt_copy(frame, p, size); 845 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 846 bcopy(&sent, p+42, sizeof(sent)); 847 tp = (struct tstamp *)(p+46); 848 tp->sec = (uint32_t)ts.tv_sec; 849 tp->nsec = (uint32_t)ts.tv_nsec; 850 sent++; 851 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 852 } 853 } 854 /* should use a parameter to decide how often to send */ 855 if (poll(&pfd, 1, 3000) <= 0) { 856 D("poll error/timeout on queue %d: %s", targ->me, 857 strerror(errno)); 858 continue; 859 } 860 /* see what we got back */ 861 for (i = targ->nmd->first_tx_ring; 862 i <= targ->nmd->last_tx_ring; i++) { 863 ring = NETMAP_RXRING(nifp, i); 864 while (!nm_ring_empty(ring)) { 865 uint32_t seq; 866 struct tstamp *tp; 867 slot = &ring->slot[ring->cur]; 868 p = NETMAP_BUF(ring, slot->buf_idx); 869 870 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 871 bcopy(p+42, &seq, sizeof(seq)); 872 tp = (struct tstamp *)(p+46); 873 ts.tv_sec = (time_t)tp->sec; 874 ts.tv_nsec = (long)tp->nsec; 875 ts.tv_sec = now.tv_sec - ts.tv_sec; 876 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 877 if (ts.tv_nsec < 0) { 878 ts.tv_nsec += 1000000000; 879 ts.tv_sec--; 880 } 881 if (1) D("seq %d/%d delta %d.%09d", seq, sent, 882 (int)ts.tv_sec, (int)ts.tv_nsec); 883 if (ts.tv_nsec < (int)min) 884 min = ts.tv_nsec; 885 count ++; 886 av += ts.tv_nsec; 887 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 888 rx++; 889 } 890 } 891 //D("tx %d rx %d", sent, rx); 892 //usleep(100000); 893 ts.tv_sec = now.tv_sec - last_print.tv_sec; 894 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 895 if (ts.tv_nsec < 0) { 896 ts.tv_nsec += 1000000000; 897 ts.tv_sec--; 898 } 899 if (ts.tv_sec >= 1) { 900 D("count %d min %d av %d", 901 count, min, av/count); 902 count = 0; 903 av = 0; 904 min = 100000000; 905 last_print = now; 906 } 907 } 908 return NULL; 909} 910 911 912/* 913 * reply to ping requests 914 */ 915static void * 916ponger_body(void *data) 917{ 918 struct targ *targ = (struct targ *) data; 919 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 920 struct netmap_if *nifp = targ->nmd->nifp; 921 struct netmap_ring *txring, *rxring; 922 int i, rx = 0, sent = 0, n = targ->g->npackets; 923 924 if (targ->g->nthreads > 1) { 925 D("can only reply ping with 1 thread"); 926 return NULL; 927 } 928 D("understood ponger %d but don't know how to do it", n); 929 while (n == 0 || sent < n) { 930 uint32_t txcur, txavail; 931//#define BUSYWAIT 932#ifdef BUSYWAIT 933 ioctl(pfd.fd, NIOCRXSYNC, NULL); 934#else 935 if (poll(&pfd, 1, 1000) <= 0) { 936 D("poll error/timeout on queue %d: %s", targ->me, 937 strerror(errno)); 938 continue; 939 } 940#endif 941 txring = NETMAP_TXRING(nifp, 0); 942 txcur = txring->cur; 943 txavail = nm_ring_space(txring); 944 /* see what we got back */ 945 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 946 rxring = NETMAP_RXRING(nifp, i); 947 while (!nm_ring_empty(rxring)) { 948 uint16_t *spkt, *dpkt; 949 uint32_t cur = rxring->cur; 950 struct netmap_slot *slot = &rxring->slot[cur]; 951 char *src, *dst; 952 src = NETMAP_BUF(rxring, slot->buf_idx); 953 //D("got pkt %p of size %d", src, slot->len); 954 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 955 rx++; 956 if (txavail == 0) 957 continue; 958 dst = NETMAP_BUF(txring, 959 txring->slot[txcur].buf_idx); 960 /* copy... */ 961 dpkt = (uint16_t *)dst; 962 spkt = (uint16_t *)src; 963 nm_pkt_copy(src, dst, slot->len); 964 dpkt[0] = spkt[3]; 965 dpkt[1] = spkt[4]; 966 dpkt[2] = spkt[5]; 967 dpkt[3] = spkt[0]; 968 dpkt[4] = spkt[1]; 969 dpkt[5] = spkt[2]; 970 txring->slot[txcur].len = slot->len; 971 /* XXX swap src dst mac */ 972 txcur = nm_ring_next(txring, txcur); 973 txavail--; 974 sent++; 975 } 976 } 977 txring->head = txring->cur = txcur; 978 targ->count = sent; 979#ifdef BUSYWAIT 980 ioctl(pfd.fd, NIOCTXSYNC, NULL); 981#endif 982 //D("tx %d rx %d", sent, rx); 983 } 984 return NULL; 985} 986 987static __inline int 988timespec_ge(const struct timespec *a, const struct timespec *b) 989{ 990 991 if (a->tv_sec > b->tv_sec) 992 return (1); 993 if (a->tv_sec < b->tv_sec) 994 return (0); 995 if (a->tv_nsec >= b->tv_nsec) 996 return (1); 997 return (0); 998} 999 1000static __inline struct timespec 1001timeval2spec(const struct timeval *a) 1002{ 1003 struct timespec ts = { 1004 .tv_sec = a->tv_sec, 1005 .tv_nsec = a->tv_usec * 1000 1006 }; 1007 return ts; 1008} 1009 1010static __inline struct timeval 1011timespec2val(const struct timespec *a) 1012{ 1013 struct timeval tv = { 1014 .tv_sec = a->tv_sec, 1015 .tv_usec = a->tv_nsec / 1000 1016 }; 1017 return tv; 1018} 1019 1020 1021static __inline struct timespec 1022timespec_add(struct timespec a, struct timespec b) 1023{ 1024 struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 1025 if (ret.tv_nsec >= 1000000000) { 1026 ret.tv_sec++; 1027 ret.tv_nsec -= 1000000000; 1028 } 1029 return ret; 1030} 1031 1032static __inline struct timespec 1033timespec_sub(struct timespec a, struct timespec b) 1034{ 1035 struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 1036 if (ret.tv_nsec < 0) { 1037 ret.tv_sec--; 1038 ret.tv_nsec += 1000000000; 1039 } 1040 return ret; 1041} 1042 1043 1044/* 1045 * wait until ts, either busy or sleeping if more than 1ms. 1046 * Return wakeup time. 1047 */ 1048static struct timespec 1049wait_time(struct timespec ts) 1050{ 1051 for (;;) { 1052 struct timespec w, cur; 1053 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1054 w = timespec_sub(ts, cur); 1055 if (w.tv_sec < 0) 1056 return cur; 1057 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1058 poll(NULL, 0, 1); 1059 } 1060} 1061 1062static void * 1063sender_body(void *data) 1064{ 1065 struct targ *targ = (struct targ *) data; 1066 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1067 struct netmap_if *nifp; 1068 struct netmap_ring *txring; 1069 int i, n = targ->g->npackets / targ->g->nthreads; 1070 int64_t sent = 0; 1071 int options = targ->g->options | OPT_COPY; 1072 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1073 int rate_limit = targ->g->tx_rate; 1074 struct pkt *pkt = &targ->pkt; 1075 void *frame; 1076 int size; 1077 1078 if (targ->frame == NULL) { 1079 frame = pkt; 1080 frame += sizeof(pkt->vh) - targ->g->virt_header; 1081 size = targ->g->pkt_size + targ->g->virt_header; 1082 } else { 1083 frame = targ->frame; 1084 size = targ->g->pkt_size; 1085 } 1086 1087 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1088 if (setaffinity(targ->thread, targ->affinity)) 1089 goto quit; 1090 1091 /* main loop.*/ 1092 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1093 if (rate_limit) { 1094 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1095 targ->tic.tv_nsec = 0; 1096 wait_time(targ->tic); 1097 nexttime = targ->tic; 1098 } 1099 if (targ->g->dev_type == DEV_TAP) { 1100 D("writing to file desc %d", targ->g->main_fd); 1101 1102 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1103 if (write(targ->g->main_fd, frame, size) != -1) 1104 sent++; 1105 update_addresses(pkt, targ->g); 1106 if (i > 10000) { 1107 targ->count = sent; 1108 i = 0; 1109 } 1110 } 1111#ifndef NO_PCAP 1112 } else if (targ->g->dev_type == DEV_PCAP) { 1113 pcap_t *p = targ->g->p; 1114 1115 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1116 if (pcap_inject(p, frame, size) != -1) 1117 sent++; 1118 update_addresses(pkt, targ->g); 1119 if (i > 10000) { 1120 targ->count = sent; 1121 i = 0; 1122 } 1123 } 1124#endif /* NO_PCAP */ 1125 } else { 1126 int tosend = 0; 1127 int frags = targ->g->frags; 1128 1129 nifp = targ->nmd->nifp; 1130 while (!targ->cancel && (n == 0 || sent < n)) { 1131 1132 if (rate_limit && tosend <= 0) { 1133 tosend = targ->g->burst; 1134 nexttime = timespec_add(nexttime, targ->g->tx_period); 1135 wait_time(nexttime); 1136 } 1137 1138 /* 1139 * wait for available room in the send queue(s) 1140 */ 1141 if (poll(&pfd, 1, 2000) <= 0) { 1142 if (targ->cancel) 1143 break; 1144 D("poll error/timeout on queue %d: %s", targ->me, 1145 strerror(errno)); 1146 // goto quit; 1147 } 1148 if (pfd.revents & POLLERR) { 1149 D("poll error"); 1150 goto quit; 1151 } 1152 /* 1153 * scan our queues and send on those with room 1154 */ 1155 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1156 D("drop copy"); 1157 options &= ~OPT_COPY; 1158 } 1159 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1160 int m, limit = rate_limit ? tosend : targ->g->burst; 1161 if (n > 0 && n - sent < limit) 1162 limit = n - sent; 1163 txring = NETMAP_TXRING(nifp, i); 1164 if (nm_ring_empty(txring)) 1165 continue; 1166 if (frags > 1) 1167 limit = ((limit + frags - 1) / frags) * frags; 1168 1169 m = send_packets(txring, pkt, frame, size, targ->g, 1170 limit, options, frags); 1171 ND("limit %d tail %d frags %d m %d", 1172 limit, txring->tail, frags, m); 1173 sent += m; 1174 targ->count = sent; 1175 if (rate_limit) { 1176 tosend -= m; 1177 if (tosend <= 0) 1178 break; 1179 } 1180 } 1181 } 1182 /* flush any remaining packets */ 1183 D("flush tail %d head %d on thread %p", 1184 txring->tail, txring->head, 1185 pthread_self()); 1186 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1187 1188 /* final part: wait all the TX queues to be empty. */ 1189 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1190 txring = NETMAP_TXRING(nifp, i); 1191 while (nm_tx_pending(txring)) { 1192 RD(5, "pending tx tail %d head %d on ring %d", 1193 txring->tail, txring->head, i); 1194 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1195 usleep(1); /* wait 1 tick */ 1196 } 1197 } 1198 } /* end DEV_NETMAP */ 1199 1200 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1201 targ->completed = 1; 1202 targ->count = sent; 1203 1204quit: 1205 /* reset the ``used`` flag. */ 1206 targ->used = 0; 1207 1208 return (NULL); 1209} 1210 1211 1212#ifndef NO_PCAP 1213static void 1214receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1215 const u_char * bytes) 1216{ 1217 int *count = (int *)user; 1218 (void)h; /* UNUSED */ 1219 (void)bytes; /* UNUSED */ 1220 (*count)++; 1221} 1222#endif /* !NO_PCAP */ 1223 1224static int 1225receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1226{ 1227 u_int cur, rx, n; 1228 1229 cur = ring->cur; 1230 n = nm_ring_space(ring); 1231 if (n < limit) 1232 limit = n; 1233 for (rx = 0; rx < limit; rx++) { 1234 struct netmap_slot *slot = &ring->slot[cur]; 1235 char *p = NETMAP_BUF(ring, slot->buf_idx); 1236 1237 if (dump) 1238 dump_payload(p, slot->len, ring, cur); 1239 1240 cur = nm_ring_next(ring, cur); 1241 } 1242 ring->head = ring->cur = cur; 1243 1244 return (rx); 1245} 1246 1247static void * 1248receiver_body(void *data) 1249{ 1250 struct targ *targ = (struct targ *) data; 1251 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1252 struct netmap_if *nifp; 1253 struct netmap_ring *rxring; 1254 int i; 1255 uint64_t received = 0; 1256 1257 if (setaffinity(targ->thread, targ->affinity)) 1258 goto quit; 1259 1260 D("reading from %s fd %d main_fd %d", 1261 targ->g->ifname, targ->fd, targ->g->main_fd); 1262 /* unbounded wait for the first packet. */ 1263 for (;!targ->cancel;) { 1264 i = poll(&pfd, 1, 1000); 1265 if (i > 0 && !(pfd.revents & POLLERR)) 1266 break; 1267 RD(1, "waiting for initial packets, poll returns %d %d", 1268 i, pfd.revents); 1269 } 1270 /* main loop, exit after 1s silence */ 1271 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1272 if (targ->g->dev_type == DEV_TAP) { 1273 while (!targ->cancel) { 1274 char buf[MAX_BODYSIZE]; 1275 /* XXX should we poll ? */ 1276 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1277 targ->count++; 1278 } 1279#ifndef NO_PCAP 1280 } else if (targ->g->dev_type == DEV_PCAP) { 1281 while (!targ->cancel) { 1282 /* XXX should we poll ? */ 1283 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1284 (u_char *)&targ->count); 1285 } 1286#endif /* !NO_PCAP */ 1287 } else { 1288 int dump = targ->g->options & OPT_DUMP; 1289 1290 nifp = targ->nmd->nifp; 1291 while (!targ->cancel) { 1292 /* Once we started to receive packets, wait at most 1 seconds 1293 before quitting. */ 1294 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1295 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1296 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1297 goto out; 1298 } 1299 1300 if (pfd.revents & POLLERR) { 1301 D("poll err"); 1302 goto quit; 1303 } 1304 1305 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1306 int m; 1307 1308 rxring = NETMAP_RXRING(nifp, i); 1309 if (nm_ring_empty(rxring)) 1310 continue; 1311 1312 m = receive_packets(rxring, targ->g->burst, dump); 1313 received += m; 1314 } 1315 targ->count = received; 1316 } 1317 } 1318 1319 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1320 1321out: 1322 targ->completed = 1; 1323 targ->count = received; 1324 1325quit: 1326 /* reset the ``used`` flag. */ 1327 targ->used = 0; 1328 1329 return (NULL); 1330} 1331 1332/* very crude code to print a number in normalized form. 1333 * Caller has to make sure that the buffer is large enough. 1334 */ 1335static const char * 1336norm(char *buf, double val) 1337{ 1338 char *units[] = { "", "K", "M", "G", "T" }; 1339 u_int i; 1340 1341 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) 1342 val /= 1000; 1343 sprintf(buf, "%.2f %s", val, units[i]); 1344 return buf; 1345} 1346 1347static void 1348tx_output(uint64_t sent, int size, double delta) 1349{ 1350 double bw, raw_bw, pps; 1351 char b1[40], b2[80], b3[80]; 1352 1353 printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", 1354 (unsigned long long)sent, size, delta); 1355 if (delta == 0) 1356 delta = 1e-6; 1357 if (size < 60) /* correct for min packet size */ 1358 size = 60; 1359 pps = sent / delta; 1360 bw = (8.0 * size * sent) / delta; 1361 /* raw packets have4 bytes crc + 20 bytes framing */ 1362 raw_bw = (8.0 * (size + 24) * sent) / delta; 1363 1364 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1365 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1366} 1367 1368 1369static void 1370rx_output(uint64_t received, double delta) 1371{ 1372 double pps; 1373 char b1[40]; 1374 1375 printf("Received %llu packets, in %.2f seconds.\n", 1376 (unsigned long long) received, delta); 1377 1378 if (delta == 0) 1379 delta = 1e-6; 1380 pps = received / delta; 1381 printf("Speed: %spps\n", norm(b1, pps)); 1382} 1383 1384static void 1385usage(void) 1386{ 1387 const char *cmd = "pkt-gen"; 1388 fprintf(stderr, 1389 "Usage:\n" 1390 "%s arguments\n" 1391 "\t-i interface interface name\n" 1392 "\t-f function tx rx ping pong\n" 1393 "\t-n count number of iterations (can be 0)\n" 1394 "\t-t pkts_to_send also forces tx mode\n" 1395 "\t-r pkts_to_receive also forces rx mode\n" 1396 "\t-l pkt_size in bytes excluding CRC\n" 1397 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1398 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1399 "\t-D dst-mac\n" 1400 "\t-S src-mac\n" 1401 "\t-a cpu_id use setaffinity\n" 1402 "\t-b burst size testing, mostly\n" 1403 "\t-c cores cores to use\n" 1404 "\t-p threads processes/threads to use\n" 1405 "\t-T report_ms milliseconds between reports\n" 1406 "\t-P use libpcap instead of netmap\n" 1407 "\t-w wait_for_link_time in seconds\n" 1408 "\t-R rate in packets per second\n" 1409 "\t-X dump payload\n" 1410 "\t-H len add empty virtio-net-header with size 'len'\n" 1411 "\t-P file load packet from pcap file\n" 1412 "\t-z use random IPv4 src address/port\n" 1413 "\t-Z use random IPv4 dst address/port\n" 1414 "", 1415 cmd); 1416 1417 exit(0); 1418} 1419 1420static void 1421start_threads(struct glob_arg *g) 1422{ 1423 int i; 1424 1425 targs = calloc(g->nthreads, sizeof(*targs)); 1426 /* 1427 * Now create the desired number of threads, each one 1428 * using a single descriptor. 1429 */ 1430 for (i = 0; i < g->nthreads; i++) { 1431 struct targ *t = &targs[i]; 1432 1433 bzero(t, sizeof(*t)); 1434 t->fd = -1; /* default, with pcap */ 1435 t->g = g; 1436 1437 if (g->dev_type == DEV_NETMAP) { 1438 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1439 uint64_t nmd_flags = 0; 1440 nmd.self = &nmd; 1441 1442 if (g->nthreads > 1) { 1443 if (nmd.req.nr_flags != NR_REG_ALL_NIC) { 1444 D("invalid nthreads mode %d", nmd.req.nr_flags); 1445 continue; 1446 } 1447 nmd.req.nr_flags = NR_REG_ONE_NIC; 1448 nmd.req.nr_ringid = i; 1449 } 1450 /* Only touch one of the rings (rx is already ok) */ 1451 if (g->td_body == receiver_body) 1452 nmd_flags |= NETMAP_NO_TX_POLL; 1453 1454 /* register interface. Override ifname and ringid etc. */ 1455 if (g->options & OPT_MONITOR_TX) 1456 nmd.req.nr_flags |= NR_MONITOR_TX; 1457 if (g->options & OPT_MONITOR_RX) 1458 nmd.req.nr_flags |= NR_MONITOR_RX; 1459 1460 t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | 1461 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); 1462 if (t->nmd == NULL) { 1463 D("Unable to open %s: %s", 1464 t->g->ifname, strerror(errno)); 1465 continue; 1466 } 1467 t->fd = t->nmd->fd; 1468 set_vnet_hdr_len(t); 1469 1470 } else { 1471 targs[i].fd = g->main_fd; 1472 } 1473 t->used = 1; 1474 t->me = i; 1475 if (g->affinity >= 0) { 1476 if (g->affinity < g->cpus) 1477 t->affinity = g->affinity; 1478 else 1479 t->affinity = i % g->cpus; 1480 } else { 1481 t->affinity = -1; 1482 } 1483 /* default, init packets */ 1484 initialize_packet(t); 1485 1486 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1487 D("Unable to create thread %d: %s", i, strerror(errno)); 1488 t->used = 0; 1489 } 1490 } 1491} 1492 1493static void 1494main_thread(struct glob_arg *g) 1495{ 1496 int i; 1497 1498 uint64_t prev = 0; 1499 uint64_t count = 0; 1500 double delta_t; 1501 struct timeval tic, toc; 1502 1503 gettimeofday(&toc, NULL); 1504 for (;;) { 1505 struct timeval now, delta; 1506 uint64_t pps, usec, my_count, npkts; 1507 int done = 0; 1508 1509 delta.tv_sec = g->report_interval/1000; 1510 delta.tv_usec = (g->report_interval%1000)*1000; 1511 select(0, NULL, NULL, NULL, &delta); 1512 gettimeofday(&now, NULL); 1513 timersub(&now, &toc, &toc); 1514 my_count = 0; 1515 for (i = 0; i < g->nthreads; i++) { 1516 my_count += targs[i].count; 1517 if (targs[i].used == 0) 1518 done++; 1519 } 1520 usec = toc.tv_sec* 1000000 + toc.tv_usec; 1521 if (usec < 10000) 1522 continue; 1523 npkts = my_count - prev; 1524 pps = (npkts*1000000 + usec/2) / usec; 1525 D("%llu pps (%llu pkts in %llu usec)", 1526 (unsigned long long)pps, 1527 (unsigned long long)npkts, 1528 (unsigned long long)usec); 1529 prev = my_count; 1530 toc = now; 1531 if (done == g->nthreads) 1532 break; 1533 } 1534 1535 timerclear(&tic); 1536 timerclear(&toc); 1537 for (i = 0; i < g->nthreads; i++) { 1538 struct timespec t_tic, t_toc; 1539 /* 1540 * Join active threads, unregister interfaces and close 1541 * file descriptors. 1542 */ 1543 if (targs[i].used) 1544 pthread_join(targs[i].thread, NULL); 1545 close(targs[i].fd); 1546 1547 if (targs[i].completed == 0) 1548 D("ouch, thread %d exited with error", i); 1549 1550 /* 1551 * Collect threads output and extract information about 1552 * how long it took to send all the packets. 1553 */ 1554 count += targs[i].count; 1555 t_tic = timeval2spec(&tic); 1556 t_toc = timeval2spec(&toc); 1557 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1558 tic = timespec2val(&targs[i].tic); 1559 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1560 toc = timespec2val(&targs[i].toc); 1561 } 1562 1563 /* print output. */ 1564 timersub(&toc, &tic, &toc); 1565 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1566 if (g->td_body == sender_body) 1567 tx_output(count, g->pkt_size, delta_t); 1568 else 1569 rx_output(count, delta_t); 1570 1571 if (g->dev_type == DEV_NETMAP) { 1572 munmap(g->nmd->mem, g->nmd->req.nr_memsize); 1573 close(g->main_fd); 1574 } 1575} 1576 1577 1578struct sf { 1579 char *key; 1580 void *f; 1581}; 1582 1583static struct sf func[] = { 1584 { "tx", sender_body }, 1585 { "rx", receiver_body }, 1586 { "ping", pinger_body }, 1587 { "pong", ponger_body }, 1588 { NULL, NULL } 1589}; 1590 1591static int 1592tap_alloc(char *dev) 1593{ 1594 struct ifreq ifr; 1595 int fd, err; 1596 char *clonedev = TAP_CLONEDEV; 1597 1598 (void)err; 1599 (void)dev; 1600 /* Arguments taken by the function: 1601 * 1602 * char *dev: the name of an interface (or '\0'). MUST have enough 1603 * space to hold the interface name if '\0' is passed 1604 * int flags: interface flags (eg, IFF_TUN etc.) 1605 */ 1606 1607#ifdef __FreeBSD__ 1608 if (dev[3]) { /* tapSomething */ 1609 static char buf[128]; 1610 snprintf(buf, sizeof(buf), "/dev/%s", dev); 1611 clonedev = buf; 1612 } 1613#endif 1614 /* open the device */ 1615 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1616 return fd; 1617 } 1618 D("%s open successful", clonedev); 1619 1620 /* preparation of the struct ifr, of type "struct ifreq" */ 1621 memset(&ifr, 0, sizeof(ifr)); 1622 1623#ifdef linux 1624 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1625 1626 if (*dev) { 1627 /* if a device name was specified, put it in the structure; otherwise, 1628 * the kernel will try to allocate the "next" device of the 1629 * specified type */ 1630 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1631 } 1632 1633 /* try to create the device */ 1634 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1635 D("failed to to a TUNSETIFF: %s", strerror(errno)); 1636 close(fd); 1637 return err; 1638 } 1639 1640 /* if the operation was successful, write back the name of the 1641 * interface to the variable "dev", so the caller can know 1642 * it. Note that the caller MUST reserve space in *dev (see calling 1643 * code below) */ 1644 strcpy(dev, ifr.ifr_name); 1645 D("new name is %s", dev); 1646#endif /* linux */ 1647 1648 /* this is the special file descriptor that the caller will use to talk 1649 * with the virtual interface */ 1650 return fd; 1651} 1652 1653int 1654main(int arc, char **argv) 1655{ 1656 int i; 1657 1658 struct glob_arg g; 1659 1660 int ch; 1661 int wait_link = 2; 1662 int devqueues = 1; /* how many device queues */ 1663 1664 bzero(&g, sizeof(g)); 1665 1666 g.main_fd = -1; 1667 g.td_body = receiver_body; 1668 g.report_interval = 1000; /* report interval */ 1669 g.affinity = -1; 1670 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1671 g.src_ip.name = "10.0.0.1"; 1672 g.dst_ip.name = "10.1.0.1"; 1673 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1674 g.src_mac.name = NULL; 1675 g.pkt_size = 60; 1676 g.burst = 512; // default 1677 g.nthreads = 1; 1678 g.cpus = 1; 1679 g.forever = 1; 1680 g.tx_rate = 0; 1681 g.frags = 1; 1682 g.nmr_config = ""; 1683 g.virt_header = 0; 1684 1685 while ( (ch = getopt(arc, argv, 1686 "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:P:zZ")) != -1) { 1687 struct sf *fn; 1688 1689 switch(ch) { 1690 default: 1691 D("bad option %c %s", ch, optarg); 1692 usage(); 1693 break; 1694 1695 case 'n': 1696 g.npackets = atoi(optarg); 1697 break; 1698 1699 case 'F': 1700 i = atoi(optarg); 1701 if (i < 1 || i > 63) { 1702 D("invalid frags %d [1..63], ignore", i); 1703 break; 1704 } 1705 g.frags = i; 1706 break; 1707 1708 case 'f': 1709 for (fn = func; fn->key; fn++) { 1710 if (!strcmp(fn->key, optarg)) 1711 break; 1712 } 1713 if (fn->key) 1714 g.td_body = fn->f; 1715 else 1716 D("unrecognised function %s", optarg); 1717 break; 1718 1719 case 'o': /* data generation options */ 1720 g.options = atoi(optarg); 1721 break; 1722 1723 case 'a': /* force affinity */ 1724 g.affinity = atoi(optarg); 1725 break; 1726 1727 case 'i': /* interface */ 1728 /* a prefix of tap: netmap: or pcap: forces the mode. 1729 * otherwise we guess 1730 */ 1731 D("interface is %s", optarg); 1732 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 1733 D("ifname too long %s", optarg); 1734 break; 1735 } 1736 strcpy(g.ifname, optarg); 1737 if (!strcmp(optarg, "null")) { 1738 g.dev_type = DEV_NETMAP; 1739 g.dummy_send = 1; 1740 } else if (!strncmp(optarg, "tap:", 4)) { 1741 g.dev_type = DEV_TAP; 1742 strcpy(g.ifname, optarg + 4); 1743 } else if (!strncmp(optarg, "pcap:", 5)) { 1744 g.dev_type = DEV_PCAP; 1745 strcpy(g.ifname, optarg + 5); 1746 } else if (!strncmp(optarg, "netmap:", 7) || 1747 !strncmp(optarg, "vale", 4)) { 1748 g.dev_type = DEV_NETMAP; 1749 } else if (!strncmp(optarg, "tap", 3)) { 1750 g.dev_type = DEV_TAP; 1751 } else { /* prepend netmap: */ 1752 g.dev_type = DEV_NETMAP; 1753 sprintf(g.ifname, "netmap:%s", optarg); 1754 } 1755 break; 1756 1757 case 'I': 1758 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1759 break; 1760 1761 case 'l': /* pkt_size */ 1762 g.pkt_size = atoi(optarg); 1763 break; 1764 1765 case 'd': 1766 g.dst_ip.name = optarg; 1767 break; 1768 1769 case 's': 1770 g.src_ip.name = optarg; 1771 break; 1772 1773 case 'T': /* report interval */ 1774 g.report_interval = atoi(optarg); 1775 break; 1776 1777 case 'w': 1778 wait_link = atoi(optarg); 1779 break; 1780 1781 case 'W': /* XXX changed default */ 1782 g.forever = 0; /* do not exit rx even with no traffic */ 1783 break; 1784 1785 case 'b': /* burst */ 1786 g.burst = atoi(optarg); 1787 break; 1788 case 'c': 1789 g.cpus = atoi(optarg); 1790 break; 1791 case 'p': 1792 g.nthreads = atoi(optarg); 1793 break; 1794 1795 case 'D': /* destination mac */ 1796 g.dst_mac.name = optarg; 1797 break; 1798 1799 case 'S': /* source mac */ 1800 g.src_mac.name = optarg; 1801 break; 1802 case 'v': 1803 verbose++; 1804 break; 1805 case 'R': 1806 g.tx_rate = atoi(optarg); 1807 break; 1808 case 'X': 1809 g.options |= OPT_DUMP; 1810 break; 1811 case 'C': 1812 g.nmr_config = strdup(optarg); 1813 break; 1814 case 'H': 1815 g.virt_header = atoi(optarg); 1816 break; 1817 case 'e': /* extra bufs */ 1818 g.extra_bufs = atoi(optarg); 1819 break; 1820 case 'm': 1821 if (strcmp(optarg, "tx") == 0) { 1822 g.options |= OPT_MONITOR_TX; 1823 } else if (strcmp(optarg, "rx") == 0) { 1824 g.options |= OPT_MONITOR_RX; 1825 } else { 1826 D("unrecognized monitor mode %s", optarg); 1827 } 1828 break; 1829 case 'P': 1830 g.packet_file = strdup(optarg); 1831 break; 1832 case 'z': 1833 g.options |= OPT_RANDOM_SRC; 1834 break; 1835 case 'Z': 1836 g.options |= OPT_RANDOM_DST; 1837 break; 1838 } 1839 } 1840 1841 if (strlen(g.ifname) <=0 ) { 1842 D("missing ifname"); 1843 usage(); 1844 } 1845 1846 i = system_ncpus(); 1847 if (g.cpus < 0 || g.cpus > i) { 1848 D("%d cpus is too high, have only %d cpus", g.cpus, i); 1849 usage(); 1850 } 1851 if (g.cpus == 0) 1852 g.cpus = i; 1853 1854 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 1855 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 1856 usage(); 1857 } 1858 1859 if (g.src_mac.name == NULL) { 1860 static char mybuf[20] = "00:00:00:00:00:00"; 1861 /* retrieve source mac address. */ 1862 if (source_hwaddr(g.ifname, mybuf) == -1) { 1863 D("Unable to retrieve source mac"); 1864 // continue, fail later 1865 } 1866 g.src_mac.name = mybuf; 1867 } 1868 /* extract address ranges */ 1869 extract_ip_range(&g.src_ip); 1870 extract_ip_range(&g.dst_ip); 1871 extract_mac_range(&g.src_mac); 1872 extract_mac_range(&g.dst_mac); 1873 1874 if (g.src_ip.start != g.src_ip.end || 1875 g.src_ip.port0 != g.src_ip.port1 || 1876 g.dst_ip.start != g.dst_ip.end || 1877 g.dst_ip.port0 != g.dst_ip.port1) 1878 g.options |= OPT_COPY; 1879 1880 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1881 && g.virt_header != VIRT_HDR_2) { 1882 D("bad virtio-net-header length"); 1883 usage(); 1884 } 1885 1886 if (g.dev_type == DEV_TAP) { 1887 D("want to use tap %s", g.ifname); 1888 g.main_fd = tap_alloc(g.ifname); 1889 if (g.main_fd < 0) { 1890 D("cannot open tap %s", g.ifname); 1891 usage(); 1892 } 1893#ifndef NO_PCAP 1894 } else if (g.dev_type == DEV_PCAP) { 1895 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1896 1897 pcap_errbuf[0] = '\0'; // init the buffer 1898 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 1899 if (g.p == NULL) { 1900 D("cannot open pcap on %s", g.ifname); 1901 usage(); 1902 } 1903 g.main_fd = pcap_fileno(g.p); 1904 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 1905#endif /* !NO_PCAP */ 1906 } else if (g.dummy_send) { /* but DEV_NETMAP */ 1907 D("using a dummy send routine"); 1908 } else { 1909 struct nmreq base_nmd; 1910 1911 bzero(&base_nmd, sizeof(base_nmd)); 1912 1913 parse_nmr_config(g.nmr_config, &base_nmd); 1914 if (g.extra_bufs) { 1915 base_nmd.nr_arg3 = g.extra_bufs; 1916 } 1917 1918 /* 1919 * Open the netmap device using nm_open(). 1920 * 1921 * protocol stack and may cause a reset of the card, 1922 * which in turn may take some time for the PHY to 1923 * reconfigure. We do the open here to have time to reset. 1924 */ 1925 g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); 1926 if (g.nmd == NULL) { 1927 D("Unable to open %s: %s", g.ifname, strerror(errno)); 1928 goto out; 1929 } 1930 g.main_fd = g.nmd->fd; 1931 D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 1932 1933 /* get num of queues in tx or rx */ 1934 if (g.td_body == sender_body) 1935 devqueues = g.nmd->req.nr_tx_rings; 1936 else 1937 devqueues = g.nmd->req.nr_rx_rings; 1938 1939 /* validate provided nthreads. */ 1940 if (g.nthreads < 1 || g.nthreads > devqueues) { 1941 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1942 // continue, fail later 1943 } 1944 1945 if (verbose) { 1946 struct netmap_if *nifp = g.nmd->nifp; 1947 struct nmreq *req = &g.nmd->req; 1948 1949 D("nifp at offset %d, %d tx %d rx region %d", 1950 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 1951 req->nr_arg2); 1952 for (i = 0; i <= req->nr_tx_rings; i++) { 1953 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 1954 D(" TX%d at 0x%lx slots %d", i, 1955 (char *)ring - (char *)nifp, ring->num_slots); 1956 } 1957 for (i = 0; i <= req->nr_rx_rings; i++) { 1958 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 1959 D(" RX%d at 0x%lx slots %d", i, 1960 (char *)ring - (char *)nifp, ring->num_slots); 1961 } 1962 } 1963 1964 /* Print some debug information. */ 1965 fprintf(stdout, 1966 "%s %s: %d queues, %d threads and %d cpus.\n", 1967 (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1968 g.ifname, 1969 devqueues, 1970 g.nthreads, 1971 g.cpus); 1972 if (g.td_body == sender_body) { 1973 fprintf(stdout, "%s -> %s (%s -> %s)\n", 1974 g.src_ip.name, g.dst_ip.name, 1975 g.src_mac.name, g.dst_mac.name); 1976 } 1977 1978out: 1979 /* Exit if something went wrong. */ 1980 if (g.main_fd < 0) { 1981 D("aborting"); 1982 usage(); 1983 } 1984 } 1985 1986 1987 if (g.options) { 1988 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1989 g.options & OPT_PREFETCH ? " prefetch" : "", 1990 g.options & OPT_ACCESS ? " access" : "", 1991 g.options & OPT_MEMCPY ? " memcpy" : "", 1992 g.options & OPT_INDIRECT ? " indirect" : "", 1993 g.options & OPT_COPY ? " copy" : ""); 1994 } 1995 1996 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1997 if (g.tx_rate > 0) { 1998 /* try to have at least something every second, 1999 * reducing the burst size to some 0.01s worth of data 2000 * (but no less than one full set of fragments) 2001 */ 2002 uint64_t x; 2003 int lim = (g.tx_rate)/300; 2004 if (g.burst > lim) 2005 g.burst = lim; 2006 if (g.burst < g.frags) 2007 g.burst = g.frags; 2008 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 2009 g.tx_period.tv_nsec = x; 2010 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 2011 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 2012 } 2013 if (g.td_body == sender_body) 2014 D("Sending %d packets every %ld.%09ld s", 2015 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 2016 /* Wait for PHY reset. */ 2017 D("Wait %d secs for phy reset", wait_link); 2018 sleep(wait_link); 2019 D("Ready..."); 2020 2021 /* Install ^C handler. */ 2022 global_nthreads = g.nthreads; 2023 signal(SIGINT, sigint_h); 2024 2025 start_threads(&g); 2026 main_thread(&g); 2027 return 0; 2028} 2029 2030/* end of file */ 2031