pkt-gen.c revision 260700
1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 260700 2014-01-16 00:20:42Z luigi $ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40#define MY_PCAP 41#include "nm_util.h" 42// #include <net/netmap_user.h> 43 44#include <ctype.h> // isprint() 45 46#ifndef NO_PCAP 47#include <pcap/pcap.h> 48#endif 49const char *default_payload="netmap pkt-gen DIRECT payload\n" 50 "http://info.iet.unipi.it/~luigi/netmap/ "; 51 52const char *indirect_payload="netmap pkt-gen indirect payload\n" 53 "http://info.iet.unipi.it/~luigi/netmap/ "; 54 55int time_second; // support for RD() debugging macro 56 57int verbose = 0; 58 59#define SKIP_PAYLOAD 1 /* do not check payload. */ 60 61 62#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 63#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 64#define VIRT_HDR_MAX VIRT_HDR_2 65struct virt_header { 66 uint8_t fields[VIRT_HDR_MAX]; 67}; 68 69struct pkt { 70 struct virt_header vh; 71 struct ether_header eh; 72 struct ip ip; 73 struct udphdr udp; 74 uint8_t body[2048]; // XXX hardwired 75} __attribute__((__packed__)); 76 77struct ip_range { 78 char *name; 79 uint32_t start, end; /* same as struct in_addr */ 80 uint16_t port0, port1; 81}; 82 83struct mac_range { 84 char *name; 85 struct ether_addr start, end; 86}; 87 88/* 89 * global arguments for all threads 90 */ 91 92struct glob_arg { 93 struct ip_range src_ip; 94 struct ip_range dst_ip; 95 struct mac_range dst_mac; 96 struct mac_range src_mac; 97 int pkt_size; 98 int burst; 99 int forever; 100 int npackets; /* total packets to send */ 101 int frags; /* fragments per packet */ 102 int nthreads; 103 int cpus; 104 int options; /* testing */ 105#define OPT_PREFETCH 1 106#define OPT_ACCESS 2 107#define OPT_COPY 4 108#define OPT_MEMCPY 8 109#define OPT_TS 16 /* add a timestamp */ 110#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 111#define OPT_DUMP 64 /* dump rx/tx traffic */ 112 int dev_type; 113#ifndef NO_PCAP 114 pcap_t *p; 115#endif 116 117 int tx_rate; 118 struct timespec tx_period; 119 120 int affinity; 121 int main_fd; 122 int report_interval; /* milliseconds between prints */ 123 void *(*td_body)(void *); 124 void *mmap_addr; 125 int mmap_size; 126 char *ifname; 127 char *nmr_config; 128 int dummy_send; 129 int virt_header; /* send also the virt_header */ 130 int host_ring; 131}; 132enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 133 134 135/* 136 * Arguments for a new thread. The same structure is used by 137 * the source and the sink 138 */ 139struct targ { 140 struct glob_arg *g; 141 int used; 142 int completed; 143 int cancel; 144 int fd; 145 struct nmreq nmr; 146 struct netmap_if *nifp; 147 uint16_t qfirst, qlast; /* range of queues to scan */ 148 volatile uint64_t count; 149 struct timespec tic, toc; 150 int me; 151 pthread_t thread; 152 int affinity; 153 154 struct pkt pkt; 155}; 156 157 158/* 159 * extract the extremes from a range of ipv4 addresses. 160 * addr_lo[-addr_hi][:port_lo[-port_hi]] 161 */ 162static void 163extract_ip_range(struct ip_range *r) 164{ 165 char *ap, *pp; 166 struct in_addr a; 167 168 if (verbose) 169 D("extract IP range from %s", r->name); 170 r->port0 = r->port1 = 0; 171 r->start = r->end = 0; 172 173 /* the first - splits start/end of range */ 174 ap = index(r->name, '-'); /* do we have ports ? */ 175 if (ap) { 176 *ap++ = '\0'; 177 } 178 /* grab the initial values (mandatory) */ 179 pp = index(r->name, ':'); 180 if (pp) { 181 *pp++ = '\0'; 182 r->port0 = r->port1 = strtol(pp, NULL, 0); 183 }; 184 inet_aton(r->name, &a); 185 r->start = r->end = ntohl(a.s_addr); 186 if (ap) { 187 pp = index(ap, ':'); 188 if (pp) { 189 *pp++ = '\0'; 190 if (*pp) 191 r->port1 = strtol(pp, NULL, 0); 192 } 193 if (*ap) { 194 inet_aton(ap, &a); 195 r->end = ntohl(a.s_addr); 196 } 197 } 198 if (r->port0 > r->port1) { 199 uint16_t tmp = r->port0; 200 r->port0 = r->port1; 201 r->port1 = tmp; 202 } 203 if (r->start > r->end) { 204 uint32_t tmp = r->start; 205 r->start = r->end; 206 r->end = tmp; 207 } 208 { 209 struct in_addr a; 210 char buf1[16]; // one ip address 211 212 a.s_addr = htonl(r->end); 213 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 214 a.s_addr = htonl(r->start); 215 if (1) 216 D("range is %s:%d to %s:%d", 217 inet_ntoa(a), r->port0, buf1, r->port1); 218 } 219} 220 221static void 222extract_mac_range(struct mac_range *r) 223{ 224 if (verbose) 225 D("extract MAC range from %s", r->name); 226 bcopy(ether_aton(r->name), &r->start, 6); 227 bcopy(ether_aton(r->name), &r->end, 6); 228#if 0 229 bcopy(targ->src_mac, eh->ether_shost, 6); 230 p = index(targ->g->src_mac, '-'); 231 if (p) 232 targ->src_mac_range = atoi(p+1); 233 234 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 235 bcopy(targ->dst_mac, eh->ether_dhost, 6); 236 p = index(targ->g->dst_mac, '-'); 237 if (p) 238 targ->dst_mac_range = atoi(p+1); 239#endif 240 if (verbose) 241 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 242} 243 244static struct targ *targs; 245static int global_nthreads; 246 247/* control-C handler */ 248static void 249sigint_h(int sig) 250{ 251 int i; 252 253 (void)sig; /* UNUSED */ 254 for (i = 0; i < global_nthreads; i++) { 255 targs[i].cancel = 1; 256 } 257 signal(SIGINT, SIG_DFL); 258} 259 260/* sysctl wrapper to return the number of active CPUs */ 261static int 262system_ncpus(void) 263{ 264#ifdef __FreeBSD__ 265 int mib[2], ncpus; 266 size_t len; 267 268 mib[0] = CTL_HW; 269 mib[1] = HW_NCPU; 270 len = sizeof(mib); 271 sysctl(mib, 2, &ncpus, &len, NULL, 0); 272 273 return (ncpus); 274#else 275 return 1; 276#endif /* !__FreeBSD__ */ 277} 278 279#ifdef __linux__ 280#define sockaddr_dl sockaddr_ll 281#define sdl_family sll_family 282#define AF_LINK AF_PACKET 283#define LLADDR(s) s->sll_addr; 284#include <linux/if_tun.h> 285#define TAP_CLONEDEV "/dev/net/tun" 286#endif /* __linux__ */ 287 288#ifdef __FreeBSD__ 289#include <net/if_tun.h> 290#define TAP_CLONEDEV "/dev/tap" 291#endif /* __FreeBSD */ 292 293#ifdef __APPLE__ 294// #warning TAP not supported on apple ? 295#include <net/if_utun.h> 296#define TAP_CLONEDEV "/dev/tap" 297#endif /* __APPLE__ */ 298 299 300/* 301 * parse the vale configuration in conf and put it in nmr. 302 * The configuration may consist of 0 to 4 numbers separated 303 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 304 * Missing numbers or zeroes stand for default values. 305 * As an additional convenience, if exactly one number 306 * is specified, then this is assigned to both #tx-slots and #rx-slots. 307 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 308 * and #rx-rings. 309 */ 310void parse_nmr_config(const char* conf, struct nmreq *nmr) 311{ 312 char *w, *tok; 313 int i, v; 314 315 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 316 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 317 if (conf == NULL || ! *conf) 318 return; 319 w = strdup(conf); 320 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 321 v = atoi(tok); 322 switch (i) { 323 case 0: 324 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 325 break; 326 case 1: 327 nmr->nr_rx_slots = v; 328 break; 329 case 2: 330 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 331 break; 332 case 3: 333 nmr->nr_rx_rings = v; 334 break; 335 default: 336 D("ignored config: %s", tok); 337 break; 338 } 339 } 340 D("txr %d txd %d rxr %d rxd %d", 341 nmr->nr_tx_rings, nmr->nr_tx_slots, 342 nmr->nr_rx_rings, nmr->nr_rx_slots); 343 free(w); 344} 345 346 347/* 348 * locate the src mac address for our interface, put it 349 * into the user-supplied buffer. return 0 if ok, -1 on error. 350 */ 351static int 352source_hwaddr(const char *ifname, char *buf) 353{ 354 struct ifaddrs *ifaphead, *ifap; 355 int l = sizeof(ifap->ifa_name); 356 357 if (getifaddrs(&ifaphead) != 0) { 358 D("getifaddrs %s failed", ifname); 359 return (-1); 360 } 361 362 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 363 struct sockaddr_dl *sdl = 364 (struct sockaddr_dl *)ifap->ifa_addr; 365 uint8_t *mac; 366 367 if (!sdl || sdl->sdl_family != AF_LINK) 368 continue; 369 if (strncmp(ifap->ifa_name, ifname, l) != 0) 370 continue; 371 mac = (uint8_t *)LLADDR(sdl); 372 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 373 mac[0], mac[1], mac[2], 374 mac[3], mac[4], mac[5]); 375 if (verbose) 376 D("source hwaddr %s", buf); 377 break; 378 } 379 freeifaddrs(ifaphead); 380 return ifap ? 0 : 1; 381} 382 383 384/* set the thread affinity. */ 385static int 386setaffinity(pthread_t me, int i) 387{ 388#if 1 // def __FreeBSD__ 389 cpuset_t cpumask; 390 391 if (i == -1) 392 return 0; 393 394 /* Set thread affinity affinity.*/ 395 CPU_ZERO(&cpumask); 396 CPU_SET(i, &cpumask); 397 398 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 399 D("Unable to set affinity: %s", strerror(errno)); 400 return 1; 401 } 402#else 403 (void)me; /* suppress 'unused' warnings */ 404 (void)i; 405#endif /* __FreeBSD__ */ 406 return 0; 407} 408 409/* Compute the checksum of the given ip header. */ 410static uint16_t 411checksum(const void *data, uint16_t len, uint32_t sum) 412{ 413 const uint8_t *addr = data; 414 uint32_t i; 415 416 /* Checksum all the pairs of bytes first... */ 417 for (i = 0; i < (len & ~1U); i += 2) { 418 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 419 if (sum > 0xFFFF) 420 sum -= 0xFFFF; 421 } 422 /* 423 * If there's a single byte left over, checksum it, too. 424 * Network byte order is big-endian, so the remaining byte is 425 * the high byte. 426 */ 427 if (i < len) { 428 sum += addr[i] << 8; 429 if (sum > 0xFFFF) 430 sum -= 0xFFFF; 431 } 432 return sum; 433} 434 435static u_int16_t 436wrapsum(u_int32_t sum) 437{ 438 sum = ~sum & 0xFFFF; 439 return (htons(sum)); 440} 441 442/* Check the payload of the packet for errors (use it for debug). 443 * Look for consecutive ascii representations of the size of the packet. 444 */ 445static void 446dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 447{ 448 char buf[128]; 449 int i, j, i0; 450 451 /* get the length in ASCII of the length of the packet. */ 452 453 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 454 ring, cur, ring->slot[cur].buf_idx, 455 ring->slot[cur].flags, len); 456 /* hexdump routine */ 457 for (i = 0; i < len; ) { 458 memset(buf, sizeof(buf), ' '); 459 sprintf(buf, "%5d: ", i); 460 i0 = i; 461 for (j=0; j < 16 && i < len; i++, j++) 462 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 463 i = i0; 464 for (j=0; j < 16 && i < len; i++, j++) 465 sprintf(buf+7+j + 48, "%c", 466 isprint(p[i]) ? p[i] : '.'); 467 printf("%s\n", buf); 468 } 469} 470 471/* 472 * Fill a packet with some payload. 473 * We create a UDP packet so the payload starts at 474 * 14+20+8 = 42 bytes. 475 */ 476#ifdef __linux__ 477#define uh_sport source 478#define uh_dport dest 479#define uh_ulen len 480#define uh_sum check 481#endif /* linux */ 482 483/* 484 * increment the addressed in the packet, 485 * starting from the least significant field. 486 * DST_IP DST_PORT SRC_IP SRC_PORT 487 */ 488static void 489update_addresses(struct pkt *pkt, struct glob_arg *g) 490{ 491 uint32_t a; 492 uint16_t p; 493 struct ip *ip = &pkt->ip; 494 struct udphdr *udp = &pkt->udp; 495 496 do { 497 p = ntohs(udp->uh_sport); 498 if (p < g->src_ip.port1) { /* just inc, no wrap */ 499 udp->uh_sport = htons(p + 1); 500 break; 501 } 502 udp->uh_sport = htons(g->src_ip.port0); 503 504 a = ntohl(ip->ip_src.s_addr); 505 if (a < g->src_ip.end) { /* just inc, no wrap */ 506 ip->ip_src.s_addr = htonl(a + 1); 507 break; 508 } 509 ip->ip_src.s_addr = htonl(g->src_ip.start); 510 511 udp->uh_sport = htons(g->src_ip.port0); 512 p = ntohs(udp->uh_dport); 513 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 514 udp->uh_dport = htons(p + 1); 515 break; 516 } 517 udp->uh_dport = htons(g->dst_ip.port0); 518 519 a = ntohl(ip->ip_dst.s_addr); 520 if (a < g->dst_ip.end) { /* just inc, no wrap */ 521 ip->ip_dst.s_addr = htonl(a + 1); 522 break; 523 } 524 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 525 } while (0); 526 // update checksum 527} 528 529/* 530 * initialize one packet and prepare for the next one. 531 * The copy could be done better instead of repeating it each time. 532 */ 533static void 534initialize_packet(struct targ *targ) 535{ 536 struct pkt *pkt = &targ->pkt; 537 struct ether_header *eh; 538 struct ip *ip; 539 struct udphdr *udp; 540 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 541 const char *payload = targ->g->options & OPT_INDIRECT ? 542 indirect_payload : default_payload; 543 int i, l0 = strlen(payload); 544 545 /* create a nice NUL-terminated string */ 546 for (i = 0; i < paylen; i += l0) { 547 if (l0 > paylen - i) 548 l0 = paylen - i; // last round 549 bcopy(payload, pkt->body + i, l0); 550 } 551 pkt->body[i-1] = '\0'; 552 ip = &pkt->ip; 553 554 /* prepare the headers */ 555 ip->ip_v = IPVERSION; 556 ip->ip_hl = 5; 557 ip->ip_id = 0; 558 ip->ip_tos = IPTOS_LOWDELAY; 559 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 560 ip->ip_id = 0; 561 ip->ip_off = htons(IP_DF); /* Don't fragment */ 562 ip->ip_ttl = IPDEFTTL; 563 ip->ip_p = IPPROTO_UDP; 564 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 565 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 566 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 567 568 569 udp = &pkt->udp; 570 udp->uh_sport = htons(targ->g->src_ip.port0); 571 udp->uh_dport = htons(targ->g->dst_ip.port0); 572 udp->uh_ulen = htons(paylen); 573 /* Magic: taken from sbin/dhclient/packet.c */ 574 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 575 checksum(pkt->body, 576 paylen - sizeof(*udp), 577 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 578 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 579 ) 580 ) 581 )); 582 583 eh = &pkt->eh; 584 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 585 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 586 eh->ether_type = htons(ETHERTYPE_IP); 587 588 bzero(&pkt->vh, sizeof(pkt->vh)); 589 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 590} 591 592 593 594/* 595 * create and enqueue a batch of packets on a ring. 596 * On the last one set NS_REPORT to tell the driver to generate 597 * an interrupt when done. 598 */ 599static int 600send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 601 int size, struct glob_arg *g, u_int count, int options, 602 u_int nfrags) 603{ 604 u_int n, sent, cur = ring->cur; 605 u_int fcnt; 606 607 n = nm_ring_space(ring); 608 if (n < count) 609 count = n; 610 if (count < nfrags) { 611 D("truncating packet, no room for frags %d %d", 612 count, nfrags); 613 } 614#if 0 615 if (options & (OPT_COPY | OPT_PREFETCH) ) { 616 for (sent = 0; sent < count; sent++) { 617 struct netmap_slot *slot = &ring->slot[cur]; 618 char *p = NETMAP_BUF(ring, slot->buf_idx); 619 620 __builtin_prefetch(p); 621 cur = nm_ring_next(ring, cur); 622 } 623 cur = ring->cur; 624 } 625#endif 626 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 627 struct netmap_slot *slot = &ring->slot[cur]; 628 char *p = NETMAP_BUF(ring, slot->buf_idx); 629 630 slot->flags = 0; 631 if (options & OPT_INDIRECT) { 632 slot->flags |= NS_INDIRECT; 633 slot->ptr = (uint64_t)frame; 634 } else if (options & OPT_COPY) { 635 pkt_copy(frame, p, size); 636 if (fcnt == nfrags) 637 update_addresses(pkt, g); 638 } else if (options & OPT_MEMCPY) { 639 memcpy(p, frame, size); 640 if (fcnt == nfrags) 641 update_addresses(pkt, g); 642 } else if (options & OPT_PREFETCH) { 643 __builtin_prefetch(p); 644 } 645 if (options & OPT_DUMP) 646 dump_payload(p, size, ring, cur); 647 slot->len = size; 648 if (--fcnt > 0) 649 slot->flags |= NS_MOREFRAG; 650 else 651 fcnt = nfrags; 652 if (sent == count - 1) { 653 slot->flags &= ~NS_MOREFRAG; 654 slot->flags |= NS_REPORT; 655 } 656 cur = nm_ring_next(ring, cur); 657 } 658 ring->head = ring->cur = cur; 659 660 return (sent); 661} 662 663/* 664 * Send a packet, and wait for a response. 665 * The payload (after UDP header, ofs 42) has a 4-byte sequence 666 * followed by a struct timeval (or bintime?) 667 */ 668#define PAY_OFS 42 /* where in the pkt... */ 669 670static void * 671pinger_body(void *data) 672{ 673 struct targ *targ = (struct targ *) data; 674 struct pollfd fds[1]; 675 struct netmap_if *nifp = targ->nifp; 676 int i, rx = 0, n = targ->g->npackets; 677 void *frame; 678 int size; 679 680 frame = &targ->pkt; 681 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 682 size = targ->g->pkt_size + targ->g->virt_header; 683 684 fds[0].fd = targ->fd; 685 fds[0].events = (POLLIN); 686 static uint32_t sent; 687 struct timespec ts, now, last_print; 688 uint32_t count = 0, min = 1000000000, av = 0; 689 690 if (targ->g->nthreads > 1) { 691 D("can only ping with 1 thread"); 692 return NULL; 693 } 694 695 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 696 now = last_print; 697 while (n == 0 || (int)sent < n) { 698 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 699 struct netmap_slot *slot; 700 char *p; 701 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 702 slot = &ring->slot[ring->cur]; 703 slot->len = size; 704 p = NETMAP_BUF(ring, slot->buf_idx); 705 706 if (nm_ring_empty(ring)) { 707 D("-- ouch, cannot send"); 708 } else { 709 pkt_copy(frame, p, size); 710 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 711 bcopy(&sent, p+42, sizeof(sent)); 712 bcopy(&ts, p+46, sizeof(ts)); 713 sent++; 714 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 715 } 716 } 717 /* should use a parameter to decide how often to send */ 718 if (poll(fds, 1, 3000) <= 0) { 719 D("poll error/timeout on queue %d: %s", targ->me, 720 strerror(errno)); 721 continue; 722 } 723 /* see what we got back */ 724 for (i = targ->qfirst; i < targ->qlast; i++) { 725 ring = NETMAP_RXRING(nifp, i); 726 while (!nm_ring_empty(ring)) { 727 uint32_t seq; 728 slot = &ring->slot[ring->cur]; 729 p = NETMAP_BUF(ring, slot->buf_idx); 730 731 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 732 bcopy(p+42, &seq, sizeof(seq)); 733 bcopy(p+46, &ts, sizeof(ts)); 734 ts.tv_sec = now.tv_sec - ts.tv_sec; 735 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 736 if (ts.tv_nsec < 0) { 737 ts.tv_nsec += 1000000000; 738 ts.tv_sec--; 739 } 740 if (1) D("seq %d/%d delta %d.%09d", seq, sent, 741 (int)ts.tv_sec, (int)ts.tv_nsec); 742 if (ts.tv_nsec < (int)min) 743 min = ts.tv_nsec; 744 count ++; 745 av += ts.tv_nsec; 746 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 747 rx++; 748 } 749 } 750 //D("tx %d rx %d", sent, rx); 751 //usleep(100000); 752 ts.tv_sec = now.tv_sec - last_print.tv_sec; 753 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 754 if (ts.tv_nsec < 0) { 755 ts.tv_nsec += 1000000000; 756 ts.tv_sec--; 757 } 758 if (ts.tv_sec >= 1) { 759 D("count %d min %d av %d", 760 count, min, av/count); 761 count = 0; 762 av = 0; 763 min = 100000000; 764 last_print = now; 765 } 766 } 767 return NULL; 768} 769 770 771/* 772 * reply to ping requests 773 */ 774static void * 775ponger_body(void *data) 776{ 777 struct targ *targ = (struct targ *) data; 778 struct pollfd fds[1]; 779 struct netmap_if *nifp = targ->nifp; 780 struct netmap_ring *txring, *rxring; 781 int i, rx = 0, sent = 0, n = targ->g->npackets; 782 fds[0].fd = targ->fd; 783 fds[0].events = (POLLIN); 784 785 if (targ->g->nthreads > 1) { 786 D("can only reply ping with 1 thread"); 787 return NULL; 788 } 789 D("understood ponger %d but don't know how to do it", n); 790 while (n == 0 || sent < n) { 791 uint32_t txcur, txavail; 792//#define BUSYWAIT 793#ifdef BUSYWAIT 794 ioctl(fds[0].fd, NIOCRXSYNC, NULL); 795#else 796 if (poll(fds, 1, 1000) <= 0) { 797 D("poll error/timeout on queue %d: %s", targ->me, 798 strerror(errno)); 799 continue; 800 } 801#endif 802 txring = NETMAP_TXRING(nifp, 0); 803 txcur = txring->cur; 804 txavail = nm_ring_space(txring); 805 /* see what we got back */ 806 for (i = targ->qfirst; i < targ->qlast; i++) { 807 rxring = NETMAP_RXRING(nifp, i); 808 while (!nm_ring_empty(rxring)) { 809 uint16_t *spkt, *dpkt; 810 uint32_t cur = rxring->cur; 811 struct netmap_slot *slot = &rxring->slot[cur]; 812 char *src, *dst; 813 src = NETMAP_BUF(rxring, slot->buf_idx); 814 //D("got pkt %p of size %d", src, slot->len); 815 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 816 rx++; 817 if (txavail == 0) 818 continue; 819 dst = NETMAP_BUF(txring, 820 txring->slot[txcur].buf_idx); 821 /* copy... */ 822 dpkt = (uint16_t *)dst; 823 spkt = (uint16_t *)src; 824 pkt_copy(src, dst, slot->len); 825 dpkt[0] = spkt[3]; 826 dpkt[1] = spkt[4]; 827 dpkt[2] = spkt[5]; 828 dpkt[3] = spkt[0]; 829 dpkt[4] = spkt[1]; 830 dpkt[5] = spkt[2]; 831 txring->slot[txcur].len = slot->len; 832 /* XXX swap src dst mac */ 833 txcur = nm_ring_next(txring, txcur); 834 txavail--; 835 sent++; 836 } 837 } 838 txring->head = txring->cur = txcur; 839 targ->count = sent; 840#ifdef BUSYWAIT 841 ioctl(fds[0].fd, NIOCTXSYNC, NULL); 842#endif 843 //D("tx %d rx %d", sent, rx); 844 } 845 return NULL; 846} 847 848static __inline int 849timespec_ge(const struct timespec *a, const struct timespec *b) 850{ 851 852 if (a->tv_sec > b->tv_sec) 853 return (1); 854 if (a->tv_sec < b->tv_sec) 855 return (0); 856 if (a->tv_nsec >= b->tv_nsec) 857 return (1); 858 return (0); 859} 860 861static __inline struct timespec 862timeval2spec(const struct timeval *a) 863{ 864 struct timespec ts = { 865 .tv_sec = a->tv_sec, 866 .tv_nsec = a->tv_usec * 1000 867 }; 868 return ts; 869} 870 871static __inline struct timeval 872timespec2val(const struct timespec *a) 873{ 874 struct timeval tv = { 875 .tv_sec = a->tv_sec, 876 .tv_usec = a->tv_nsec / 1000 877 }; 878 return tv; 879} 880 881 882static __inline struct timespec 883timespec_add(struct timespec a, struct timespec b) 884{ 885 struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 886 if (ret.tv_nsec >= 1000000000) { 887 ret.tv_sec++; 888 ret.tv_nsec -= 1000000000; 889 } 890 return ret; 891} 892 893static __inline struct timespec 894timespec_sub(struct timespec a, struct timespec b) 895{ 896 struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 897 if (ret.tv_nsec < 0) { 898 ret.tv_sec--; 899 ret.tv_nsec += 1000000000; 900 } 901 return ret; 902} 903 904 905/* 906 * wait until ts, either busy or sleeping if more than 1ms. 907 * Return wakeup time. 908 */ 909static struct timespec 910wait_time(struct timespec ts) 911{ 912 for (;;) { 913 struct timespec w, cur; 914 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 915 w = timespec_sub(ts, cur); 916 if (w.tv_sec < 0) 917 return cur; 918 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 919 poll(NULL, 0, 1); 920 } 921} 922 923static void * 924sender_body(void *data) 925{ 926 struct targ *targ = (struct targ *) data; 927 928 struct pollfd fds[1]; 929 struct netmap_if *nifp = targ->nifp; 930 struct netmap_ring *txring; 931 int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; 932 int options = targ->g->options | OPT_COPY; 933 struct timespec nexttime = { 0, 0}; // XXX silence compiler 934 int rate_limit = targ->g->tx_rate; 935 struct pkt *pkt = &targ->pkt; 936 void *frame; 937 int size; 938 939 frame = pkt; 940 frame += sizeof(pkt->vh) - targ->g->virt_header; 941 size = targ->g->pkt_size + targ->g->virt_header; 942 943 D("start"); 944 if (setaffinity(targ->thread, targ->affinity)) 945 goto quit; 946 /* setup poll(2) mechanism. */ 947 memset(fds, 0, sizeof(fds)); 948 fds[0].fd = targ->fd; 949 fds[0].events = (POLLOUT); 950 951 /* main loop.*/ 952 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 953 if (rate_limit) { 954 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 955 targ->tic.tv_nsec = 0; 956 wait_time(targ->tic); 957 nexttime = targ->tic; 958 } 959 if (targ->g->dev_type == DEV_TAP) { 960 D("writing to file desc %d", targ->g->main_fd); 961 962 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 963 if (write(targ->g->main_fd, frame, size) != -1) 964 sent++; 965 update_addresses(pkt, targ->g); 966 if (i > 10000) { 967 targ->count = sent; 968 i = 0; 969 } 970 } 971#ifndef NO_PCAP 972 } else if (targ->g->dev_type == DEV_PCAP) { 973 pcap_t *p = targ->g->p; 974 975 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 976 if (pcap_inject(p, frame, size) != -1) 977 sent++; 978 update_addresses(pkt, targ->g); 979 if (i > 10000) { 980 targ->count = sent; 981 i = 0; 982 } 983 } 984#endif /* NO_PCAP */ 985 } else { 986 int tosend = 0; 987 int frags = targ->g->frags; 988 989 while (!targ->cancel && (n == 0 || sent < n)) { 990 991 if (rate_limit && tosend <= 0) { 992 tosend = targ->g->burst; 993 nexttime = timespec_add(nexttime, targ->g->tx_period); 994 wait_time(nexttime); 995 } 996 997 /* 998 * wait for available room in the send queue(s) 999 */ 1000 if (poll(fds, 1, 2000) <= 0) { 1001 if (targ->cancel) 1002 break; 1003 D("poll error/timeout on queue %d: %s", targ->me, 1004 strerror(errno)); 1005 goto quit; 1006 } 1007 if (fds[0].revents & POLLERR) { 1008 D("poll error"); 1009 goto quit; 1010 } 1011 /* 1012 * scan our queues and send on those with room 1013 */ 1014 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1015 D("drop copy"); 1016 options &= ~OPT_COPY; 1017 } 1018 for (i = targ->qfirst; i < targ->qlast; i++) { 1019 int m, limit = rate_limit ? tosend : targ->g->burst; 1020 if (n > 0 && n - sent < limit) 1021 limit = n - sent; 1022 txring = NETMAP_TXRING(nifp, i); 1023 if (nm_ring_empty(txring)) 1024 continue; 1025 if (frags > 1) 1026 limit = ((limit + frags - 1) / frags) * frags; 1027 1028 m = send_packets(txring, pkt, frame, size, targ->g, 1029 limit, options, frags); 1030 ND("limit %d tail %d frags %d m %d", 1031 limit, txring->tail, frags, m); 1032 sent += m; 1033 targ->count = sent; 1034 if (rate_limit) { 1035 tosend -= m; 1036 if (tosend <= 0) 1037 break; 1038 } 1039 } 1040 } 1041 /* flush any remaining packets */ 1042 ioctl(fds[0].fd, NIOCTXSYNC, NULL); 1043 1044 /* final part: wait all the TX queues to be empty. */ 1045 for (i = targ->qfirst; i < targ->qlast; i++) { 1046 txring = NETMAP_TXRING(nifp, i); 1047 while (nm_tx_pending(txring)) { 1048 ioctl(fds[0].fd, NIOCTXSYNC, NULL); 1049 usleep(1); /* wait 1 tick */ 1050 } 1051 } 1052 } /* end DEV_NETMAP */ 1053 1054 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1055 targ->completed = 1; 1056 targ->count = sent; 1057 1058quit: 1059 /* reset the ``used`` flag. */ 1060 targ->used = 0; 1061 1062 return (NULL); 1063} 1064 1065 1066#ifndef NO_PCAP 1067static void 1068receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1069 const u_char * bytes) 1070{ 1071 int *count = (int *)user; 1072 (void)h; /* UNUSED */ 1073 (void)bytes; /* UNUSED */ 1074 (*count)++; 1075} 1076#endif /* !NO_PCAP */ 1077 1078static int 1079receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1080{ 1081 u_int cur, rx, n; 1082 1083 cur = ring->cur; 1084 n = nm_ring_space(ring); 1085 if (n < limit) 1086 limit = n; 1087 for (rx = 0; rx < limit; rx++) { 1088 struct netmap_slot *slot = &ring->slot[cur]; 1089 char *p = NETMAP_BUF(ring, slot->buf_idx); 1090 1091 if (dump) 1092 dump_payload(p, slot->len, ring, cur); 1093 1094 cur = nm_ring_next(ring, cur); 1095 } 1096 ring->head = ring->cur = cur; 1097 1098 return (rx); 1099} 1100 1101static void * 1102receiver_body(void *data) 1103{ 1104 struct targ *targ = (struct targ *) data; 1105 struct pollfd fds[1]; 1106 struct netmap_if *nifp = targ->nifp; 1107 struct netmap_ring *rxring; 1108 int i; 1109 uint64_t received = 0; 1110 1111 if (setaffinity(targ->thread, targ->affinity)) 1112 goto quit; 1113 1114 /* setup poll(2) mechanism. */ 1115 memset(fds, 0, sizeof(fds)); 1116 fds[0].fd = targ->fd; 1117 fds[0].events = (POLLIN); 1118 1119 /* unbounded wait for the first packet. */ 1120 for (;;) { 1121 i = poll(fds, 1, 1000); 1122 if (i > 0 && !(fds[0].revents & POLLERR)) 1123 break; 1124 RD(1, "waiting for initial packets, poll returns %d %d", i, fds[0].revents); 1125 } 1126 1127 /* main loop, exit after 1s silence */ 1128 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1129 if (targ->g->dev_type == DEV_TAP) { 1130 D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); 1131 while (!targ->cancel) { 1132 char buf[2048]; 1133 /* XXX should we poll ? */ 1134 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1135 targ->count++; 1136 } 1137#ifndef NO_PCAP 1138 } else if (targ->g->dev_type == DEV_PCAP) { 1139 while (!targ->cancel) { 1140 /* XXX should we poll ? */ 1141 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); 1142 } 1143#endif /* !NO_PCAP */ 1144 } else { 1145 int dump = targ->g->options & OPT_DUMP; 1146 while (!targ->cancel) { 1147 /* Once we started to receive packets, wait at most 1 seconds 1148 before quitting. */ 1149 if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1150 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1151 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1152 break; 1153 } 1154 1155 if (fds[0].revents & POLLERR) { 1156 D("poll err"); 1157 goto quit; 1158 } 1159 1160 for (i = targ->qfirst; i < targ->qlast; i++) { 1161 int m; 1162 1163 rxring = NETMAP_RXRING(nifp, i); 1164 if (nm_ring_empty(rxring)) 1165 continue; 1166 1167 m = receive_packets(rxring, targ->g->burst, dump); 1168 received += m; 1169 } 1170 targ->count = received; 1171 1172 // tell the card we have read the data 1173 //ioctl(fds[0].fd, NIOCRXSYNC, NULL); 1174 } 1175 } 1176 1177 targ->completed = 1; 1178 targ->count = received; 1179 1180quit: 1181 /* reset the ``used`` flag. */ 1182 targ->used = 0; 1183 1184 return (NULL); 1185} 1186 1187/* very crude code to print a number in normalized form. 1188 * Caller has to make sure that the buffer is large enough. 1189 */ 1190static const char * 1191norm(char *buf, double val) 1192{ 1193 char *units[] = { "", "K", "M", "G" }; 1194 u_int i; 1195 1196 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++) 1197 val /= 1000; 1198 sprintf(buf, "%.2f %s", val, units[i]); 1199 return buf; 1200} 1201 1202static void 1203tx_output(uint64_t sent, int size, double delta) 1204{ 1205 double bw, raw_bw, pps; 1206 char b1[40], b2[80], b3[80]; 1207 1208 printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", 1209 sent, size, delta); 1210 if (delta == 0) 1211 delta = 1e-6; 1212 if (size < 60) /* correct for min packet size */ 1213 size = 60; 1214 pps = sent / delta; 1215 bw = (8.0 * size * sent) / delta; 1216 /* raw packets have4 bytes crc + 20 bytes framing */ 1217 raw_bw = (8.0 * (size + 24) * sent) / delta; 1218 1219 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1220 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1221} 1222 1223 1224static void 1225rx_output(uint64_t received, double delta) 1226{ 1227 double pps; 1228 char b1[40]; 1229 1230 printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); 1231 1232 if (delta == 0) 1233 delta = 1e-6; 1234 pps = received / delta; 1235 printf("Speed: %spps\n", norm(b1, pps)); 1236} 1237 1238static void 1239usage(void) 1240{ 1241 const char *cmd = "pkt-gen"; 1242 fprintf(stderr, 1243 "Usage:\n" 1244 "%s arguments\n" 1245 "\t-i interface interface name\n" 1246 "\t-f function tx rx ping pong\n" 1247 "\t-n count number of iterations (can be 0)\n" 1248 "\t-t pkts_to_send also forces tx mode\n" 1249 "\t-r pkts_to_receive also forces rx mode\n" 1250 "\t-l pkt_size in bytes excluding CRC\n" 1251 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1252 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1253 "\t-D dst-mac\n" 1254 "\t-S src-mac\n" 1255 "\t-a cpu_id use setaffinity\n" 1256 "\t-b burst size testing, mostly\n" 1257 "\t-c cores cores to use\n" 1258 "\t-p threads processes/threads to use\n" 1259 "\t-T report_ms milliseconds between reports\n" 1260 "\t-P use libpcap instead of netmap\n" 1261 "\t-w wait_for_link_time in seconds\n" 1262 "\t-R rate in packets per second\n" 1263 "\t-X dump payload\n" 1264 "\t-H len add empty virtio-net-header with size 'len'\n" 1265 "\t-h use host ring\n" 1266 "", 1267 cmd); 1268 1269 exit(0); 1270} 1271 1272static void 1273start_threads(struct glob_arg *g) 1274{ 1275 int i; 1276 1277 targs = calloc(g->nthreads, sizeof(*targs)); 1278 /* 1279 * Now create the desired number of threads, each one 1280 * using a single descriptor. 1281 */ 1282 for (i = 0; i < g->nthreads; i++) { 1283 bzero(&targs[i], sizeof(targs[i])); 1284 targs[i].fd = -1; /* default, with pcap */ 1285 targs[i].g = g; 1286 1287 if (g->dev_type == DEV_NETMAP) { 1288 struct nmreq tifreq; 1289 int tfd; 1290 1291 /* register interface. */ 1292 tfd = open("/dev/netmap", O_RDWR); 1293 if (tfd == -1) { 1294 D("Unable to open /dev/netmap: %s", strerror(errno)); 1295 continue; 1296 } 1297 targs[i].fd = tfd; 1298 1299 bzero(&tifreq, sizeof(tifreq)); 1300 strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name)); 1301 tifreq.nr_version = NETMAP_API; 1302 if (g->host_ring) { 1303 tifreq.nr_ringid = NETMAP_SW_RING; 1304 } else { 1305 tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0; 1306 } 1307 parse_nmr_config(g->nmr_config, &tifreq); 1308 1309 /* 1310 * if we are acting as a receiver only, do not touch the transmit ring. 1311 * This is not the default because many apps may use the interface 1312 * in both directions, but a pure receiver does not. 1313 */ 1314 if (g->td_body == receiver_body) { 1315 tifreq.nr_ringid |= NETMAP_NO_TX_POLL; 1316 } 1317 1318 if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { 1319 D("Unable to register %s: %s", g->ifname, strerror(errno)); 1320 continue; 1321 } 1322 D("memsize is %d MB", tifreq.nr_memsize >> 20); 1323 targs[i].nmr = tifreq; 1324 targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset); 1325 D("nifp flags 0x%x", targs[i].nifp->ni_flags); 1326 /* start threads. */ 1327 if (g->host_ring) { 1328 targs[i].qfirst = (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); 1329 targs[i].qlast = targs[i].qfirst + 1; 1330 } else { 1331 targs[i].qfirst = (g->nthreads > 1) ? i : 0; 1332 targs[i].qlast = (g->nthreads > 1) ? i+1 : 1333 (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); 1334 } 1335 } else { 1336 targs[i].fd = g->main_fd; 1337 } 1338 targs[i].used = 1; 1339 targs[i].me = i; 1340 if (g->affinity >= 0) { 1341 if (g->affinity < g->cpus) 1342 targs[i].affinity = g->affinity; 1343 else 1344 targs[i].affinity = i % g->cpus; 1345 } else 1346 targs[i].affinity = -1; 1347 /* default, init packets */ 1348 initialize_packet(&targs[i]); 1349 1350 if (pthread_create(&targs[i].thread, NULL, g->td_body, 1351 &targs[i]) == -1) { 1352 D("Unable to create thread %d: %s", i, strerror(errno)); 1353 targs[i].used = 0; 1354 } 1355 } 1356} 1357 1358static void 1359main_thread(struct glob_arg *g) 1360{ 1361 int i; 1362 1363 uint64_t prev = 0; 1364 uint64_t count = 0; 1365 double delta_t; 1366 struct timeval tic, toc; 1367 1368 gettimeofday(&toc, NULL); 1369 for (;;) { 1370 struct timeval now, delta; 1371 uint64_t pps, usec, my_count, npkts; 1372 int done = 0; 1373 1374 delta.tv_sec = g->report_interval/1000; 1375 delta.tv_usec = (g->report_interval%1000)*1000; 1376 select(0, NULL, NULL, NULL, &delta); 1377 gettimeofday(&now, NULL); 1378 time_second = now.tv_sec; 1379 timersub(&now, &toc, &toc); 1380 my_count = 0; 1381 for (i = 0; i < g->nthreads; i++) { 1382 my_count += targs[i].count; 1383 if (targs[i].used == 0) 1384 done++; 1385 } 1386 usec = toc.tv_sec* 1000000 + toc.tv_usec; 1387 if (usec < 10000) 1388 continue; 1389 npkts = my_count - prev; 1390 pps = (npkts*1000000 + usec/2) / usec; 1391 D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)", 1392 pps, npkts, usec); 1393 prev = my_count; 1394 toc = now; 1395 if (done == g->nthreads) 1396 break; 1397 } 1398 1399 timerclear(&tic); 1400 timerclear(&toc); 1401 for (i = 0; i < g->nthreads; i++) { 1402 struct timespec t_tic, t_toc; 1403 /* 1404 * Join active threads, unregister interfaces and close 1405 * file descriptors. 1406 */ 1407 if (targs[i].used) 1408 pthread_join(targs[i].thread, NULL); 1409 close(targs[i].fd); 1410 1411 if (targs[i].completed == 0) 1412 D("ouch, thread %d exited with error", i); 1413 1414 /* 1415 * Collect threads output and extract information about 1416 * how long it took to send all the packets. 1417 */ 1418 count += targs[i].count; 1419 t_tic = timeval2spec(&tic); 1420 t_toc = timeval2spec(&toc); 1421 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1422 tic = timespec2val(&targs[i].tic); 1423 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1424 toc = timespec2val(&targs[i].toc); 1425 } 1426 1427 /* print output. */ 1428 timersub(&toc, &tic, &toc); 1429 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1430 if (g->td_body == sender_body) 1431 tx_output(count, g->pkt_size, delta_t); 1432 else 1433 rx_output(count, delta_t); 1434 1435 if (g->dev_type == DEV_NETMAP) { 1436 munmap(g->mmap_addr, g->mmap_size); 1437 close(g->main_fd); 1438 } 1439} 1440 1441 1442struct sf { 1443 char *key; 1444 void *f; 1445}; 1446 1447static struct sf func[] = { 1448 { "tx", sender_body }, 1449 { "rx", receiver_body }, 1450 { "ping", pinger_body }, 1451 { "pong", ponger_body }, 1452 { NULL, NULL } 1453}; 1454 1455static int 1456tap_alloc(char *dev) 1457{ 1458 struct ifreq ifr; 1459 int fd, err; 1460 char *clonedev = TAP_CLONEDEV; 1461 1462 (void)err; 1463 (void)dev; 1464 /* Arguments taken by the function: 1465 * 1466 * char *dev: the name of an interface (or '\0'). MUST have enough 1467 * space to hold the interface name if '\0' is passed 1468 * int flags: interface flags (eg, IFF_TUN etc.) 1469 */ 1470 1471#ifdef __FreeBSD__ 1472 if (dev[3]) { /* tapSomething */ 1473 static char buf[128]; 1474 snprintf(buf, sizeof(buf), "/dev/%s", dev); 1475 clonedev = buf; 1476 } 1477#endif 1478 /* open the device */ 1479 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1480 return fd; 1481 } 1482 D("%s open successful", clonedev); 1483 1484 /* preparation of the struct ifr, of type "struct ifreq" */ 1485 memset(&ifr, 0, sizeof(ifr)); 1486 1487#ifdef linux 1488 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1489 1490 if (*dev) { 1491 /* if a device name was specified, put it in the structure; otherwise, 1492 * the kernel will try to allocate the "next" device of the 1493 * specified type */ 1494 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1495 } 1496 1497 /* try to create the device */ 1498 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1499 D("failed to to a TUNSETIFF: %s", strerror(errno)); 1500 close(fd); 1501 return err; 1502 } 1503 1504 /* if the operation was successful, write back the name of the 1505 * interface to the variable "dev", so the caller can know 1506 * it. Note that the caller MUST reserve space in *dev (see calling 1507 * code below) */ 1508 strcpy(dev, ifr.ifr_name); 1509 D("new name is %s", dev); 1510#endif /* linux */ 1511 1512 /* this is the special file descriptor that the caller will use to talk 1513 * with the virtual interface */ 1514 return fd; 1515} 1516 1517int 1518main(int arc, char **argv) 1519{ 1520 int i; 1521 1522 struct glob_arg g; 1523 1524 struct nmreq nmr; 1525 int ch; 1526 int wait_link = 2; 1527 int devqueues = 1; /* how many device queues */ 1528 1529 bzero(&g, sizeof(g)); 1530 1531 g.main_fd = -1; 1532 g.td_body = receiver_body; 1533 g.report_interval = 1000; /* report interval */ 1534 g.affinity = -1; 1535 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1536 g.src_ip.name = "10.0.0.1"; 1537 g.dst_ip.name = "10.1.0.1"; 1538 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1539 g.src_mac.name = NULL; 1540 g.pkt_size = 60; 1541 g.burst = 512; // default 1542 g.nthreads = 1; 1543 g.cpus = 1; 1544 g.forever = 1; 1545 g.tx_rate = 0; 1546 g.frags = 1; 1547 g.nmr_config = ""; 1548 g.virt_header = 0; 1549 1550 while ( (ch = getopt(arc, argv, 1551 "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:h")) != -1) { 1552 struct sf *fn; 1553 1554 switch(ch) { 1555 default: 1556 D("bad option %c %s", ch, optarg); 1557 usage(); 1558 break; 1559 1560 case 'n': 1561 g.npackets = atoi(optarg); 1562 break; 1563 1564 case 'F': 1565 i = atoi(optarg); 1566 if (i < 1 || i > 63) { 1567 D("invalid frags %d [1..63], ignore", i); 1568 break; 1569 } 1570 g.frags = i; 1571 break; 1572 1573 case 'f': 1574 for (fn = func; fn->key; fn++) { 1575 if (!strcmp(fn->key, optarg)) 1576 break; 1577 } 1578 if (fn->key) 1579 g.td_body = fn->f; 1580 else 1581 D("unrecognised function %s", optarg); 1582 break; 1583 1584 case 'o': /* data generation options */ 1585 g.options = atoi(optarg); 1586 break; 1587 1588 case 'a': /* force affinity */ 1589 g.affinity = atoi(optarg); 1590 break; 1591 1592 case 'i': /* interface */ 1593 /* a prefix of tap: netmap: or pcap: forces the mode. 1594 * otherwise we guess 1595 */ 1596 D("interface is %s", optarg); 1597 g.ifname = optarg; 1598 if (!strcmp(optarg, "null")) { 1599 g.dev_type = DEV_NETMAP; 1600 g.dummy_send = 1; 1601 } else if (!strncmp(optarg, "tap:", 4)) { 1602 g.dev_type = DEV_TAP; 1603 g.ifname = optarg + 4; 1604 } else if (!strncmp(optarg, "pcap:", 5)) { 1605 g.dev_type = DEV_PCAP; 1606 g.ifname = optarg + 5; 1607 } else if (!strncmp(optarg, "netmap:", 7)) { 1608 g.dev_type = DEV_NETMAP; 1609 g.ifname = optarg + 7; 1610 } else if (!strncmp(optarg, "tap", 3)) { 1611 g.dev_type = DEV_TAP; 1612 } else { 1613 g.dev_type = DEV_NETMAP; 1614 } 1615 break; 1616 1617 case 'I': 1618 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1619 break; 1620 1621 case 't': /* send, deprecated */ 1622 D("-t deprecated, please use -f tx -n %s", optarg); 1623 g.td_body = sender_body; 1624 g.npackets = atoi(optarg); 1625 break; 1626 1627 case 'r': /* receive */ 1628 D("-r deprecated, please use -f rx -n %s", optarg); 1629 g.td_body = receiver_body; 1630 g.npackets = atoi(optarg); 1631 break; 1632 1633 case 'l': /* pkt_size */ 1634 g.pkt_size = atoi(optarg); 1635 break; 1636 1637 case 'd': 1638 g.dst_ip.name = optarg; 1639 break; 1640 1641 case 's': 1642 g.src_ip.name = optarg; 1643 break; 1644 1645 case 'T': /* report interval */ 1646 g.report_interval = atoi(optarg); 1647 break; 1648 1649 case 'w': 1650 wait_link = atoi(optarg); 1651 break; 1652 1653 case 'W': /* XXX changed default */ 1654 g.forever = 0; /* do not exit rx even with no traffic */ 1655 break; 1656 1657 case 'b': /* burst */ 1658 g.burst = atoi(optarg); 1659 break; 1660 case 'c': 1661 g.cpus = atoi(optarg); 1662 break; 1663 case 'p': 1664 g.nthreads = atoi(optarg); 1665 break; 1666 1667 case 'D': /* destination mac */ 1668 g.dst_mac.name = optarg; 1669 break; 1670 1671 case 'S': /* source mac */ 1672 g.src_mac.name = optarg; 1673 break; 1674 case 'v': 1675 verbose++; 1676 break; 1677 case 'R': 1678 g.tx_rate = atoi(optarg); 1679 break; 1680 case 'X': 1681 g.options |= OPT_DUMP; 1682 break; 1683 case 'C': 1684 g.nmr_config = strdup(optarg); 1685 break; 1686 case 'H': 1687 g.virt_header = atoi(optarg); 1688 break; 1689 case 'h': 1690 g.host_ring = 1; 1691 break; 1692 } 1693 } 1694 1695 if (g.ifname == NULL) { 1696 D("missing ifname"); 1697 usage(); 1698 } 1699 1700 i = system_ncpus(); 1701 if (g.cpus < 0 || g.cpus > i) { 1702 D("%d cpus is too high, have only %d cpus", g.cpus, i); 1703 usage(); 1704 } 1705 if (g.cpus == 0) 1706 g.cpus = i; 1707 1708 if (g.pkt_size < 16 || g.pkt_size > 1536) { 1709 D("bad pktsize %d\n", g.pkt_size); 1710 usage(); 1711 } 1712 1713 if (g.src_mac.name == NULL) { 1714 static char mybuf[20] = "00:00:00:00:00:00"; 1715 /* retrieve source mac address. */ 1716 if (source_hwaddr(g.ifname, mybuf) == -1) { 1717 D("Unable to retrieve source mac"); 1718 // continue, fail later 1719 } 1720 g.src_mac.name = mybuf; 1721 } 1722 /* extract address ranges */ 1723 extract_ip_range(&g.src_ip); 1724 extract_ip_range(&g.dst_ip); 1725 extract_mac_range(&g.src_mac); 1726 extract_mac_range(&g.dst_mac); 1727 1728 if (g.src_ip.start != g.src_ip.end || 1729 g.src_ip.port0 != g.src_ip.port1 || 1730 g.dst_ip.start != g.dst_ip.end || 1731 g.dst_ip.port0 != g.dst_ip.port1) 1732 g.options |= OPT_COPY; 1733 1734 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1735 && g.virt_header != VIRT_HDR_2) { 1736 D("bad virtio-net-header length"); 1737 usage(); 1738 } 1739 1740 if (g.dev_type == DEV_TAP) { 1741 D("want to use tap %s", g.ifname); 1742 g.main_fd = tap_alloc(g.ifname); 1743 if (g.main_fd < 0) { 1744 D("cannot open tap %s", g.ifname); 1745 usage(); 1746 } 1747#ifndef NO_PCAP 1748 } else if (g.dev_type == DEV_PCAP) { 1749 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1750 1751 D("using pcap on %s", g.ifname); 1752 pcap_errbuf[0] = '\0'; // init the buffer 1753 g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); 1754 if (g.p == NULL) { 1755 D("cannot open pcap on %s", g.ifname); 1756 usage(); 1757 } 1758#endif /* !NO_PCAP */ 1759 } else if (g.dummy_send) { /* but DEV_NETMAP */ 1760 D("using a dummy send routine"); 1761 } else { 1762 bzero(&nmr, sizeof(nmr)); 1763 nmr.nr_version = NETMAP_API; 1764 /* 1765 * Open the netmap device to fetch the number of queues of our 1766 * interface. 1767 * 1768 * The first NIOCREGIF also detaches the card from the 1769 * protocol stack and may cause a reset of the card, 1770 * which in turn may take some time for the PHY to 1771 * reconfigure. 1772 */ 1773 g.main_fd = open("/dev/netmap", O_RDWR); 1774 if (g.main_fd == -1) { 1775 D("Unable to open /dev/netmap: %s", strerror(errno)); 1776 // fail later 1777 } 1778 /* 1779 * Register the interface on the netmap device: from now on, 1780 * we can operate on the network interface without any 1781 * interference from the legacy network stack. 1782 * 1783 * We decide to put the first interface registration here to 1784 * give time to cards that take a long time to reset the PHY. 1785 */ 1786 bzero(&nmr, sizeof(nmr)); 1787 nmr.nr_version = NETMAP_API; 1788 strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name)); 1789 parse_nmr_config(g.nmr_config, &nmr); 1790 if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) { 1791 D("Unable to register interface %s: %s", g.ifname, strerror(errno)); 1792 //continue, fail later 1793 } 1794 ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname, 1795 nmr.nr_tx_rings, nmr.nr_tx_slots, 1796 nmr.nr_rx_rings, nmr.nr_rx_slots); 1797 devqueues = nmr.nr_rx_rings; 1798 1799 /* validate provided nthreads. */ 1800 if (g.nthreads < 1 || g.nthreads > devqueues) { 1801 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1802 // continue, fail later 1803 } 1804 1805 /* 1806 * Map the netmap shared memory: instead of issuing mmap() 1807 * inside the body of the threads, we prefer to keep this 1808 * operation here to simplify the thread logic. 1809 */ 1810 D("mapping %d Kbytes", nmr.nr_memsize>>10); 1811 g.mmap_size = nmr.nr_memsize; 1812 g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, 1813 PROT_WRITE | PROT_READ, 1814 MAP_SHARED, g.main_fd, 0); 1815 if (g.mmap_addr == MAP_FAILED) { 1816 D("Unable to mmap %d KB: %s", nmr.nr_memsize >> 10, strerror(errno)); 1817 // continue, fail later 1818 } 1819 1820 if (verbose) { 1821 struct netmap_if *nifp = NETMAP_IF(g.mmap_addr, nmr.nr_offset); 1822 1823 D("nifp at offset %d, %d tx %d rx rings %s", 1824 nmr.nr_offset, nmr.nr_tx_rings, nmr.nr_rx_rings, 1825 nmr.nr_ringid & NETMAP_PRIV_MEM ? "PRIVATE" : "common" ); 1826 for (i = 0; i <= nmr.nr_tx_rings; i++) { 1827 D(" TX%d at 0x%lx", i, 1828 (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); 1829 } 1830 for (i = 0; i <= nmr.nr_rx_rings; i++) { 1831 D(" RX%d at 0x%lx", i, 1832 (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); 1833 } 1834 } 1835 1836 /* Print some debug information. */ 1837 fprintf(stdout, 1838 "%s %s: %d queues, %d threads and %d cpus.\n", 1839 (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1840 g.ifname, 1841 devqueues, 1842 g.nthreads, 1843 g.cpus); 1844 if (g.td_body == sender_body) { 1845 fprintf(stdout, "%s -> %s (%s -> %s)\n", 1846 g.src_ip.name, g.dst_ip.name, 1847 g.src_mac.name, g.dst_mac.name); 1848 } 1849 1850 /* Exit if something went wrong. */ 1851 if (g.main_fd < 0) { 1852 D("aborting"); 1853 usage(); 1854 } 1855 } 1856 1857 1858 if (g.options) { 1859 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1860 g.options & OPT_PREFETCH ? " prefetch" : "", 1861 g.options & OPT_ACCESS ? " access" : "", 1862 g.options & OPT_MEMCPY ? " memcpy" : "", 1863 g.options & OPT_INDIRECT ? " indirect" : "", 1864 g.options & OPT_COPY ? " copy" : ""); 1865 } 1866 1867 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1868 if (g.tx_rate > 0) { 1869 /* try to have at least something every second, 1870 * reducing the burst size to some 0.01s worth of data 1871 * (but no less than one full set of fragments) 1872 */ 1873 uint64_t x; 1874 int lim = (g.tx_rate)/300; 1875 if (g.burst > lim) 1876 g.burst = lim; 1877 if (g.burst < g.frags) 1878 g.burst = g.frags; 1879 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 1880 g.tx_period.tv_nsec = x; 1881 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1882 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1883 } 1884 if (g.td_body == sender_body) 1885 D("Sending %d packets every %ld.%09ld s", 1886 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1887 /* Wait for PHY reset. */ 1888 D("Wait %d secs for phy reset", wait_link); 1889 sleep(wait_link); 1890 D("Ready..."); 1891 1892 /* Install ^C handler. */ 1893 global_nthreads = g.nthreads; 1894 signal(SIGINT, sigint_h); 1895 1896 start_threads(&g); 1897 main_thread(&g); 1898 return 0; 1899} 1900 1901/* end of file */ 1902