pkt-gen.c revision 270252
1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $FreeBSD: stable/10/tools/tools/netmap/pkt-gen.c 270252 2014-08-20 23:34:36Z luigi $ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40// #define TRASH_VHOST_HDR 41 42#define _GNU_SOURCE /* for CPU_SET() */ 43#include <stdio.h> 44#define NETMAP_WITH_LIBS 45#include <net/netmap_user.h> 46 47 48#include <ctype.h> // isprint() 49#include <unistd.h> // sysconf() 50#include <sys/poll.h> 51#include <arpa/inet.h> /* ntohs */ 52#include <sys/sysctl.h> /* sysctl */ 53#include <ifaddrs.h> /* getifaddrs */ 54#include <net/ethernet.h> 55#include <netinet/in.h> 56#include <netinet/ip.h> 57#include <netinet/udp.h> 58 59#include <pthread.h> 60 61#ifndef NO_PCAP 62#include <pcap/pcap.h> 63#endif 64 65#ifdef linux 66 67#define cpuset_t cpu_set_t 68 69#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 70#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 71#include <linux/ethtool.h> 72#include <linux/sockios.h> 73 74#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 75#include <netinet/ether.h> /* ether_aton */ 76#include <linux/if_packet.h> /* sockaddr_ll */ 77#endif /* linux */ 78 79#ifdef __FreeBSD__ 80#include <sys/endian.h> /* le64toh */ 81#include <machine/param.h> 82 83#include <pthread_np.h> /* pthread w/ affinity */ 84#include <sys/cpuset.h> /* cpu_set */ 85#include <net/if_dl.h> /* LLADDR */ 86#endif /* __FreeBSD__ */ 87 88#ifdef __APPLE__ 89 90#define cpuset_t uint64_t // XXX 91static inline void CPU_ZERO(cpuset_t *p) 92{ 93 *p = 0; 94} 95 96static inline void CPU_SET(uint32_t i, cpuset_t *p) 97{ 98 *p |= 1<< (i & 0x3f); 99} 100 101#define pthread_setaffinity_np(a, b, c) ((void)a, 0) 102 103#define ifr_flagshigh ifr_flags // XXX 104#define IFF_PPROMISC IFF_PROMISC 105#include <net/if_dl.h> /* LLADDR */ 106#define clock_gettime(a,b) \ 107 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 108#endif /* __APPLE__ */ 109 110const char *default_payload="netmap pkt-gen DIRECT payload\n" 111 "http://info.iet.unipi.it/~luigi/netmap/ "; 112 113const char *indirect_payload="netmap pkt-gen indirect payload\n" 114 "http://info.iet.unipi.it/~luigi/netmap/ "; 115 116int verbose = 0; 117 118#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 119 120 121#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 122#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 123#define VIRT_HDR_MAX VIRT_HDR_2 124struct virt_header { 125 uint8_t fields[VIRT_HDR_MAX]; 126}; 127 128#define MAX_BODYSIZE 16384 129 130struct pkt { 131 struct virt_header vh; 132 struct ether_header eh; 133 struct ip ip; 134 struct udphdr udp; 135 uint8_t body[MAX_BODYSIZE]; // XXX hardwired 136} __attribute__((__packed__)); 137 138struct ip_range { 139 char *name; 140 uint32_t start, end; /* same as struct in_addr */ 141 uint16_t port0, port1; 142}; 143 144struct mac_range { 145 char *name; 146 struct ether_addr start, end; 147}; 148 149/* ifname can be netmap:foo-xxxx */ 150#define MAX_IFNAMELEN 64 /* our buffer for ifname */ 151//#define MAX_PKTSIZE 1536 152#define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 153 154/* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 155struct tstamp { 156 uint32_t sec; 157 uint32_t nsec; 158}; 159 160/* 161 * global arguments for all threads 162 */ 163 164struct glob_arg { 165 struct ip_range src_ip; 166 struct ip_range dst_ip; 167 struct mac_range dst_mac; 168 struct mac_range src_mac; 169 int pkt_size; 170 int burst; 171 int forever; 172 int npackets; /* total packets to send */ 173 int frags; /* fragments per packet */ 174 int nthreads; 175 int cpus; 176 int options; /* testing */ 177#define OPT_PREFETCH 1 178#define OPT_ACCESS 2 179#define OPT_COPY 4 180#define OPT_MEMCPY 8 181#define OPT_TS 16 /* add a timestamp */ 182#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 183#define OPT_DUMP 64 /* dump rx/tx traffic */ 184#define OPT_MONITOR_TX 128 185#define OPT_MONITOR_RX 256 186 int dev_type; 187#ifndef NO_PCAP 188 pcap_t *p; 189#endif 190 191 int tx_rate; 192 struct timespec tx_period; 193 194 int affinity; 195 int main_fd; 196 struct nm_desc *nmd; 197 int report_interval; /* milliseconds between prints */ 198 void *(*td_body)(void *); 199 void *mmap_addr; 200 char ifname[MAX_IFNAMELEN]; 201 char *nmr_config; 202 int dummy_send; 203 int virt_header; /* send also the virt_header */ 204 int extra_bufs; /* goes in nr_arg3 */ 205}; 206enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 207 208 209/* 210 * Arguments for a new thread. The same structure is used by 211 * the source and the sink 212 */ 213struct targ { 214 struct glob_arg *g; 215 int used; 216 int completed; 217 int cancel; 218 int fd; 219 struct nm_desc *nmd; 220 volatile uint64_t count; 221 struct timespec tic, toc; 222 int me; 223 pthread_t thread; 224 int affinity; 225 226 struct pkt pkt; 227}; 228 229 230/* 231 * extract the extremes from a range of ipv4 addresses. 232 * addr_lo[-addr_hi][:port_lo[-port_hi]] 233 */ 234static void 235extract_ip_range(struct ip_range *r) 236{ 237 char *ap, *pp; 238 struct in_addr a; 239 240 if (verbose) 241 D("extract IP range from %s", r->name); 242 r->port0 = r->port1 = 0; 243 r->start = r->end = 0; 244 245 /* the first - splits start/end of range */ 246 ap = index(r->name, '-'); /* do we have ports ? */ 247 if (ap) { 248 *ap++ = '\0'; 249 } 250 /* grab the initial values (mandatory) */ 251 pp = index(r->name, ':'); 252 if (pp) { 253 *pp++ = '\0'; 254 r->port0 = r->port1 = strtol(pp, NULL, 0); 255 }; 256 inet_aton(r->name, &a); 257 r->start = r->end = ntohl(a.s_addr); 258 if (ap) { 259 pp = index(ap, ':'); 260 if (pp) { 261 *pp++ = '\0'; 262 if (*pp) 263 r->port1 = strtol(pp, NULL, 0); 264 } 265 if (*ap) { 266 inet_aton(ap, &a); 267 r->end = ntohl(a.s_addr); 268 } 269 } 270 if (r->port0 > r->port1) { 271 uint16_t tmp = r->port0; 272 r->port0 = r->port1; 273 r->port1 = tmp; 274 } 275 if (r->start > r->end) { 276 uint32_t tmp = r->start; 277 r->start = r->end; 278 r->end = tmp; 279 } 280 { 281 struct in_addr a; 282 char buf1[16]; // one ip address 283 284 a.s_addr = htonl(r->end); 285 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 286 a.s_addr = htonl(r->start); 287 if (1) 288 D("range is %s:%d to %s:%d", 289 inet_ntoa(a), r->port0, buf1, r->port1); 290 } 291} 292 293static void 294extract_mac_range(struct mac_range *r) 295{ 296 if (verbose) 297 D("extract MAC range from %s", r->name); 298 bcopy(ether_aton(r->name), &r->start, 6); 299 bcopy(ether_aton(r->name), &r->end, 6); 300#if 0 301 bcopy(targ->src_mac, eh->ether_shost, 6); 302 p = index(targ->g->src_mac, '-'); 303 if (p) 304 targ->src_mac_range = atoi(p+1); 305 306 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 307 bcopy(targ->dst_mac, eh->ether_dhost, 6); 308 p = index(targ->g->dst_mac, '-'); 309 if (p) 310 targ->dst_mac_range = atoi(p+1); 311#endif 312 if (verbose) 313 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 314} 315 316static struct targ *targs; 317static int global_nthreads; 318 319/* control-C handler */ 320static void 321sigint_h(int sig) 322{ 323 int i; 324 325 (void)sig; /* UNUSED */ 326 D("received control-C on thread %p", pthread_self()); 327 for (i = 0; i < global_nthreads; i++) { 328 targs[i].cancel = 1; 329 } 330 signal(SIGINT, SIG_DFL); 331} 332 333/* sysctl wrapper to return the number of active CPUs */ 334static int 335system_ncpus(void) 336{ 337 int ncpus; 338#if defined (__FreeBSD__) 339 int mib[2] = { CTL_HW, HW_NCPU }; 340 size_t len = sizeof(mib); 341 sysctl(mib, 2, &ncpus, &len, NULL, 0); 342#elif defined(linux) 343 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 344#else /* others */ 345 ncpus = 1; 346#endif /* others */ 347 return (ncpus); 348} 349 350#ifdef __linux__ 351#define sockaddr_dl sockaddr_ll 352#define sdl_family sll_family 353#define AF_LINK AF_PACKET 354#define LLADDR(s) s->sll_addr; 355#include <linux/if_tun.h> 356#define TAP_CLONEDEV "/dev/net/tun" 357#endif /* __linux__ */ 358 359#ifdef __FreeBSD__ 360#include <net/if_tun.h> 361#define TAP_CLONEDEV "/dev/tap" 362#endif /* __FreeBSD */ 363 364#ifdef __APPLE__ 365// #warning TAP not supported on apple ? 366#include <net/if_utun.h> 367#define TAP_CLONEDEV "/dev/tap" 368#endif /* __APPLE__ */ 369 370 371/* 372 * parse the vale configuration in conf and put it in nmr. 373 * Return the flag set if necessary. 374 * The configuration may consist of 0 to 4 numbers separated 375 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 376 * Missing numbers or zeroes stand for default values. 377 * As an additional convenience, if exactly one number 378 * is specified, then this is assigned to both #tx-slots and #rx-slots. 379 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 380 * and #rx-rings. 381 */ 382int 383parse_nmr_config(const char* conf, struct nmreq *nmr) 384{ 385 char *w, *tok; 386 int i, v; 387 388 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 389 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 390 if (conf == NULL || ! *conf) 391 return 0; 392 w = strdup(conf); 393 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 394 v = atoi(tok); 395 switch (i) { 396 case 0: 397 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 398 break; 399 case 1: 400 nmr->nr_rx_slots = v; 401 break; 402 case 2: 403 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 404 break; 405 case 3: 406 nmr->nr_rx_rings = v; 407 break; 408 default: 409 D("ignored config: %s", tok); 410 break; 411 } 412 } 413 D("txr %d txd %d rxr %d rxd %d", 414 nmr->nr_tx_rings, nmr->nr_tx_slots, 415 nmr->nr_rx_rings, nmr->nr_rx_slots); 416 free(w); 417 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 418 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 419 NM_OPEN_RING_CFG : 0; 420} 421 422 423/* 424 * locate the src mac address for our interface, put it 425 * into the user-supplied buffer. return 0 if ok, -1 on error. 426 */ 427static int 428source_hwaddr(const char *ifname, char *buf) 429{ 430 struct ifaddrs *ifaphead, *ifap; 431 int l = sizeof(ifap->ifa_name); 432 433 if (getifaddrs(&ifaphead) != 0) { 434 D("getifaddrs %s failed", ifname); 435 return (-1); 436 } 437 438 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 439 struct sockaddr_dl *sdl = 440 (struct sockaddr_dl *)ifap->ifa_addr; 441 uint8_t *mac; 442 443 if (!sdl || sdl->sdl_family != AF_LINK) 444 continue; 445 if (strncmp(ifap->ifa_name, ifname, l) != 0) 446 continue; 447 mac = (uint8_t *)LLADDR(sdl); 448 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 449 mac[0], mac[1], mac[2], 450 mac[3], mac[4], mac[5]); 451 if (verbose) 452 D("source hwaddr %s", buf); 453 break; 454 } 455 freeifaddrs(ifaphead); 456 return ifap ? 0 : 1; 457} 458 459 460/* set the thread affinity. */ 461static int 462setaffinity(pthread_t me, int i) 463{ 464 cpuset_t cpumask; 465 466 if (i == -1) 467 return 0; 468 469 /* Set thread affinity affinity.*/ 470 CPU_ZERO(&cpumask); 471 CPU_SET(i, &cpumask); 472 473 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 474 D("Unable to set affinity: %s", strerror(errno)); 475 return 1; 476 } 477 return 0; 478} 479 480/* Compute the checksum of the given ip header. */ 481static uint16_t 482checksum(const void *data, uint16_t len, uint32_t sum) 483{ 484 const uint8_t *addr = data; 485 uint32_t i; 486 487 /* Checksum all the pairs of bytes first... */ 488 for (i = 0; i < (len & ~1U); i += 2) { 489 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 490 if (sum > 0xFFFF) 491 sum -= 0xFFFF; 492 } 493 /* 494 * If there's a single byte left over, checksum it, too. 495 * Network byte order is big-endian, so the remaining byte is 496 * the high byte. 497 */ 498 if (i < len) { 499 sum += addr[i] << 8; 500 if (sum > 0xFFFF) 501 sum -= 0xFFFF; 502 } 503 return sum; 504} 505 506static u_int16_t 507wrapsum(u_int32_t sum) 508{ 509 sum = ~sum & 0xFFFF; 510 return (htons(sum)); 511} 512 513/* Check the payload of the packet for errors (use it for debug). 514 * Look for consecutive ascii representations of the size of the packet. 515 */ 516static void 517dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 518{ 519 char buf[128]; 520 int i, j, i0; 521 522 /* get the length in ASCII of the length of the packet. */ 523 524 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 525 ring, cur, ring->slot[cur].buf_idx, 526 ring->slot[cur].flags, len); 527 /* hexdump routine */ 528 for (i = 0; i < len; ) { 529 memset(buf, sizeof(buf), ' '); 530 sprintf(buf, "%5d: ", i); 531 i0 = i; 532 for (j=0; j < 16 && i < len; i++, j++) 533 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 534 i = i0; 535 for (j=0; j < 16 && i < len; i++, j++) 536 sprintf(buf+7+j + 48, "%c", 537 isprint(p[i]) ? p[i] : '.'); 538 printf("%s\n", buf); 539 } 540} 541 542/* 543 * Fill a packet with some payload. 544 * We create a UDP packet so the payload starts at 545 * 14+20+8 = 42 bytes. 546 */ 547#ifdef __linux__ 548#define uh_sport source 549#define uh_dport dest 550#define uh_ulen len 551#define uh_sum check 552#endif /* linux */ 553 554/* 555 * increment the addressed in the packet, 556 * starting from the least significant field. 557 * DST_IP DST_PORT SRC_IP SRC_PORT 558 */ 559static void 560update_addresses(struct pkt *pkt, struct glob_arg *g) 561{ 562 uint32_t a; 563 uint16_t p; 564 struct ip *ip = &pkt->ip; 565 struct udphdr *udp = &pkt->udp; 566 567 do { 568 p = ntohs(udp->uh_sport); 569 if (p < g->src_ip.port1) { /* just inc, no wrap */ 570 udp->uh_sport = htons(p + 1); 571 break; 572 } 573 udp->uh_sport = htons(g->src_ip.port0); 574 575 a = ntohl(ip->ip_src.s_addr); 576 if (a < g->src_ip.end) { /* just inc, no wrap */ 577 ip->ip_src.s_addr = htonl(a + 1); 578 break; 579 } 580 ip->ip_src.s_addr = htonl(g->src_ip.start); 581 582 udp->uh_sport = htons(g->src_ip.port0); 583 p = ntohs(udp->uh_dport); 584 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 585 udp->uh_dport = htons(p + 1); 586 break; 587 } 588 udp->uh_dport = htons(g->dst_ip.port0); 589 590 a = ntohl(ip->ip_dst.s_addr); 591 if (a < g->dst_ip.end) { /* just inc, no wrap */ 592 ip->ip_dst.s_addr = htonl(a + 1); 593 break; 594 } 595 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 596 } while (0); 597 // update checksum 598} 599 600/* 601 * initialize one packet and prepare for the next one. 602 * The copy could be done better instead of repeating it each time. 603 */ 604static void 605initialize_packet(struct targ *targ) 606{ 607 struct pkt *pkt = &targ->pkt; 608 struct ether_header *eh; 609 struct ip *ip; 610 struct udphdr *udp; 611 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 612 const char *payload = targ->g->options & OPT_INDIRECT ? 613 indirect_payload : default_payload; 614 int i, l0 = strlen(payload); 615 616 /* create a nice NUL-terminated string */ 617 for (i = 0; i < paylen; i += l0) { 618 if (l0 > paylen - i) 619 l0 = paylen - i; // last round 620 bcopy(payload, pkt->body + i, l0); 621 } 622 pkt->body[i-1] = '\0'; 623 ip = &pkt->ip; 624 625 /* prepare the headers */ 626 ip->ip_v = IPVERSION; 627 ip->ip_hl = 5; 628 ip->ip_id = 0; 629 ip->ip_tos = IPTOS_LOWDELAY; 630 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 631 ip->ip_id = 0; 632 ip->ip_off = htons(IP_DF); /* Don't fragment */ 633 ip->ip_ttl = IPDEFTTL; 634 ip->ip_p = IPPROTO_UDP; 635 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 636 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 637 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 638 639 640 udp = &pkt->udp; 641 udp->uh_sport = htons(targ->g->src_ip.port0); 642 udp->uh_dport = htons(targ->g->dst_ip.port0); 643 udp->uh_ulen = htons(paylen); 644 /* Magic: taken from sbin/dhclient/packet.c */ 645 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 646 checksum(pkt->body, 647 paylen - sizeof(*udp), 648 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 649 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 650 ) 651 ) 652 )); 653 654 eh = &pkt->eh; 655 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 656 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 657 eh->ether_type = htons(ETHERTYPE_IP); 658 659 bzero(&pkt->vh, sizeof(pkt->vh)); 660#ifdef TRASH_VHOST_HDR 661 /* set bogus content */ 662 pkt->vh.fields[0] = 0xff; 663 pkt->vh.fields[1] = 0xff; 664 pkt->vh.fields[2] = 0xff; 665 pkt->vh.fields[3] = 0xff; 666 pkt->vh.fields[4] = 0xff; 667 pkt->vh.fields[5] = 0xff; 668#endif /* TRASH_VHOST_HDR */ 669 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 670} 671 672static void 673set_vnet_hdr_len(struct targ *t) 674{ 675 int err, l = t->g->virt_header; 676 struct nmreq req; 677 678 if (l == 0) 679 return; 680 681 memset(&req, 0, sizeof(req)); 682 bcopy(t->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 683 req.nr_version = NETMAP_API; 684 req.nr_cmd = NETMAP_BDG_VNET_HDR; 685 req.nr_arg1 = l; 686 err = ioctl(t->fd, NIOCREGIF, &req); 687 if (err) { 688 D("Unable to set vnet header length %d", l); 689 } 690} 691 692 693/* 694 * create and enqueue a batch of packets on a ring. 695 * On the last one set NS_REPORT to tell the driver to generate 696 * an interrupt when done. 697 */ 698static int 699send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 700 int size, struct glob_arg *g, u_int count, int options, 701 u_int nfrags) 702{ 703 u_int n, sent, cur = ring->cur; 704 u_int fcnt; 705 706 n = nm_ring_space(ring); 707 if (n < count) 708 count = n; 709 if (count < nfrags) { 710 D("truncating packet, no room for frags %d %d", 711 count, nfrags); 712 } 713#if 0 714 if (options & (OPT_COPY | OPT_PREFETCH) ) { 715 for (sent = 0; sent < count; sent++) { 716 struct netmap_slot *slot = &ring->slot[cur]; 717 char *p = NETMAP_BUF(ring, slot->buf_idx); 718 719 __builtin_prefetch(p); 720 cur = nm_ring_next(ring, cur); 721 } 722 cur = ring->cur; 723 } 724#endif 725 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 726 struct netmap_slot *slot = &ring->slot[cur]; 727 char *p = NETMAP_BUF(ring, slot->buf_idx); 728 729 slot->flags = 0; 730 if (options & OPT_INDIRECT) { 731 slot->flags |= NS_INDIRECT; 732 slot->ptr = (uint64_t)frame; 733 } else if (options & OPT_COPY) { 734 nm_pkt_copy(frame, p, size); 735 if (fcnt == nfrags) 736 update_addresses(pkt, g); 737 } else if (options & OPT_MEMCPY) { 738 memcpy(p, frame, size); 739 if (fcnt == nfrags) 740 update_addresses(pkt, g); 741 } else if (options & OPT_PREFETCH) { 742 __builtin_prefetch(p); 743 } 744 if (options & OPT_DUMP) 745 dump_payload(p, size, ring, cur); 746 slot->len = size; 747 if (--fcnt > 0) 748 slot->flags |= NS_MOREFRAG; 749 else 750 fcnt = nfrags; 751 if (sent == count - 1) { 752 slot->flags &= ~NS_MOREFRAG; 753 slot->flags |= NS_REPORT; 754 } 755 cur = nm_ring_next(ring, cur); 756 } 757 ring->head = ring->cur = cur; 758 759 return (sent); 760} 761 762/* 763 * Send a packet, and wait for a response. 764 * The payload (after UDP header, ofs 42) has a 4-byte sequence 765 * followed by a struct timeval (or bintime?) 766 */ 767#define PAY_OFS 42 /* where in the pkt... */ 768 769static void * 770pinger_body(void *data) 771{ 772 struct targ *targ = (struct targ *) data; 773 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 774 struct netmap_if *nifp = targ->nmd->nifp; 775 int i, rx = 0, n = targ->g->npackets; 776 void *frame; 777 int size; 778 uint32_t sent = 0; 779 struct timespec ts, now, last_print; 780 uint32_t count = 0, min = 1000000000, av = 0; 781 782 frame = &targ->pkt; 783 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 784 size = targ->g->pkt_size + targ->g->virt_header; 785 786 787 if (targ->g->nthreads > 1) { 788 D("can only ping with 1 thread"); 789 return NULL; 790 } 791 792 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 793 now = last_print; 794 while (n == 0 || (int)sent < n) { 795 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 796 struct netmap_slot *slot; 797 char *p; 798 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 799 slot = &ring->slot[ring->cur]; 800 slot->len = size; 801 p = NETMAP_BUF(ring, slot->buf_idx); 802 803 if (nm_ring_empty(ring)) { 804 D("-- ouch, cannot send"); 805 } else { 806 struct tstamp *tp; 807 nm_pkt_copy(frame, p, size); 808 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 809 bcopy(&sent, p+42, sizeof(sent)); 810 tp = (struct tstamp *)(p+46); 811 tp->sec = (uint32_t)ts.tv_sec; 812 tp->nsec = (uint32_t)ts.tv_nsec; 813 sent++; 814 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 815 } 816 } 817 /* should use a parameter to decide how often to send */ 818 if (poll(&pfd, 1, 3000) <= 0) { 819 D("poll error/timeout on queue %d: %s", targ->me, 820 strerror(errno)); 821 continue; 822 } 823 /* see what we got back */ 824 for (i = targ->nmd->first_tx_ring; 825 i <= targ->nmd->last_tx_ring; i++) { 826 ring = NETMAP_RXRING(nifp, i); 827 while (!nm_ring_empty(ring)) { 828 uint32_t seq; 829 struct tstamp *tp; 830 slot = &ring->slot[ring->cur]; 831 p = NETMAP_BUF(ring, slot->buf_idx); 832 833 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 834 bcopy(p+42, &seq, sizeof(seq)); 835 tp = (struct tstamp *)(p+46); 836 ts.tv_sec = (time_t)tp->sec; 837 ts.tv_nsec = (long)tp->nsec; 838 ts.tv_sec = now.tv_sec - ts.tv_sec; 839 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 840 if (ts.tv_nsec < 0) { 841 ts.tv_nsec += 1000000000; 842 ts.tv_sec--; 843 } 844 if (1) D("seq %d/%d delta %d.%09d", seq, sent, 845 (int)ts.tv_sec, (int)ts.tv_nsec); 846 if (ts.tv_nsec < (int)min) 847 min = ts.tv_nsec; 848 count ++; 849 av += ts.tv_nsec; 850 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 851 rx++; 852 } 853 } 854 //D("tx %d rx %d", sent, rx); 855 //usleep(100000); 856 ts.tv_sec = now.tv_sec - last_print.tv_sec; 857 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 858 if (ts.tv_nsec < 0) { 859 ts.tv_nsec += 1000000000; 860 ts.tv_sec--; 861 } 862 if (ts.tv_sec >= 1) { 863 D("count %d min %d av %d", 864 count, min, av/count); 865 count = 0; 866 av = 0; 867 min = 100000000; 868 last_print = now; 869 } 870 } 871 return NULL; 872} 873 874 875/* 876 * reply to ping requests 877 */ 878static void * 879ponger_body(void *data) 880{ 881 struct targ *targ = (struct targ *) data; 882 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 883 struct netmap_if *nifp = targ->nmd->nifp; 884 struct netmap_ring *txring, *rxring; 885 int i, rx = 0, sent = 0, n = targ->g->npackets; 886 887 if (targ->g->nthreads > 1) { 888 D("can only reply ping with 1 thread"); 889 return NULL; 890 } 891 D("understood ponger %d but don't know how to do it", n); 892 while (n == 0 || sent < n) { 893 uint32_t txcur, txavail; 894//#define BUSYWAIT 895#ifdef BUSYWAIT 896 ioctl(pfd.fd, NIOCRXSYNC, NULL); 897#else 898 if (poll(&pfd, 1, 1000) <= 0) { 899 D("poll error/timeout on queue %d: %s", targ->me, 900 strerror(errno)); 901 continue; 902 } 903#endif 904 txring = NETMAP_TXRING(nifp, 0); 905 txcur = txring->cur; 906 txavail = nm_ring_space(txring); 907 /* see what we got back */ 908 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 909 rxring = NETMAP_RXRING(nifp, i); 910 while (!nm_ring_empty(rxring)) { 911 uint16_t *spkt, *dpkt; 912 uint32_t cur = rxring->cur; 913 struct netmap_slot *slot = &rxring->slot[cur]; 914 char *src, *dst; 915 src = NETMAP_BUF(rxring, slot->buf_idx); 916 //D("got pkt %p of size %d", src, slot->len); 917 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 918 rx++; 919 if (txavail == 0) 920 continue; 921 dst = NETMAP_BUF(txring, 922 txring->slot[txcur].buf_idx); 923 /* copy... */ 924 dpkt = (uint16_t *)dst; 925 spkt = (uint16_t *)src; 926 nm_pkt_copy(src, dst, slot->len); 927 dpkt[0] = spkt[3]; 928 dpkt[1] = spkt[4]; 929 dpkt[2] = spkt[5]; 930 dpkt[3] = spkt[0]; 931 dpkt[4] = spkt[1]; 932 dpkt[5] = spkt[2]; 933 txring->slot[txcur].len = slot->len; 934 /* XXX swap src dst mac */ 935 txcur = nm_ring_next(txring, txcur); 936 txavail--; 937 sent++; 938 } 939 } 940 txring->head = txring->cur = txcur; 941 targ->count = sent; 942#ifdef BUSYWAIT 943 ioctl(pfd.fd, NIOCTXSYNC, NULL); 944#endif 945 //D("tx %d rx %d", sent, rx); 946 } 947 return NULL; 948} 949 950static __inline int 951timespec_ge(const struct timespec *a, const struct timespec *b) 952{ 953 954 if (a->tv_sec > b->tv_sec) 955 return (1); 956 if (a->tv_sec < b->tv_sec) 957 return (0); 958 if (a->tv_nsec >= b->tv_nsec) 959 return (1); 960 return (0); 961} 962 963static __inline struct timespec 964timeval2spec(const struct timeval *a) 965{ 966 struct timespec ts = { 967 .tv_sec = a->tv_sec, 968 .tv_nsec = a->tv_usec * 1000 969 }; 970 return ts; 971} 972 973static __inline struct timeval 974timespec2val(const struct timespec *a) 975{ 976 struct timeval tv = { 977 .tv_sec = a->tv_sec, 978 .tv_usec = a->tv_nsec / 1000 979 }; 980 return tv; 981} 982 983 984static __inline struct timespec 985timespec_add(struct timespec a, struct timespec b) 986{ 987 struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 988 if (ret.tv_nsec >= 1000000000) { 989 ret.tv_sec++; 990 ret.tv_nsec -= 1000000000; 991 } 992 return ret; 993} 994 995static __inline struct timespec 996timespec_sub(struct timespec a, struct timespec b) 997{ 998 struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 999 if (ret.tv_nsec < 0) { 1000 ret.tv_sec--; 1001 ret.tv_nsec += 1000000000; 1002 } 1003 return ret; 1004} 1005 1006 1007/* 1008 * wait until ts, either busy or sleeping if more than 1ms. 1009 * Return wakeup time. 1010 */ 1011static struct timespec 1012wait_time(struct timespec ts) 1013{ 1014 for (;;) { 1015 struct timespec w, cur; 1016 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1017 w = timespec_sub(ts, cur); 1018 if (w.tv_sec < 0) 1019 return cur; 1020 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1021 poll(NULL, 0, 1); 1022 } 1023} 1024 1025static void * 1026sender_body(void *data) 1027{ 1028 struct targ *targ = (struct targ *) data; 1029 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1030 struct netmap_if *nifp; 1031 struct netmap_ring *txring; 1032 int i, n = targ->g->npackets / targ->g->nthreads; 1033 int64_t sent = 0; 1034 int options = targ->g->options | OPT_COPY; 1035 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1036 int rate_limit = targ->g->tx_rate; 1037 struct pkt *pkt = &targ->pkt; 1038 void *frame; 1039 int size; 1040 1041 frame = pkt; 1042 frame += sizeof(pkt->vh) - targ->g->virt_header; 1043 size = targ->g->pkt_size + targ->g->virt_header; 1044 1045 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1046 if (setaffinity(targ->thread, targ->affinity)) 1047 goto quit; 1048 1049 /* main loop.*/ 1050 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1051 if (rate_limit) { 1052 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1053 targ->tic.tv_nsec = 0; 1054 wait_time(targ->tic); 1055 nexttime = targ->tic; 1056 } 1057 if (targ->g->dev_type == DEV_TAP) { 1058 D("writing to file desc %d", targ->g->main_fd); 1059 1060 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1061 if (write(targ->g->main_fd, frame, size) != -1) 1062 sent++; 1063 update_addresses(pkt, targ->g); 1064 if (i > 10000) { 1065 targ->count = sent; 1066 i = 0; 1067 } 1068 } 1069#ifndef NO_PCAP 1070 } else if (targ->g->dev_type == DEV_PCAP) { 1071 pcap_t *p = targ->g->p; 1072 1073 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1074 if (pcap_inject(p, frame, size) != -1) 1075 sent++; 1076 update_addresses(pkt, targ->g); 1077 if (i > 10000) { 1078 targ->count = sent; 1079 i = 0; 1080 } 1081 } 1082#endif /* NO_PCAP */ 1083 } else { 1084 int tosend = 0; 1085 int frags = targ->g->frags; 1086 1087 nifp = targ->nmd->nifp; 1088 while (!targ->cancel && (n == 0 || sent < n)) { 1089 1090 if (rate_limit && tosend <= 0) { 1091 tosend = targ->g->burst; 1092 nexttime = timespec_add(nexttime, targ->g->tx_period); 1093 wait_time(nexttime); 1094 } 1095 1096 /* 1097 * wait for available room in the send queue(s) 1098 */ 1099 if (poll(&pfd, 1, 2000) <= 0) { 1100 if (targ->cancel) 1101 break; 1102 D("poll error/timeout on queue %d: %s", targ->me, 1103 strerror(errno)); 1104 // goto quit; 1105 } 1106 if (pfd.revents & POLLERR) { 1107 D("poll error"); 1108 goto quit; 1109 } 1110 /* 1111 * scan our queues and send on those with room 1112 */ 1113 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1114 D("drop copy"); 1115 options &= ~OPT_COPY; 1116 } 1117 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1118 int m, limit = rate_limit ? tosend : targ->g->burst; 1119 if (n > 0 && n - sent < limit) 1120 limit = n - sent; 1121 txring = NETMAP_TXRING(nifp, i); 1122 if (nm_ring_empty(txring)) 1123 continue; 1124 if (frags > 1) 1125 limit = ((limit + frags - 1) / frags) * frags; 1126 1127 m = send_packets(txring, pkt, frame, size, targ->g, 1128 limit, options, frags); 1129 ND("limit %d tail %d frags %d m %d", 1130 limit, txring->tail, frags, m); 1131 sent += m; 1132 targ->count = sent; 1133 if (rate_limit) { 1134 tosend -= m; 1135 if (tosend <= 0) 1136 break; 1137 } 1138 } 1139 } 1140 /* flush any remaining packets */ 1141 D("flush tail %d head %d on thread %p", 1142 txring->tail, txring->head, 1143 pthread_self()); 1144 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1145 1146 /* final part: wait all the TX queues to be empty. */ 1147 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1148 txring = NETMAP_TXRING(nifp, i); 1149 while (nm_tx_pending(txring)) { 1150 RD(5, "pending tx tail %d head %d on ring %d", 1151 txring->tail, txring->head, i); 1152 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1153 usleep(1); /* wait 1 tick */ 1154 } 1155 } 1156 } /* end DEV_NETMAP */ 1157 1158 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1159 targ->completed = 1; 1160 targ->count = sent; 1161 1162quit: 1163 /* reset the ``used`` flag. */ 1164 targ->used = 0; 1165 1166 return (NULL); 1167} 1168 1169 1170#ifndef NO_PCAP 1171static void 1172receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1173 const u_char * bytes) 1174{ 1175 int *count = (int *)user; 1176 (void)h; /* UNUSED */ 1177 (void)bytes; /* UNUSED */ 1178 (*count)++; 1179} 1180#endif /* !NO_PCAP */ 1181 1182static int 1183receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1184{ 1185 u_int cur, rx, n; 1186 1187 cur = ring->cur; 1188 n = nm_ring_space(ring); 1189 if (n < limit) 1190 limit = n; 1191 for (rx = 0; rx < limit; rx++) { 1192 struct netmap_slot *slot = &ring->slot[cur]; 1193 char *p = NETMAP_BUF(ring, slot->buf_idx); 1194 1195 if (dump) 1196 dump_payload(p, slot->len, ring, cur); 1197 1198 cur = nm_ring_next(ring, cur); 1199 } 1200 ring->head = ring->cur = cur; 1201 1202 return (rx); 1203} 1204 1205static void * 1206receiver_body(void *data) 1207{ 1208 struct targ *targ = (struct targ *) data; 1209 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1210 struct netmap_if *nifp; 1211 struct netmap_ring *rxring; 1212 int i; 1213 uint64_t received = 0; 1214 1215 if (setaffinity(targ->thread, targ->affinity)) 1216 goto quit; 1217 1218 D("reading from %s fd %d main_fd %d", 1219 targ->g->ifname, targ->fd, targ->g->main_fd); 1220 /* unbounded wait for the first packet. */ 1221 for (;!targ->cancel;) { 1222 i = poll(&pfd, 1, 1000); 1223 if (i > 0 && !(pfd.revents & POLLERR)) 1224 break; 1225 RD(1, "waiting for initial packets, poll returns %d %d", 1226 i, pfd.revents); 1227 } 1228 /* main loop, exit after 1s silence */ 1229 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1230 if (targ->g->dev_type == DEV_TAP) { 1231 while (!targ->cancel) { 1232 char buf[MAX_BODYSIZE]; 1233 /* XXX should we poll ? */ 1234 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1235 targ->count++; 1236 } 1237#ifndef NO_PCAP 1238 } else if (targ->g->dev_type == DEV_PCAP) { 1239 while (!targ->cancel) { 1240 /* XXX should we poll ? */ 1241 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1242 (u_char *)&targ->count); 1243 } 1244#endif /* !NO_PCAP */ 1245 } else { 1246 int dump = targ->g->options & OPT_DUMP; 1247 1248 nifp = targ->nmd->nifp; 1249 while (!targ->cancel) { 1250 /* Once we started to receive packets, wait at most 1 seconds 1251 before quitting. */ 1252 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1253 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1254 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1255 goto out; 1256 } 1257 1258 if (pfd.revents & POLLERR) { 1259 D("poll err"); 1260 goto quit; 1261 } 1262 1263 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1264 int m; 1265 1266 rxring = NETMAP_RXRING(nifp, i); 1267 if (nm_ring_empty(rxring)) 1268 continue; 1269 1270 m = receive_packets(rxring, targ->g->burst, dump); 1271 received += m; 1272 } 1273 targ->count = received; 1274 } 1275 } 1276 1277 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1278 1279out: 1280 targ->completed = 1; 1281 targ->count = received; 1282 1283quit: 1284 /* reset the ``used`` flag. */ 1285 targ->used = 0; 1286 1287 return (NULL); 1288} 1289 1290/* very crude code to print a number in normalized form. 1291 * Caller has to make sure that the buffer is large enough. 1292 */ 1293static const char * 1294norm(char *buf, double val) 1295{ 1296 char *units[] = { "", "K", "M", "G", "T" }; 1297 u_int i; 1298 1299 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) 1300 val /= 1000; 1301 sprintf(buf, "%.2f %s", val, units[i]); 1302 return buf; 1303} 1304 1305static void 1306tx_output(uint64_t sent, int size, double delta) 1307{ 1308 double bw, raw_bw, pps; 1309 char b1[40], b2[80], b3[80]; 1310 1311 printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", 1312 (unsigned long long)sent, size, delta); 1313 if (delta == 0) 1314 delta = 1e-6; 1315 if (size < 60) /* correct for min packet size */ 1316 size = 60; 1317 pps = sent / delta; 1318 bw = (8.0 * size * sent) / delta; 1319 /* raw packets have4 bytes crc + 20 bytes framing */ 1320 raw_bw = (8.0 * (size + 24) * sent) / delta; 1321 1322 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1323 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1324} 1325 1326 1327static void 1328rx_output(uint64_t received, double delta) 1329{ 1330 double pps; 1331 char b1[40]; 1332 1333 printf("Received %llu packets, in %.2f seconds.\n", 1334 (unsigned long long) received, delta); 1335 1336 if (delta == 0) 1337 delta = 1e-6; 1338 pps = received / delta; 1339 printf("Speed: %spps\n", norm(b1, pps)); 1340} 1341 1342static void 1343usage(void) 1344{ 1345 const char *cmd = "pkt-gen"; 1346 fprintf(stderr, 1347 "Usage:\n" 1348 "%s arguments\n" 1349 "\t-i interface interface name\n" 1350 "\t-f function tx rx ping pong\n" 1351 "\t-n count number of iterations (can be 0)\n" 1352 "\t-t pkts_to_send also forces tx mode\n" 1353 "\t-r pkts_to_receive also forces rx mode\n" 1354 "\t-l pkt_size in bytes excluding CRC\n" 1355 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1356 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1357 "\t-D dst-mac\n" 1358 "\t-S src-mac\n" 1359 "\t-a cpu_id use setaffinity\n" 1360 "\t-b burst size testing, mostly\n" 1361 "\t-c cores cores to use\n" 1362 "\t-p threads processes/threads to use\n" 1363 "\t-T report_ms milliseconds between reports\n" 1364 "\t-P use libpcap instead of netmap\n" 1365 "\t-w wait_for_link_time in seconds\n" 1366 "\t-R rate in packets per second\n" 1367 "\t-X dump payload\n" 1368 "\t-H len add empty virtio-net-header with size 'len'\n" 1369 "", 1370 cmd); 1371 1372 exit(0); 1373} 1374 1375static void 1376start_threads(struct glob_arg *g) 1377{ 1378 int i; 1379 1380 targs = calloc(g->nthreads, sizeof(*targs)); 1381 /* 1382 * Now create the desired number of threads, each one 1383 * using a single descriptor. 1384 */ 1385 for (i = 0; i < g->nthreads; i++) { 1386 struct targ *t = &targs[i]; 1387 1388 bzero(t, sizeof(*t)); 1389 t->fd = -1; /* default, with pcap */ 1390 t->g = g; 1391 1392 if (g->dev_type == DEV_NETMAP) { 1393 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1394 uint64_t nmd_flags = 0; 1395 nmd.self = &nmd; 1396 1397 if (g->nthreads > 1) { 1398 if (nmd.req.nr_flags != NR_REG_ALL_NIC) { 1399 D("invalid nthreads mode %d", nmd.req.nr_flags); 1400 continue; 1401 } 1402 nmd.req.nr_flags = NR_REG_ONE_NIC; 1403 nmd.req.nr_ringid = i; 1404 } 1405 /* Only touch one of the rings (rx is already ok) */ 1406 if (g->td_body == receiver_body) 1407 nmd_flags |= NETMAP_NO_TX_POLL; 1408 1409 /* register interface. Override ifname and ringid etc. */ 1410 if (g->options & OPT_MONITOR_TX) 1411 nmd.req.nr_flags |= NR_MONITOR_TX; 1412 if (g->options & OPT_MONITOR_RX) 1413 nmd.req.nr_flags |= NR_MONITOR_RX; 1414 1415 t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | 1416 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); 1417 if (t->nmd == NULL) { 1418 D("Unable to open %s: %s", 1419 t->g->ifname, strerror(errno)); 1420 continue; 1421 } 1422 t->fd = t->nmd->fd; 1423 set_vnet_hdr_len(t); 1424 1425 } else { 1426 targs[i].fd = g->main_fd; 1427 } 1428 t->used = 1; 1429 t->me = i; 1430 if (g->affinity >= 0) { 1431 if (g->affinity < g->cpus) 1432 t->affinity = g->affinity; 1433 else 1434 t->affinity = i % g->cpus; 1435 } else { 1436 t->affinity = -1; 1437 } 1438 /* default, init packets */ 1439 initialize_packet(t); 1440 1441 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1442 D("Unable to create thread %d: %s", i, strerror(errno)); 1443 t->used = 0; 1444 } 1445 } 1446} 1447 1448static void 1449main_thread(struct glob_arg *g) 1450{ 1451 int i; 1452 1453 uint64_t prev = 0; 1454 uint64_t count = 0; 1455 double delta_t; 1456 struct timeval tic, toc; 1457 1458 gettimeofday(&toc, NULL); 1459 for (;;) { 1460 struct timeval now, delta; 1461 uint64_t pps, usec, my_count, npkts; 1462 int done = 0; 1463 1464 delta.tv_sec = g->report_interval/1000; 1465 delta.tv_usec = (g->report_interval%1000)*1000; 1466 select(0, NULL, NULL, NULL, &delta); 1467 gettimeofday(&now, NULL); 1468 timersub(&now, &toc, &toc); 1469 my_count = 0; 1470 for (i = 0; i < g->nthreads; i++) { 1471 my_count += targs[i].count; 1472 if (targs[i].used == 0) 1473 done++; 1474 } 1475 usec = toc.tv_sec* 1000000 + toc.tv_usec; 1476 if (usec < 10000) 1477 continue; 1478 npkts = my_count - prev; 1479 pps = (npkts*1000000 + usec/2) / usec; 1480 D("%llu pps (%llu pkts in %llu usec)", 1481 (unsigned long long)pps, 1482 (unsigned long long)npkts, 1483 (unsigned long long)usec); 1484 prev = my_count; 1485 toc = now; 1486 if (done == g->nthreads) 1487 break; 1488 } 1489 1490 timerclear(&tic); 1491 timerclear(&toc); 1492 for (i = 0; i < g->nthreads; i++) { 1493 struct timespec t_tic, t_toc; 1494 /* 1495 * Join active threads, unregister interfaces and close 1496 * file descriptors. 1497 */ 1498 if (targs[i].used) 1499 pthread_join(targs[i].thread, NULL); 1500 close(targs[i].fd); 1501 1502 if (targs[i].completed == 0) 1503 D("ouch, thread %d exited with error", i); 1504 1505 /* 1506 * Collect threads output and extract information about 1507 * how long it took to send all the packets. 1508 */ 1509 count += targs[i].count; 1510 t_tic = timeval2spec(&tic); 1511 t_toc = timeval2spec(&toc); 1512 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1513 tic = timespec2val(&targs[i].tic); 1514 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1515 toc = timespec2val(&targs[i].toc); 1516 } 1517 1518 /* print output. */ 1519 timersub(&toc, &tic, &toc); 1520 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1521 if (g->td_body == sender_body) 1522 tx_output(count, g->pkt_size, delta_t); 1523 else 1524 rx_output(count, delta_t); 1525 1526 if (g->dev_type == DEV_NETMAP) { 1527 munmap(g->nmd->mem, g->nmd->req.nr_memsize); 1528 close(g->main_fd); 1529 } 1530} 1531 1532 1533struct sf { 1534 char *key; 1535 void *f; 1536}; 1537 1538static struct sf func[] = { 1539 { "tx", sender_body }, 1540 { "rx", receiver_body }, 1541 { "ping", pinger_body }, 1542 { "pong", ponger_body }, 1543 { NULL, NULL } 1544}; 1545 1546static int 1547tap_alloc(char *dev) 1548{ 1549 struct ifreq ifr; 1550 int fd, err; 1551 char *clonedev = TAP_CLONEDEV; 1552 1553 (void)err; 1554 (void)dev; 1555 /* Arguments taken by the function: 1556 * 1557 * char *dev: the name of an interface (or '\0'). MUST have enough 1558 * space to hold the interface name if '\0' is passed 1559 * int flags: interface flags (eg, IFF_TUN etc.) 1560 */ 1561 1562#ifdef __FreeBSD__ 1563 if (dev[3]) { /* tapSomething */ 1564 static char buf[128]; 1565 snprintf(buf, sizeof(buf), "/dev/%s", dev); 1566 clonedev = buf; 1567 } 1568#endif 1569 /* open the device */ 1570 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1571 return fd; 1572 } 1573 D("%s open successful", clonedev); 1574 1575 /* preparation of the struct ifr, of type "struct ifreq" */ 1576 memset(&ifr, 0, sizeof(ifr)); 1577 1578#ifdef linux 1579 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1580 1581 if (*dev) { 1582 /* if a device name was specified, put it in the structure; otherwise, 1583 * the kernel will try to allocate the "next" device of the 1584 * specified type */ 1585 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1586 } 1587 1588 /* try to create the device */ 1589 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1590 D("failed to to a TUNSETIFF: %s", strerror(errno)); 1591 close(fd); 1592 return err; 1593 } 1594 1595 /* if the operation was successful, write back the name of the 1596 * interface to the variable "dev", so the caller can know 1597 * it. Note that the caller MUST reserve space in *dev (see calling 1598 * code below) */ 1599 strcpy(dev, ifr.ifr_name); 1600 D("new name is %s", dev); 1601#endif /* linux */ 1602 1603 /* this is the special file descriptor that the caller will use to talk 1604 * with the virtual interface */ 1605 return fd; 1606} 1607 1608int 1609main(int arc, char **argv) 1610{ 1611 int i; 1612 1613 struct glob_arg g; 1614 1615 int ch; 1616 int wait_link = 2; 1617 int devqueues = 1; /* how many device queues */ 1618 1619 bzero(&g, sizeof(g)); 1620 1621 g.main_fd = -1; 1622 g.td_body = receiver_body; 1623 g.report_interval = 1000; /* report interval */ 1624 g.affinity = -1; 1625 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1626 g.src_ip.name = "10.0.0.1"; 1627 g.dst_ip.name = "10.1.0.1"; 1628 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1629 g.src_mac.name = NULL; 1630 g.pkt_size = 60; 1631 g.burst = 512; // default 1632 g.nthreads = 1; 1633 g.cpus = 1; 1634 g.forever = 1; 1635 g.tx_rate = 0; 1636 g.frags = 1; 1637 g.nmr_config = ""; 1638 g.virt_header = 0; 1639 1640 while ( (ch = getopt(arc, argv, 1641 "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:")) != -1) { 1642 struct sf *fn; 1643 1644 switch(ch) { 1645 default: 1646 D("bad option %c %s", ch, optarg); 1647 usage(); 1648 break; 1649 1650 case 'n': 1651 g.npackets = atoi(optarg); 1652 break; 1653 1654 case 'F': 1655 i = atoi(optarg); 1656 if (i < 1 || i > 63) { 1657 D("invalid frags %d [1..63], ignore", i); 1658 break; 1659 } 1660 g.frags = i; 1661 break; 1662 1663 case 'f': 1664 for (fn = func; fn->key; fn++) { 1665 if (!strcmp(fn->key, optarg)) 1666 break; 1667 } 1668 if (fn->key) 1669 g.td_body = fn->f; 1670 else 1671 D("unrecognised function %s", optarg); 1672 break; 1673 1674 case 'o': /* data generation options */ 1675 g.options = atoi(optarg); 1676 break; 1677 1678 case 'a': /* force affinity */ 1679 g.affinity = atoi(optarg); 1680 break; 1681 1682 case 'i': /* interface */ 1683 /* a prefix of tap: netmap: or pcap: forces the mode. 1684 * otherwise we guess 1685 */ 1686 D("interface is %s", optarg); 1687 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 1688 D("ifname too long %s", optarg); 1689 break; 1690 } 1691 strcpy(g.ifname, optarg); 1692 if (!strcmp(optarg, "null")) { 1693 g.dev_type = DEV_NETMAP; 1694 g.dummy_send = 1; 1695 } else if (!strncmp(optarg, "tap:", 4)) { 1696 g.dev_type = DEV_TAP; 1697 strcpy(g.ifname, optarg + 4); 1698 } else if (!strncmp(optarg, "pcap:", 5)) { 1699 g.dev_type = DEV_PCAP; 1700 strcpy(g.ifname, optarg + 5); 1701 } else if (!strncmp(optarg, "netmap:", 7) || 1702 !strncmp(optarg, "vale", 4)) { 1703 g.dev_type = DEV_NETMAP; 1704 } else if (!strncmp(optarg, "tap", 3)) { 1705 g.dev_type = DEV_TAP; 1706 } else { /* prepend netmap: */ 1707 g.dev_type = DEV_NETMAP; 1708 sprintf(g.ifname, "netmap:%s", optarg); 1709 } 1710 break; 1711 1712 case 'I': 1713 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1714 break; 1715 1716 case 'l': /* pkt_size */ 1717 g.pkt_size = atoi(optarg); 1718 break; 1719 1720 case 'd': 1721 g.dst_ip.name = optarg; 1722 break; 1723 1724 case 's': 1725 g.src_ip.name = optarg; 1726 break; 1727 1728 case 'T': /* report interval */ 1729 g.report_interval = atoi(optarg); 1730 break; 1731 1732 case 'w': 1733 wait_link = atoi(optarg); 1734 break; 1735 1736 case 'W': /* XXX changed default */ 1737 g.forever = 0; /* do not exit rx even with no traffic */ 1738 break; 1739 1740 case 'b': /* burst */ 1741 g.burst = atoi(optarg); 1742 break; 1743 case 'c': 1744 g.cpus = atoi(optarg); 1745 break; 1746 case 'p': 1747 g.nthreads = atoi(optarg); 1748 break; 1749 1750 case 'D': /* destination mac */ 1751 g.dst_mac.name = optarg; 1752 break; 1753 1754 case 'S': /* source mac */ 1755 g.src_mac.name = optarg; 1756 break; 1757 case 'v': 1758 verbose++; 1759 break; 1760 case 'R': 1761 g.tx_rate = atoi(optarg); 1762 break; 1763 case 'X': 1764 g.options |= OPT_DUMP; 1765 break; 1766 case 'C': 1767 g.nmr_config = strdup(optarg); 1768 break; 1769 case 'H': 1770 g.virt_header = atoi(optarg); 1771 break; 1772 case 'e': /* extra bufs */ 1773 g.extra_bufs = atoi(optarg); 1774 break; 1775 case 'm': 1776 if (strcmp(optarg, "tx") == 0) { 1777 g.options |= OPT_MONITOR_TX; 1778 } else if (strcmp(optarg, "rx") == 0) { 1779 g.options |= OPT_MONITOR_RX; 1780 } else { 1781 D("unrecognized monitor mode %s", optarg); 1782 } 1783 break; 1784 } 1785 } 1786 1787 if (g.ifname == NULL) { 1788 D("missing ifname"); 1789 usage(); 1790 } 1791 1792 i = system_ncpus(); 1793 if (g.cpus < 0 || g.cpus > i) { 1794 D("%d cpus is too high, have only %d cpus", g.cpus, i); 1795 usage(); 1796 } 1797 if (g.cpus == 0) 1798 g.cpus = i; 1799 1800 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 1801 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 1802 usage(); 1803 } 1804 1805 if (g.src_mac.name == NULL) { 1806 static char mybuf[20] = "00:00:00:00:00:00"; 1807 /* retrieve source mac address. */ 1808 if (source_hwaddr(g.ifname, mybuf) == -1) { 1809 D("Unable to retrieve source mac"); 1810 // continue, fail later 1811 } 1812 g.src_mac.name = mybuf; 1813 } 1814 /* extract address ranges */ 1815 extract_ip_range(&g.src_ip); 1816 extract_ip_range(&g.dst_ip); 1817 extract_mac_range(&g.src_mac); 1818 extract_mac_range(&g.dst_mac); 1819 1820 if (g.src_ip.start != g.src_ip.end || 1821 g.src_ip.port0 != g.src_ip.port1 || 1822 g.dst_ip.start != g.dst_ip.end || 1823 g.dst_ip.port0 != g.dst_ip.port1) 1824 g.options |= OPT_COPY; 1825 1826 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1827 && g.virt_header != VIRT_HDR_2) { 1828 D("bad virtio-net-header length"); 1829 usage(); 1830 } 1831 1832 if (g.dev_type == DEV_TAP) { 1833 D("want to use tap %s", g.ifname); 1834 g.main_fd = tap_alloc(g.ifname); 1835 if (g.main_fd < 0) { 1836 D("cannot open tap %s", g.ifname); 1837 usage(); 1838 } 1839#ifndef NO_PCAP 1840 } else if (g.dev_type == DEV_PCAP) { 1841 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1842 1843 pcap_errbuf[0] = '\0'; // init the buffer 1844 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 1845 if (g.p == NULL) { 1846 D("cannot open pcap on %s", g.ifname); 1847 usage(); 1848 } 1849 g.main_fd = pcap_fileno(g.p); 1850 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 1851#endif /* !NO_PCAP */ 1852 } else if (g.dummy_send) { /* but DEV_NETMAP */ 1853 D("using a dummy send routine"); 1854 } else { 1855 struct nmreq base_nmd; 1856 1857 bzero(&base_nmd, sizeof(base_nmd)); 1858 1859 parse_nmr_config(g.nmr_config, &base_nmd); 1860 if (g.extra_bufs) { 1861 base_nmd.nr_arg3 = g.extra_bufs; 1862 } 1863 1864 /* 1865 * Open the netmap device using nm_open(). 1866 * 1867 * protocol stack and may cause a reset of the card, 1868 * which in turn may take some time for the PHY to 1869 * reconfigure. We do the open here to have time to reset. 1870 */ 1871 g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); 1872 if (g.nmd == NULL) { 1873 D("Unable to open %s: %s", g.ifname, strerror(errno)); 1874 goto out; 1875 } 1876 g.main_fd = g.nmd->fd; 1877 D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 1878 1879 /* get num of queues in tx or rx */ 1880 if (g.td_body == sender_body) 1881 devqueues = g.nmd->req.nr_tx_rings; 1882 else 1883 devqueues = g.nmd->req.nr_rx_rings; 1884 1885 /* validate provided nthreads. */ 1886 if (g.nthreads < 1 || g.nthreads > devqueues) { 1887 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1888 // continue, fail later 1889 } 1890 1891 if (verbose) { 1892 struct netmap_if *nifp = g.nmd->nifp; 1893 struct nmreq *req = &g.nmd->req; 1894 1895 D("nifp at offset %d, %d tx %d rx region %d", 1896 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 1897 req->nr_arg2); 1898 for (i = 0; i <= req->nr_tx_rings; i++) { 1899 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 1900 D(" TX%d at 0x%lx slots %d", i, 1901 (char *)ring - (char *)nifp, ring->num_slots); 1902 } 1903 for (i = 0; i <= req->nr_rx_rings; i++) { 1904 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 1905 D(" RX%d at 0x%lx slots %d", i, 1906 (char *)ring - (char *)nifp, ring->num_slots); 1907 } 1908 } 1909 1910 /* Print some debug information. */ 1911 fprintf(stdout, 1912 "%s %s: %d queues, %d threads and %d cpus.\n", 1913 (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1914 g.ifname, 1915 devqueues, 1916 g.nthreads, 1917 g.cpus); 1918 if (g.td_body == sender_body) { 1919 fprintf(stdout, "%s -> %s (%s -> %s)\n", 1920 g.src_ip.name, g.dst_ip.name, 1921 g.src_mac.name, g.dst_mac.name); 1922 } 1923 1924out: 1925 /* Exit if something went wrong. */ 1926 if (g.main_fd < 0) { 1927 D("aborting"); 1928 usage(); 1929 } 1930 } 1931 1932 1933 if (g.options) { 1934 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1935 g.options & OPT_PREFETCH ? " prefetch" : "", 1936 g.options & OPT_ACCESS ? " access" : "", 1937 g.options & OPT_MEMCPY ? " memcpy" : "", 1938 g.options & OPT_INDIRECT ? " indirect" : "", 1939 g.options & OPT_COPY ? " copy" : ""); 1940 } 1941 1942 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1943 if (g.tx_rate > 0) { 1944 /* try to have at least something every second, 1945 * reducing the burst size to some 0.01s worth of data 1946 * (but no less than one full set of fragments) 1947 */ 1948 uint64_t x; 1949 int lim = (g.tx_rate)/300; 1950 if (g.burst > lim) 1951 g.burst = lim; 1952 if (g.burst < g.frags) 1953 g.burst = g.frags; 1954 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 1955 g.tx_period.tv_nsec = x; 1956 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1957 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1958 } 1959 if (g.td_body == sender_body) 1960 D("Sending %d packets every %ld.%09ld s", 1961 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1962 /* Wait for PHY reset. */ 1963 D("Wait %d secs for phy reset", wait_link); 1964 sleep(wait_link); 1965 D("Ready..."); 1966 1967 /* Install ^C handler. */ 1968 global_nthreads = g.nthreads; 1969 signal(SIGINT, sigint_h); 1970 1971 start_threads(&g); 1972 main_thread(&g); 1973 return 0; 1974} 1975 1976/* end of file */ 1977