dummynet.c revision 193715
160786Sps/* 260786Sps * Copyright (c) 2002-2003 Luigi Rizzo 360786Sps * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp 460786Sps * Copyright (c) 1994 Ugen J.S.Antsilevich 560786Sps * 660786Sps * Idea and grammar partially left from: 760786Sps * Copyright (c) 1993 Daniel Boulet 860786Sps * 960786Sps * Redistribution and use in source forms, with and without modification, 1060786Sps * are permitted provided that this entire comment appears intact. 1160786Sps * 1260786Sps * Redistribution in binary form may occur without any restrictions. 1360786Sps * Obviously, it would be nice if you gave credit where credit is due 1460786Sps * but requiring it would be too onerous. 1560786Sps * 1660786Sps * This software is provided ``AS IS'' without any warranties of any kind. 1760786Sps * 1860786Sps * NEW command line interface for IP firewall facility 1960786Sps * 2060786Sps * $FreeBSD: head/sbin/ipfw/dummynet.c 193715 2009-06-08 14:32:29Z luigi $ 2160786Sps * 2260786Sps * dummynet support 2360786Sps */ 2460786Sps 2560786Sps#include <sys/types.h> 2660786Sps#include <sys/socket.h> 2760786Sps#include <sys/queue.h> 2860786Sps/* XXX there are several sysctl leftover here */ 2960786Sps#include <sys/sysctl.h> 3060786Sps 3160786Sps#include "ipfw2.h" 3260786Sps 3360786Sps#include <ctype.h> 3460786Sps#include <err.h> 3560786Sps#include <netdb.h> 3660786Sps#include <stdio.h> 3760786Sps#include <stdlib.h> 3860786Sps#include <string.h> 3960786Sps#include <sysexits.h> 4060786Sps 4160786Sps#include <net/if.h> 4260786Sps#include <netinet/in.h> 4360786Sps#include <netinet/ip_fw.h> 4460786Sps#include <netinet/ip_dummynet.h> 4560786Sps#include <arpa/inet.h> /* inet_ntoa */ 4660786Sps 4760786Spsstatic struct _s_x dummynet_params[] = { 4860786Sps { "plr", TOK_PLR }, 4960786Sps { "noerror", TOK_NOERROR }, 5060786Sps { "buckets", TOK_BUCKETS }, 5160786Sps { "dst-ip", TOK_DSTIP }, 5260786Sps { "src-ip", TOK_SRCIP }, 5360786Sps { "dst-port", TOK_DSTPORT }, 5460786Sps { "src-port", TOK_SRCPORT }, 5560786Sps { "proto", TOK_PROTO }, 5660786Sps { "weight", TOK_WEIGHT }, 5760786Sps { "all", TOK_ALL }, 5860786Sps { "mask", TOK_MASK }, 5960786Sps { "droptail", TOK_DROPTAIL }, 6060786Sps { "red", TOK_RED }, 6160786Sps { "gred", TOK_GRED }, 6260786Sps { "bw", TOK_BW }, 6360786Sps { "bandwidth", TOK_BW }, 6460786Sps { "delay", TOK_DELAY }, 6560786Sps { "pipe", TOK_PIPE }, 6660786Sps { "queue", TOK_QUEUE }, 6760786Sps { "flow-id", TOK_FLOWID}, 6860786Sps { "dst-ipv6", TOK_DSTIP6}, 6960786Sps { "dst-ip6", TOK_DSTIP6}, 7060786Sps { "src-ipv6", TOK_SRCIP6}, 7160786Sps { "src-ip6", TOK_SRCIP6}, 7260786Sps { "profile", TOK_PIPE_PROFILE}, 7360786Sps { "dummynet-params", TOK_NULL }, 7460786Sps { NULL, 0 } /* terminator */ 7560786Sps}; 7660786Sps 7760786Spsstatic int 7860786Spssort_q(const void *pa, const void *pb) 7960786Sps{ 8060786Sps int rev = (co.do_sort < 0); 8160786Sps int field = rev ? -co.do_sort : co.do_sort; 8260786Sps long long res = 0; 8360786Sps const struct dn_flow_queue *a = pa; 8460786Sps const struct dn_flow_queue *b = pb; 8560786Sps 8660786Sps switch (field) { 8760786Sps case 1: /* pkts */ 8860786Sps res = a->len - b->len; 8960786Sps break; 9060786Sps case 2: /* bytes */ 9160786Sps res = a->len_bytes - b->len_bytes; 9260786Sps break; 9360786Sps 9460786Sps case 3: /* tot pkts */ 9560786Sps res = a->tot_pkts - b->tot_pkts; 9660786Sps break; 9760786Sps 9860786Sps case 4: /* tot bytes */ 9963128Sps res = a->tot_bytes - b->tot_bytes; 10063128Sps break; 10163128Sps } 10263128Sps if (res < 0) 10363128Sps res = -1; 10463128Sps if (res > 0) 10563128Sps res = 1; 10660786Sps return (int)(rev ? res : -res); 10760786Sps} 10860786Sps 10960786Spsstatic void 11060786Spslist_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) 11160786Sps{ 11260786Sps int l; 11363128Sps int index_printed, indexes = 0; 11460786Sps char buff[255]; 11560786Sps struct protoent *pe; 11660786Sps 11760786Sps if (fs->rq_elements == 0) 11860786Sps return; 11960786Sps 12060786Sps if (co.do_sort != 0) 12160786Sps heapsort(q, fs->rq_elements, sizeof *q, sort_q); 12260786Sps 12360786Sps /* Print IPv4 flows */ 12460786Sps index_printed = 0; 12560786Sps for (l = 0; l < fs->rq_elements; l++) { 12660786Sps struct in_addr ina; 12760786Sps 12860786Sps /* XXX: Should check for IPv4 flows */ 12960786Sps if (IS_IP6_FLOW_ID(&(q[l].id))) 13060786Sps continue; 13160786Sps 13260786Sps if (!index_printed) { 13360786Sps index_printed = 1; 13460786Sps if (indexes > 0) /* currently a no-op */ 13560786Sps printf("\n"); 13660786Sps indexes++; 13760786Sps printf(" " 13860786Sps "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", 13960786Sps fs->flow_mask.proto, 14060786Sps fs->flow_mask.src_ip, fs->flow_mask.src_port, 14160786Sps fs->flow_mask.dst_ip, fs->flow_mask.dst_port); 14260786Sps 14360786Sps printf("BKT Prot ___Source IP/port____ " 14460786Sps "____Dest. IP/port____ " 14560786Sps "Tot_pkt/bytes Pkt/Byte Drp\n"); 14660786Sps } 14760786Sps 14860786Sps printf("%3d ", q[l].hash_slot); 14960786Sps pe = getprotobynumber(q[l].id.proto); 15060786Sps if (pe) 15160786Sps printf("%-4s ", pe->p_name); 15260786Sps else 15360786Sps printf("%4u ", q[l].id.proto); 15460786Sps ina.s_addr = htonl(q[l].id.src_ip); 15560786Sps printf("%15s/%-5d ", 15660786Sps inet_ntoa(ina), q[l].id.src_port); 15760786Sps ina.s_addr = htonl(q[l].id.dst_ip); 15860786Sps printf("%15s/%-5d ", 15960786Sps inet_ntoa(ina), q[l].id.dst_port); 16060786Sps printf("%4llu %8llu %2u %4u %3u\n", 16160786Sps align_uint64(&q[l].tot_pkts), 16260786Sps align_uint64(&q[l].tot_bytes), 16360786Sps q[l].len, q[l].len_bytes, q[l].drops); 16460786Sps if (co.verbose) 16560786Sps printf(" S %20llu F %20llu\n", 16660786Sps align_uint64(&q[l].S), align_uint64(&q[l].F)); 16760786Sps } 16860786Sps 16960786Sps /* Print IPv6 flows */ 17060786Sps index_printed = 0; 17160786Sps for (l = 0; l < fs->rq_elements; l++) { 17260786Sps if (!IS_IP6_FLOW_ID(&(q[l].id))) 17360786Sps continue; 17460786Sps 17560786Sps if (!index_printed) { 17660786Sps index_printed = 1; 17760786Sps if (indexes > 0) 17860786Sps printf("\n"); 17960786Sps indexes++; 18060786Sps printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", 18160786Sps fs->flow_mask.proto, fs->flow_mask.flow_id6); 18260786Sps inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), 18360786Sps buff, sizeof(buff)); 18460786Sps printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); 18560786Sps inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), 18660786Sps buff, sizeof(buff) ); 18760786Sps printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); 18860786Sps 18960786Sps printf("BKT ___Prot___ _flow-id_ " 19060786Sps "______________Source IPv6/port_______________ " 19160786Sps "_______________Dest. IPv6/port_______________ " 19260786Sps "Tot_pkt/bytes Pkt/Byte Drp\n"); 19360786Sps } 19460786Sps printf("%3d ", q[l].hash_slot); 19560786Sps pe = getprotobynumber(q[l].id.proto); 19660786Sps if (pe != NULL) 19760786Sps printf("%9s ", pe->p_name); 19860786Sps else 19960786Sps printf("%9u ", q[l].id.proto); 20060786Sps printf("%7d %39s/%-5d ", q[l].id.flow_id6, 20160786Sps inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), 20260786Sps q[l].id.src_port); 20360786Sps printf(" %39s/%-5d ", 20460786Sps inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), 20560786Sps q[l].id.dst_port); 20660786Sps printf(" %4llu %8llu %2u %4u %3u\n", 20760786Sps align_uint64(&q[l].tot_pkts), 20860786Sps align_uint64(&q[l].tot_bytes), 20960786Sps q[l].len, q[l].len_bytes, q[l].drops); 21060786Sps if (co.verbose) 21160786Sps printf(" S %20llu F %20llu\n", 21260786Sps align_uint64(&q[l].S), 21360786Sps align_uint64(&q[l].F)); 21460786Sps } 21560786Sps} 21660786Sps 21760786Spsstatic void 21860786Spsprint_flowset_parms(struct dn_flow_set *fs, char *prefix) 21960786Sps{ 22060786Sps int l; 22160786Sps char qs[30]; 22260786Sps char plr[30]; 22360786Sps char red[90]; /* Display RED parameters */ 22460786Sps 22560786Sps l = fs->qsize; 22660786Sps if (fs->flags_fs & DN_QSIZE_IS_BYTES) { 22760786Sps if (l >= 8192) 22860786Sps sprintf(qs, "%d KB", l / 1024); 22960786Sps else 23060786Sps sprintf(qs, "%d B", l); 23160786Sps } else 23260786Sps sprintf(qs, "%3d sl.", l); 23360786Sps if (fs->plr) 23460786Sps sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); 23560786Sps else 23660786Sps plr[0] = '\0'; 23760786Sps if (fs->flags_fs & DN_IS_RED) /* RED parameters */ 23860786Sps sprintf(red, 23960786Sps "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", 24089019Sps (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', 24189019Sps 1.0 * fs->w_q / (double)(1 << SCALE_RED), 24289019Sps SCALE_VAL(fs->min_th), 24389019Sps SCALE_VAL(fs->max_th), 24460786Sps 1.0 * fs->max_p / (double)(1 << SCALE_RED)); 24560786Sps else 24660786Sps sprintf(red, "droptail"); 24760786Sps 24860786Sps printf("%s %s%s %d queues (%d buckets) %s\n", 24960786Sps prefix, qs, plr, fs->rq_elements, fs->rq_size, red); 25060786Sps} 25160786Sps 25260786Spsstatic void 25360786Spsprint_extra_delay_parms(struct dn_pipe *p, char *prefix) 25460786Sps{ 25560786Sps double loss; 25689019Sps if (p->samples_no <= 0) 25789019Sps return; 25889019Sps 25989019Sps loss = p->loss_level; 26089019Sps loss /= p->samples_no; 26189019Sps printf("%s profile: name \"%s\" loss %f samples %d\n", 26260786Sps prefix, p->name, loss, p->samples_no); 26360786Sps} 26460786Sps 26560786Spsvoid 26689019Spsipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) 26789019Sps{ 26889019Sps int rulenum; 26989019Sps void *next = data; 27089019Sps struct dn_pipe *p = (struct dn_pipe *) data; 27189019Sps struct dn_flow_set *fs; 27289019Sps struct dn_flow_queue *q; 27389019Sps int l; 27489019Sps 27589019Sps if (ac > 0) 27689019Sps rulenum = strtoul(*av++, NULL, 10); 27789019Sps else 27889019Sps rulenum = 0; 27989019Sps for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { 28089019Sps double b = p->bandwidth; 28189019Sps char buf[30]; 28289019Sps char prefix[80]; 28389019Sps 28489019Sps if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) 28589019Sps break; /* done with pipes, now queues */ 28689019Sps 28789019Sps /* 28889019Sps * compute length, as pipe have variable size 28989019Sps */ 29089019Sps l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); 29189019Sps next = (char *)p + l; 29289019Sps nbytes -= l; 29389019Sps 29489019Sps if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) 29589019Sps continue; 29689019Sps 29789019Sps /* 29889019Sps * Print rate (or clocking interface) 29989019Sps */ 30089019Sps if (p->if_name[0] != '\0') 30160786Sps sprintf(buf, "%s", p->if_name); 30260786Sps else if (b == 0) 30360786Sps sprintf(buf, "unlimited"); 30460786Sps else if (b >= 1000000) 30560786Sps sprintf(buf, "%7.3f Mbit/s", b/1000000); 30660786Sps else if (b >= 1000) 30760786Sps sprintf(buf, "%7.3f Kbit/s", b/1000); 30860786Sps else 30960786Sps sprintf(buf, "%7.3f bit/s ", b); 31060786Sps 31160786Sps sprintf(prefix, "%05d: %s %4d ms ", 31260786Sps p->pipe_nr, buf, p->delay); 31360786Sps 31460786Sps print_extra_delay_parms(p, prefix); 31560786Sps 31689019Sps print_flowset_parms(&(p->fs), prefix); 31760786Sps 31860786Sps q = (struct dn_flow_queue *)(p+1); 31960786Sps list_queues(&(p->fs), q); 32060786Sps } 32160786Sps for (fs = next; nbytes >= sizeof *fs; fs = next) { 32260786Sps char prefix[80]; 32360786Sps 32460786Sps if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) 32560786Sps break; 32660786Sps l = sizeof(*fs) + fs->rq_elements * sizeof(*q); 32760786Sps next = (char *)fs + l; 328 nbytes -= l; 329 330 if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || 331 (rulenum != fs->parent_nr && co.do_pipe == 1))) { 332 continue; 333 } 334 335 q = (struct dn_flow_queue *)(fs+1); 336 sprintf(prefix, "q%05d: weight %d pipe %d ", 337 fs->fs_nr, fs->weight, fs->parent_nr); 338 print_flowset_parms(fs, prefix); 339 list_queues(fs, q); 340 } 341} 342 343/* 344 * Delete pipe or queue i 345 */ 346int 347ipfw_delete_pipe(int pipe_or_queue, int i) 348{ 349 struct dn_pipe p; 350 351 memset(&p, 0, sizeof p); 352 if (pipe_or_queue == 1) 353 p.pipe_nr = i; /* pipe */ 354 else 355 p.fs.fs_nr = i; /* queue */ 356 i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); 357 if (i) { 358 i = 1; 359 warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); 360 } 361 return i; 362} 363 364/* 365 * Code to parse delay profiles. 366 * 367 * Some link types introduce extra delays in the transmission 368 * of a packet, e.g. because of MAC level framing, contention on 369 * the use of the channel, MAC level retransmissions and so on. 370 * From our point of view, the channel is effectively unavailable 371 * for this extra time, which is constant or variable depending 372 * on the link type. Additionally, packets may be dropped after this 373 * time (e.g. on a wireless link after too many retransmissions). 374 * We can model the additional delay with an empirical curve 375 * that represents its distribution. 376 * 377 * cumulative probability 378 * 1.0 ^ 379 * | 380 * L +-- loss-level x 381 * | ****** 382 * | * 383 * | ***** 384 * | * 385 * | ** 386 * | * 387 * +-------*-------------------> 388 * delay 389 * 390 * The empirical curve may have both vertical and horizontal lines. 391 * Vertical lines represent constant delay for a range of 392 * probabilities; horizontal lines correspond to a discontinuty 393 * in the delay distribution: the pipe will use the largest delay 394 * for a given probability. 395 * 396 * To pass the curve to dummynet, we must store the parameters 397 * in a file as described below, and issue the command 398 * 399 * ipfw pipe <n> config ... bw XXX profile <filename> ... 400 * 401 * The file format is the following, with whitespace acting as 402 * a separator and '#' indicating the beginning a comment: 403 * 404 * samples N 405 * the number of samples used in the internal 406 * representation (2..1024; default 100); 407 * 408 * loss-level L 409 * The probability above which packets are lost. 410 * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); 411 * 412 * name identifier 413 * Optional a name (listed by "ipfw pipe show") 414 * to identify the distribution; 415 * 416 * "delay prob" | "prob delay" 417 * One of these two lines is mandatory and defines 418 * the format of the following lines with data points. 419 * 420 * XXX YYY 421 * 2 or more lines representing points in the curve, 422 * with either delay or probability first, according 423 * to the chosen format. 424 * The unit for delay is milliseconds. 425 * 426 * Data points does not need to be ordered or equal to the number 427 * specified in the "samples" line. ipfw will sort and interpolate 428 * the curve as needed. 429 * 430 * Example of a profile file: 431 432 name bla_bla_bla 433 samples 100 434 loss-level 0.86 435 prob delay 436 0 200 # minimum overhead is 200ms 437 0.5 200 438 0.5 300 439 0.8 1000 440 0.9 1300 441 1 1300 442 443 * Internally, we will convert the curve to a fixed number of 444 * samples, and when it is time to transmit a packet we will 445 * model the extra delay as extra bits in the packet. 446 * 447 */ 448 449#define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN 450#define ED_TOK_SAMPLES "samples" 451#define ED_TOK_LOSS "loss-level" 452#define ED_TOK_NAME "name" 453#define ED_TOK_DELAY "delay" 454#define ED_TOK_PROB "prob" 455#define ED_TOK_BW "bw" 456#define ED_SEPARATORS " \t\n" 457#define ED_MIN_SAMPLES_NO 2 458 459/* 460 * returns 1 if s is a non-negative number, with at least one '.' 461 */ 462static int 463is_valid_number(const char *s) 464{ 465 int i, dots_found = 0; 466 int len = strlen(s); 467 468 for (i = 0; i<len; ++i) 469 if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1)) 470 return 0; 471 return 1; 472} 473 474/* 475 * Take as input a string describing a bandwidth value 476 * and return the numeric bandwidth value. 477 * set clocking interface or bandwidth value 478 */ 479void 480read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) 481{ 482 if (*bandwidth != -1) 483 warn("duplicate token, override bandwidth value!"); 484 485 if (arg[0] >= 'a' && arg[0] <= 'z') { 486 if (namelen >= IFNAMSIZ) 487 warn("interface name truncated"); 488 namelen--; 489 /* interface name */ 490 strncpy(if_name, arg, namelen); 491 if_name[namelen] = '\0'; 492 *bandwidth = 0; 493 } else { /* read bandwidth value */ 494 int bw; 495 char *end = NULL; 496 497 bw = strtoul(arg, &end, 0); 498 if (*end == 'K' || *end == 'k') { 499 end++; 500 bw *= 1000; 501 } else if (*end == 'M') { 502 end++; 503 bw *= 1000000; 504 } 505 if ((*end == 'B' && 506 _substrcmp2(end, "Bi", "Bit/s") != 0) || 507 _substrcmp2(end, "by", "bytes") == 0) 508 bw *= 8; 509 510 if (bw < 0) 511 errx(EX_DATAERR, "bandwidth too large"); 512 513 *bandwidth = bw; 514 if_name[0] = '\0'; 515 } 516} 517 518struct point { 519 double prob; 520 double delay; 521}; 522 523int 524compare_points(const void *vp1, const void *vp2) 525{ 526 const struct point *p1 = vp1; 527 const struct point *p2 = vp2; 528 double res = 0; 529 530 res = p1->prob - p2->prob; 531 if (res == 0) 532 res = p1->delay - p2->delay; 533 if (res < 0) 534 return -1; 535 else if (res > 0) 536 return 1; 537 else 538 return 0; 539} 540 541#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno 542 543static void 544load_extra_delays(const char *filename, struct dn_pipe *p) 545{ 546 char line[ED_MAX_LINE_LEN]; 547 FILE *f; 548 int lineno = 0; 549 int i; 550 551 int samples = -1; 552 double loss = -1.0; 553 char profile_name[ED_MAX_NAME_LEN]; 554 int delay_first = -1; 555 int do_points = 0; 556 struct point points[ED_MAX_SAMPLES_NO]; 557 int points_no = 0; 558 559 profile_name[0] = '\0'; 560 f = fopen(filename, "r"); 561 if (f == NULL) 562 err(EX_UNAVAILABLE, "fopen: %s", filename); 563 564 while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ 565 char *s, *cur = line, *name = NULL, *arg = NULL; 566 567 ++lineno; 568 569 /* parse the line */ 570 while (cur) { 571 s = strsep(&cur, ED_SEPARATORS); 572 if (s == NULL || *s == '#') 573 break; 574 if (*s == '\0') 575 continue; 576 if (arg) 577 errx(ED_EFMT("too many arguments")); 578 if (name == NULL) 579 name = s; 580 else 581 arg = s; 582 } 583 if (name == NULL) /* empty line */ 584 continue; 585 if (arg == NULL) 586 errx(ED_EFMT("missing arg for %s"), name); 587 588 if (!strcasecmp(name, ED_TOK_SAMPLES)) { 589 if (samples > 0) 590 errx(ED_EFMT("duplicate ``samples'' line")); 591 if (atoi(arg) <=0) 592 errx(ED_EFMT("invalid number of samples")); 593 samples = atoi(arg); 594 if (samples>ED_MAX_SAMPLES_NO) 595 errx(ED_EFMT("too many samples, maximum is %d"), 596 ED_MAX_SAMPLES_NO); 597 do_points = 0; 598 } else if (!strcasecmp(name, ED_TOK_BW)) { 599 read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); 600 } else if (!strcasecmp(name, ED_TOK_LOSS)) { 601 if (loss != -1.0) 602 errx(ED_EFMT("duplicated token: %s"), name); 603 if (!is_valid_number(arg)) 604 errx(ED_EFMT("invalid %s"), arg); 605 loss = atof(arg); 606 if (loss > 1) 607 errx(ED_EFMT("%s greater than 1.0"), name); 608 do_points = 0; 609 } else if (!strcasecmp(name, ED_TOK_NAME)) { 610 if (profile_name[0] != '\0') 611 errx(ED_EFMT("duplicated token: %s"), name); 612 strncpy(profile_name, arg, sizeof(profile_name) - 1); 613 profile_name[sizeof(profile_name)-1] = '\0'; 614 do_points = 0; 615 } else if (!strcasecmp(name, ED_TOK_DELAY)) { 616 if (do_points) 617 errx(ED_EFMT("duplicated token: %s"), name); 618 delay_first = 1; 619 do_points = 1; 620 } else if (!strcasecmp(name, ED_TOK_PROB)) { 621 if (do_points) 622 errx(ED_EFMT("duplicated token: %s"), name); 623 delay_first = 0; 624 do_points = 1; 625 } else if (do_points) { 626 if (!is_valid_number(name) || !is_valid_number(arg)) 627 errx(ED_EFMT("invalid point found")); 628 if (delay_first) { 629 points[points_no].delay = atof(name); 630 points[points_no].prob = atof(arg); 631 } else { 632 points[points_no].delay = atof(arg); 633 points[points_no].prob = atof(name); 634 } 635 if (points[points_no].prob > 1.0) 636 errx(ED_EFMT("probability greater than 1.0")); 637 ++points_no; 638 } else { 639 errx(ED_EFMT("unrecognised command '%s'"), name); 640 } 641 } 642 643 if (samples == -1) { 644 warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); 645 samples = 100; 646 } 647 648 if (loss == -1.0) { 649 warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); 650 loss = 1; 651 } 652 653 /* make sure that there are enough points. */ 654 if (points_no < ED_MIN_SAMPLES_NO) 655 errx(ED_EFMT("too few samples, need at least %d"), 656 ED_MIN_SAMPLES_NO); 657 658 qsort(points, points_no, sizeof(struct point), compare_points); 659 660 /* interpolation */ 661 for (i = 0; i<points_no-1; ++i) { 662 double y1 = points[i].prob * samples; 663 double x1 = points[i].delay; 664 double y2 = points[i+1].prob * samples; 665 double x2 = points[i+1].delay; 666 667 int index = y1; 668 int stop = y2; 669 670 if (x1 == x2) { 671 for (; index<stop; ++index) 672 p->samples[index] = x1; 673 } else { 674 double m = (y2-y1)/(x2-x1); 675 double c = y1 - m*x1; 676 for (; index<stop ; ++index) 677 p->samples[index] = (index - c)/m; 678 } 679 } 680 p->samples_no = samples; 681 p->loss_level = loss * samples; 682 strncpy(p->name, profile_name, sizeof(p->name)); 683} 684 685void 686ipfw_config_pipe(int ac, char **av) 687{ 688 int samples[ED_MAX_SAMPLES_NO]; 689 struct dn_pipe p; 690 int i; 691 char *end; 692 void *par = NULL; 693 694 memset(&p, 0, sizeof p); 695 p.bandwidth = -1; 696 697 av++; ac--; 698 /* Pipe number */ 699 if (ac && isdigit(**av)) { 700 i = atoi(*av); av++; ac--; 701 if (co.do_pipe == 1) 702 p.pipe_nr = i; 703 else 704 p.fs.fs_nr = i; 705 } 706 while (ac > 0) { 707 double d; 708 int tok = match_token(dummynet_params, *av); 709 ac--; av++; 710 711 switch(tok) { 712 case TOK_NOERROR: 713 p.fs.flags_fs |= DN_NOERROR; 714 break; 715 716 case TOK_PLR: 717 NEED1("plr needs argument 0..1\n"); 718 d = strtod(av[0], NULL); 719 if (d > 1) 720 d = 1; 721 else if (d < 0) 722 d = 0; 723 p.fs.plr = (int)(d*0x7fffffff); 724 ac--; av++; 725 break; 726 727 case TOK_QUEUE: 728 NEED1("queue needs queue size\n"); 729 end = NULL; 730 p.fs.qsize = strtoul(av[0], &end, 0); 731 if (*end == 'K' || *end == 'k') { 732 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 733 p.fs.qsize *= 1024; 734 } else if (*end == 'B' || 735 _substrcmp2(end, "by", "bytes") == 0) { 736 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 737 } 738 ac--; av++; 739 break; 740 741 case TOK_BUCKETS: 742 NEED1("buckets needs argument\n"); 743 p.fs.rq_size = strtoul(av[0], NULL, 0); 744 ac--; av++; 745 break; 746 747 case TOK_MASK: 748 NEED1("mask needs mask specifier\n"); 749 /* 750 * per-flow queue, mask is dst_ip, dst_port, 751 * src_ip, src_port, proto measured in bits 752 */ 753 par = NULL; 754 755 bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); 756 end = NULL; 757 758 while (ac >= 1) { 759 uint32_t *p32 = NULL; 760 uint16_t *p16 = NULL; 761 uint32_t *p20 = NULL; 762 struct in6_addr *pa6 = NULL; 763 uint32_t a; 764 765 tok = match_token(dummynet_params, *av); 766 ac--; av++; 767 switch(tok) { 768 case TOK_ALL: 769 /* 770 * special case, all bits significant 771 */ 772 p.fs.flow_mask.dst_ip = ~0; 773 p.fs.flow_mask.src_ip = ~0; 774 p.fs.flow_mask.dst_port = ~0; 775 p.fs.flow_mask.src_port = ~0; 776 p.fs.flow_mask.proto = ~0; 777 n2mask(&(p.fs.flow_mask.dst_ip6), 128); 778 n2mask(&(p.fs.flow_mask.src_ip6), 128); 779 p.fs.flow_mask.flow_id6 = ~0; 780 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 781 goto end_mask; 782 783 case TOK_DSTIP: 784 p32 = &p.fs.flow_mask.dst_ip; 785 break; 786 787 case TOK_SRCIP: 788 p32 = &p.fs.flow_mask.src_ip; 789 break; 790 791 case TOK_DSTIP6: 792 pa6 = &(p.fs.flow_mask.dst_ip6); 793 break; 794 795 case TOK_SRCIP6: 796 pa6 = &(p.fs.flow_mask.src_ip6); 797 break; 798 799 case TOK_FLOWID: 800 p20 = &p.fs.flow_mask.flow_id6; 801 break; 802 803 case TOK_DSTPORT: 804 p16 = &p.fs.flow_mask.dst_port; 805 break; 806 807 case TOK_SRCPORT: 808 p16 = &p.fs.flow_mask.src_port; 809 break; 810 811 case TOK_PROTO: 812 break; 813 814 default: 815 ac++; av--; /* backtrack */ 816 goto end_mask; 817 } 818 if (ac < 1) 819 errx(EX_USAGE, "mask: value missing"); 820 if (*av[0] == '/') { 821 a = strtoul(av[0]+1, &end, 0); 822 if (pa6 == NULL) 823 a = (a == 32) ? ~0 : (1 << a) - 1; 824 } else 825 a = strtoul(av[0], &end, 0); 826 if (p32 != NULL) 827 *p32 = a; 828 else if (p16 != NULL) { 829 if (a > 0xFFFF) 830 errx(EX_DATAERR, 831 "port mask must be 16 bit"); 832 *p16 = (uint16_t)a; 833 } else if (p20 != NULL) { 834 if (a > 0xfffff) 835 errx(EX_DATAERR, 836 "flow_id mask must be 20 bit"); 837 *p20 = (uint32_t)a; 838 } else if (pa6 != NULL) { 839 if (a > 128) 840 errx(EX_DATAERR, 841 "in6addr invalid mask len"); 842 else 843 n2mask(pa6, a); 844 } else { 845 if (a > 0xFF) 846 errx(EX_DATAERR, 847 "proto mask must be 8 bit"); 848 p.fs.flow_mask.proto = (uint8_t)a; 849 } 850 if (a != 0) 851 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 852 ac--; av++; 853 } /* end while, config masks */ 854end_mask: 855 break; 856 857 case TOK_RED: 858 case TOK_GRED: 859 NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); 860 p.fs.flags_fs |= DN_IS_RED; 861 if (tok == TOK_GRED) 862 p.fs.flags_fs |= DN_IS_GENTLE_RED; 863 /* 864 * the format for parameters is w_q/min_th/max_th/max_p 865 */ 866 if ((end = strsep(&av[0], "/"))) { 867 double w_q = strtod(end, NULL); 868 if (w_q > 1 || w_q <= 0) 869 errx(EX_DATAERR, "0 < w_q <= 1"); 870 p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); 871 } 872 if ((end = strsep(&av[0], "/"))) { 873 p.fs.min_th = strtoul(end, &end, 0); 874 if (*end == 'K' || *end == 'k') 875 p.fs.min_th *= 1024; 876 } 877 if ((end = strsep(&av[0], "/"))) { 878 p.fs.max_th = strtoul(end, &end, 0); 879 if (*end == 'K' || *end == 'k') 880 p.fs.max_th *= 1024; 881 } 882 if ((end = strsep(&av[0], "/"))) { 883 double max_p = strtod(end, NULL); 884 if (max_p > 1 || max_p <= 0) 885 errx(EX_DATAERR, "0 < max_p <= 1"); 886 p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); 887 } 888 ac--; av++; 889 break; 890 891 case TOK_DROPTAIL: 892 p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); 893 break; 894 895 case TOK_BW: 896 NEED1("bw needs bandwidth or interface\n"); 897 if (co.do_pipe != 1) 898 errx(EX_DATAERR, "bandwidth only valid for pipes"); 899 read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); 900 ac--; av++; 901 break; 902 903 case TOK_DELAY: 904 if (co.do_pipe != 1) 905 errx(EX_DATAERR, "delay only valid for pipes"); 906 NEED1("delay needs argument 0..10000ms\n"); 907 p.delay = strtoul(av[0], NULL, 0); 908 ac--; av++; 909 break; 910 911 case TOK_WEIGHT: 912 if (co.do_pipe == 1) 913 errx(EX_DATAERR,"weight only valid for queues"); 914 NEED1("weight needs argument 0..100\n"); 915 p.fs.weight = strtoul(av[0], &end, 0); 916 ac--; av++; 917 break; 918 919 case TOK_PIPE: 920 if (co.do_pipe == 1) 921 errx(EX_DATAERR,"pipe only valid for queues"); 922 NEED1("pipe needs pipe_number\n"); 923 p.fs.parent_nr = strtoul(av[0], &end, 0); 924 ac--; av++; 925 break; 926 927 case TOK_PIPE_PROFILE: 928 if (co.do_pipe != 1) 929 errx(EX_DATAERR, "extra delay only valid for pipes"); 930 NEED1("extra delay needs the file name\n"); 931 p.samples = &samples[0]; 932 load_extra_delays(av[0], &p); 933 --ac; ++av; 934 break; 935 936 default: 937 errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); 938 } 939 } 940 if (co.do_pipe == 1) { 941 if (p.pipe_nr == 0) 942 errx(EX_DATAERR, "pipe_nr must be > 0"); 943 if (p.delay > 10000) 944 errx(EX_DATAERR, "delay must be < 10000"); 945 } else { /* co.do_pipe == 2, queue */ 946 if (p.fs.parent_nr == 0) 947 errx(EX_DATAERR, "pipe must be > 0"); 948 if (p.fs.weight >100) 949 errx(EX_DATAERR, "weight must be <= 100"); 950 } 951 952 /* check for bandwidth value */ 953 if (p.bandwidth == -1) { 954 p.bandwidth = 0; 955 if (p.samples_no > 0) 956 errx(EX_DATAERR, "profile requires a bandwidth limit"); 957 } 958 959 if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { 960 size_t len; 961 long limit; 962 963 len = sizeof(limit); 964 if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", 965 &limit, &len, NULL, 0) == -1) 966 limit = 1024*1024; 967 if (p.fs.qsize > limit) 968 errx(EX_DATAERR, "queue size must be < %ldB", limit); 969 } else { 970 size_t len; 971 long limit; 972 973 len = sizeof(limit); 974 if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", 975 &limit, &len, NULL, 0) == -1) 976 limit = 100; 977 if (p.fs.qsize > limit) 978 errx(EX_DATAERR, "2 <= queue size <= %ld", limit); 979 } 980 if (p.fs.flags_fs & DN_IS_RED) { 981 size_t len; 982 int lookup_depth, avg_pkt_size; 983 double s, idle, weight, w_q; 984 struct clockinfo ck; 985 int t; 986 987 if (p.fs.min_th >= p.fs.max_th) 988 errx(EX_DATAERR, "min_th %d must be < than max_th %d", 989 p.fs.min_th, p.fs.max_th); 990 if (p.fs.max_th == 0) 991 errx(EX_DATAERR, "max_th must be > 0"); 992 993 len = sizeof(int); 994 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", 995 &lookup_depth, &len, NULL, 0) == -1) 996 errx(1, "sysctlbyname(\"%s\")", 997 "net.inet.ip.dummynet.red_lookup_depth"); 998 if (lookup_depth == 0) 999 errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" 1000 " must be greater than zero"); 1001 1002 len = sizeof(int); 1003 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", 1004 &avg_pkt_size, &len, NULL, 0) == -1) 1005 1006 errx(1, "sysctlbyname(\"%s\")", 1007 "net.inet.ip.dummynet.red_avg_pkt_size"); 1008 if (avg_pkt_size == 0) 1009 errx(EX_DATAERR, 1010 "net.inet.ip.dummynet.red_avg_pkt_size must" 1011 " be greater than zero"); 1012 1013 len = sizeof(struct clockinfo); 1014 if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) 1015 errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); 1016 1017 /* 1018 * Ticks needed for sending a medium-sized packet. 1019 * Unfortunately, when we are configuring a WF2Q+ queue, we 1020 * do not have bandwidth information, because that is stored 1021 * in the parent pipe, and also we have multiple queues 1022 * competing for it. So we set s=0, which is not very 1023 * correct. But on the other hand, why do we want RED with 1024 * WF2Q+ ? 1025 */ 1026 if (p.bandwidth==0) /* this is a WF2Q+ queue */ 1027 s = 0; 1028 else 1029 s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; 1030 1031 /* 1032 * max idle time (in ticks) before avg queue size becomes 0. 1033 * NOTA: (3/w_q) is approx the value x so that 1034 * (1-w_q)^x < 10^-3. 1035 */ 1036 w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); 1037 idle = s * 3. / w_q; 1038 p.fs.lookup_step = (int)idle / lookup_depth; 1039 if (!p.fs.lookup_step) 1040 p.fs.lookup_step = 1; 1041 weight = 1 - w_q; 1042 for (t = p.fs.lookup_step; t > 1; --t) 1043 weight *= 1 - w_q; 1044 p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); 1045 } 1046 if (p.samples_no <= 0) { 1047 i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); 1048 } else { 1049 struct dn_pipe_max pm; 1050 int len = sizeof(pm); 1051 1052 memcpy(&pm.pipe, &p, sizeof(pm.pipe)); 1053 memcpy(&pm.samples, samples, sizeof(pm.samples)); 1054 1055 i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); 1056 } 1057 1058 if (i) 1059 err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); 1060} 1061