dummynet.c revision 194930
1/* 2 * Copyright (c) 2002-2003 Luigi Rizzo 3 * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp 4 * Copyright (c) 1994 Ugen J.S.Antsilevich 5 * 6 * Idea and grammar partially left from: 7 * Copyright (c) 1993 Daniel Boulet 8 * 9 * Redistribution and use in source forms, with and without modification, 10 * are permitted provided that this entire comment appears intact. 11 * 12 * Redistribution in binary form may occur without any restrictions. 13 * Obviously, it would be nice if you gave credit where credit is due 14 * but requiring it would be too onerous. 15 * 16 * This software is provided ``AS IS'' without any warranties of any kind. 17 * 18 * NEW command line interface for IP firewall facility 19 * 20 * $FreeBSD: head/sbin/ipfw/dummynet.c 194930 2009-06-24 22:57:07Z oleg $ 21 * 22 * dummynet support 23 */ 24 25#include <sys/types.h> 26#include <sys/socket.h> 27#include <sys/queue.h> 28/* XXX there are several sysctl leftover here */ 29#include <sys/sysctl.h> 30 31#include "ipfw2.h" 32 33#include <ctype.h> 34#include <err.h> 35#include <errno.h> 36#include <libutil.h> 37#include <netdb.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <sysexits.h> 42 43#include <net/if.h> 44#include <netinet/in.h> 45#include <netinet/ip_fw.h> 46#include <netinet/ip_dummynet.h> 47#include <arpa/inet.h> /* inet_ntoa */ 48 49static struct _s_x dummynet_params[] = { 50 { "plr", TOK_PLR }, 51 { "noerror", TOK_NOERROR }, 52 { "buckets", TOK_BUCKETS }, 53 { "dst-ip", TOK_DSTIP }, 54 { "src-ip", TOK_SRCIP }, 55 { "dst-port", TOK_DSTPORT }, 56 { "src-port", TOK_SRCPORT }, 57 { "proto", TOK_PROTO }, 58 { "weight", TOK_WEIGHT }, 59 { "all", TOK_ALL }, 60 { "mask", TOK_MASK }, 61 { "droptail", TOK_DROPTAIL }, 62 { "red", TOK_RED }, 63 { "gred", TOK_GRED }, 64 { "bw", TOK_BW }, 65 { "bandwidth", TOK_BW }, 66 { "delay", TOK_DELAY }, 67 { "pipe", TOK_PIPE }, 68 { "queue", TOK_QUEUE }, 69 { "flow-id", TOK_FLOWID}, 70 { "dst-ipv6", TOK_DSTIP6}, 71 { "dst-ip6", TOK_DSTIP6}, 72 { "src-ipv6", TOK_SRCIP6}, 73 { "src-ip6", TOK_SRCIP6}, 74 { "profile", TOK_PIPE_PROFILE}, 75 { "burst", TOK_BURST}, 76 { "dummynet-params", TOK_NULL }, 77 { NULL, 0 } /* terminator */ 78}; 79 80static int 81sort_q(const void *pa, const void *pb) 82{ 83 int rev = (co.do_sort < 0); 84 int field = rev ? -co.do_sort : co.do_sort; 85 long long res = 0; 86 const struct dn_flow_queue *a = pa; 87 const struct dn_flow_queue *b = pb; 88 89 switch (field) { 90 case 1: /* pkts */ 91 res = a->len - b->len; 92 break; 93 case 2: /* bytes */ 94 res = a->len_bytes - b->len_bytes; 95 break; 96 97 case 3: /* tot pkts */ 98 res = a->tot_pkts - b->tot_pkts; 99 break; 100 101 case 4: /* tot bytes */ 102 res = a->tot_bytes - b->tot_bytes; 103 break; 104 } 105 if (res < 0) 106 res = -1; 107 if (res > 0) 108 res = 1; 109 return (int)(rev ? res : -res); 110} 111 112static void 113list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) 114{ 115 int l; 116 int index_printed, indexes = 0; 117 char buff[255]; 118 struct protoent *pe; 119 120 if (fs->rq_elements == 0) 121 return; 122 123 if (co.do_sort != 0) 124 heapsort(q, fs->rq_elements, sizeof *q, sort_q); 125 126 /* Print IPv4 flows */ 127 index_printed = 0; 128 for (l = 0; l < fs->rq_elements; l++) { 129 struct in_addr ina; 130 131 /* XXX: Should check for IPv4 flows */ 132 if (IS_IP6_FLOW_ID(&(q[l].id))) 133 continue; 134 135 if (!index_printed) { 136 index_printed = 1; 137 if (indexes > 0) /* currently a no-op */ 138 printf("\n"); 139 indexes++; 140 printf(" " 141 "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", 142 fs->flow_mask.proto, 143 fs->flow_mask.src_ip, fs->flow_mask.src_port, 144 fs->flow_mask.dst_ip, fs->flow_mask.dst_port); 145 146 printf("BKT Prot ___Source IP/port____ " 147 "____Dest. IP/port____ " 148 "Tot_pkt/bytes Pkt/Byte Drp\n"); 149 } 150 151 printf("%3d ", q[l].hash_slot); 152 pe = getprotobynumber(q[l].id.proto); 153 if (pe) 154 printf("%-4s ", pe->p_name); 155 else 156 printf("%4u ", q[l].id.proto); 157 ina.s_addr = htonl(q[l].id.src_ip); 158 printf("%15s/%-5d ", 159 inet_ntoa(ina), q[l].id.src_port); 160 ina.s_addr = htonl(q[l].id.dst_ip); 161 printf("%15s/%-5d ", 162 inet_ntoa(ina), q[l].id.dst_port); 163 printf("%4llu %8llu %2u %4u %3u\n", 164 align_uint64(&q[l].tot_pkts), 165 align_uint64(&q[l].tot_bytes), 166 q[l].len, q[l].len_bytes, q[l].drops); 167 if (co.verbose) 168 printf(" S %20llu F %20llu\n", 169 align_uint64(&q[l].S), align_uint64(&q[l].F)); 170 } 171 172 /* Print IPv6 flows */ 173 index_printed = 0; 174 for (l = 0; l < fs->rq_elements; l++) { 175 if (!IS_IP6_FLOW_ID(&(q[l].id))) 176 continue; 177 178 if (!index_printed) { 179 index_printed = 1; 180 if (indexes > 0) 181 printf("\n"); 182 indexes++; 183 printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", 184 fs->flow_mask.proto, fs->flow_mask.flow_id6); 185 inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), 186 buff, sizeof(buff)); 187 printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); 188 inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), 189 buff, sizeof(buff) ); 190 printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); 191 192 printf("BKT ___Prot___ _flow-id_ " 193 "______________Source IPv6/port_______________ " 194 "_______________Dest. IPv6/port_______________ " 195 "Tot_pkt/bytes Pkt/Byte Drp\n"); 196 } 197 printf("%3d ", q[l].hash_slot); 198 pe = getprotobynumber(q[l].id.proto); 199 if (pe != NULL) 200 printf("%9s ", pe->p_name); 201 else 202 printf("%9u ", q[l].id.proto); 203 printf("%7d %39s/%-5d ", q[l].id.flow_id6, 204 inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), 205 q[l].id.src_port); 206 printf(" %39s/%-5d ", 207 inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), 208 q[l].id.dst_port); 209 printf(" %4llu %8llu %2u %4u %3u\n", 210 align_uint64(&q[l].tot_pkts), 211 align_uint64(&q[l].tot_bytes), 212 q[l].len, q[l].len_bytes, q[l].drops); 213 if (co.verbose) 214 printf(" S %20llu F %20llu\n", 215 align_uint64(&q[l].S), 216 align_uint64(&q[l].F)); 217 } 218} 219 220static void 221print_flowset_parms(struct dn_flow_set *fs, char *prefix) 222{ 223 int l; 224 char qs[30]; 225 char plr[30]; 226 char red[90]; /* Display RED parameters */ 227 228 l = fs->qsize; 229 if (fs->flags_fs & DN_QSIZE_IS_BYTES) { 230 if (l >= 8192) 231 sprintf(qs, "%d KB", l / 1024); 232 else 233 sprintf(qs, "%d B", l); 234 } else 235 sprintf(qs, "%3d sl.", l); 236 if (fs->plr) 237 sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); 238 else 239 plr[0] = '\0'; 240 if (fs->flags_fs & DN_IS_RED) /* RED parameters */ 241 sprintf(red, 242 "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", 243 (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', 244 1.0 * fs->w_q / (double)(1 << SCALE_RED), 245 SCALE_VAL(fs->min_th), 246 SCALE_VAL(fs->max_th), 247 1.0 * fs->max_p / (double)(1 << SCALE_RED)); 248 else 249 sprintf(red, "droptail"); 250 251 printf("%s %s%s %d queues (%d buckets) %s\n", 252 prefix, qs, plr, fs->rq_elements, fs->rq_size, red); 253} 254 255static void 256print_extra_delay_parms(struct dn_pipe *p) 257{ 258 double loss; 259 if (p->samples_no <= 0) 260 return; 261 262 loss = p->loss_level; 263 loss /= p->samples_no; 264 printf("\t profile: name \"%s\" loss %f samples %d\n", 265 p->name, loss, p->samples_no); 266} 267 268void 269ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) 270{ 271 int rulenum; 272 void *next = data; 273 struct dn_pipe *p = (struct dn_pipe *) data; 274 struct dn_flow_set *fs; 275 struct dn_flow_queue *q; 276 int l; 277 278 if (ac > 0) 279 rulenum = strtoul(*av++, NULL, 10); 280 else 281 rulenum = 0; 282 for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { 283 double b = p->bandwidth; 284 char buf[30]; 285 char prefix[80]; 286 char burst[5 + 7]; 287 288 if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) 289 break; /* done with pipes, now queues */ 290 291 /* 292 * compute length, as pipe have variable size 293 */ 294 l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); 295 next = (char *)p + l; 296 nbytes -= l; 297 298 if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) 299 continue; 300 301 /* 302 * Print rate (or clocking interface) 303 */ 304 if (p->if_name[0] != '\0') 305 sprintf(buf, "%s", p->if_name); 306 else if (b == 0) 307 sprintf(buf, "unlimited"); 308 else if (b >= 1000000) 309 sprintf(buf, "%7.3f Mbit/s", b/1000000); 310 else if (b >= 1000) 311 sprintf(buf, "%7.3f Kbit/s", b/1000); 312 else 313 sprintf(buf, "%7.3f bit/s ", b); 314 315 sprintf(prefix, "%05d: %s %4d ms ", 316 p->pipe_nr, buf, p->delay); 317 318 print_flowset_parms(&(p->fs), prefix); 319 320 if (humanize_number(burst, sizeof(burst), p->burst, 321 "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose) 322 printf("\t burst: %ju Byte\n", p->burst); 323 else 324 printf("\t burst: %s\n", burst); 325 326 print_extra_delay_parms(p); 327 328 q = (struct dn_flow_queue *)(p+1); 329 list_queues(&(p->fs), q); 330 } 331 for (fs = next; nbytes >= sizeof *fs; fs = next) { 332 char prefix[80]; 333 334 if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) 335 break; 336 l = sizeof(*fs) + fs->rq_elements * sizeof(*q); 337 next = (char *)fs + l; 338 nbytes -= l; 339 340 if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || 341 (rulenum != fs->parent_nr && co.do_pipe == 1))) { 342 continue; 343 } 344 345 q = (struct dn_flow_queue *)(fs+1); 346 sprintf(prefix, "q%05d: weight %d pipe %d ", 347 fs->fs_nr, fs->weight, fs->parent_nr); 348 print_flowset_parms(fs, prefix); 349 list_queues(fs, q); 350 } 351} 352 353/* 354 * Delete pipe or queue i 355 */ 356int 357ipfw_delete_pipe(int pipe_or_queue, int i) 358{ 359 struct dn_pipe p; 360 361 memset(&p, 0, sizeof p); 362 if (pipe_or_queue == 1) 363 p.pipe_nr = i; /* pipe */ 364 else 365 p.fs.fs_nr = i; /* queue */ 366 i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); 367 if (i) { 368 i = 1; 369 warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); 370 } 371 return i; 372} 373 374/* 375 * Code to parse delay profiles. 376 * 377 * Some link types introduce extra delays in the transmission 378 * of a packet, e.g. because of MAC level framing, contention on 379 * the use of the channel, MAC level retransmissions and so on. 380 * From our point of view, the channel is effectively unavailable 381 * for this extra time, which is constant or variable depending 382 * on the link type. Additionally, packets may be dropped after this 383 * time (e.g. on a wireless link after too many retransmissions). 384 * We can model the additional delay with an empirical curve 385 * that represents its distribution. 386 * 387 * cumulative probability 388 * 1.0 ^ 389 * | 390 * L +-- loss-level x 391 * | ****** 392 * | * 393 * | ***** 394 * | * 395 * | ** 396 * | * 397 * +-------*-------------------> 398 * delay 399 * 400 * The empirical curve may have both vertical and horizontal lines. 401 * Vertical lines represent constant delay for a range of 402 * probabilities; horizontal lines correspond to a discontinuty 403 * in the delay distribution: the pipe will use the largest delay 404 * for a given probability. 405 * 406 * To pass the curve to dummynet, we must store the parameters 407 * in a file as described below, and issue the command 408 * 409 * ipfw pipe <n> config ... bw XXX profile <filename> ... 410 * 411 * The file format is the following, with whitespace acting as 412 * a separator and '#' indicating the beginning a comment: 413 * 414 * samples N 415 * the number of samples used in the internal 416 * representation (2..1024; default 100); 417 * 418 * loss-level L 419 * The probability above which packets are lost. 420 * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); 421 * 422 * name identifier 423 * Optional a name (listed by "ipfw pipe show") 424 * to identify the distribution; 425 * 426 * "delay prob" | "prob delay" 427 * One of these two lines is mandatory and defines 428 * the format of the following lines with data points. 429 * 430 * XXX YYY 431 * 2 or more lines representing points in the curve, 432 * with either delay or probability first, according 433 * to the chosen format. 434 * The unit for delay is milliseconds. 435 * 436 * Data points does not need to be ordered or equal to the number 437 * specified in the "samples" line. ipfw will sort and interpolate 438 * the curve as needed. 439 * 440 * Example of a profile file: 441 442 name bla_bla_bla 443 samples 100 444 loss-level 0.86 445 prob delay 446 0 200 # minimum overhead is 200ms 447 0.5 200 448 0.5 300 449 0.8 1000 450 0.9 1300 451 1 1300 452 453 * Internally, we will convert the curve to a fixed number of 454 * samples, and when it is time to transmit a packet we will 455 * model the extra delay as extra bits in the packet. 456 * 457 */ 458 459#define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN 460#define ED_TOK_SAMPLES "samples" 461#define ED_TOK_LOSS "loss-level" 462#define ED_TOK_NAME "name" 463#define ED_TOK_DELAY "delay" 464#define ED_TOK_PROB "prob" 465#define ED_TOK_BW "bw" 466#define ED_SEPARATORS " \t\n" 467#define ED_MIN_SAMPLES_NO 2 468 469/* 470 * returns 1 if s is a non-negative number, with at least one '.' 471 */ 472static int 473is_valid_number(const char *s) 474{ 475 int i, dots_found = 0; 476 int len = strlen(s); 477 478 for (i = 0; i<len; ++i) 479 if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1)) 480 return 0; 481 return 1; 482} 483 484/* 485 * Take as input a string describing a bandwidth value 486 * and return the numeric bandwidth value. 487 * set clocking interface or bandwidth value 488 */ 489void 490read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) 491{ 492 if (*bandwidth != -1) 493 warn("duplicate token, override bandwidth value!"); 494 495 if (arg[0] >= 'a' && arg[0] <= 'z') { 496 if (namelen >= IFNAMSIZ) 497 warn("interface name truncated"); 498 namelen--; 499 /* interface name */ 500 strncpy(if_name, arg, namelen); 501 if_name[namelen] = '\0'; 502 *bandwidth = 0; 503 } else { /* read bandwidth value */ 504 int bw; 505 char *end = NULL; 506 507 bw = strtoul(arg, &end, 0); 508 if (*end == 'K' || *end == 'k') { 509 end++; 510 bw *= 1000; 511 } else if (*end == 'M') { 512 end++; 513 bw *= 1000000; 514 } 515 if ((*end == 'B' && 516 _substrcmp2(end, "Bi", "Bit/s") != 0) || 517 _substrcmp2(end, "by", "bytes") == 0) 518 bw *= 8; 519 520 if (bw < 0) 521 errx(EX_DATAERR, "bandwidth too large"); 522 523 *bandwidth = bw; 524 if_name[0] = '\0'; 525 } 526} 527 528struct point { 529 double prob; 530 double delay; 531}; 532 533int 534compare_points(const void *vp1, const void *vp2) 535{ 536 const struct point *p1 = vp1; 537 const struct point *p2 = vp2; 538 double res = 0; 539 540 res = p1->prob - p2->prob; 541 if (res == 0) 542 res = p1->delay - p2->delay; 543 if (res < 0) 544 return -1; 545 else if (res > 0) 546 return 1; 547 else 548 return 0; 549} 550 551#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno 552 553static void 554load_extra_delays(const char *filename, struct dn_pipe *p) 555{ 556 char line[ED_MAX_LINE_LEN]; 557 FILE *f; 558 int lineno = 0; 559 int i; 560 561 int samples = -1; 562 double loss = -1.0; 563 char profile_name[ED_MAX_NAME_LEN]; 564 int delay_first = -1; 565 int do_points = 0; 566 struct point points[ED_MAX_SAMPLES_NO]; 567 int points_no = 0; 568 569 profile_name[0] = '\0'; 570 f = fopen(filename, "r"); 571 if (f == NULL) 572 err(EX_UNAVAILABLE, "fopen: %s", filename); 573 574 while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ 575 char *s, *cur = line, *name = NULL, *arg = NULL; 576 577 ++lineno; 578 579 /* parse the line */ 580 while (cur) { 581 s = strsep(&cur, ED_SEPARATORS); 582 if (s == NULL || *s == '#') 583 break; 584 if (*s == '\0') 585 continue; 586 if (arg) 587 errx(ED_EFMT("too many arguments")); 588 if (name == NULL) 589 name = s; 590 else 591 arg = s; 592 } 593 if (name == NULL) /* empty line */ 594 continue; 595 if (arg == NULL) 596 errx(ED_EFMT("missing arg for %s"), name); 597 598 if (!strcasecmp(name, ED_TOK_SAMPLES)) { 599 if (samples > 0) 600 errx(ED_EFMT("duplicate ``samples'' line")); 601 if (atoi(arg) <=0) 602 errx(ED_EFMT("invalid number of samples")); 603 samples = atoi(arg); 604 if (samples>ED_MAX_SAMPLES_NO) 605 errx(ED_EFMT("too many samples, maximum is %d"), 606 ED_MAX_SAMPLES_NO); 607 do_points = 0; 608 } else if (!strcasecmp(name, ED_TOK_BW)) { 609 read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); 610 } else if (!strcasecmp(name, ED_TOK_LOSS)) { 611 if (loss != -1.0) 612 errx(ED_EFMT("duplicated token: %s"), name); 613 if (!is_valid_number(arg)) 614 errx(ED_EFMT("invalid %s"), arg); 615 loss = atof(arg); 616 if (loss > 1) 617 errx(ED_EFMT("%s greater than 1.0"), name); 618 do_points = 0; 619 } else if (!strcasecmp(name, ED_TOK_NAME)) { 620 if (profile_name[0] != '\0') 621 errx(ED_EFMT("duplicated token: %s"), name); 622 strncpy(profile_name, arg, sizeof(profile_name) - 1); 623 profile_name[sizeof(profile_name)-1] = '\0'; 624 do_points = 0; 625 } else if (!strcasecmp(name, ED_TOK_DELAY)) { 626 if (do_points) 627 errx(ED_EFMT("duplicated token: %s"), name); 628 delay_first = 1; 629 do_points = 1; 630 } else if (!strcasecmp(name, ED_TOK_PROB)) { 631 if (do_points) 632 errx(ED_EFMT("duplicated token: %s"), name); 633 delay_first = 0; 634 do_points = 1; 635 } else if (do_points) { 636 if (!is_valid_number(name) || !is_valid_number(arg)) 637 errx(ED_EFMT("invalid point found")); 638 if (delay_first) { 639 points[points_no].delay = atof(name); 640 points[points_no].prob = atof(arg); 641 } else { 642 points[points_no].delay = atof(arg); 643 points[points_no].prob = atof(name); 644 } 645 if (points[points_no].prob > 1.0) 646 errx(ED_EFMT("probability greater than 1.0")); 647 ++points_no; 648 } else { 649 errx(ED_EFMT("unrecognised command '%s'"), name); 650 } 651 } 652 653 if (samples == -1) { 654 warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); 655 samples = 100; 656 } 657 658 if (loss == -1.0) { 659 warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); 660 loss = 1; 661 } 662 663 /* make sure that there are enough points. */ 664 if (points_no < ED_MIN_SAMPLES_NO) 665 errx(ED_EFMT("too few samples, need at least %d"), 666 ED_MIN_SAMPLES_NO); 667 668 qsort(points, points_no, sizeof(struct point), compare_points); 669 670 /* interpolation */ 671 for (i = 0; i<points_no-1; ++i) { 672 double y1 = points[i].prob * samples; 673 double x1 = points[i].delay; 674 double y2 = points[i+1].prob * samples; 675 double x2 = points[i+1].delay; 676 677 int index = y1; 678 int stop = y2; 679 680 if (x1 == x2) { 681 for (; index<stop; ++index) 682 p->samples[index] = x1; 683 } else { 684 double m = (y2-y1)/(x2-x1); 685 double c = y1 - m*x1; 686 for (; index<stop ; ++index) 687 p->samples[index] = (index - c)/m; 688 } 689 } 690 p->samples_no = samples; 691 p->loss_level = loss * samples; 692 strncpy(p->name, profile_name, sizeof(p->name)); 693} 694 695void 696ipfw_config_pipe(int ac, char **av) 697{ 698 int samples[ED_MAX_SAMPLES_NO]; 699 struct dn_pipe p; 700 int i; 701 char *end; 702 void *par = NULL; 703 704 memset(&p, 0, sizeof p); 705 p.bandwidth = -1; 706 707 av++; ac--; 708 /* Pipe number */ 709 if (ac && isdigit(**av)) { 710 i = atoi(*av); av++; ac--; 711 if (co.do_pipe == 1) 712 p.pipe_nr = i; 713 else 714 p.fs.fs_nr = i; 715 } 716 while (ac > 0) { 717 double d; 718 int tok = match_token(dummynet_params, *av); 719 ac--; av++; 720 721 switch(tok) { 722 case TOK_NOERROR: 723 p.fs.flags_fs |= DN_NOERROR; 724 break; 725 726 case TOK_PLR: 727 NEED1("plr needs argument 0..1\n"); 728 d = strtod(av[0], NULL); 729 if (d > 1) 730 d = 1; 731 else if (d < 0) 732 d = 0; 733 p.fs.plr = (int)(d*0x7fffffff); 734 ac--; av++; 735 break; 736 737 case TOK_QUEUE: 738 NEED1("queue needs queue size\n"); 739 end = NULL; 740 p.fs.qsize = strtoul(av[0], &end, 0); 741 if (*end == 'K' || *end == 'k') { 742 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 743 p.fs.qsize *= 1024; 744 } else if (*end == 'B' || 745 _substrcmp2(end, "by", "bytes") == 0) { 746 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 747 } 748 ac--; av++; 749 break; 750 751 case TOK_BUCKETS: 752 NEED1("buckets needs argument\n"); 753 p.fs.rq_size = strtoul(av[0], NULL, 0); 754 ac--; av++; 755 break; 756 757 case TOK_MASK: 758 NEED1("mask needs mask specifier\n"); 759 /* 760 * per-flow queue, mask is dst_ip, dst_port, 761 * src_ip, src_port, proto measured in bits 762 */ 763 par = NULL; 764 765 bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); 766 end = NULL; 767 768 while (ac >= 1) { 769 uint32_t *p32 = NULL; 770 uint16_t *p16 = NULL; 771 uint32_t *p20 = NULL; 772 struct in6_addr *pa6 = NULL; 773 uint32_t a; 774 775 tok = match_token(dummynet_params, *av); 776 ac--; av++; 777 switch(tok) { 778 case TOK_ALL: 779 /* 780 * special case, all bits significant 781 */ 782 p.fs.flow_mask.dst_ip = ~0; 783 p.fs.flow_mask.src_ip = ~0; 784 p.fs.flow_mask.dst_port = ~0; 785 p.fs.flow_mask.src_port = ~0; 786 p.fs.flow_mask.proto = ~0; 787 n2mask(&(p.fs.flow_mask.dst_ip6), 128); 788 n2mask(&(p.fs.flow_mask.src_ip6), 128); 789 p.fs.flow_mask.flow_id6 = ~0; 790 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 791 goto end_mask; 792 793 case TOK_DSTIP: 794 p32 = &p.fs.flow_mask.dst_ip; 795 break; 796 797 case TOK_SRCIP: 798 p32 = &p.fs.flow_mask.src_ip; 799 break; 800 801 case TOK_DSTIP6: 802 pa6 = &(p.fs.flow_mask.dst_ip6); 803 break; 804 805 case TOK_SRCIP6: 806 pa6 = &(p.fs.flow_mask.src_ip6); 807 break; 808 809 case TOK_FLOWID: 810 p20 = &p.fs.flow_mask.flow_id6; 811 break; 812 813 case TOK_DSTPORT: 814 p16 = &p.fs.flow_mask.dst_port; 815 break; 816 817 case TOK_SRCPORT: 818 p16 = &p.fs.flow_mask.src_port; 819 break; 820 821 case TOK_PROTO: 822 break; 823 824 default: 825 ac++; av--; /* backtrack */ 826 goto end_mask; 827 } 828 if (ac < 1) 829 errx(EX_USAGE, "mask: value missing"); 830 if (*av[0] == '/') { 831 a = strtoul(av[0]+1, &end, 0); 832 if (pa6 == NULL) 833 a = (a == 32) ? ~0 : (1 << a) - 1; 834 } else 835 a = strtoul(av[0], &end, 0); 836 if (p32 != NULL) 837 *p32 = a; 838 else if (p16 != NULL) { 839 if (a > 0xFFFF) 840 errx(EX_DATAERR, 841 "port mask must be 16 bit"); 842 *p16 = (uint16_t)a; 843 } else if (p20 != NULL) { 844 if (a > 0xfffff) 845 errx(EX_DATAERR, 846 "flow_id mask must be 20 bit"); 847 *p20 = (uint32_t)a; 848 } else if (pa6 != NULL) { 849 if (a > 128) 850 errx(EX_DATAERR, 851 "in6addr invalid mask len"); 852 else 853 n2mask(pa6, a); 854 } else { 855 if (a > 0xFF) 856 errx(EX_DATAERR, 857 "proto mask must be 8 bit"); 858 p.fs.flow_mask.proto = (uint8_t)a; 859 } 860 if (a != 0) 861 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 862 ac--; av++; 863 } /* end while, config masks */ 864end_mask: 865 break; 866 867 case TOK_RED: 868 case TOK_GRED: 869 NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); 870 p.fs.flags_fs |= DN_IS_RED; 871 if (tok == TOK_GRED) 872 p.fs.flags_fs |= DN_IS_GENTLE_RED; 873 /* 874 * the format for parameters is w_q/min_th/max_th/max_p 875 */ 876 if ((end = strsep(&av[0], "/"))) { 877 double w_q = strtod(end, NULL); 878 if (w_q > 1 || w_q <= 0) 879 errx(EX_DATAERR, "0 < w_q <= 1"); 880 p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); 881 } 882 if ((end = strsep(&av[0], "/"))) { 883 p.fs.min_th = strtoul(end, &end, 0); 884 if (*end == 'K' || *end == 'k') 885 p.fs.min_th *= 1024; 886 } 887 if ((end = strsep(&av[0], "/"))) { 888 p.fs.max_th = strtoul(end, &end, 0); 889 if (*end == 'K' || *end == 'k') 890 p.fs.max_th *= 1024; 891 } 892 if ((end = strsep(&av[0], "/"))) { 893 double max_p = strtod(end, NULL); 894 if (max_p > 1 || max_p <= 0) 895 errx(EX_DATAERR, "0 < max_p <= 1"); 896 p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); 897 } 898 ac--; av++; 899 break; 900 901 case TOK_DROPTAIL: 902 p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); 903 break; 904 905 case TOK_BW: 906 NEED1("bw needs bandwidth or interface\n"); 907 if (co.do_pipe != 1) 908 errx(EX_DATAERR, "bandwidth only valid for pipes"); 909 read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); 910 ac--; av++; 911 break; 912 913 case TOK_DELAY: 914 if (co.do_pipe != 1) 915 errx(EX_DATAERR, "delay only valid for pipes"); 916 NEED1("delay needs argument 0..10000ms\n"); 917 p.delay = strtoul(av[0], NULL, 0); 918 ac--; av++; 919 break; 920 921 case TOK_WEIGHT: 922 if (co.do_pipe == 1) 923 errx(EX_DATAERR,"weight only valid for queues"); 924 NEED1("weight needs argument 0..100\n"); 925 p.fs.weight = strtoul(av[0], &end, 0); 926 ac--; av++; 927 break; 928 929 case TOK_PIPE: 930 if (co.do_pipe == 1) 931 errx(EX_DATAERR,"pipe only valid for queues"); 932 NEED1("pipe needs pipe_number\n"); 933 p.fs.parent_nr = strtoul(av[0], &end, 0); 934 ac--; av++; 935 break; 936 937 case TOK_PIPE_PROFILE: 938 if (co.do_pipe != 1) 939 errx(EX_DATAERR, "extra delay only valid for pipes"); 940 NEED1("extra delay needs the file name\n"); 941 p.samples = &samples[0]; 942 load_extra_delays(av[0], &p); 943 --ac; ++av; 944 break; 945 946 case TOK_BURST: 947 if (co.do_pipe != 1) 948 errx(EX_DATAERR, "burst only valid for pipes"); 949 NEED1("burst needs argument\n"); 950 errno = 0; 951 if (expand_number(av[0], &p.burst) < 0) 952 if (errno != ERANGE) 953 errx(EX_DATAERR, 954 "burst: invalid argument"); 955 if (errno || p.burst > (1ULL << 48) - 1) 956 errx(EX_DATAERR, 957 "burst: out of range (0..2^48-1)"); 958 ac--; av++; 959 break; 960 961 default: 962 errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); 963 } 964 } 965 if (co.do_pipe == 1) { 966 if (p.pipe_nr == 0) 967 errx(EX_DATAERR, "pipe_nr must be > 0"); 968 if (p.delay > 10000) 969 errx(EX_DATAERR, "delay must be < 10000"); 970 } else { /* co.do_pipe == 2, queue */ 971 if (p.fs.parent_nr == 0) 972 errx(EX_DATAERR, "pipe must be > 0"); 973 if (p.fs.weight >100) 974 errx(EX_DATAERR, "weight must be <= 100"); 975 } 976 977 /* check for bandwidth value */ 978 if (p.bandwidth == -1) { 979 p.bandwidth = 0; 980 if (p.samples_no > 0) 981 errx(EX_DATAERR, "profile requires a bandwidth limit"); 982 } 983 984 if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { 985 size_t len; 986 long limit; 987 988 len = sizeof(limit); 989 if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", 990 &limit, &len, NULL, 0) == -1) 991 limit = 1024*1024; 992 if (p.fs.qsize > limit) 993 errx(EX_DATAERR, "queue size must be < %ldB", limit); 994 } else { 995 size_t len; 996 long limit; 997 998 len = sizeof(limit); 999 if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", 1000 &limit, &len, NULL, 0) == -1) 1001 limit = 100; 1002 if (p.fs.qsize > limit) 1003 errx(EX_DATAERR, "2 <= queue size <= %ld", limit); 1004 } 1005 if (p.fs.flags_fs & DN_IS_RED) { 1006 size_t len; 1007 int lookup_depth, avg_pkt_size; 1008 double s, idle, weight, w_q; 1009 struct clockinfo ck; 1010 int t; 1011 1012 if (p.fs.min_th >= p.fs.max_th) 1013 errx(EX_DATAERR, "min_th %d must be < than max_th %d", 1014 p.fs.min_th, p.fs.max_th); 1015 if (p.fs.max_th == 0) 1016 errx(EX_DATAERR, "max_th must be > 0"); 1017 1018 len = sizeof(int); 1019 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", 1020 &lookup_depth, &len, NULL, 0) == -1) 1021 errx(1, "sysctlbyname(\"%s\")", 1022 "net.inet.ip.dummynet.red_lookup_depth"); 1023 if (lookup_depth == 0) 1024 errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" 1025 " must be greater than zero"); 1026 1027 len = sizeof(int); 1028 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", 1029 &avg_pkt_size, &len, NULL, 0) == -1) 1030 1031 errx(1, "sysctlbyname(\"%s\")", 1032 "net.inet.ip.dummynet.red_avg_pkt_size"); 1033 if (avg_pkt_size == 0) 1034 errx(EX_DATAERR, 1035 "net.inet.ip.dummynet.red_avg_pkt_size must" 1036 " be greater than zero"); 1037 1038 len = sizeof(struct clockinfo); 1039 if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) 1040 errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); 1041 1042 /* 1043 * Ticks needed for sending a medium-sized packet. 1044 * Unfortunately, when we are configuring a WF2Q+ queue, we 1045 * do not have bandwidth information, because that is stored 1046 * in the parent pipe, and also we have multiple queues 1047 * competing for it. So we set s=0, which is not very 1048 * correct. But on the other hand, why do we want RED with 1049 * WF2Q+ ? 1050 */ 1051 if (p.bandwidth==0) /* this is a WF2Q+ queue */ 1052 s = 0; 1053 else 1054 s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; 1055 1056 /* 1057 * max idle time (in ticks) before avg queue size becomes 0. 1058 * NOTA: (3/w_q) is approx the value x so that 1059 * (1-w_q)^x < 10^-3. 1060 */ 1061 w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); 1062 idle = s * 3. / w_q; 1063 p.fs.lookup_step = (int)idle / lookup_depth; 1064 if (!p.fs.lookup_step) 1065 p.fs.lookup_step = 1; 1066 weight = 1 - w_q; 1067 for (t = p.fs.lookup_step; t > 1; --t) 1068 weight *= 1 - w_q; 1069 p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); 1070 } 1071 if (p.samples_no <= 0) { 1072 i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); 1073 } else { 1074 struct dn_pipe_max pm; 1075 int len = sizeof(pm); 1076 1077 memcpy(&pm.pipe, &p, sizeof(pm.pipe)); 1078 memcpy(&pm.samples, samples, sizeof(pm.samples)); 1079 1080 i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); 1081 } 1082 1083 if (i) 1084 err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); 1085} 1086