dummynet.c revision 190865
1/* 2 * Copyright (c) 2002-2003 Luigi Rizzo 3 * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp 4 * Copyright (c) 1994 Ugen J.S.Antsilevich 5 * 6 * Idea and grammar partially left from: 7 * Copyright (c) 1993 Daniel Boulet 8 * 9 * Redistribution and use in source forms, with and without modification, 10 * are permitted provided that this entire comment appears intact. 11 * 12 * Redistribution in binary form may occur without any restrictions. 13 * Obviously, it would be nice if you gave credit where credit is due 14 * but requiring it would be too onerous. 15 * 16 * This software is provided ``AS IS'' without any warranties of any kind. 17 * 18 * NEW command line interface for IP firewall facility 19 * 20 * $FreeBSD: head/sbin/ipfw/dummynet.c 190865 2009-04-09 12:46:00Z luigi $ 21 * 22 * dummynet support 23 */ 24 25#include <sys/types.h> 26#include <sys/socket.h> 27#include <sys/queue.h> 28/* XXX there are several sysctl leftover here */ 29#include <sys/sysctl.h> 30 31#include "ipfw2.h" 32 33#include <ctype.h> 34#include <err.h> 35#include <netdb.h> 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39#include <sysexits.h> 40 41#include <net/if.h> 42#include <netinet/in.h> 43#include <netinet/ip_fw.h> 44#include <netinet/ip_dummynet.h> 45#include <arpa/inet.h> /* inet_ntoa */ 46 47static struct _s_x dummynet_params[] = { 48 { "plr", TOK_PLR }, 49 { "noerror", TOK_NOERROR }, 50 { "buckets", TOK_BUCKETS }, 51 { "dst-ip", TOK_DSTIP }, 52 { "src-ip", TOK_SRCIP }, 53 { "dst-port", TOK_DSTPORT }, 54 { "src-port", TOK_SRCPORT }, 55 { "proto", TOK_PROTO }, 56 { "weight", TOK_WEIGHT }, 57 { "all", TOK_ALL }, 58 { "mask", TOK_MASK }, 59 { "droptail", TOK_DROPTAIL }, 60 { "red", TOK_RED }, 61 { "gred", TOK_GRED }, 62 { "bw", TOK_BW }, 63 { "bandwidth", TOK_BW }, 64 { "delay", TOK_DELAY }, 65 { "pipe", TOK_PIPE }, 66 { "queue", TOK_QUEUE }, 67 { "flow-id", TOK_FLOWID}, 68 { "dst-ipv6", TOK_DSTIP6}, 69 { "dst-ip6", TOK_DSTIP6}, 70 { "src-ipv6", TOK_SRCIP6}, 71 { "src-ip6", TOK_SRCIP6}, 72 { "profile", TOK_PIPE_PROFILE}, 73 { "dummynet-params", TOK_NULL }, 74 { NULL, 0 } /* terminator */ 75}; 76 77static int 78sort_q(const void *pa, const void *pb) 79{ 80 int rev = (co.do_sort < 0); 81 int field = rev ? -co.do_sort : co.do_sort; 82 long long res = 0; 83 const struct dn_flow_queue *a = pa; 84 const struct dn_flow_queue *b = pb; 85 86 switch (field) { 87 case 1: /* pkts */ 88 res = a->len - b->len; 89 break; 90 case 2: /* bytes */ 91 res = a->len_bytes - b->len_bytes; 92 break; 93 94 case 3: /* tot pkts */ 95 res = a->tot_pkts - b->tot_pkts; 96 break; 97 98 case 4: /* tot bytes */ 99 res = a->tot_bytes - b->tot_bytes; 100 break; 101 } 102 if (res < 0) 103 res = -1; 104 if (res > 0) 105 res = 1; 106 return (int)(rev ? res : -res); 107} 108 109static void 110list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) 111{ 112 int l; 113 int index_printed, indexes = 0; 114 char buff[255]; 115 struct protoent *pe; 116 117 if (fs->rq_elements == 0) 118 return; 119 120 if (co.do_sort != 0) 121 heapsort(q, fs->rq_elements, sizeof *q, sort_q); 122 123 /* Print IPv4 flows */ 124 index_printed = 0; 125 for (l = 0; l < fs->rq_elements; l++) { 126 struct in_addr ina; 127 128 /* XXX: Should check for IPv4 flows */ 129 if (IS_IP6_FLOW_ID(&(q[l].id))) 130 continue; 131 132 if (!index_printed) { 133 index_printed = 1; 134 if (indexes > 0) /* currently a no-op */ 135 printf("\n"); 136 indexes++; 137 printf(" " 138 "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", 139 fs->flow_mask.proto, 140 fs->flow_mask.src_ip, fs->flow_mask.src_port, 141 fs->flow_mask.dst_ip, fs->flow_mask.dst_port); 142 143 printf("BKT Prot ___Source IP/port____ " 144 "____Dest. IP/port____ " 145 "Tot_pkt/bytes Pkt/Byte Drp\n"); 146 } 147 148 printf("%3d ", q[l].hash_slot); 149 pe = getprotobynumber(q[l].id.proto); 150 if (pe) 151 printf("%-4s ", pe->p_name); 152 else 153 printf("%4u ", q[l].id.proto); 154 ina.s_addr = htonl(q[l].id.src_ip); 155 printf("%15s/%-5d ", 156 inet_ntoa(ina), q[l].id.src_port); 157 ina.s_addr = htonl(q[l].id.dst_ip); 158 printf("%15s/%-5d ", 159 inet_ntoa(ina), q[l].id.dst_port); 160 printf("%4llu %8llu %2u %4u %3u\n", 161 align_uint64(&q[l].tot_pkts), 162 align_uint64(&q[l].tot_bytes), 163 q[l].len, q[l].len_bytes, q[l].drops); 164 if (co.verbose) 165 printf(" S %20llu F %20llu\n", 166 align_uint64(&q[l].S), align_uint64(&q[l].F)); 167 } 168 169 /* Print IPv6 flows */ 170 index_printed = 0; 171 for (l = 0; l < fs->rq_elements; l++) { 172 if (!IS_IP6_FLOW_ID(&(q[l].id))) 173 continue; 174 175 if (!index_printed) { 176 index_printed = 1; 177 if (indexes > 0) 178 printf("\n"); 179 indexes++; 180 printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", 181 fs->flow_mask.proto, fs->flow_mask.flow_id6); 182 inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), 183 buff, sizeof(buff)); 184 printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); 185 inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), 186 buff, sizeof(buff) ); 187 printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); 188 189 printf("BKT ___Prot___ _flow-id_ " 190 "______________Source IPv6/port_______________ " 191 "_______________Dest. IPv6/port_______________ " 192 "Tot_pkt/bytes Pkt/Byte Drp\n"); 193 } 194 printf("%3d ", q[l].hash_slot); 195 pe = getprotobynumber(q[l].id.proto); 196 if (pe != NULL) 197 printf("%9s ", pe->p_name); 198 else 199 printf("%9u ", q[l].id.proto); 200 printf("%7d %39s/%-5d ", q[l].id.flow_id6, 201 inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), 202 q[l].id.src_port); 203 printf(" %39s/%-5d ", 204 inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), 205 q[l].id.dst_port); 206 printf(" %4llu %8llu %2u %4u %3u\n", 207 align_uint64(&q[l].tot_pkts), 208 align_uint64(&q[l].tot_bytes), 209 q[l].len, q[l].len_bytes, q[l].drops); 210 if (co.verbose) 211 printf(" S %20llu F %20llu\n", 212 align_uint64(&q[l].S), 213 align_uint64(&q[l].F)); 214 } 215} 216 217static void 218print_flowset_parms(struct dn_flow_set *fs, char *prefix) 219{ 220 int l; 221 char qs[30]; 222 char plr[30]; 223 char red[90]; /* Display RED parameters */ 224 225 l = fs->qsize; 226 if (fs->flags_fs & DN_QSIZE_IS_BYTES) { 227 if (l >= 8192) 228 sprintf(qs, "%d KB", l / 1024); 229 else 230 sprintf(qs, "%d B", l); 231 } else 232 sprintf(qs, "%3d sl.", l); 233 if (fs->plr) 234 sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); 235 else 236 plr[0] = '\0'; 237 if (fs->flags_fs & DN_IS_RED) /* RED parameters */ 238 sprintf(red, 239 "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", 240 (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', 241 1.0 * fs->w_q / (double)(1 << SCALE_RED), 242 SCALE_VAL(fs->min_th), 243 SCALE_VAL(fs->max_th), 244 1.0 * fs->max_p / (double)(1 << SCALE_RED)); 245 else 246 sprintf(red, "droptail"); 247 248 printf("%s %s%s %d queues (%d buckets) %s\n", 249 prefix, qs, plr, fs->rq_elements, fs->rq_size, red); 250} 251 252static void 253print_extra_delay_parms(struct dn_pipe *p, char *prefix) 254{ 255 double loss; 256 if (p->samples_no <= 0) 257 return; 258 259 loss = p->loss_level; 260 loss /= p->samples_no; 261 printf("%s profile: name \"%s\" loss %f samples %d\n", 262 prefix, p->name, loss, p->samples_no); 263} 264 265void 266ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) 267{ 268 int rulenum; 269 void *next = data; 270 struct dn_pipe *p = (struct dn_pipe *) data; 271 struct dn_flow_set *fs; 272 struct dn_flow_queue *q; 273 int l; 274 275 if (ac > 0) 276 rulenum = strtoul(*av++, NULL, 10); 277 else 278 rulenum = 0; 279 for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { 280 double b = p->bandwidth; 281 char buf[30]; 282 char prefix[80]; 283 284 if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) 285 break; /* done with pipes, now queues */ 286 287 /* 288 * compute length, as pipe have variable size 289 */ 290 l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); 291 next = (char *)p + l; 292 nbytes -= l; 293 294 if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) 295 continue; 296 297 /* 298 * Print rate (or clocking interface) 299 */ 300 if (p->if_name[0] != '\0') 301 sprintf(buf, "%s", p->if_name); 302 else if (b == 0) 303 sprintf(buf, "unlimited"); 304 else if (b >= 1000000) 305 sprintf(buf, "%7.3f Mbit/s", b/1000000); 306 else if (b >= 1000) 307 sprintf(buf, "%7.3f Kbit/s", b/1000); 308 else 309 sprintf(buf, "%7.3f bit/s ", b); 310 311 sprintf(prefix, "%05d: %s %4d ms ", 312 p->pipe_nr, buf, p->delay); 313 314 print_extra_delay_parms(p, prefix); 315 316 print_flowset_parms(&(p->fs), prefix); 317 if (co.verbose) 318 printf(" V %20llu\n", align_uint64(&p->V) >> MY_M); 319 320 q = (struct dn_flow_queue *)(p+1); 321 list_queues(&(p->fs), q); 322 } 323 for (fs = next; nbytes >= sizeof *fs; fs = next) { 324 char prefix[80]; 325 326 if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) 327 break; 328 l = sizeof(*fs) + fs->rq_elements * sizeof(*q); 329 next = (char *)fs + l; 330 nbytes -= l; 331 332 if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || 333 (rulenum != fs->parent_nr && co.do_pipe == 1))) { 334 continue; 335 } 336 337 q = (struct dn_flow_queue *)(fs+1); 338 sprintf(prefix, "q%05d: weight %d pipe %d ", 339 fs->fs_nr, fs->weight, fs->parent_nr); 340 print_flowset_parms(fs, prefix); 341 list_queues(fs, q); 342 } 343} 344 345/* 346 * Delete pipe or queue i 347 */ 348int 349ipfw_delete_pipe(int pipe_or_queue, int i) 350{ 351 struct dn_pipe p; 352 353 memset(&p, 0, sizeof p); 354 if (pipe_or_queue == 1) 355 p.pipe_nr = i; /* pipe */ 356 else 357 p.fs.fs_nr = i; /* queue */ 358 i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); 359 if (i) { 360 i = 1; 361 warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); 362 } 363 return i; 364} 365 366/* 367 * Code to parse delay profiles. 368 * 369 * Some link types introduce extra delays in the transmission 370 * of a packet, e.g. because of MAC level framing, contention on 371 * the use of the channel, MAC level retransmissions and so on. 372 * From our point of view, the channel is effectively unavailable 373 * for this extra time, which is constant or variable depending 374 * on the link type. Additionally, packets may be dropped after this 375 * time (e.g. on a wireless link after too many retransmissions). 376 * We can model the additional delay with an empirical curve 377 * that represents its distribution. 378 * 379 * cumulative probability 380 * 1.0 ^ 381 * | 382 * L +-- loss-level x 383 * | ****** 384 * | * 385 * | ***** 386 * | * 387 * | ** 388 * | * 389 * +-------*-------------------> 390 * delay 391 * 392 * The empirical curve may have both vertical and horizontal lines. 393 * Vertical lines represent constant delay for a range of 394 * probabilities; horizontal lines correspond to a discontinuty 395 * in the delay distribution: the pipe will use the largest delay 396 * for a given probability. 397 * 398 * To pass the curve to dummynet, we must store the parameters 399 * in a file as described below, and issue the command 400 * 401 * ipfw pipe <n> config ... bw XXX profile <filename> ... 402 * 403 * The file format is the following, with whitespace acting as 404 * a separator and '#' indicating the beginning a comment: 405 * 406 * samples N 407 * the number of samples used in the internal 408 * representation (2..1024; default 100); 409 * 410 * loss-level L 411 * The probability above which packets are lost. 412 * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); 413 * 414 * name identifier 415 * Optional a name (listed by "ipfw pipe show") 416 * to identify the distribution; 417 * 418 * "delay prob" | "prob delay" 419 * One of these two lines is mandatory and defines 420 * the format of the following lines with data points. 421 * 422 * XXX YYY 423 * 2 or more lines representing points in the curve, 424 * with either delay or probability first, according 425 * to the chosen format. 426 * The unit for delay is milliseconds. 427 * 428 * Data points does not need to be ordered or equal to the number 429 * specified in the "samples" line. ipfw will sort and interpolate 430 * the curve as needed. 431 * 432 * Example of a profile file: 433 434 name bla_bla_bla 435 samples 100 436 loss-level 0.86 437 prob delay 438 0 200 # minimum overhead is 200ms 439 0.5 200 440 0.5 300 441 0.8 1000 442 0.9 1300 443 1 1300 444 445 * Internally, we will convert the curve to a fixed number of 446 * samples, and when it is time to transmit a packet we will 447 * model the extra delay as extra bits in the packet. 448 * 449 */ 450 451#define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN 452#define ED_TOK_SAMPLES "samples" 453#define ED_TOK_LOSS "loss-level" 454#define ED_TOK_NAME "name" 455#define ED_TOK_DELAY "delay" 456#define ED_TOK_PROB "prob" 457#define ED_SEPARATORS " \t\n" 458#define ED_MIN_SAMPLES_NO 2 459 460/* 461 * returns 1 if s is a non-negative number, with at least one '.' 462 */ 463static int 464is_valid_number(const char *s) 465{ 466 int i, dots_found = 0; 467 int len = strlen(s); 468 469 for (i = 0; i<len; ++i) 470 if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1)) 471 return 0; 472 return 1; 473} 474 475struct point { 476 double prob; 477 double delay; 478}; 479 480int 481compare_points(const void *vp1, const void *vp2) 482{ 483 const struct point *p1 = vp1; 484 const struct point *p2 = vp2; 485 double res = 0; 486 487 res = p1->prob - p2->prob; 488 if (res == 0) 489 res = p1->delay - p2->delay; 490 if (res < 0) 491 return -1; 492 else if (res > 0) 493 return 1; 494 else 495 return 0; 496} 497 498#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno 499 500static void 501load_extra_delays(const char *filename, struct dn_pipe *p) 502{ 503 char line[ED_MAX_LINE_LEN]; 504 FILE *f; 505 int lineno = 0; 506 int i; 507 508 int samples = -1; 509 double loss = -1.0; 510 char profile_name[ED_MAX_NAME_LEN]; 511 int delay_first = -1; 512 int do_points = 0; 513 struct point points[ED_MAX_SAMPLES_NO]; 514 int points_no = 0; 515 516 profile_name[0] = '\0'; 517 f = fopen(filename, "r"); 518 if (f == NULL) 519 err(EX_UNAVAILABLE, "fopen: %s", filename); 520 521 while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ 522 char *s, *cur = line, *name = NULL, *arg = NULL; 523 524 ++lineno; 525 526 /* parse the line */ 527 while (cur) { 528 s = strsep(&cur, ED_SEPARATORS); 529 if (s == NULL || *s == '#') 530 break; 531 if (*s == '\0') 532 continue; 533 if (arg) 534 errx(ED_EFMT("too many arguments")); 535 if (name == NULL) 536 name = s; 537 else 538 arg = s; 539 } 540 if (name == NULL) /* empty line */ 541 continue; 542 if (arg == NULL) 543 errx(ED_EFMT("missing arg for %s"), name); 544 545 if (!strcasecmp(name, ED_TOK_SAMPLES)) { 546 if (samples > 0) 547 errx(ED_EFMT("duplicate ``samples'' line")); 548 if (atoi(arg) <=0) 549 errx(ED_EFMT("invalid number of samples")); 550 samples = atoi(arg); 551 if (samples>ED_MAX_SAMPLES_NO) 552 errx(ED_EFMT("too many samples, maximum is %d"), 553 ED_MAX_SAMPLES_NO); 554 do_points = 0; 555 } else if (!strcasecmp(name, ED_TOK_LOSS)) { 556 if (loss != -1.0) 557 errx(ED_EFMT("duplicated token: %s"), name); 558 if (!is_valid_number(arg)) 559 errx(ED_EFMT("invalid %s"), arg); 560 loss = atof(arg); 561 if (loss > 1) 562 errx(ED_EFMT("%s greater than 1.0"), name); 563 do_points = 0; 564 } else if (!strcasecmp(name, ED_TOK_NAME)) { 565 if (profile_name[0] != '\0') 566 errx(ED_EFMT("duplicated token: %s"), name); 567 strncpy(profile_name, arg, sizeof(profile_name) - 1); 568 profile_name[sizeof(profile_name)-1] = '\0'; 569 do_points = 0; 570 } else if (!strcasecmp(name, ED_TOK_DELAY)) { 571 if (do_points) 572 errx(ED_EFMT("duplicated token: %s"), name); 573 delay_first = 1; 574 do_points = 1; 575 } else if (!strcasecmp(name, ED_TOK_PROB)) { 576 if (do_points) 577 errx(ED_EFMT("duplicated token: %s"), name); 578 delay_first = 0; 579 do_points = 1; 580 } else if (do_points) { 581 if (!is_valid_number(name) || !is_valid_number(arg)) 582 errx(ED_EFMT("invalid point found")); 583 if (delay_first) { 584 points[points_no].delay = atof(name); 585 points[points_no].prob = atof(arg); 586 } else { 587 points[points_no].delay = atof(arg); 588 points[points_no].prob = atof(name); 589 } 590 if (points[points_no].prob > 1.0) 591 errx(ED_EFMT("probability greater than 1.0")); 592 ++points_no; 593 } else { 594 errx(ED_EFMT("unrecognised command '%s'"), name); 595 } 596 } 597 598 if (samples == -1) { 599 warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); 600 samples = 100; 601 } 602 603 if (loss == -1.0) { 604 warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); 605 loss = 1; 606 } 607 608 /* make sure that there are enough points. */ 609 if (points_no < ED_MIN_SAMPLES_NO) 610 errx(ED_EFMT("too few samples, need at least %d"), 611 ED_MIN_SAMPLES_NO); 612 613 qsort(points, points_no, sizeof(struct point), compare_points); 614 615 /* interpolation */ 616 for (i = 0; i<points_no-1; ++i) { 617 double y1 = points[i].prob * samples; 618 double x1 = points[i].delay; 619 double y2 = points[i+1].prob * samples; 620 double x2 = points[i+1].delay; 621 622 int index = y1; 623 int stop = y2; 624 625 if (x1 == x2) { 626 for (; index<stop; ++index) 627 p->samples[index] = x1; 628 } else { 629 double m = (y2-y1)/(x2-x1); 630 double c = y1 - m*x1; 631 for (; index<stop ; ++index) 632 p->samples[index] = (index - c)/m; 633 } 634 } 635 p->samples_no = samples; 636 p->loss_level = loss * samples; 637 strncpy(p->name, profile_name, sizeof(p->name)); 638} 639 640void 641ipfw_config_pipe(int ac, char **av) 642{ 643 int samples[ED_MAX_SAMPLES_NO]; 644 struct dn_pipe p; 645 int i; 646 char *end; 647 void *par = NULL; 648 649 memset(&p, 0, sizeof p); 650 651 av++; ac--; 652 /* Pipe number */ 653 if (ac && isdigit(**av)) { 654 i = atoi(*av); av++; ac--; 655 if (co.do_pipe == 1) 656 p.pipe_nr = i; 657 else 658 p.fs.fs_nr = i; 659 } 660 while (ac > 0) { 661 double d; 662 int tok = match_token(dummynet_params, *av); 663 ac--; av++; 664 665 switch(tok) { 666 case TOK_NOERROR: 667 p.fs.flags_fs |= DN_NOERROR; 668 break; 669 670 case TOK_PLR: 671 NEED1("plr needs argument 0..1\n"); 672 d = strtod(av[0], NULL); 673 if (d > 1) 674 d = 1; 675 else if (d < 0) 676 d = 0; 677 p.fs.plr = (int)(d*0x7fffffff); 678 ac--; av++; 679 break; 680 681 case TOK_QUEUE: 682 NEED1("queue needs queue size\n"); 683 end = NULL; 684 p.fs.qsize = strtoul(av[0], &end, 0); 685 if (*end == 'K' || *end == 'k') { 686 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 687 p.fs.qsize *= 1024; 688 } else if (*end == 'B' || 689 _substrcmp2(end, "by", "bytes") == 0) { 690 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 691 } 692 ac--; av++; 693 break; 694 695 case TOK_BUCKETS: 696 NEED1("buckets needs argument\n"); 697 p.fs.rq_size = strtoul(av[0], NULL, 0); 698 ac--; av++; 699 break; 700 701 case TOK_MASK: 702 NEED1("mask needs mask specifier\n"); 703 /* 704 * per-flow queue, mask is dst_ip, dst_port, 705 * src_ip, src_port, proto measured in bits 706 */ 707 par = NULL; 708 709 bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); 710 end = NULL; 711 712 while (ac >= 1) { 713 uint32_t *p32 = NULL; 714 uint16_t *p16 = NULL; 715 uint32_t *p20 = NULL; 716 struct in6_addr *pa6 = NULL; 717 uint32_t a; 718 719 tok = match_token(dummynet_params, *av); 720 ac--; av++; 721 switch(tok) { 722 case TOK_ALL: 723 /* 724 * special case, all bits significant 725 */ 726 p.fs.flow_mask.dst_ip = ~0; 727 p.fs.flow_mask.src_ip = ~0; 728 p.fs.flow_mask.dst_port = ~0; 729 p.fs.flow_mask.src_port = ~0; 730 p.fs.flow_mask.proto = ~0; 731 n2mask(&(p.fs.flow_mask.dst_ip6), 128); 732 n2mask(&(p.fs.flow_mask.src_ip6), 128); 733 p.fs.flow_mask.flow_id6 = ~0; 734 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 735 goto end_mask; 736 737 case TOK_DSTIP: 738 p32 = &p.fs.flow_mask.dst_ip; 739 break; 740 741 case TOK_SRCIP: 742 p32 = &p.fs.flow_mask.src_ip; 743 break; 744 745 case TOK_DSTIP6: 746 pa6 = &(p.fs.flow_mask.dst_ip6); 747 break; 748 749 case TOK_SRCIP6: 750 pa6 = &(p.fs.flow_mask.src_ip6); 751 break; 752 753 case TOK_FLOWID: 754 p20 = &p.fs.flow_mask.flow_id6; 755 break; 756 757 case TOK_DSTPORT: 758 p16 = &p.fs.flow_mask.dst_port; 759 break; 760 761 case TOK_SRCPORT: 762 p16 = &p.fs.flow_mask.src_port; 763 break; 764 765 case TOK_PROTO: 766 break; 767 768 default: 769 ac++; av--; /* backtrack */ 770 goto end_mask; 771 } 772 if (ac < 1) 773 errx(EX_USAGE, "mask: value missing"); 774 if (*av[0] == '/') { 775 a = strtoul(av[0]+1, &end, 0); 776 if (pa6 == NULL) 777 a = (a == 32) ? ~0 : (1 << a) - 1; 778 } else 779 a = strtoul(av[0], &end, 0); 780 if (p32 != NULL) 781 *p32 = a; 782 else if (p16 != NULL) { 783 if (a > 0xFFFF) 784 errx(EX_DATAERR, 785 "port mask must be 16 bit"); 786 *p16 = (uint16_t)a; 787 } else if (p20 != NULL) { 788 if (a > 0xfffff) 789 errx(EX_DATAERR, 790 "flow_id mask must be 20 bit"); 791 *p20 = (uint32_t)a; 792 } else if (pa6 != NULL) { 793 if (a > 128) 794 errx(EX_DATAERR, 795 "in6addr invalid mask len"); 796 else 797 n2mask(pa6, a); 798 } else { 799 if (a > 0xFF) 800 errx(EX_DATAERR, 801 "proto mask must be 8 bit"); 802 p.fs.flow_mask.proto = (uint8_t)a; 803 } 804 if (a != 0) 805 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 806 ac--; av++; 807 } /* end while, config masks */ 808end_mask: 809 break; 810 811 case TOK_RED: 812 case TOK_GRED: 813 NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); 814 p.fs.flags_fs |= DN_IS_RED; 815 if (tok == TOK_GRED) 816 p.fs.flags_fs |= DN_IS_GENTLE_RED; 817 /* 818 * the format for parameters is w_q/min_th/max_th/max_p 819 */ 820 if ((end = strsep(&av[0], "/"))) { 821 double w_q = strtod(end, NULL); 822 if (w_q > 1 || w_q <= 0) 823 errx(EX_DATAERR, "0 < w_q <= 1"); 824 p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); 825 } 826 if ((end = strsep(&av[0], "/"))) { 827 p.fs.min_th = strtoul(end, &end, 0); 828 if (*end == 'K' || *end == 'k') 829 p.fs.min_th *= 1024; 830 } 831 if ((end = strsep(&av[0], "/"))) { 832 p.fs.max_th = strtoul(end, &end, 0); 833 if (*end == 'K' || *end == 'k') 834 p.fs.max_th *= 1024; 835 } 836 if ((end = strsep(&av[0], "/"))) { 837 double max_p = strtod(end, NULL); 838 if (max_p > 1 || max_p <= 0) 839 errx(EX_DATAERR, "0 < max_p <= 1"); 840 p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); 841 } 842 ac--; av++; 843 break; 844 845 case TOK_DROPTAIL: 846 p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); 847 break; 848 849 case TOK_BW: 850 NEED1("bw needs bandwidth or interface\n"); 851 if (co.do_pipe != 1) 852 errx(EX_DATAERR, "bandwidth only valid for pipes"); 853 /* 854 * set clocking interface or bandwidth value 855 */ 856 if (av[0][0] >= 'a' && av[0][0] <= 'z') { 857 int l = sizeof(p.if_name)-1; 858 /* interface name */ 859 strncpy(p.if_name, av[0], l); 860 p.if_name[l] = '\0'; 861 p.bandwidth = 0; 862 } else { 863 p.if_name[0] = '\0'; 864 p.bandwidth = strtoul(av[0], &end, 0); 865 if (*end == 'K' || *end == 'k') { 866 end++; 867 p.bandwidth *= 1000; 868 } else if (*end == 'M') { 869 end++; 870 p.bandwidth *= 1000000; 871 } 872 if ((*end == 'B' && 873 _substrcmp2(end, "Bi", "Bit/s") != 0) || 874 _substrcmp2(end, "by", "bytes") == 0) 875 p.bandwidth *= 8; 876 if (p.bandwidth < 0) 877 errx(EX_DATAERR, "bandwidth too large"); 878 } 879 ac--; av++; 880 break; 881 882 case TOK_DELAY: 883 if (co.do_pipe != 1) 884 errx(EX_DATAERR, "delay only valid for pipes"); 885 NEED1("delay needs argument 0..10000ms\n"); 886 p.delay = strtoul(av[0], NULL, 0); 887 ac--; av++; 888 break; 889 890 case TOK_WEIGHT: 891 if (co.do_pipe == 1) 892 errx(EX_DATAERR,"weight only valid for queues"); 893 NEED1("weight needs argument 0..100\n"); 894 p.fs.weight = strtoul(av[0], &end, 0); 895 ac--; av++; 896 break; 897 898 case TOK_PIPE: 899 if (co.do_pipe == 1) 900 errx(EX_DATAERR,"pipe only valid for queues"); 901 NEED1("pipe needs pipe_number\n"); 902 p.fs.parent_nr = strtoul(av[0], &end, 0); 903 ac--; av++; 904 break; 905 906 case TOK_PIPE_PROFILE: 907 if (co.do_pipe != 1) 908 errx(EX_DATAERR, "extra delay only valid for pipes"); 909 NEED1("extra delay needs the file name\n"); 910 p.samples = &samples[0]; 911 load_extra_delays(av[0], &p); 912 --ac; ++av; 913 break; 914 915 default: 916 errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); 917 } 918 } 919 if (co.do_pipe == 1) { 920 if (p.pipe_nr == 0) 921 errx(EX_DATAERR, "pipe_nr must be > 0"); 922 if (p.delay > 10000) 923 errx(EX_DATAERR, "delay must be < 10000"); 924 if (p.samples_no > 0 && p.bandwidth == 0) 925 errx(EX_DATAERR, 926 "profile requires a bandwidth limit"); 927 } else { /* co.do_pipe == 2, queue */ 928 if (p.fs.parent_nr == 0) 929 errx(EX_DATAERR, "pipe must be > 0"); 930 if (p.fs.weight >100) 931 errx(EX_DATAERR, "weight must be <= 100"); 932 } 933 if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { 934 size_t len; 935 long limit; 936 937 len = sizeof(limit); 938 if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", 939 &limit, &len, NULL, 0) == -1) 940 limit = 1024*1024; 941 if (p.fs.qsize > limit) 942 errx(EX_DATAERR, "queue size must be < %ldB", limit); 943 } else { 944 size_t len; 945 long limit; 946 947 len = sizeof(limit); 948 if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", 949 &limit, &len, NULL, 0) == -1) 950 limit = 100; 951 if (p.fs.qsize > limit) 952 errx(EX_DATAERR, "2 <= queue size <= %ld", limit); 953 } 954 if (p.fs.flags_fs & DN_IS_RED) { 955 size_t len; 956 int lookup_depth, avg_pkt_size; 957 double s, idle, weight, w_q; 958 struct clockinfo ck; 959 int t; 960 961 if (p.fs.min_th >= p.fs.max_th) 962 errx(EX_DATAERR, "min_th %d must be < than max_th %d", 963 p.fs.min_th, p.fs.max_th); 964 if (p.fs.max_th == 0) 965 errx(EX_DATAERR, "max_th must be > 0"); 966 967 len = sizeof(int); 968 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", 969 &lookup_depth, &len, NULL, 0) == -1) 970 errx(1, "sysctlbyname(\"%s\")", 971 "net.inet.ip.dummynet.red_lookup_depth"); 972 if (lookup_depth == 0) 973 errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" 974 " must be greater than zero"); 975 976 len = sizeof(int); 977 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", 978 &avg_pkt_size, &len, NULL, 0) == -1) 979 980 errx(1, "sysctlbyname(\"%s\")", 981 "net.inet.ip.dummynet.red_avg_pkt_size"); 982 if (avg_pkt_size == 0) 983 errx(EX_DATAERR, 984 "net.inet.ip.dummynet.red_avg_pkt_size must" 985 " be greater than zero"); 986 987 len = sizeof(struct clockinfo); 988 if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) 989 errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); 990 991 /* 992 * Ticks needed for sending a medium-sized packet. 993 * Unfortunately, when we are configuring a WF2Q+ queue, we 994 * do not have bandwidth information, because that is stored 995 * in the parent pipe, and also we have multiple queues 996 * competing for it. So we set s=0, which is not very 997 * correct. But on the other hand, why do we want RED with 998 * WF2Q+ ? 999 */ 1000 if (p.bandwidth==0) /* this is a WF2Q+ queue */ 1001 s = 0; 1002 else 1003 s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; 1004 1005 /* 1006 * max idle time (in ticks) before avg queue size becomes 0. 1007 * NOTA: (3/w_q) is approx the value x so that 1008 * (1-w_q)^x < 10^-3. 1009 */ 1010 w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); 1011 idle = s * 3. / w_q; 1012 p.fs.lookup_step = (int)idle / lookup_depth; 1013 if (!p.fs.lookup_step) 1014 p.fs.lookup_step = 1; 1015 weight = 1 - w_q; 1016 for (t = p.fs.lookup_step; t > 1; --t) 1017 weight *= 1 - w_q; 1018 p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); 1019 } 1020 if (p.samples_no <= 0) { 1021 i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); 1022 } else { 1023 struct dn_pipe_max pm; 1024 int len = sizeof(pm); 1025 1026 memcpy(&pm.pipe, &p, sizeof(pm.pipe)); 1027 memcpy(&pm.samples, samples, sizeof(pm.samples)); 1028 1029 i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); 1030 } 1031 1032 if (i) 1033 err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); 1034} 1035