dummynet.c revision 199626
160786Sps/* 260786Sps * Copyright (c) 2002-2003 Luigi Rizzo 360786Sps * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp 460786Sps * Copyright (c) 1994 Ugen J.S.Antsilevich 560786Sps * 660786Sps * Idea and grammar partially left from: 760786Sps * Copyright (c) 1993 Daniel Boulet 860786Sps * 960786Sps * Redistribution and use in source forms, with and without modification, 1060786Sps * are permitted provided that this entire comment appears intact. 1160786Sps * 1260786Sps * Redistribution in binary form may occur without any restrictions. 1360786Sps * Obviously, it would be nice if you gave credit where credit is due 1460786Sps * but requiring it would be too onerous. 1560786Sps * 1660786Sps * This software is provided ``AS IS'' without any warranties of any kind. 1760786Sps * 1860786Sps * NEW command line interface for IP firewall facility 1960786Sps * 2060786Sps * $FreeBSD: head/sbin/ipfw/dummynet.c 199626 2009-11-21 10:46:49Z netchild $ 2160786Sps * 2260786Sps * dummynet support 2360786Sps */ 2460786Sps 2560786Sps#include <sys/types.h> 2660786Sps#include <sys/socket.h> 2760786Sps#include <sys/queue.h> 2860786Sps/* XXX there are several sysctl leftover here */ 2960786Sps#include <sys/sysctl.h> 3060786Sps 3160786Sps#include "ipfw2.h" 3260786Sps 3360786Sps#include <ctype.h> 3460786Sps#include <err.h> 3560786Sps#include <errno.h> 3660786Sps#include <libutil.h> 3760786Sps#include <netdb.h> 3860786Sps#include <stdio.h> 3960786Sps#include <stdlib.h> 4060786Sps#include <string.h> 4160786Sps#include <sysexits.h> 4260786Sps 4360786Sps#include <net/if.h> 4460786Sps#include <netinet/in.h> 4560786Sps#include <netinet/ip_fw.h> 4660786Sps#include <netinet/ip_dummynet.h> 4760786Sps#include <arpa/inet.h> /* inet_ntoa */ 4860786Sps 4960786Spsstatic struct _s_x dummynet_params[] = { 5060786Sps { "plr", TOK_PLR }, 5160786Sps { "noerror", TOK_NOERROR }, 5260786Sps { "buckets", TOK_BUCKETS }, 5360786Sps { "dst-ip", TOK_DSTIP }, 5460786Sps { "src-ip", TOK_SRCIP }, 5560786Sps { "dst-port", TOK_DSTPORT }, 5660786Sps { "src-port", TOK_SRCPORT }, 5760786Sps { "proto", TOK_PROTO }, 5860786Sps { "weight", TOK_WEIGHT }, 5960786Sps { "all", TOK_ALL }, 6060786Sps { "mask", TOK_MASK }, 6160786Sps { "droptail", TOK_DROPTAIL }, 6260786Sps { "red", TOK_RED }, 6360786Sps { "gred", TOK_GRED }, 6460786Sps { "bw", TOK_BW }, 6560786Sps { "bandwidth", TOK_BW }, 6660786Sps { "delay", TOK_DELAY }, 6760786Sps { "pipe", TOK_PIPE }, 6860786Sps { "queue", TOK_QUEUE }, 6960786Sps { "flow-id", TOK_FLOWID}, 7060786Sps { "dst-ipv6", TOK_DSTIP6}, 7160786Sps { "dst-ip6", TOK_DSTIP6}, 7260786Sps { "src-ipv6", TOK_SRCIP6}, 7360786Sps { "src-ip6", TOK_SRCIP6}, 7460786Sps { "profile", TOK_PIPE_PROFILE}, 7560786Sps { "burst", TOK_BURST}, 7660786Sps { "dummynet-params", TOK_NULL }, 7760786Sps { NULL, 0 } /* terminator */ 7860786Sps}; 7960786Sps 8060786Spsstatic int 8160786Spssort_q(const void *pa, const void *pb) 8260786Sps{ 8360786Sps int rev = (co.do_sort < 0); 8460786Sps int field = rev ? -co.do_sort : co.do_sort; 8560786Sps long long res = 0; 8660786Sps const struct dn_flow_queue *a = pa; 8760786Sps const struct dn_flow_queue *b = pb; 8860786Sps 8960786Sps switch (field) { 9060786Sps case 1: /* pkts */ 9160786Sps res = a->len - b->len; 9260786Sps break; 9360786Sps case 2: /* bytes */ 9460786Sps res = a->len_bytes - b->len_bytes; 9560786Sps break; 9660786Sps 9760786Sps case 3: /* tot pkts */ 9860786Sps res = a->tot_pkts - b->tot_pkts; 9960786Sps break; 10060786Sps 10160786Sps case 4: /* tot bytes */ 10260786Sps res = a->tot_bytes - b->tot_bytes; 10360786Sps break; 10460786Sps } 10560786Sps if (res < 0) 10660786Sps res = -1; 10760786Sps if (res > 0) 10860786Sps res = 1; 10960786Sps return (int)(rev ? res : -res); 11060786Sps} 11160786Sps 11260786Spsstatic void 11360786Spslist_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) 11460786Sps{ 11560786Sps int l; 11660786Sps int index_printed, indexes = 0; 11760786Sps char buff[255]; 11860786Sps struct protoent *pe; 11960786Sps 12060786Sps if (fs->rq_elements == 0) 12160786Sps return; 12260786Sps 12360786Sps if (co.do_sort != 0) 12460786Sps heapsort(q, fs->rq_elements, sizeof *q, sort_q); 12560786Sps 12660786Sps /* Print IPv4 flows */ 12760786Sps index_printed = 0; 12860786Sps for (l = 0; l < fs->rq_elements; l++) { 12960786Sps struct in_addr ina; 13060786Sps 13160786Sps /* XXX: Should check for IPv4 flows */ 13260786Sps if (IS_IP6_FLOW_ID(&(q[l].id))) 13360786Sps continue; 13460786Sps 13560786Sps if (!index_printed) { 13660786Sps index_printed = 1; 13760786Sps if (indexes > 0) /* currently a no-op */ 13860786Sps printf("\n"); 13960786Sps indexes++; 14060786Sps printf(" " 14160786Sps "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", 14260786Sps fs->flow_mask.proto, 14360786Sps fs->flow_mask.src_ip, fs->flow_mask.src_port, 14460786Sps fs->flow_mask.dst_ip, fs->flow_mask.dst_port); 14560786Sps 14660786Sps printf("BKT Prot ___Source IP/port____ " 14760786Sps "____Dest. IP/port____ " 14860786Sps "Tot_pkt/bytes Pkt/Byte Drp\n"); 14960786Sps } 15060786Sps 15160786Sps printf("%3d ", q[l].hash_slot); 15260786Sps pe = getprotobynumber(q[l].id.proto); 15360786Sps if (pe) 15460786Sps printf("%-4s ", pe->p_name); 15560786Sps else 15660786Sps printf("%4u ", q[l].id.proto); 15760786Sps ina.s_addr = htonl(q[l].id.src_ip); 15860786Sps printf("%15s/%-5d ", 15960786Sps inet_ntoa(ina), q[l].id.src_port); 16060786Sps ina.s_addr = htonl(q[l].id.dst_ip); 16160786Sps printf("%15s/%-5d ", 16260786Sps inet_ntoa(ina), q[l].id.dst_port); 16360786Sps printf("%4llu %8llu %2u %4u %3u\n", 16460786Sps align_uint64(&q[l].tot_pkts), 16560786Sps align_uint64(&q[l].tot_bytes), 16660786Sps q[l].len, q[l].len_bytes, q[l].drops); 16760786Sps if (co.verbose) 16860786Sps printf(" S %20llu F %20llu\n", 16960786Sps align_uint64(&q[l].S), align_uint64(&q[l].F)); 17060786Sps } 17160786Sps 17260786Sps /* Print IPv6 flows */ 17360786Sps index_printed = 0; 17460786Sps for (l = 0; l < fs->rq_elements; l++) { 17560786Sps if (!IS_IP6_FLOW_ID(&(q[l].id))) 17660786Sps continue; 17760786Sps 17860786Sps if (!index_printed) { 17960786Sps index_printed = 1; 18060786Sps if (indexes > 0) 18160786Sps printf("\n"); 18260786Sps indexes++; 18360786Sps printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", 18460786Sps fs->flow_mask.proto, fs->flow_mask.flow_id6); 18560786Sps inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), 18660786Sps buff, sizeof(buff)); 18760786Sps printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); 18860786Sps inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), 18960786Sps buff, sizeof(buff) ); 19060786Sps printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); 19160786Sps 19260786Sps printf("BKT ___Prot___ _flow-id_ " 19360786Sps "______________Source IPv6/port_______________ " 19460786Sps "_______________Dest. IPv6/port_______________ " 19560786Sps "Tot_pkt/bytes Pkt/Byte Drp\n"); 19660786Sps } 19760786Sps printf("%3d ", q[l].hash_slot); 19860786Sps pe = getprotobynumber(q[l].id.proto); 19960786Sps if (pe != NULL) 20060786Sps printf("%9s ", pe->p_name); 20160786Sps else 20260786Sps printf("%9u ", q[l].id.proto); 20360786Sps printf("%7d %39s/%-5d ", q[l].id.flow_id6, 20460786Sps inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), 20560786Sps q[l].id.src_port); 20660786Sps printf(" %39s/%-5d ", 20760786Sps inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), 20860786Sps q[l].id.dst_port); 20960786Sps printf(" %4llu %8llu %2u %4u %3u\n", 21060786Sps align_uint64(&q[l].tot_pkts), 21160786Sps align_uint64(&q[l].tot_bytes), 21260786Sps q[l].len, q[l].len_bytes, q[l].drops); 21360786Sps if (co.verbose) 21460786Sps printf(" S %20llu F %20llu\n", 21560786Sps align_uint64(&q[l].S), 21660786Sps align_uint64(&q[l].F)); 21760786Sps } 21860786Sps} 21960786Sps 22060786Spsstatic void 22160786Spsprint_flowset_parms(struct dn_flow_set *fs, char *prefix) 22260786Sps{ 22360786Sps int l; 22460786Sps char qs[30]; 22560786Sps char plr[30]; 22660786Sps char red[90]; /* Display RED parameters */ 22760786Sps 22860786Sps l = fs->qsize; 22960786Sps if (fs->flags_fs & DN_QSIZE_IS_BYTES) { 23060786Sps if (l >= 8192) 23160786Sps sprintf(qs, "%d KB", l / 1024); 23260786Sps else 23360786Sps sprintf(qs, "%d B", l); 23460786Sps } else 23560786Sps sprintf(qs, "%3d sl.", l); 23660786Sps if (fs->plr) 23760786Sps sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); 23860786Sps else 23960786Sps plr[0] = '\0'; 24060786Sps if (fs->flags_fs & DN_IS_RED) /* RED parameters */ 24160786Sps sprintf(red, 24260786Sps "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", 24360786Sps (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', 24460786Sps 1.0 * fs->w_q / (double)(1 << SCALE_RED), 24560786Sps SCALE_VAL(fs->min_th), 24660786Sps SCALE_VAL(fs->max_th), 24760786Sps 1.0 * fs->max_p / (double)(1 << SCALE_RED)); 24860786Sps else 24960786Sps sprintf(red, "droptail"); 25060786Sps 25160786Sps printf("%s %s%s %d queues (%d buckets) %s\n", 25260786Sps prefix, qs, plr, fs->rq_elements, fs->rq_size, red); 25360786Sps} 25460786Sps 25560786Spsstatic void 25660786Spsprint_extra_delay_parms(struct dn_pipe *p) 25760786Sps{ 25860786Sps double loss; 25960786Sps if (p->samples_no <= 0) 26060786Sps return; 26160786Sps 26260786Sps loss = p->loss_level; 26360786Sps loss /= p->samples_no; 26460786Sps printf("\t profile: name \"%s\" loss %f samples %d\n", 26560786Sps p->name, loss, p->samples_no); 26660786Sps} 26760786Sps 26860786Spsvoid 26960786Spsipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) 27060786Sps{ 27160786Sps int rulenum; 27260786Sps void *next = data; 27360786Sps struct dn_pipe *p = (struct dn_pipe *) data; 27460786Sps struct dn_flow_set *fs; 27560786Sps struct dn_flow_queue *q; 27660786Sps int l; 27760786Sps 27860786Sps if (ac > 0) 27960786Sps rulenum = strtoul(*av++, NULL, 10); 28060786Sps else 28160786Sps rulenum = 0; 28260786Sps for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { 28360786Sps double b = p->bandwidth; 28460786Sps char buf[30]; 28560786Sps char prefix[80]; 28660786Sps char burst[5 + 7]; 28760786Sps 28860786Sps if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) 28960786Sps break; /* done with pipes, now queues */ 29060786Sps 29160786Sps /* 29260786Sps * compute length, as pipe have variable size 29360786Sps */ 29460786Sps l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); 29560786Sps next = (char *)p + l; 29660786Sps nbytes -= l; 29760786Sps 29860786Sps if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) 29960786Sps continue; 30060786Sps 30160786Sps /* 30260786Sps * Print rate (or clocking interface) 30360786Sps */ 30460786Sps if (p->if_name[0] != '\0') 30560786Sps sprintf(buf, "%s", p->if_name); 30660786Sps else if (b == 0) 30760786Sps sprintf(buf, "unlimited"); 30860786Sps else if (b >= 1000000) 30960786Sps sprintf(buf, "%7.3f Mbit/s", b/1000000); 31060786Sps else if (b >= 1000) 31160786Sps sprintf(buf, "%7.3f Kbit/s", b/1000); 31260786Sps else 31360786Sps sprintf(buf, "%7.3f bit/s ", b); 31460786Sps 31560786Sps sprintf(prefix, "%05d: %s %4d ms ", 31660786Sps p->pipe_nr, buf, p->delay); 31760786Sps 31860786Sps print_flowset_parms(&(p->fs), prefix); 31960786Sps 32060786Sps if (humanize_number(burst, sizeof(burst), p->burst, 32160786Sps "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose) 32260786Sps printf("\t burst: %ju Byte\n", p->burst); 32360786Sps else 32460786Sps printf("\t burst: %s\n", burst); 32560786Sps 32660786Sps print_extra_delay_parms(p); 32760786Sps 32860786Sps q = (struct dn_flow_queue *)(p+1); 32960786Sps list_queues(&(p->fs), q); 33060786Sps } 33160786Sps for (fs = next; nbytes >= sizeof *fs; fs = next) { 33260786Sps char prefix[80]; 33360786Sps 33460786Sps if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) 33560786Sps break; 33660786Sps l = sizeof(*fs) + fs->rq_elements * sizeof(*q); 33760786Sps next = (char *)fs + l; 33860786Sps nbytes -= l; 33960786Sps 34060786Sps if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || 34160786Sps (rulenum != fs->parent_nr && co.do_pipe == 1))) { 34260786Sps continue; 34360786Sps } 34460786Sps 34560786Sps q = (struct dn_flow_queue *)(fs+1); 34660786Sps sprintf(prefix, "q%05d: weight %d pipe %d ", 34760786Sps fs->fs_nr, fs->weight, fs->parent_nr); 34860786Sps print_flowset_parms(fs, prefix); 34960786Sps list_queues(fs, q); 35060786Sps } 35160786Sps} 35260786Sps 35360786Sps/* 35460786Sps * Delete pipe or queue i 35560786Sps */ 35660786Spsint 35760786Spsipfw_delete_pipe(int pipe_or_queue, int i) 35860786Sps{ 35960786Sps struct dn_pipe p; 36060786Sps 36160786Sps memset(&p, 0, sizeof p); 36260786Sps if (pipe_or_queue == 1) 36360786Sps p.pipe_nr = i; /* pipe */ 36460786Sps else 36560786Sps p.fs.fs_nr = i; /* queue */ 36660786Sps i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); 36760786Sps if (i) { 36860786Sps i = 1; 36960786Sps warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); 37060786Sps } 37160786Sps return i; 37260786Sps} 37360786Sps 37460786Sps/* 37560786Sps * Code to parse delay profiles. 37660786Sps * 37760786Sps * Some link types introduce extra delays in the transmission 37860786Sps * of a packet, e.g. because of MAC level framing, contention on 37960786Sps * the use of the channel, MAC level retransmissions and so on. 38060786Sps * From our point of view, the channel is effectively unavailable 38160786Sps * for this extra time, which is constant or variable depending 38260786Sps * on the link type. Additionally, packets may be dropped after this 383 * time (e.g. on a wireless link after too many retransmissions). 384 * We can model the additional delay with an empirical curve 385 * that represents its distribution. 386 * 387 * cumulative probability 388 * 1.0 ^ 389 * | 390 * L +-- loss-level x 391 * | ****** 392 * | * 393 * | ***** 394 * | * 395 * | ** 396 * | * 397 * +-------*-------------------> 398 * delay 399 * 400 * The empirical curve may have both vertical and horizontal lines. 401 * Vertical lines represent constant delay for a range of 402 * probabilities; horizontal lines correspond to a discontinuty 403 * in the delay distribution: the pipe will use the largest delay 404 * for a given probability. 405 * 406 * To pass the curve to dummynet, we must store the parameters 407 * in a file as described below, and issue the command 408 * 409 * ipfw pipe <n> config ... bw XXX profile <filename> ... 410 * 411 * The file format is the following, with whitespace acting as 412 * a separator and '#' indicating the beginning a comment: 413 * 414 * samples N 415 * the number of samples used in the internal 416 * representation (2..1024; default 100); 417 * 418 * loss-level L 419 * The probability above which packets are lost. 420 * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); 421 * 422 * name identifier 423 * Optional a name (listed by "ipfw pipe show") 424 * to identify the distribution; 425 * 426 * "delay prob" | "prob delay" 427 * One of these two lines is mandatory and defines 428 * the format of the following lines with data points. 429 * 430 * XXX YYY 431 * 2 or more lines representing points in the curve, 432 * with either delay or probability first, according 433 * to the chosen format. 434 * The unit for delay is milliseconds. 435 * 436 * Data points does not need to be ordered or equal to the number 437 * specified in the "samples" line. ipfw will sort and interpolate 438 * the curve as needed. 439 * 440 * Example of a profile file: 441 442 name bla_bla_bla 443 samples 100 444 loss-level 0.86 445 prob delay 446 0 200 # minimum overhead is 200ms 447 0.5 200 448 0.5 300 449 0.8 1000 450 0.9 1300 451 1 1300 452 453 * Internally, we will convert the curve to a fixed number of 454 * samples, and when it is time to transmit a packet we will 455 * model the extra delay as extra bits in the packet. 456 * 457 */ 458 459#define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN 460#define ED_TOK_SAMPLES "samples" 461#define ED_TOK_LOSS "loss-level" 462#define ED_TOK_NAME "name" 463#define ED_TOK_DELAY "delay" 464#define ED_TOK_PROB "prob" 465#define ED_TOK_BW "bw" 466#define ED_SEPARATORS " \t\n" 467#define ED_MIN_SAMPLES_NO 2 468 469/* 470 * returns 1 if s is a non-negative number, with at least one '.' 471 */ 472static int 473is_valid_number(const char *s) 474{ 475 int i, dots_found = 0; 476 int len = strlen(s); 477 478 for (i = 0; i<len; ++i) 479 if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1)) 480 return 0; 481 return 1; 482} 483 484/* 485 * Take as input a string describing a bandwidth value 486 * and return the numeric bandwidth value. 487 * set clocking interface or bandwidth value 488 */ 489void 490read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) 491{ 492 if (*bandwidth != -1) 493 warn("duplicate token, override bandwidth value!"); 494 495 if (arg[0] >= 'a' && arg[0] <= 'z') { 496 if (namelen >= IFNAMSIZ) 497 warn("interface name truncated"); 498 namelen--; 499 /* interface name */ 500 strncpy(if_name, arg, namelen); 501 if_name[namelen] = '\0'; 502 *bandwidth = 0; 503 } else { /* read bandwidth value */ 504 int bw; 505 char *end = NULL; 506 507 bw = strtoul(arg, &end, 0); 508 if (*end == 'K' || *end == 'k') { 509 end++; 510 bw *= 1000; 511 } else if (*end == 'M') { 512 end++; 513 bw *= 1000000; 514 } 515 if ((*end == 'B' && 516 _substrcmp2(end, "Bi", "Bit/s") != 0) || 517 _substrcmp2(end, "by", "bytes") == 0) 518 bw *= 8; 519 520 if (bw < 0) 521 errx(EX_DATAERR, "bandwidth too large"); 522 523 *bandwidth = bw; 524 if_name[0] = '\0'; 525 } 526} 527 528struct point { 529 double prob; 530 double delay; 531}; 532 533int 534compare_points(const void *vp1, const void *vp2) 535{ 536 const struct point *p1 = vp1; 537 const struct point *p2 = vp2; 538 double res = 0; 539 540 res = p1->prob - p2->prob; 541 if (res == 0) 542 res = p1->delay - p2->delay; 543 if (res < 0) 544 return -1; 545 else if (res > 0) 546 return 1; 547 else 548 return 0; 549} 550 551#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno 552 553static void 554load_extra_delays(const char *filename, struct dn_pipe *p) 555{ 556 char line[ED_MAX_LINE_LEN]; 557 FILE *f; 558 int lineno = 0; 559 int i; 560 561 int samples = -1; 562 double loss = -1.0; 563 char profile_name[ED_MAX_NAME_LEN]; 564 int delay_first = -1; 565 int do_points = 0; 566 struct point points[ED_MAX_SAMPLES_NO]; 567 int points_no = 0; 568 569 profile_name[0] = '\0'; 570 f = fopen(filename, "r"); 571 if (f == NULL) 572 err(EX_UNAVAILABLE, "fopen: %s", filename); 573 574 while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ 575 char *s, *cur = line, *name = NULL, *arg = NULL; 576 577 ++lineno; 578 579 /* parse the line */ 580 while (cur) { 581 s = strsep(&cur, ED_SEPARATORS); 582 if (s == NULL || *s == '#') 583 break; 584 if (*s == '\0') 585 continue; 586 if (arg) 587 errx(ED_EFMT("too many arguments")); 588 if (name == NULL) 589 name = s; 590 else 591 arg = s; 592 } 593 if (name == NULL) /* empty line */ 594 continue; 595 if (arg == NULL) 596 errx(ED_EFMT("missing arg for %s"), name); 597 598 if (!strcasecmp(name, ED_TOK_SAMPLES)) { 599 if (samples > 0) 600 errx(ED_EFMT("duplicate ``samples'' line")); 601 if (atoi(arg) <=0) 602 errx(ED_EFMT("invalid number of samples")); 603 samples = atoi(arg); 604 if (samples>ED_MAX_SAMPLES_NO) 605 errx(ED_EFMT("too many samples, maximum is %d"), 606 ED_MAX_SAMPLES_NO); 607 do_points = 0; 608 } else if (!strcasecmp(name, ED_TOK_BW)) { 609 read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); 610 } else if (!strcasecmp(name, ED_TOK_LOSS)) { 611 if (loss != -1.0) 612 errx(ED_EFMT("duplicated token: %s"), name); 613 if (!is_valid_number(arg)) 614 errx(ED_EFMT("invalid %s"), arg); 615 loss = atof(arg); 616 if (loss > 1) 617 errx(ED_EFMT("%s greater than 1.0"), name); 618 do_points = 0; 619 } else if (!strcasecmp(name, ED_TOK_NAME)) { 620 if (profile_name[0] != '\0') 621 errx(ED_EFMT("duplicated token: %s"), name); 622 strncpy(profile_name, arg, sizeof(profile_name) - 1); 623 profile_name[sizeof(profile_name)-1] = '\0'; 624 do_points = 0; 625 } else if (!strcasecmp(name, ED_TOK_DELAY)) { 626 if (do_points) 627 errx(ED_EFMT("duplicated token: %s"), name); 628 delay_first = 1; 629 do_points = 1; 630 } else if (!strcasecmp(name, ED_TOK_PROB)) { 631 if (do_points) 632 errx(ED_EFMT("duplicated token: %s"), name); 633 delay_first = 0; 634 do_points = 1; 635 } else if (do_points) { 636 if (!is_valid_number(name) || !is_valid_number(arg)) 637 errx(ED_EFMT("invalid point found")); 638 if (delay_first) { 639 points[points_no].delay = atof(name); 640 points[points_no].prob = atof(arg); 641 } else { 642 points[points_no].delay = atof(arg); 643 points[points_no].prob = atof(name); 644 } 645 if (points[points_no].prob > 1.0) 646 errx(ED_EFMT("probability greater than 1.0")); 647 ++points_no; 648 } else { 649 errx(ED_EFMT("unrecognised command '%s'"), name); 650 } 651 } 652 653 fclose (f); 654 655 if (samples == -1) { 656 warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); 657 samples = 100; 658 } 659 660 if (loss == -1.0) { 661 warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); 662 loss = 1; 663 } 664 665 /* make sure that there are enough points. */ 666 if (points_no < ED_MIN_SAMPLES_NO) 667 errx(ED_EFMT("too few samples, need at least %d"), 668 ED_MIN_SAMPLES_NO); 669 670 qsort(points, points_no, sizeof(struct point), compare_points); 671 672 /* interpolation */ 673 for (i = 0; i<points_no-1; ++i) { 674 double y1 = points[i].prob * samples; 675 double x1 = points[i].delay; 676 double y2 = points[i+1].prob * samples; 677 double x2 = points[i+1].delay; 678 679 int index = y1; 680 int stop = y2; 681 682 if (x1 == x2) { 683 for (; index<stop; ++index) 684 p->samples[index] = x1; 685 } else { 686 double m = (y2-y1)/(x2-x1); 687 double c = y1 - m*x1; 688 for (; index<stop ; ++index) 689 p->samples[index] = (index - c)/m; 690 } 691 } 692 p->samples_no = samples; 693 p->loss_level = loss * samples; 694 strncpy(p->name, profile_name, sizeof(p->name)); 695} 696 697void 698ipfw_config_pipe(int ac, char **av) 699{ 700 int samples[ED_MAX_SAMPLES_NO]; 701 struct dn_pipe p; 702 int i; 703 char *end; 704 void *par = NULL; 705 706 memset(&p, 0, sizeof p); 707 p.bandwidth = -1; 708 709 av++; ac--; 710 /* Pipe number */ 711 if (ac && isdigit(**av)) { 712 i = atoi(*av); av++; ac--; 713 if (co.do_pipe == 1) 714 p.pipe_nr = i; 715 else 716 p.fs.fs_nr = i; 717 } 718 while (ac > 0) { 719 double d; 720 int tok = match_token(dummynet_params, *av); 721 ac--; av++; 722 723 switch(tok) { 724 case TOK_NOERROR: 725 p.fs.flags_fs |= DN_NOERROR; 726 break; 727 728 case TOK_PLR: 729 NEED1("plr needs argument 0..1\n"); 730 d = strtod(av[0], NULL); 731 if (d > 1) 732 d = 1; 733 else if (d < 0) 734 d = 0; 735 p.fs.plr = (int)(d*0x7fffffff); 736 ac--; av++; 737 break; 738 739 case TOK_QUEUE: 740 NEED1("queue needs queue size\n"); 741 end = NULL; 742 p.fs.qsize = strtoul(av[0], &end, 0); 743 if (*end == 'K' || *end == 'k') { 744 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 745 p.fs.qsize *= 1024; 746 } else if (*end == 'B' || 747 _substrcmp2(end, "by", "bytes") == 0) { 748 p.fs.flags_fs |= DN_QSIZE_IS_BYTES; 749 } 750 ac--; av++; 751 break; 752 753 case TOK_BUCKETS: 754 NEED1("buckets needs argument\n"); 755 p.fs.rq_size = strtoul(av[0], NULL, 0); 756 ac--; av++; 757 break; 758 759 case TOK_MASK: 760 NEED1("mask needs mask specifier\n"); 761 /* 762 * per-flow queue, mask is dst_ip, dst_port, 763 * src_ip, src_port, proto measured in bits 764 */ 765 par = NULL; 766 767 bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); 768 end = NULL; 769 770 while (ac >= 1) { 771 uint32_t *p32 = NULL; 772 uint16_t *p16 = NULL; 773 uint32_t *p20 = NULL; 774 struct in6_addr *pa6 = NULL; 775 uint32_t a; 776 777 tok = match_token(dummynet_params, *av); 778 ac--; av++; 779 switch(tok) { 780 case TOK_ALL: 781 /* 782 * special case, all bits significant 783 */ 784 p.fs.flow_mask.dst_ip = ~0; 785 p.fs.flow_mask.src_ip = ~0; 786 p.fs.flow_mask.dst_port = ~0; 787 p.fs.flow_mask.src_port = ~0; 788 p.fs.flow_mask.proto = ~0; 789 n2mask(&(p.fs.flow_mask.dst_ip6), 128); 790 n2mask(&(p.fs.flow_mask.src_ip6), 128); 791 p.fs.flow_mask.flow_id6 = ~0; 792 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 793 goto end_mask; 794 795 case TOK_DSTIP: 796 p32 = &p.fs.flow_mask.dst_ip; 797 break; 798 799 case TOK_SRCIP: 800 p32 = &p.fs.flow_mask.src_ip; 801 break; 802 803 case TOK_DSTIP6: 804 pa6 = &(p.fs.flow_mask.dst_ip6); 805 break; 806 807 case TOK_SRCIP6: 808 pa6 = &(p.fs.flow_mask.src_ip6); 809 break; 810 811 case TOK_FLOWID: 812 p20 = &p.fs.flow_mask.flow_id6; 813 break; 814 815 case TOK_DSTPORT: 816 p16 = &p.fs.flow_mask.dst_port; 817 break; 818 819 case TOK_SRCPORT: 820 p16 = &p.fs.flow_mask.src_port; 821 break; 822 823 case TOK_PROTO: 824 break; 825 826 default: 827 ac++; av--; /* backtrack */ 828 goto end_mask; 829 } 830 if (ac < 1) 831 errx(EX_USAGE, "mask: value missing"); 832 if (*av[0] == '/') { 833 a = strtoul(av[0]+1, &end, 0); 834 if (pa6 == NULL) 835 a = (a == 32) ? ~0 : (1 << a) - 1; 836 } else 837 a = strtoul(av[0], &end, 0); 838 if (p32 != NULL) 839 *p32 = a; 840 else if (p16 != NULL) { 841 if (a > 0xFFFF) 842 errx(EX_DATAERR, 843 "port mask must be 16 bit"); 844 *p16 = (uint16_t)a; 845 } else if (p20 != NULL) { 846 if (a > 0xfffff) 847 errx(EX_DATAERR, 848 "flow_id mask must be 20 bit"); 849 *p20 = (uint32_t)a; 850 } else if (pa6 != NULL) { 851 if (a > 128) 852 errx(EX_DATAERR, 853 "in6addr invalid mask len"); 854 else 855 n2mask(pa6, a); 856 } else { 857 if (a > 0xFF) 858 errx(EX_DATAERR, 859 "proto mask must be 8 bit"); 860 p.fs.flow_mask.proto = (uint8_t)a; 861 } 862 if (a != 0) 863 p.fs.flags_fs |= DN_HAVE_FLOW_MASK; 864 ac--; av++; 865 } /* end while, config masks */ 866end_mask: 867 break; 868 869 case TOK_RED: 870 case TOK_GRED: 871 NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); 872 p.fs.flags_fs |= DN_IS_RED; 873 if (tok == TOK_GRED) 874 p.fs.flags_fs |= DN_IS_GENTLE_RED; 875 /* 876 * the format for parameters is w_q/min_th/max_th/max_p 877 */ 878 if ((end = strsep(&av[0], "/"))) { 879 double w_q = strtod(end, NULL); 880 if (w_q > 1 || w_q <= 0) 881 errx(EX_DATAERR, "0 < w_q <= 1"); 882 p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); 883 } 884 if ((end = strsep(&av[0], "/"))) { 885 p.fs.min_th = strtoul(end, &end, 0); 886 if (*end == 'K' || *end == 'k') 887 p.fs.min_th *= 1024; 888 } 889 if ((end = strsep(&av[0], "/"))) { 890 p.fs.max_th = strtoul(end, &end, 0); 891 if (*end == 'K' || *end == 'k') 892 p.fs.max_th *= 1024; 893 } 894 if ((end = strsep(&av[0], "/"))) { 895 double max_p = strtod(end, NULL); 896 if (max_p > 1 || max_p <= 0) 897 errx(EX_DATAERR, "0 < max_p <= 1"); 898 p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); 899 } 900 ac--; av++; 901 break; 902 903 case TOK_DROPTAIL: 904 p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); 905 break; 906 907 case TOK_BW: 908 NEED1("bw needs bandwidth or interface\n"); 909 if (co.do_pipe != 1) 910 errx(EX_DATAERR, "bandwidth only valid for pipes"); 911 read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); 912 ac--; av++; 913 break; 914 915 case TOK_DELAY: 916 if (co.do_pipe != 1) 917 errx(EX_DATAERR, "delay only valid for pipes"); 918 NEED1("delay needs argument 0..10000ms\n"); 919 p.delay = strtoul(av[0], NULL, 0); 920 ac--; av++; 921 break; 922 923 case TOK_WEIGHT: 924 if (co.do_pipe == 1) 925 errx(EX_DATAERR,"weight only valid for queues"); 926 NEED1("weight needs argument 0..100\n"); 927 p.fs.weight = strtoul(av[0], &end, 0); 928 ac--; av++; 929 break; 930 931 case TOK_PIPE: 932 if (co.do_pipe == 1) 933 errx(EX_DATAERR,"pipe only valid for queues"); 934 NEED1("pipe needs pipe_number\n"); 935 p.fs.parent_nr = strtoul(av[0], &end, 0); 936 ac--; av++; 937 break; 938 939 case TOK_PIPE_PROFILE: 940 if (co.do_pipe != 1) 941 errx(EX_DATAERR, "extra delay only valid for pipes"); 942 NEED1("extra delay needs the file name\n"); 943 p.samples = &samples[0]; 944 load_extra_delays(av[0], &p); 945 --ac; ++av; 946 break; 947 948 case TOK_BURST: 949 if (co.do_pipe != 1) 950 errx(EX_DATAERR, "burst only valid for pipes"); 951 NEED1("burst needs argument\n"); 952 errno = 0; 953 if (expand_number(av[0], &p.burst) < 0) 954 if (errno != ERANGE) 955 errx(EX_DATAERR, 956 "burst: invalid argument"); 957 if (errno || p.burst > (1ULL << 48) - 1) 958 errx(EX_DATAERR, 959 "burst: out of range (0..2^48-1)"); 960 ac--; av++; 961 break; 962 963 default: 964 errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); 965 } 966 } 967 if (co.do_pipe == 1) { 968 if (p.pipe_nr == 0) 969 errx(EX_DATAERR, "pipe_nr must be > 0"); 970 if (p.delay > 10000) 971 errx(EX_DATAERR, "delay must be < 10000"); 972 } else { /* co.do_pipe == 2, queue */ 973 if (p.fs.parent_nr == 0) 974 errx(EX_DATAERR, "pipe must be > 0"); 975 if (p.fs.weight >100) 976 errx(EX_DATAERR, "weight must be <= 100"); 977 } 978 979 /* check for bandwidth value */ 980 if (p.bandwidth == -1) { 981 p.bandwidth = 0; 982 if (p.samples_no > 0) 983 errx(EX_DATAERR, "profile requires a bandwidth limit"); 984 } 985 986 if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { 987 size_t len; 988 long limit; 989 990 len = sizeof(limit); 991 if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", 992 &limit, &len, NULL, 0) == -1) 993 limit = 1024*1024; 994 if (p.fs.qsize > limit) 995 errx(EX_DATAERR, "queue size must be < %ldB", limit); 996 } else { 997 size_t len; 998 long limit; 999 1000 len = sizeof(limit); 1001 if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", 1002 &limit, &len, NULL, 0) == -1) 1003 limit = 100; 1004 if (p.fs.qsize > limit) 1005 errx(EX_DATAERR, "2 <= queue size <= %ld", limit); 1006 } 1007 if (p.fs.flags_fs & DN_IS_RED) { 1008 size_t len; 1009 int lookup_depth, avg_pkt_size; 1010 double s, idle, weight, w_q; 1011 struct clockinfo ck; 1012 int t; 1013 1014 if (p.fs.min_th >= p.fs.max_th) 1015 errx(EX_DATAERR, "min_th %d must be < than max_th %d", 1016 p.fs.min_th, p.fs.max_th); 1017 if (p.fs.max_th == 0) 1018 errx(EX_DATAERR, "max_th must be > 0"); 1019 1020 len = sizeof(int); 1021 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", 1022 &lookup_depth, &len, NULL, 0) == -1) 1023 errx(1, "sysctlbyname(\"%s\")", 1024 "net.inet.ip.dummynet.red_lookup_depth"); 1025 if (lookup_depth == 0) 1026 errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" 1027 " must be greater than zero"); 1028 1029 len = sizeof(int); 1030 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", 1031 &avg_pkt_size, &len, NULL, 0) == -1) 1032 1033 errx(1, "sysctlbyname(\"%s\")", 1034 "net.inet.ip.dummynet.red_avg_pkt_size"); 1035 if (avg_pkt_size == 0) 1036 errx(EX_DATAERR, 1037 "net.inet.ip.dummynet.red_avg_pkt_size must" 1038 " be greater than zero"); 1039 1040 len = sizeof(struct clockinfo); 1041 if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) 1042 errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); 1043 1044 /* 1045 * Ticks needed for sending a medium-sized packet. 1046 * Unfortunately, when we are configuring a WF2Q+ queue, we 1047 * do not have bandwidth information, because that is stored 1048 * in the parent pipe, and also we have multiple queues 1049 * competing for it. So we set s=0, which is not very 1050 * correct. But on the other hand, why do we want RED with 1051 * WF2Q+ ? 1052 */ 1053 if (p.bandwidth==0) /* this is a WF2Q+ queue */ 1054 s = 0; 1055 else 1056 s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; 1057 1058 /* 1059 * max idle time (in ticks) before avg queue size becomes 0. 1060 * NOTA: (3/w_q) is approx the value x so that 1061 * (1-w_q)^x < 10^-3. 1062 */ 1063 w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); 1064 idle = s * 3. / w_q; 1065 p.fs.lookup_step = (int)idle / lookup_depth; 1066 if (!p.fs.lookup_step) 1067 p.fs.lookup_step = 1; 1068 weight = 1 - w_q; 1069 for (t = p.fs.lookup_step; t > 1; --t) 1070 weight *= 1 - w_q; 1071 p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); 1072 } 1073 if (p.samples_no <= 0) { 1074 i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); 1075 } else { 1076 struct dn_pipe_max pm; 1077 int len = sizeof(pm); 1078 1079 memcpy(&pm.pipe, &p, sizeof(pm.pipe)); 1080 memcpy(&pm.samples, samples, sizeof(pm.samples)); 1081 1082 i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); 1083 } 1084 1085 if (i) 1086 err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); 1087} 1088