dummynet.c revision 193715
160786Sps/*
260786Sps * Copyright (c) 2002-2003 Luigi Rizzo
360786Sps * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
460786Sps * Copyright (c) 1994 Ugen J.S.Antsilevich
560786Sps *
660786Sps * Idea and grammar partially left from:
760786Sps * Copyright (c) 1993 Daniel Boulet
860786Sps *
960786Sps * Redistribution and use in source forms, with and without modification,
1060786Sps * are permitted provided that this entire comment appears intact.
1160786Sps *
1260786Sps * Redistribution in binary form may occur without any restrictions.
1360786Sps * Obviously, it would be nice if you gave credit where credit is due
1460786Sps * but requiring it would be too onerous.
1560786Sps *
1660786Sps * This software is provided ``AS IS'' without any warranties of any kind.
1760786Sps *
1860786Sps * NEW command line interface for IP firewall facility
1960786Sps *
2060786Sps * $FreeBSD: head/sbin/ipfw/dummynet.c 193715 2009-06-08 14:32:29Z luigi $
2160786Sps *
2260786Sps * dummynet support
2360786Sps */
2460786Sps
2560786Sps#include <sys/types.h>
2660786Sps#include <sys/socket.h>
2760786Sps#include <sys/queue.h>
2860786Sps/* XXX there are several sysctl leftover here */
2960786Sps#include <sys/sysctl.h>
3060786Sps
3160786Sps#include "ipfw2.h"
3260786Sps
3360786Sps#include <ctype.h>
3460786Sps#include <err.h>
3560786Sps#include <netdb.h>
3660786Sps#include <stdio.h>
3760786Sps#include <stdlib.h>
3860786Sps#include <string.h>
3960786Sps#include <sysexits.h>
4060786Sps
4160786Sps#include <net/if.h>
4260786Sps#include <netinet/in.h>
4360786Sps#include <netinet/ip_fw.h>
4460786Sps#include <netinet/ip_dummynet.h>
4560786Sps#include <arpa/inet.h>	/* inet_ntoa */
4660786Sps
4760786Spsstatic struct _s_x dummynet_params[] = {
4860786Sps	{ "plr",		TOK_PLR },
4960786Sps	{ "noerror",		TOK_NOERROR },
5060786Sps	{ "buckets",		TOK_BUCKETS },
5160786Sps	{ "dst-ip",		TOK_DSTIP },
5260786Sps	{ "src-ip",		TOK_SRCIP },
5360786Sps	{ "dst-port",		TOK_DSTPORT },
5460786Sps	{ "src-port",		TOK_SRCPORT },
5560786Sps	{ "proto",		TOK_PROTO },
5660786Sps	{ "weight",		TOK_WEIGHT },
5760786Sps	{ "all",		TOK_ALL },
5860786Sps	{ "mask",		TOK_MASK },
5960786Sps	{ "droptail",		TOK_DROPTAIL },
6060786Sps	{ "red",		TOK_RED },
6160786Sps	{ "gred",		TOK_GRED },
6260786Sps	{ "bw",			TOK_BW },
6360786Sps	{ "bandwidth",		TOK_BW },
6460786Sps	{ "delay",		TOK_DELAY },
6560786Sps	{ "pipe",		TOK_PIPE },
6660786Sps	{ "queue",		TOK_QUEUE },
6760786Sps	{ "flow-id",		TOK_FLOWID},
6860786Sps	{ "dst-ipv6",		TOK_DSTIP6},
6960786Sps	{ "dst-ip6",		TOK_DSTIP6},
7060786Sps	{ "src-ipv6",		TOK_SRCIP6},
7160786Sps	{ "src-ip6",		TOK_SRCIP6},
7260786Sps	{ "profile",		TOK_PIPE_PROFILE},
7360786Sps	{ "dummynet-params",	TOK_NULL },
7460786Sps	{ NULL, 0 }	/* terminator */
7560786Sps};
7660786Sps
7760786Spsstatic int
7860786Spssort_q(const void *pa, const void *pb)
7960786Sps{
8060786Sps	int rev = (co.do_sort < 0);
8160786Sps	int field = rev ? -co.do_sort : co.do_sort;
8260786Sps	long long res = 0;
8360786Sps	const struct dn_flow_queue *a = pa;
8460786Sps	const struct dn_flow_queue *b = pb;
8560786Sps
8660786Sps	switch (field) {
8760786Sps	case 1: /* pkts */
8860786Sps		res = a->len - b->len;
8960786Sps		break;
9060786Sps	case 2: /* bytes */
9160786Sps		res = a->len_bytes - b->len_bytes;
9260786Sps		break;
9360786Sps
9460786Sps	case 3: /* tot pkts */
9560786Sps		res = a->tot_pkts - b->tot_pkts;
9660786Sps		break;
9760786Sps
9860786Sps	case 4: /* tot bytes */
9963128Sps		res = a->tot_bytes - b->tot_bytes;
10063128Sps		break;
10163128Sps	}
10263128Sps	if (res < 0)
10363128Sps		res = -1;
10463128Sps	if (res > 0)
10563128Sps		res = 1;
10660786Sps	return (int)(rev ? res : -res);
10760786Sps}
10860786Sps
10960786Spsstatic void
11060786Spslist_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
11160786Sps{
11260786Sps	int l;
11363128Sps	int index_printed, indexes = 0;
11460786Sps	char buff[255];
11560786Sps	struct protoent *pe;
11660786Sps
11760786Sps	if (fs->rq_elements == 0)
11860786Sps		return;
11960786Sps
12060786Sps	if (co.do_sort != 0)
12160786Sps		heapsort(q, fs->rq_elements, sizeof *q, sort_q);
12260786Sps
12360786Sps	/* Print IPv4 flows */
12460786Sps	index_printed = 0;
12560786Sps	for (l = 0; l < fs->rq_elements; l++) {
12660786Sps		struct in_addr ina;
12760786Sps
12860786Sps		/* XXX: Should check for IPv4 flows */
12960786Sps		if (IS_IP6_FLOW_ID(&(q[l].id)))
13060786Sps			continue;
13160786Sps
13260786Sps		if (!index_printed) {
13360786Sps			index_printed = 1;
13460786Sps			if (indexes > 0)	/* currently a no-op */
13560786Sps				printf("\n");
13660786Sps			indexes++;
13760786Sps			printf("    "
13860786Sps			    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
13960786Sps			    fs->flow_mask.proto,
14060786Sps			    fs->flow_mask.src_ip, fs->flow_mask.src_port,
14160786Sps			    fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
14260786Sps
14360786Sps			printf("BKT Prot ___Source IP/port____ "
14460786Sps			    "____Dest. IP/port____ "
14560786Sps			    "Tot_pkt/bytes Pkt/Byte Drp\n");
14660786Sps		}
14760786Sps
14860786Sps		printf("%3d ", q[l].hash_slot);
14960786Sps		pe = getprotobynumber(q[l].id.proto);
15060786Sps		if (pe)
15160786Sps			printf("%-4s ", pe->p_name);
15260786Sps		else
15360786Sps			printf("%4u ", q[l].id.proto);
15460786Sps		ina.s_addr = htonl(q[l].id.src_ip);
15560786Sps		printf("%15s/%-5d ",
15660786Sps		    inet_ntoa(ina), q[l].id.src_port);
15760786Sps		ina.s_addr = htonl(q[l].id.dst_ip);
15860786Sps		printf("%15s/%-5d ",
15960786Sps		    inet_ntoa(ina), q[l].id.dst_port);
16060786Sps		printf("%4llu %8llu %2u %4u %3u\n",
16160786Sps		    align_uint64(&q[l].tot_pkts),
16260786Sps		    align_uint64(&q[l].tot_bytes),
16360786Sps		    q[l].len, q[l].len_bytes, q[l].drops);
16460786Sps		if (co.verbose)
16560786Sps			printf("   S %20llu  F %20llu\n",
16660786Sps			    align_uint64(&q[l].S), align_uint64(&q[l].F));
16760786Sps	}
16860786Sps
16960786Sps	/* Print IPv6 flows */
17060786Sps	index_printed = 0;
17160786Sps	for (l = 0; l < fs->rq_elements; l++) {
17260786Sps		if (!IS_IP6_FLOW_ID(&(q[l].id)))
17360786Sps			continue;
17460786Sps
17560786Sps		if (!index_printed) {
17660786Sps			index_printed = 1;
17760786Sps			if (indexes > 0)
17860786Sps				printf("\n");
17960786Sps			indexes++;
18060786Sps			printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
18160786Sps			    fs->flow_mask.proto, fs->flow_mask.flow_id6);
18260786Sps			inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
18360786Sps			    buff, sizeof(buff));
18460786Sps			printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
18560786Sps			inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
18660786Sps			    buff, sizeof(buff) );
18760786Sps			printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
18860786Sps
18960786Sps			printf("BKT ___Prot___ _flow-id_ "
19060786Sps			    "______________Source IPv6/port_______________ "
19160786Sps			    "_______________Dest. IPv6/port_______________ "
19260786Sps			    "Tot_pkt/bytes Pkt/Byte Drp\n");
19360786Sps		}
19460786Sps		printf("%3d ", q[l].hash_slot);
19560786Sps		pe = getprotobynumber(q[l].id.proto);
19660786Sps		if (pe != NULL)
19760786Sps			printf("%9s ", pe->p_name);
19860786Sps		else
19960786Sps			printf("%9u ", q[l].id.proto);
20060786Sps		printf("%7d  %39s/%-5d ", q[l].id.flow_id6,
20160786Sps		    inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
20260786Sps		    q[l].id.src_port);
20360786Sps		printf(" %39s/%-5d ",
20460786Sps		    inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
20560786Sps		    q[l].id.dst_port);
20660786Sps		printf(" %4llu %8llu %2u %4u %3u\n",
20760786Sps		    align_uint64(&q[l].tot_pkts),
20860786Sps		    align_uint64(&q[l].tot_bytes),
20960786Sps		    q[l].len, q[l].len_bytes, q[l].drops);
21060786Sps		if (co.verbose)
21160786Sps			printf("   S %20llu  F %20llu\n",
21260786Sps			    align_uint64(&q[l].S),
21360786Sps			    align_uint64(&q[l].F));
21460786Sps	}
21560786Sps}
21660786Sps
21760786Spsstatic void
21860786Spsprint_flowset_parms(struct dn_flow_set *fs, char *prefix)
21960786Sps{
22060786Sps	int l;
22160786Sps	char qs[30];
22260786Sps	char plr[30];
22360786Sps	char red[90];	/* Display RED parameters */
22460786Sps
22560786Sps	l = fs->qsize;
22660786Sps	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
22760786Sps		if (l >= 8192)
22860786Sps			sprintf(qs, "%d KB", l / 1024);
22960786Sps		else
23060786Sps			sprintf(qs, "%d B", l);
23160786Sps	} else
23260786Sps		sprintf(qs, "%3d sl.", l);
23360786Sps	if (fs->plr)
23460786Sps		sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
23560786Sps	else
23660786Sps		plr[0] = '\0';
23760786Sps	if (fs->flags_fs & DN_IS_RED)	/* RED parameters */
23860786Sps		sprintf(red,
23960786Sps		    "\n\t  %cRED w_q %f min_th %d max_th %d max_p %f",
24089019Sps		    (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
24189019Sps		    1.0 * fs->w_q / (double)(1 << SCALE_RED),
24289019Sps		    SCALE_VAL(fs->min_th),
24389019Sps		    SCALE_VAL(fs->max_th),
24460786Sps		    1.0 * fs->max_p / (double)(1 << SCALE_RED));
24560786Sps	else
24660786Sps		sprintf(red, "droptail");
24760786Sps
24860786Sps	printf("%s %s%s %d queues (%d buckets) %s\n",
24960786Sps	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
25060786Sps}
25160786Sps
25260786Spsstatic void
25360786Spsprint_extra_delay_parms(struct dn_pipe *p, char *prefix)
25460786Sps{
25560786Sps	double loss;
25689019Sps	if (p->samples_no <= 0)
25789019Sps		return;
25889019Sps
25989019Sps	loss = p->loss_level;
26089019Sps	loss /= p->samples_no;
26189019Sps	printf("%s profile: name \"%s\" loss %f samples %d\n",
26260786Sps		prefix, p->name, loss, p->samples_no);
26360786Sps}
26460786Sps
26560786Spsvoid
26689019Spsipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
26789019Sps{
26889019Sps	int rulenum;
26989019Sps	void *next = data;
27089019Sps	struct dn_pipe *p = (struct dn_pipe *) data;
27189019Sps	struct dn_flow_set *fs;
27289019Sps	struct dn_flow_queue *q;
27389019Sps	int l;
27489019Sps
27589019Sps	if (ac > 0)
27689019Sps		rulenum = strtoul(*av++, NULL, 10);
27789019Sps	else
27889019Sps		rulenum = 0;
27989019Sps	for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
28089019Sps		double b = p->bandwidth;
28189019Sps		char buf[30];
28289019Sps		char prefix[80];
28389019Sps
28489019Sps		if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
28589019Sps			break;	/* done with pipes, now queues */
28689019Sps
28789019Sps		/*
28889019Sps		 * compute length, as pipe have variable size
28989019Sps		 */
29089019Sps		l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
29189019Sps		next = (char *)p + l;
29289019Sps		nbytes -= l;
29389019Sps
29489019Sps		if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2)
29589019Sps			continue;
29689019Sps
29789019Sps		/*
29889019Sps		 * Print rate (or clocking interface)
29989019Sps		 */
30089019Sps		if (p->if_name[0] != '\0')
30160786Sps			sprintf(buf, "%s", p->if_name);
30260786Sps		else if (b == 0)
30360786Sps			sprintf(buf, "unlimited");
30460786Sps		else if (b >= 1000000)
30560786Sps			sprintf(buf, "%7.3f Mbit/s", b/1000000);
30660786Sps		else if (b >= 1000)
30760786Sps			sprintf(buf, "%7.3f Kbit/s", b/1000);
30860786Sps		else
30960786Sps			sprintf(buf, "%7.3f bit/s ", b);
31060786Sps
31160786Sps		sprintf(prefix, "%05d: %s %4d ms ",
31260786Sps		    p->pipe_nr, buf, p->delay);
31360786Sps
31460786Sps		print_extra_delay_parms(p, prefix);
31560786Sps
31689019Sps		print_flowset_parms(&(p->fs), prefix);
31760786Sps
31860786Sps		q = (struct dn_flow_queue *)(p+1);
31960786Sps		list_queues(&(p->fs), q);
32060786Sps	}
32160786Sps	for (fs = next; nbytes >= sizeof *fs; fs = next) {
32260786Sps		char prefix[80];
32360786Sps
32460786Sps		if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
32560786Sps			break;
32660786Sps		l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
32760786Sps		next = (char *)fs + l;
328		nbytes -= l;
329
330		if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) ||
331		    (rulenum != fs->parent_nr && co.do_pipe == 1))) {
332			continue;
333		}
334
335		q = (struct dn_flow_queue *)(fs+1);
336		sprintf(prefix, "q%05d: weight %d pipe %d ",
337		    fs->fs_nr, fs->weight, fs->parent_nr);
338		print_flowset_parms(fs, prefix);
339		list_queues(fs, q);
340	}
341}
342
343/*
344 * Delete pipe or queue i
345 */
346int
347ipfw_delete_pipe(int pipe_or_queue, int i)
348{
349	struct dn_pipe p;
350
351	memset(&p, 0, sizeof p);
352	if (pipe_or_queue == 1)
353		p.pipe_nr = i;		/* pipe */
354	else
355		p.fs.fs_nr = i;		/* queue */
356	i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
357	if (i) {
358		i = 1;
359		warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
360	}
361	return i;
362}
363
364/*
365 * Code to parse delay profiles.
366 *
367 * Some link types introduce extra delays in the transmission
368 * of a packet, e.g. because of MAC level framing, contention on
369 * the use of the channel, MAC level retransmissions and so on.
370 * From our point of view, the channel is effectively unavailable
371 * for this extra time, which is constant or variable depending
372 * on the link type. Additionally, packets may be dropped after this
373 * time (e.g. on a wireless link after too many retransmissions).
374 * We can model the additional delay with an empirical curve
375 * that represents its distribution.
376 *
377 *	cumulative probability
378 *	1.0 ^
379 *	    |
380 *	L   +-- loss-level          x
381 *	    |                 ******
382 *	    |                *
383 *	    |           *****
384 *	    |          *
385 *	    |        **
386 *	    |       *
387 *	    +-------*------------------->
388 *			delay
389 *
390 * The empirical curve may have both vertical and horizontal lines.
391 * Vertical lines represent constant delay for a range of
392 * probabilities; horizontal lines correspond to a discontinuty
393 * in the delay distribution: the pipe will use the largest delay
394 * for a given probability.
395 *
396 * To pass the curve to dummynet, we must store the parameters
397 * in a file as described below, and issue the command
398 *
399 *      ipfw pipe <n> config ... bw XXX profile <filename> ...
400 *
401 * The file format is the following, with whitespace acting as
402 * a separator and '#' indicating the beginning a comment:
403 *
404 *	samples N
405 *		the number of samples used in the internal
406 *		representation (2..1024; default 100);
407 *
408 *	loss-level L
409 *		The probability above which packets are lost.
410 *               (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
411 *
412 *	name identifier
413 *		Optional a name (listed by "ipfw pipe show")
414 *		to identify the distribution;
415 *
416 *	"delay prob" | "prob delay"
417 *		One of these two lines is mandatory and defines
418 *		the format of the following lines with data points.
419 *
420 *	XXX YYY
421 *		2 or more lines representing points in the curve,
422 *		with either delay or probability first, according
423 *		to the chosen format.
424 *		The unit for delay is milliseconds.
425 *
426 * Data points does not need to be ordered or equal to the number
427 * specified in the "samples" line. ipfw will sort and interpolate
428 * the curve as needed.
429 *
430 * Example of a profile file:
431
432        name    bla_bla_bla
433        samples 100
434        loss-level    0.86
435        prob    delay
436        0       200	# minimum overhead is 200ms
437        0.5     200
438        0.5     300
439        0.8     1000
440        0.9     1300
441        1       1300
442
443 * Internally, we will convert the curve to a fixed number of
444 * samples, and when it is time to transmit a packet we will
445 * model the extra delay as extra bits in the packet.
446 *
447 */
448
449#define ED_MAX_LINE_LEN	256+ED_MAX_NAME_LEN
450#define ED_TOK_SAMPLES	"samples"
451#define ED_TOK_LOSS	"loss-level"
452#define ED_TOK_NAME	"name"
453#define ED_TOK_DELAY	"delay"
454#define ED_TOK_PROB	"prob"
455#define ED_TOK_BW	"bw"
456#define ED_SEPARATORS	" \t\n"
457#define ED_MIN_SAMPLES_NO	2
458
459/*
460 * returns 1 if s is a non-negative number, with at least one '.'
461 */
462static int
463is_valid_number(const char *s)
464{
465	int i, dots_found = 0;
466	int len = strlen(s);
467
468	for (i = 0; i<len; ++i)
469		if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1))
470			return 0;
471	return 1;
472}
473
474/*
475 * Take as input a string describing a bandwidth value
476 * and return the numeric bandwidth value.
477 * set clocking interface or bandwidth value
478 */
479void
480read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen)
481{
482	if (*bandwidth != -1)
483		warn("duplicate token, override bandwidth value!");
484
485	if (arg[0] >= 'a' && arg[0] <= 'z') {
486		if (namelen >= IFNAMSIZ)
487			warn("interface name truncated");
488		namelen--;
489		/* interface name */
490		strncpy(if_name, arg, namelen);
491		if_name[namelen] = '\0';
492		*bandwidth = 0;
493	} else {	/* read bandwidth value */
494		int bw;
495		char *end = NULL;
496
497		bw = strtoul(arg, &end, 0);
498		if (*end == 'K' || *end == 'k') {
499			end++;
500			bw *= 1000;
501		} else if (*end == 'M') {
502			end++;
503			bw *= 1000000;
504		}
505		if ((*end == 'B' &&
506			_substrcmp2(end, "Bi", "Bit/s") != 0) ||
507		    _substrcmp2(end, "by", "bytes") == 0)
508			bw *= 8;
509
510		if (bw < 0)
511			errx(EX_DATAERR, "bandwidth too large");
512
513		*bandwidth = bw;
514		if_name[0] = '\0';
515	}
516}
517
518struct point {
519	double prob;
520	double delay;
521};
522
523int
524compare_points(const void *vp1, const void *vp2)
525{
526	const struct point *p1 = vp1;
527	const struct point *p2 = vp2;
528	double res = 0;
529
530	res = p1->prob - p2->prob;
531	if (res == 0)
532		res = p1->delay - p2->delay;
533	if (res < 0)
534		return -1;
535	else if (res > 0)
536		return 1;
537	else
538		return 0;
539}
540
541#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
542
543static void
544load_extra_delays(const char *filename, struct dn_pipe *p)
545{
546	char    line[ED_MAX_LINE_LEN];
547	FILE    *f;
548	int     lineno = 0;
549	int     i;
550
551	int     samples = -1;
552	double  loss = -1.0;
553	char    profile_name[ED_MAX_NAME_LEN];
554	int     delay_first = -1;
555	int     do_points = 0;
556	struct point    points[ED_MAX_SAMPLES_NO];
557	int     points_no = 0;
558
559	profile_name[0] = '\0';
560	f = fopen(filename, "r");
561	if (f == NULL)
562		err(EX_UNAVAILABLE, "fopen: %s", filename);
563
564	while (fgets(line, ED_MAX_LINE_LEN, f)) {         /* read commands */
565		char *s, *cur = line, *name = NULL, *arg = NULL;
566
567		++lineno;
568
569		/* parse the line */
570		while (cur) {
571			s = strsep(&cur, ED_SEPARATORS);
572			if (s == NULL || *s == '#')
573				break;
574			if (*s == '\0')
575				continue;
576			if (arg)
577				errx(ED_EFMT("too many arguments"));
578			if (name == NULL)
579				name = s;
580			else
581				arg = s;
582		}
583		if (name == NULL)	/* empty line */
584			continue;
585		if (arg == NULL)
586			errx(ED_EFMT("missing arg for %s"), name);
587
588		if (!strcasecmp(name, ED_TOK_SAMPLES)) {
589		    if (samples > 0)
590			errx(ED_EFMT("duplicate ``samples'' line"));
591		    if (atoi(arg) <=0)
592			errx(ED_EFMT("invalid number of samples"));
593		    samples = atoi(arg);
594		    if (samples>ED_MAX_SAMPLES_NO)
595			    errx(ED_EFMT("too many samples, maximum is %d"),
596				ED_MAX_SAMPLES_NO);
597		    do_points = 0;
598		} else if (!strcasecmp(name, ED_TOK_BW)) {
599		    read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name));
600		} else if (!strcasecmp(name, ED_TOK_LOSS)) {
601		    if (loss != -1.0)
602			errx(ED_EFMT("duplicated token: %s"), name);
603		    if (!is_valid_number(arg))
604			errx(ED_EFMT("invalid %s"), arg);
605		    loss = atof(arg);
606		    if (loss > 1)
607			errx(ED_EFMT("%s greater than 1.0"), name);
608		    do_points = 0;
609		} else if (!strcasecmp(name, ED_TOK_NAME)) {
610		    if (profile_name[0] != '\0')
611			errx(ED_EFMT("duplicated token: %s"), name);
612		    strncpy(profile_name, arg, sizeof(profile_name) - 1);
613		    profile_name[sizeof(profile_name)-1] = '\0';
614		    do_points = 0;
615		} else if (!strcasecmp(name, ED_TOK_DELAY)) {
616		    if (do_points)
617			errx(ED_EFMT("duplicated token: %s"), name);
618		    delay_first = 1;
619		    do_points = 1;
620		} else if (!strcasecmp(name, ED_TOK_PROB)) {
621		    if (do_points)
622			errx(ED_EFMT("duplicated token: %s"), name);
623		    delay_first = 0;
624		    do_points = 1;
625		} else if (do_points) {
626		    if (!is_valid_number(name) || !is_valid_number(arg))
627			errx(ED_EFMT("invalid point found"));
628		    if (delay_first) {
629			points[points_no].delay = atof(name);
630			points[points_no].prob = atof(arg);
631		    } else {
632			points[points_no].delay = atof(arg);
633			points[points_no].prob = atof(name);
634		    }
635		    if (points[points_no].prob > 1.0)
636			errx(ED_EFMT("probability greater than 1.0"));
637		    ++points_no;
638		} else {
639		    errx(ED_EFMT("unrecognised command '%s'"), name);
640		}
641	}
642
643	if (samples == -1) {
644	    warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES);
645	    samples = 100;
646	}
647
648	if (loss == -1.0) {
649	    warnx("'%s' not found, assuming no loss", ED_TOK_LOSS);
650	    loss = 1;
651	}
652
653	/* make sure that there are enough points. */
654	if (points_no < ED_MIN_SAMPLES_NO)
655	    errx(ED_EFMT("too few samples, need at least %d"),
656		ED_MIN_SAMPLES_NO);
657
658	qsort(points, points_no, sizeof(struct point), compare_points);
659
660	/* interpolation */
661	for (i = 0; i<points_no-1; ++i) {
662	    double y1 = points[i].prob * samples;
663	    double x1 = points[i].delay;
664	    double y2 = points[i+1].prob * samples;
665	    double x2 = points[i+1].delay;
666
667	    int index = y1;
668	    int stop = y2;
669
670	    if (x1 == x2) {
671		for (; index<stop; ++index)
672		    p->samples[index] = x1;
673	    } else {
674		double m = (y2-y1)/(x2-x1);
675		double c = y1 - m*x1;
676		for (; index<stop ; ++index)
677		    p->samples[index] = (index - c)/m;
678	    }
679	}
680	p->samples_no = samples;
681	p->loss_level = loss * samples;
682	strncpy(p->name, profile_name, sizeof(p->name));
683}
684
685void
686ipfw_config_pipe(int ac, char **av)
687{
688	int samples[ED_MAX_SAMPLES_NO];
689	struct dn_pipe p;
690	int i;
691	char *end;
692	void *par = NULL;
693
694	memset(&p, 0, sizeof p);
695	p.bandwidth = -1;
696
697	av++; ac--;
698	/* Pipe number */
699	if (ac && isdigit(**av)) {
700		i = atoi(*av); av++; ac--;
701		if (co.do_pipe == 1)
702			p.pipe_nr = i;
703		else
704			p.fs.fs_nr = i;
705	}
706	while (ac > 0) {
707		double d;
708		int tok = match_token(dummynet_params, *av);
709		ac--; av++;
710
711		switch(tok) {
712		case TOK_NOERROR:
713			p.fs.flags_fs |= DN_NOERROR;
714			break;
715
716		case TOK_PLR:
717			NEED1("plr needs argument 0..1\n");
718			d = strtod(av[0], NULL);
719			if (d > 1)
720				d = 1;
721			else if (d < 0)
722				d = 0;
723			p.fs.plr = (int)(d*0x7fffffff);
724			ac--; av++;
725			break;
726
727		case TOK_QUEUE:
728			NEED1("queue needs queue size\n");
729			end = NULL;
730			p.fs.qsize = strtoul(av[0], &end, 0);
731			if (*end == 'K' || *end == 'k') {
732				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
733				p.fs.qsize *= 1024;
734			} else if (*end == 'B' ||
735			    _substrcmp2(end, "by", "bytes") == 0) {
736				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
737			}
738			ac--; av++;
739			break;
740
741		case TOK_BUCKETS:
742			NEED1("buckets needs argument\n");
743			p.fs.rq_size = strtoul(av[0], NULL, 0);
744			ac--; av++;
745			break;
746
747		case TOK_MASK:
748			NEED1("mask needs mask specifier\n");
749			/*
750			 * per-flow queue, mask is dst_ip, dst_port,
751			 * src_ip, src_port, proto measured in bits
752			 */
753			par = NULL;
754
755			bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
756			end = NULL;
757
758			while (ac >= 1) {
759			    uint32_t *p32 = NULL;
760			    uint16_t *p16 = NULL;
761			    uint32_t *p20 = NULL;
762			    struct in6_addr *pa6 = NULL;
763			    uint32_t a;
764
765			    tok = match_token(dummynet_params, *av);
766			    ac--; av++;
767			    switch(tok) {
768			    case TOK_ALL:
769				    /*
770				     * special case, all bits significant
771				     */
772				    p.fs.flow_mask.dst_ip = ~0;
773				    p.fs.flow_mask.src_ip = ~0;
774				    p.fs.flow_mask.dst_port = ~0;
775				    p.fs.flow_mask.src_port = ~0;
776				    p.fs.flow_mask.proto = ~0;
777				    n2mask(&(p.fs.flow_mask.dst_ip6), 128);
778				    n2mask(&(p.fs.flow_mask.src_ip6), 128);
779				    p.fs.flow_mask.flow_id6 = ~0;
780				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
781				    goto end_mask;
782
783			    case TOK_DSTIP:
784				    p32 = &p.fs.flow_mask.dst_ip;
785				    break;
786
787			    case TOK_SRCIP:
788				    p32 = &p.fs.flow_mask.src_ip;
789				    break;
790
791			    case TOK_DSTIP6:
792				    pa6 = &(p.fs.flow_mask.dst_ip6);
793				    break;
794
795			    case TOK_SRCIP6:
796				    pa6 = &(p.fs.flow_mask.src_ip6);
797				    break;
798
799			    case TOK_FLOWID:
800				    p20 = &p.fs.flow_mask.flow_id6;
801				    break;
802
803			    case TOK_DSTPORT:
804				    p16 = &p.fs.flow_mask.dst_port;
805				    break;
806
807			    case TOK_SRCPORT:
808				    p16 = &p.fs.flow_mask.src_port;
809				    break;
810
811			    case TOK_PROTO:
812				    break;
813
814			    default:
815				    ac++; av--; /* backtrack */
816				    goto end_mask;
817			    }
818			    if (ac < 1)
819				    errx(EX_USAGE, "mask: value missing");
820			    if (*av[0] == '/') {
821				    a = strtoul(av[0]+1, &end, 0);
822				    if (pa6 == NULL)
823					    a = (a == 32) ? ~0 : (1 << a) - 1;
824			    } else
825				    a = strtoul(av[0], &end, 0);
826			    if (p32 != NULL)
827				    *p32 = a;
828			    else if (p16 != NULL) {
829				    if (a > 0xFFFF)
830					    errx(EX_DATAERR,
831						"port mask must be 16 bit");
832				    *p16 = (uint16_t)a;
833			    } else if (p20 != NULL) {
834				    if (a > 0xfffff)
835					errx(EX_DATAERR,
836					    "flow_id mask must be 20 bit");
837				    *p20 = (uint32_t)a;
838			    } else if (pa6 != NULL) {
839				    if (a > 128)
840					errx(EX_DATAERR,
841					    "in6addr invalid mask len");
842				    else
843					n2mask(pa6, a);
844			    } else {
845				    if (a > 0xFF)
846					    errx(EX_DATAERR,
847						"proto mask must be 8 bit");
848				    p.fs.flow_mask.proto = (uint8_t)a;
849			    }
850			    if (a != 0)
851				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
852			    ac--; av++;
853			} /* end while, config masks */
854end_mask:
855			break;
856
857		case TOK_RED:
858		case TOK_GRED:
859			NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
860			p.fs.flags_fs |= DN_IS_RED;
861			if (tok == TOK_GRED)
862				p.fs.flags_fs |= DN_IS_GENTLE_RED;
863			/*
864			 * the format for parameters is w_q/min_th/max_th/max_p
865			 */
866			if ((end = strsep(&av[0], "/"))) {
867			    double w_q = strtod(end, NULL);
868			    if (w_q > 1 || w_q <= 0)
869				errx(EX_DATAERR, "0 < w_q <= 1");
870			    p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
871			}
872			if ((end = strsep(&av[0], "/"))) {
873			    p.fs.min_th = strtoul(end, &end, 0);
874			    if (*end == 'K' || *end == 'k')
875				p.fs.min_th *= 1024;
876			}
877			if ((end = strsep(&av[0], "/"))) {
878			    p.fs.max_th = strtoul(end, &end, 0);
879			    if (*end == 'K' || *end == 'k')
880				p.fs.max_th *= 1024;
881			}
882			if ((end = strsep(&av[0], "/"))) {
883			    double max_p = strtod(end, NULL);
884			    if (max_p > 1 || max_p <= 0)
885				errx(EX_DATAERR, "0 < max_p <= 1");
886			    p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
887			}
888			ac--; av++;
889			break;
890
891		case TOK_DROPTAIL:
892			p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
893			break;
894
895		case TOK_BW:
896			NEED1("bw needs bandwidth or interface\n");
897			if (co.do_pipe != 1)
898			    errx(EX_DATAERR, "bandwidth only valid for pipes");
899			read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name));
900			ac--; av++;
901			break;
902
903		case TOK_DELAY:
904			if (co.do_pipe != 1)
905				errx(EX_DATAERR, "delay only valid for pipes");
906			NEED1("delay needs argument 0..10000ms\n");
907			p.delay = strtoul(av[0], NULL, 0);
908			ac--; av++;
909			break;
910
911		case TOK_WEIGHT:
912			if (co.do_pipe == 1)
913				errx(EX_DATAERR,"weight only valid for queues");
914			NEED1("weight needs argument 0..100\n");
915			p.fs.weight = strtoul(av[0], &end, 0);
916			ac--; av++;
917			break;
918
919		case TOK_PIPE:
920			if (co.do_pipe == 1)
921				errx(EX_DATAERR,"pipe only valid for queues");
922			NEED1("pipe needs pipe_number\n");
923			p.fs.parent_nr = strtoul(av[0], &end, 0);
924			ac--; av++;
925			break;
926
927		case TOK_PIPE_PROFILE:
928			if (co.do_pipe != 1)
929			    errx(EX_DATAERR, "extra delay only valid for pipes");
930			NEED1("extra delay needs the file name\n");
931			p.samples = &samples[0];
932			load_extra_delays(av[0], &p);
933			--ac; ++av;
934			break;
935
936		default:
937			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
938		}
939	}
940	if (co.do_pipe == 1) {
941		if (p.pipe_nr == 0)
942			errx(EX_DATAERR, "pipe_nr must be > 0");
943		if (p.delay > 10000)
944			errx(EX_DATAERR, "delay must be < 10000");
945	} else { /* co.do_pipe == 2, queue */
946		if (p.fs.parent_nr == 0)
947			errx(EX_DATAERR, "pipe must be > 0");
948		if (p.fs.weight >100)
949			errx(EX_DATAERR, "weight must be <= 100");
950	}
951
952	/* check for bandwidth value */
953	if (p.bandwidth == -1) {
954		p.bandwidth = 0;
955		if (p.samples_no > 0)
956			errx(EX_DATAERR, "profile requires a bandwidth limit");
957	}
958
959	if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) {
960		size_t len;
961		long limit;
962
963		len = sizeof(limit);
964		if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit",
965			&limit, &len, NULL, 0) == -1)
966			limit = 1024*1024;
967		if (p.fs.qsize > limit)
968			errx(EX_DATAERR, "queue size must be < %ldB", limit);
969	} else {
970		size_t len;
971		long limit;
972
973		len = sizeof(limit);
974		if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit",
975			&limit, &len, NULL, 0) == -1)
976			limit = 100;
977		if (p.fs.qsize > limit)
978			errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
979	}
980	if (p.fs.flags_fs & DN_IS_RED) {
981		size_t len;
982		int lookup_depth, avg_pkt_size;
983		double s, idle, weight, w_q;
984		struct clockinfo ck;
985		int t;
986
987		if (p.fs.min_th >= p.fs.max_th)
988		    errx(EX_DATAERR, "min_th %d must be < than max_th %d",
989			p.fs.min_th, p.fs.max_th);
990		if (p.fs.max_th == 0)
991		    errx(EX_DATAERR, "max_th must be > 0");
992
993		len = sizeof(int);
994		if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
995			&lookup_depth, &len, NULL, 0) == -1)
996		    errx(1, "sysctlbyname(\"%s\")",
997			"net.inet.ip.dummynet.red_lookup_depth");
998		if (lookup_depth == 0)
999		    errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
1000			" must be greater than zero");
1001
1002		len = sizeof(int);
1003		if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
1004			&avg_pkt_size, &len, NULL, 0) == -1)
1005
1006		    errx(1, "sysctlbyname(\"%s\")",
1007			"net.inet.ip.dummynet.red_avg_pkt_size");
1008		if (avg_pkt_size == 0)
1009			errx(EX_DATAERR,
1010			    "net.inet.ip.dummynet.red_avg_pkt_size must"
1011			    " be greater than zero");
1012
1013		len = sizeof(struct clockinfo);
1014		if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1)
1015			errx(1, "sysctlbyname(\"%s\")", "kern.clockrate");
1016
1017		/*
1018		 * Ticks needed for sending a medium-sized packet.
1019		 * Unfortunately, when we are configuring a WF2Q+ queue, we
1020		 * do not have bandwidth information, because that is stored
1021		 * in the parent pipe, and also we have multiple queues
1022		 * competing for it. So we set s=0, which is not very
1023		 * correct. But on the other hand, why do we want RED with
1024		 * WF2Q+ ?
1025		 */
1026		if (p.bandwidth==0) /* this is a WF2Q+ queue */
1027			s = 0;
1028		else
1029			s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
1030
1031		/*
1032		 * max idle time (in ticks) before avg queue size becomes 0.
1033		 * NOTA:  (3/w_q) is approx the value x so that
1034		 * (1-w_q)^x < 10^-3.
1035		 */
1036		w_q = ((double)p.fs.w_q) / (1 << SCALE_RED);
1037		idle = s * 3. / w_q;
1038		p.fs.lookup_step = (int)idle / lookup_depth;
1039		if (!p.fs.lookup_step)
1040			p.fs.lookup_step = 1;
1041		weight = 1 - w_q;
1042		for (t = p.fs.lookup_step; t > 1; --t)
1043			weight *= 1 - w_q;
1044		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
1045	}
1046	if (p.samples_no <= 0) {
1047		i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
1048	} else {
1049		struct dn_pipe_max pm;
1050		int len = sizeof(pm);
1051
1052		memcpy(&pm.pipe, &p, sizeof(pm.pipe));
1053		memcpy(&pm.samples, samples, sizeof(pm.samples));
1054
1055		i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
1056	}
1057
1058	if (i)
1059		err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
1060}
1061