pfctl_altq.c revision 171172
1/*	$OpenBSD: pfctl_altq.c,v 1.91 2006/11/28 00:08:50 henning Exp $	*/
2
3/*
4 * Copyright (c) 2002
5 *	Sony Computer Science Laboratories Inc.
6 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <sys/cdefs.h>
22__FBSDID("$FreeBSD: head/contrib/pf/pfctl/pfctl_altq.c 171172 2007-07-03 12:30:03Z mlaier $");
23
24#include <sys/param.h>
25#include <sys/ioctl.h>
26#include <sys/socket.h>
27
28#include <net/if.h>
29#include <netinet/in.h>
30#include <net/pfvar.h>
31
32#include <err.h>
33#include <errno.h>
34#include <limits.h>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40
41#include <altq/altq.h>
42#include <altq/altq_cbq.h>
43#include <altq/altq_priq.h>
44#include <altq/altq_hfsc.h>
45
46#include "pfctl_parser.h"
47#include "pfctl.h"
48
49#define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
50
51TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
52LIST_HEAD(gen_sc, segment) rtsc, lssc;
53
54struct pf_altq	*qname_to_pfaltq(const char *, const char *);
55u_int32_t	 qname_to_qid(const char *);
56
57static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
58static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
59static int	check_commit_cbq(int, int, struct pf_altq *);
60static int	print_cbq_opts(const struct pf_altq *);
61
62static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
63static int	check_commit_priq(int, int, struct pf_altq *);
64static int	print_priq_opts(const struct pf_altq *);
65
66static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
67static int	check_commit_hfsc(int, int, struct pf_altq *);
68static int	print_hfsc_opts(const struct pf_altq *,
69		    const struct node_queue_opt *);
70
71static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
72static int		 is_gsc_under_sc(struct gen_sc *,
73			     struct service_curve *);
74static void		 gsc_destroy(struct gen_sc *);
75static struct segment	*gsc_getentry(struct gen_sc *, double);
76static int		 gsc_add_seg(struct gen_sc *, double, double, double,
77			     double);
78static double		 sc_x2y(struct service_curve *, double);
79
80#ifdef __FreeBSD__
81u_int32_t	 getifspeed(int, char *);
82#else
83u_int32_t	 getifspeed(char *);
84#endif
85u_long		 getifmtu(char *);
86int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
87		     u_int32_t);
88u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
89void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
90		     const struct node_hfsc_sc *);
91
92void
93pfaltq_store(struct pf_altq *a)
94{
95	struct pf_altq	*altq;
96
97	if ((altq = malloc(sizeof(*altq))) == NULL)
98		err(1, "malloc");
99	memcpy(altq, a, sizeof(struct pf_altq));
100	TAILQ_INSERT_TAIL(&altqs, altq, entries);
101}
102
103struct pf_altq *
104pfaltq_lookup(const char *ifname)
105{
106	struct pf_altq	*altq;
107
108	TAILQ_FOREACH(altq, &altqs, entries) {
109		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
110		    altq->qname[0] == 0)
111			return (altq);
112	}
113	return (NULL);
114}
115
116struct pf_altq *
117qname_to_pfaltq(const char *qname, const char *ifname)
118{
119	struct pf_altq	*altq;
120
121	TAILQ_FOREACH(altq, &altqs, entries) {
122		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
123		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
124			return (altq);
125	}
126	return (NULL);
127}
128
129u_int32_t
130qname_to_qid(const char *qname)
131{
132	struct pf_altq	*altq;
133
134	/*
135	 * We guarantee that same named queues on different interfaces
136	 * have the same qid, so we do NOT need to limit matching on
137	 * one interface!
138	 */
139
140	TAILQ_FOREACH(altq, &altqs, entries) {
141		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
142			return (altq->qid);
143	}
144	return (0);
145}
146
147void
148print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
149	struct node_queue_opt *qopts)
150{
151	if (a->qname[0] != 0) {
152		print_queue(a, level, bw, 1, qopts);
153		return;
154	}
155
156	printf("altq on %s ", a->ifname);
157
158	switch (a->scheduler) {
159	case ALTQT_CBQ:
160		if (!print_cbq_opts(a))
161			printf("cbq ");
162		break;
163	case ALTQT_PRIQ:
164		if (!print_priq_opts(a))
165			printf("priq ");
166		break;
167	case ALTQT_HFSC:
168		if (!print_hfsc_opts(a, qopts))
169			printf("hfsc ");
170		break;
171	}
172
173	if (bw != NULL && bw->bw_percent > 0) {
174		if (bw->bw_percent < 100)
175			printf("bandwidth %u%% ", bw->bw_percent);
176	} else
177		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
178
179	if (a->qlimit != DEFAULT_QLIMIT)
180		printf("qlimit %u ", a->qlimit);
181	printf("tbrsize %u ", a->tbrsize);
182}
183
184void
185print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
186    int print_interface, struct node_queue_opt *qopts)
187{
188	unsigned	i;
189
190	printf("queue ");
191	for (i = 0; i < level; ++i)
192		printf(" ");
193	printf("%s ", a->qname);
194	if (print_interface)
195		printf("on %s ", a->ifname);
196	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
197		if (bw != NULL && bw->bw_percent > 0) {
198			if (bw->bw_percent < 100)
199				printf("bandwidth %u%% ", bw->bw_percent);
200		} else
201			printf("bandwidth %s ", rate2str((double)a->bandwidth));
202	}
203	if (a->priority != DEFAULT_PRIORITY)
204		printf("priority %u ", a->priority);
205	if (a->qlimit != DEFAULT_QLIMIT)
206		printf("qlimit %u ", a->qlimit);
207	switch (a->scheduler) {
208	case ALTQT_CBQ:
209		print_cbq_opts(a);
210		break;
211	case ALTQT_PRIQ:
212		print_priq_opts(a);
213		break;
214	case ALTQT_HFSC:
215		print_hfsc_opts(a, qopts);
216		break;
217	}
218}
219
220/*
221 * eval_pfaltq computes the discipline parameters.
222 */
223int
224eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
225    struct node_queue_opt *opts)
226{
227	u_int	rate, size, errors = 0;
228
229	if (bw->bw_absolute > 0)
230		pa->ifbandwidth = bw->bw_absolute;
231	else
232#ifdef __FreeBSD__
233		if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) {
234#else
235		if ((rate = getifspeed(pa->ifname)) == 0) {
236#endif
237			fprintf(stderr, "interface %s does not know its bandwidth, "
238			    "please specify an absolute bandwidth\n",
239			    pa->ifname);
240			errors++;
241		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
242			pa->ifbandwidth = rate;
243
244	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
245
246	/* if tbrsize is not specified, use heuristics */
247	if (pa->tbrsize == 0) {
248		rate = pa->ifbandwidth;
249		if (rate <= 1 * 1000 * 1000)
250			size = 1;
251		else if (rate <= 10 * 1000 * 1000)
252			size = 4;
253		else if (rate <= 200 * 1000 * 1000)
254			size = 8;
255		else
256			size = 24;
257		size = size * getifmtu(pa->ifname);
258		if (size > 0xffff)
259			size = 0xffff;
260		pa->tbrsize = size;
261	}
262	return (errors);
263}
264
265/*
266 * check_commit_altq does consistency check for each interface
267 */
268int
269check_commit_altq(int dev, int opts)
270{
271	struct pf_altq	*altq;
272	int		 error = 0;
273
274	/* call the discipline check for each interface. */
275	TAILQ_FOREACH(altq, &altqs, entries) {
276		if (altq->qname[0] == 0) {
277			switch (altq->scheduler) {
278			case ALTQT_CBQ:
279				error = check_commit_cbq(dev, opts, altq);
280				break;
281			case ALTQT_PRIQ:
282				error = check_commit_priq(dev, opts, altq);
283				break;
284			case ALTQT_HFSC:
285				error = check_commit_hfsc(dev, opts, altq);
286				break;
287			default:
288				break;
289			}
290		}
291	}
292	return (error);
293}
294
295/*
296 * eval_pfqueue computes the queue parameters.
297 */
298int
299eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
300    struct node_queue_opt *opts)
301{
302	/* should be merged with expand_queue */
303	struct pf_altq	*if_pa, *parent, *altq;
304	u_int32_t	 bwsum;
305	int		 error = 0;
306
307	/* find the corresponding interface and copy fields used by queues */
308	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
309		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
310		return (1);
311	}
312	pa->scheduler = if_pa->scheduler;
313	pa->ifbandwidth = if_pa->ifbandwidth;
314
315	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
316		fprintf(stderr, "queue %s already exists on interface %s\n",
317		    pa->qname, pa->ifname);
318		return (1);
319	}
320	pa->qid = qname_to_qid(pa->qname);
321
322	parent = NULL;
323	if (pa->parent[0] != 0) {
324		parent = qname_to_pfaltq(pa->parent, pa->ifname);
325		if (parent == NULL) {
326			fprintf(stderr, "parent %s not found for %s\n",
327			    pa->parent, pa->qname);
328			return (1);
329		}
330		pa->parent_qid = parent->qid;
331	}
332	if (pa->qlimit == 0)
333		pa->qlimit = DEFAULT_QLIMIT;
334
335	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
336		pa->bandwidth = eval_bwspec(bw,
337		    parent == NULL ? 0 : parent->bandwidth);
338
339		if (pa->bandwidth > pa->ifbandwidth) {
340			fprintf(stderr, "bandwidth for %s higher than "
341			    "interface\n", pa->qname);
342			return (1);
343		}
344		/* check the sum of the child bandwidth is under parent's */
345		if (parent != NULL) {
346			if (pa->bandwidth > parent->bandwidth) {
347				warnx("bandwidth for %s higher than parent",
348				    pa->qname);
349				return (1);
350			}
351			bwsum = 0;
352			TAILQ_FOREACH(altq, &altqs, entries) {
353				if (strncmp(altq->ifname, pa->ifname,
354				    IFNAMSIZ) == 0 &&
355				    altq->qname[0] != 0 &&
356				    strncmp(altq->parent, pa->parent,
357				    PF_QNAME_SIZE) == 0)
358					bwsum += altq->bandwidth;
359			}
360			bwsum += pa->bandwidth;
361			if (bwsum > parent->bandwidth) {
362				warnx("the sum of the child bandwidth higher"
363				    " than parent \"%s\"", parent->qname);
364			}
365		}
366	}
367
368	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
369		return (1);
370
371	switch (pa->scheduler) {
372	case ALTQT_CBQ:
373		error = eval_pfqueue_cbq(pf, pa);
374		break;
375	case ALTQT_PRIQ:
376		error = eval_pfqueue_priq(pf, pa);
377		break;
378	case ALTQT_HFSC:
379		error = eval_pfqueue_hfsc(pf, pa);
380		break;
381	default:
382		break;
383	}
384	return (error);
385}
386
387/*
388 * CBQ support functions
389 */
390#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
391#define	RM_NS_PER_SEC	(1000000000)
392
393static int
394eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
395{
396	struct cbq_opts	*opts;
397	u_int		 ifmtu;
398
399	if (pa->priority >= CBQ_MAXPRI) {
400		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
401		return (-1);
402	}
403
404	ifmtu = getifmtu(pa->ifname);
405	opts = &pa->pq_u.cbq_opts;
406
407	if (opts->pktsize == 0) {	/* use default */
408		opts->pktsize = ifmtu;
409		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
410			opts->pktsize &= ~MCLBYTES;
411	} else if (opts->pktsize > ifmtu)
412		opts->pktsize = ifmtu;
413	if (opts->maxpktsize == 0)	/* use default */
414		opts->maxpktsize = ifmtu;
415	else if (opts->maxpktsize > ifmtu)
416		opts->pktsize = ifmtu;
417
418	if (opts->pktsize > opts->maxpktsize)
419		opts->pktsize = opts->maxpktsize;
420
421	if (pa->parent[0] == 0)
422		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
423
424	cbq_compute_idletime(pf, pa);
425	return (0);
426}
427
428/*
429 * compute ns_per_byte, maxidle, minidle, and offtime
430 */
431static int
432cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
433{
434	struct cbq_opts	*opts;
435	double		 maxidle_s, maxidle, minidle;
436	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
437	double		 z, g, f, gton, gtom;
438	u_int		 minburst, maxburst;
439
440	opts = &pa->pq_u.cbq_opts;
441	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
442	minburst = opts->minburst;
443	maxburst = opts->maxburst;
444
445	if (pa->bandwidth == 0)
446		f = 0.0001;	/* small enough? */
447	else
448		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
449
450	nsPerByte = ifnsPerByte / f;
451	ptime = (double)opts->pktsize * ifnsPerByte;
452	cptime = ptime * (1.0 - f) / f;
453
454	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
455		/*
456		 * this causes integer overflow in kernel!
457		 * (bandwidth < 6Kbps when max_pkt_size=1500)
458		 */
459		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
460			warnx("queue bandwidth must be larger than %s",
461			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
462			    (double)INT_MAX * (double)pa->ifbandwidth));
463			fprintf(stderr, "cbq: queue %s is too slow!\n",
464			    pa->qname);
465		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
466	}
467
468	if (maxburst == 0) {  /* use default */
469		if (cptime > 10.0 * 1000000)
470			maxburst = 4;
471		else
472			maxburst = 16;
473	}
474	if (minburst == 0)  /* use default */
475		minburst = 2;
476	if (minburst > maxburst)
477		minburst = maxburst;
478
479	z = (double)(1 << RM_FILTER_GAIN);
480	g = (1.0 - 1.0 / z);
481	gton = pow(g, (double)maxburst);
482	gtom = pow(g, (double)(minburst-1));
483	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
484	maxidle_s = (1.0 - g);
485	if (maxidle > maxidle_s)
486		maxidle = ptime * maxidle;
487	else
488		maxidle = ptime * maxidle_s;
489	offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
490	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
491
492	/* scale parameters */
493	maxidle = ((maxidle * 8.0) / nsPerByte) *
494	    pow(2.0, (double)RM_FILTER_GAIN);
495	offtime = (offtime * 8.0) / nsPerByte *
496	    pow(2.0, (double)RM_FILTER_GAIN);
497	minidle = ((minidle * 8.0) / nsPerByte) *
498	    pow(2.0, (double)RM_FILTER_GAIN);
499
500	maxidle = maxidle / 1000.0;
501	offtime = offtime / 1000.0;
502	minidle = minidle / 1000.0;
503
504	opts->minburst = minburst;
505	opts->maxburst = maxburst;
506	opts->ns_per_byte = (u_int)nsPerByte;
507	opts->maxidle = (u_int)fabs(maxidle);
508	opts->minidle = (int)minidle;
509	opts->offtime = (u_int)fabs(offtime);
510
511	return (0);
512}
513
514static int
515check_commit_cbq(int dev, int opts, struct pf_altq *pa)
516{
517	struct pf_altq	*altq;
518	int		 root_class, default_class;
519	int		 error = 0;
520
521	/*
522	 * check if cbq has one root queue and one default queue
523	 * for this interface
524	 */
525	root_class = default_class = 0;
526	TAILQ_FOREACH(altq, &altqs, entries) {
527		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
528			continue;
529		if (altq->qname[0] == 0)  /* this is for interface */
530			continue;
531		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
532			root_class++;
533		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
534			default_class++;
535	}
536	if (root_class != 1) {
537		warnx("should have one root queue on %s", pa->ifname);
538		error++;
539	}
540	if (default_class != 1) {
541		warnx("should have one default queue on %s", pa->ifname);
542		error++;
543	}
544	return (error);
545}
546
547static int
548print_cbq_opts(const struct pf_altq *a)
549{
550	const struct cbq_opts	*opts;
551
552	opts = &a->pq_u.cbq_opts;
553	if (opts->flags) {
554		printf("cbq(");
555		if (opts->flags & CBQCLF_RED)
556			printf(" red");
557		if (opts->flags & CBQCLF_ECN)
558			printf(" ecn");
559		if (opts->flags & CBQCLF_RIO)
560			printf(" rio");
561		if (opts->flags & CBQCLF_CLEARDSCP)
562			printf(" cleardscp");
563		if (opts->flags & CBQCLF_FLOWVALVE)
564			printf(" flowvalve");
565		if (opts->flags & CBQCLF_BORROW)
566			printf(" borrow");
567		if (opts->flags & CBQCLF_WRR)
568			printf(" wrr");
569		if (opts->flags & CBQCLF_EFFICIENT)
570			printf(" efficient");
571		if (opts->flags & CBQCLF_ROOTCLASS)
572			printf(" root");
573		if (opts->flags & CBQCLF_DEFCLASS)
574			printf(" default");
575		printf(" ) ");
576
577		return (1);
578	} else
579		return (0);
580}
581
582/*
583 * PRIQ support functions
584 */
585static int
586eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
587{
588	struct pf_altq	*altq;
589
590	if (pa->priority >= PRIQ_MAXPRI) {
591		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
592		return (-1);
593	}
594	/* the priority should be unique for the interface */
595	TAILQ_FOREACH(altq, &altqs, entries) {
596		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
597		    altq->qname[0] != 0 && altq->priority == pa->priority) {
598			warnx("%s and %s have the same priority",
599			    altq->qname, pa->qname);
600			return (-1);
601		}
602	}
603
604	return (0);
605}
606
607static int
608check_commit_priq(int dev, int opts, struct pf_altq *pa)
609{
610	struct pf_altq	*altq;
611	int		 default_class;
612	int		 error = 0;
613
614	/*
615	 * check if priq has one default class for this interface
616	 */
617	default_class = 0;
618	TAILQ_FOREACH(altq, &altqs, entries) {
619		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
620			continue;
621		if (altq->qname[0] == 0)  /* this is for interface */
622			continue;
623		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
624			default_class++;
625	}
626	if (default_class != 1) {
627		warnx("should have one default queue on %s", pa->ifname);
628		error++;
629	}
630	return (error);
631}
632
633static int
634print_priq_opts(const struct pf_altq *a)
635{
636	const struct priq_opts	*opts;
637
638	opts = &a->pq_u.priq_opts;
639
640	if (opts->flags) {
641		printf("priq(");
642		if (opts->flags & PRCF_RED)
643			printf(" red");
644		if (opts->flags & PRCF_ECN)
645			printf(" ecn");
646		if (opts->flags & PRCF_RIO)
647			printf(" rio");
648		if (opts->flags & PRCF_CLEARDSCP)
649			printf(" cleardscp");
650		if (opts->flags & PRCF_DEFAULTCLASS)
651			printf(" default");
652		printf(" ) ");
653
654		return (1);
655	} else
656		return (0);
657}
658
659/*
660 * HFSC support functions
661 */
662static int
663eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
664{
665	struct pf_altq		*altq, *parent;
666	struct hfsc_opts	*opts;
667	struct service_curve	 sc;
668
669	opts = &pa->pq_u.hfsc_opts;
670
671	if (pa->parent[0] == 0) {
672		/* root queue */
673		opts->lssc_m1 = pa->ifbandwidth;
674		opts->lssc_m2 = pa->ifbandwidth;
675		opts->lssc_d = 0;
676		return (0);
677	}
678
679	LIST_INIT(&rtsc);
680	LIST_INIT(&lssc);
681
682	/* if link_share is not specified, use bandwidth */
683	if (opts->lssc_m2 == 0)
684		opts->lssc_m2 = pa->bandwidth;
685
686	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
687	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
688	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
689		warnx("m2 is zero for %s", pa->qname);
690		return (-1);
691	}
692
693	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
694	    (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
695	    (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
696		warnx("m1 must be zero for convex curve: %s", pa->qname);
697		return (-1);
698	}
699
700	/*
701	 * admission control:
702	 * for the real-time service curve, the sum of the service curves
703	 * should not exceed 80% of the interface bandwidth.  20% is reserved
704	 * not to over-commit the actual interface bandwidth.
705	 * for the linkshare service curve, the sum of the child service
706	 * curve should not exceed the parent service curve.
707	 * for the upper-limit service curve, the assigned bandwidth should
708	 * be smaller than the interface bandwidth, and the upper-limit should
709	 * be larger than the real-time service curve when both are defined.
710	 */
711	parent = qname_to_pfaltq(pa->parent, pa->ifname);
712	if (parent == NULL)
713		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
714
715	TAILQ_FOREACH(altq, &altqs, entries) {
716		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
717			continue;
718		if (altq->qname[0] == 0)  /* this is for interface */
719			continue;
720
721		/* if the class has a real-time service curve, add it. */
722		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
723			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
724			sc.d = altq->pq_u.hfsc_opts.rtsc_d;
725			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
726			gsc_add_sc(&rtsc, &sc);
727		}
728
729		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
730			continue;
731
732		/* if the class has a linkshare service curve, add it. */
733		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
734			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
735			sc.d = altq->pq_u.hfsc_opts.lssc_d;
736			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
737			gsc_add_sc(&lssc, &sc);
738		}
739	}
740
741	/* check the real-time service curve.  reserve 20% of interface bw */
742	if (opts->rtsc_m2 != 0) {
743		/* add this queue to the sum */
744		sc.m1 = opts->rtsc_m1;
745		sc.d = opts->rtsc_d;
746		sc.m2 = opts->rtsc_m2;
747		gsc_add_sc(&rtsc, &sc);
748		/* compare the sum with 80% of the interface */
749		sc.m1 = 0;
750		sc.d = 0;
751		sc.m2 = pa->ifbandwidth / 100 * 80;
752		if (!is_gsc_under_sc(&rtsc, &sc)) {
753			warnx("real-time sc exceeds 80%% of the interface "
754			    "bandwidth (%s)", rate2str((double)sc.m2));
755			goto err_ret;
756		}
757	}
758
759	/* check the linkshare service curve. */
760	if (opts->lssc_m2 != 0) {
761		/* add this queue to the child sum */
762		sc.m1 = opts->lssc_m1;
763		sc.d = opts->lssc_d;
764		sc.m2 = opts->lssc_m2;
765		gsc_add_sc(&lssc, &sc);
766		/* compare the sum of the children with parent's sc */
767		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
768		sc.d = parent->pq_u.hfsc_opts.lssc_d;
769		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
770		if (!is_gsc_under_sc(&lssc, &sc)) {
771			warnx("linkshare sc exceeds parent's sc");
772			goto err_ret;
773		}
774	}
775
776	/* check the upper-limit service curve. */
777	if (opts->ulsc_m2 != 0) {
778		if (opts->ulsc_m1 > pa->ifbandwidth ||
779		    opts->ulsc_m2 > pa->ifbandwidth) {
780			warnx("upper-limit larger than interface bandwidth");
781			goto err_ret;
782		}
783		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
784			warnx("upper-limit sc smaller than real-time sc");
785			goto err_ret;
786		}
787	}
788
789	gsc_destroy(&rtsc);
790	gsc_destroy(&lssc);
791
792	return (0);
793
794err_ret:
795	gsc_destroy(&rtsc);
796	gsc_destroy(&lssc);
797	return (-1);
798}
799
800static int
801check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
802{
803	struct pf_altq	*altq, *def = NULL;
804	int		 default_class;
805	int		 error = 0;
806
807	/* check if hfsc has one default queue for this interface */
808	default_class = 0;
809	TAILQ_FOREACH(altq, &altqs, entries) {
810		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
811			continue;
812		if (altq->qname[0] == 0)  /* this is for interface */
813			continue;
814		if (altq->parent[0] == 0)  /* dummy root */
815			continue;
816		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
817			default_class++;
818			def = altq;
819		}
820	}
821	if (default_class != 1) {
822		warnx("should have one default queue on %s", pa->ifname);
823		return (1);
824	}
825	/* make sure the default queue is a leaf */
826	TAILQ_FOREACH(altq, &altqs, entries) {
827		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
828			continue;
829		if (altq->qname[0] == 0)  /* this is for interface */
830			continue;
831		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
832			warnx("default queue is not a leaf");
833			error++;
834		}
835	}
836	return (error);
837}
838
839static int
840print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
841{
842	const struct hfsc_opts		*opts;
843	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
844
845	opts = &a->pq_u.hfsc_opts;
846	if (qopts == NULL)
847		rtsc = lssc = ulsc = NULL;
848	else {
849		rtsc = &qopts->data.hfsc_opts.realtime;
850		lssc = &qopts->data.hfsc_opts.linkshare;
851		ulsc = &qopts->data.hfsc_opts.upperlimit;
852	}
853
854	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
855	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
856	    opts->lssc_d != 0))) {
857		printf("hfsc(");
858		if (opts->flags & HFCF_RED)
859			printf(" red");
860		if (opts->flags & HFCF_ECN)
861			printf(" ecn");
862		if (opts->flags & HFCF_RIO)
863			printf(" rio");
864		if (opts->flags & HFCF_CLEARDSCP)
865			printf(" cleardscp");
866		if (opts->flags & HFCF_DEFAULTCLASS)
867			printf(" default");
868		if (opts->rtsc_m2 != 0)
869			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
870			    opts->rtsc_m2, rtsc);
871		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
872		    opts->lssc_d != 0))
873			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
874			    opts->lssc_m2, lssc);
875		if (opts->ulsc_m2 != 0)
876			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
877			    opts->ulsc_m2, ulsc);
878		printf(" ) ");
879
880		return (1);
881	} else
882		return (0);
883}
884
885/*
886 * admission control using generalized service curve
887 */
888#ifndef INFINITY
889#define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
890#endif
891
892/* add a new service curve to a generalized service curve */
893static void
894gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
895{
896	if (is_sc_null(sc))
897		return;
898	if (sc->d != 0)
899		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
900	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
901}
902
903/*
904 * check whether all points of a generalized service curve have
905 * their y-coordinates no larger than a given two-piece linear
906 * service curve.
907 */
908static int
909is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
910{
911	struct segment	*s, *last, *end;
912	double		 y;
913
914	if (is_sc_null(sc)) {
915		if (LIST_EMPTY(gsc))
916			return (1);
917		LIST_FOREACH(s, gsc, _next) {
918			if (s->m != 0)
919				return (0);
920		}
921		return (1);
922	}
923	/*
924	 * gsc has a dummy entry at the end with x = INFINITY.
925	 * loop through up to this dummy entry.
926	 */
927	end = gsc_getentry(gsc, INFINITY);
928	if (end == NULL)
929		return (1);
930	last = NULL;
931	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
932		if (s->y > sc_x2y(sc, s->x))
933			return (0);
934		last = s;
935	}
936	/* last now holds the real last segment */
937	if (last == NULL)
938		return (1);
939	if (last->m > sc->m2)
940		return (0);
941	if (last->x < sc->d && last->m > sc->m1) {
942		y = last->y + (sc->d - last->x) * last->m;
943		if (y > sc_x2y(sc, sc->d))
944			return (0);
945	}
946	return (1);
947}
948
949static void
950gsc_destroy(struct gen_sc *gsc)
951{
952	struct segment	*s;
953
954	while ((s = LIST_FIRST(gsc)) != NULL) {
955		LIST_REMOVE(s, _next);
956		free(s);
957	}
958}
959
960/*
961 * return a segment entry starting at x.
962 * if gsc has no entry starting at x, a new entry is created at x.
963 */
964static struct segment *
965gsc_getentry(struct gen_sc *gsc, double x)
966{
967	struct segment	*new, *prev, *s;
968
969	prev = NULL;
970	LIST_FOREACH(s, gsc, _next) {
971		if (s->x == x)
972			return (s);	/* matching entry found */
973		else if (s->x < x)
974			prev = s;
975		else
976			break;
977	}
978
979	/* we have to create a new entry */
980	if ((new = calloc(1, sizeof(struct segment))) == NULL)
981		return (NULL);
982
983	new->x = x;
984	if (x == INFINITY || s == NULL)
985		new->d = 0;
986	else if (s->x == INFINITY)
987		new->d = INFINITY;
988	else
989		new->d = s->x - x;
990	if (prev == NULL) {
991		/* insert the new entry at the head of the list */
992		new->y = 0;
993		new->m = 0;
994		LIST_INSERT_HEAD(gsc, new, _next);
995	} else {
996		/*
997		 * the start point intersects with the segment pointed by
998		 * prev.  divide prev into 2 segments
999		 */
1000		if (x == INFINITY) {
1001			prev->d = INFINITY;
1002			if (prev->m == 0)
1003				new->y = prev->y;
1004			else
1005				new->y = INFINITY;
1006		} else {
1007			prev->d = x - prev->x;
1008			new->y = prev->d * prev->m + prev->y;
1009		}
1010		new->m = prev->m;
1011		LIST_INSERT_AFTER(prev, new, _next);
1012	}
1013	return (new);
1014}
1015
1016/* add a segment to a generalized service curve */
1017static int
1018gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1019{
1020	struct segment	*start, *end, *s;
1021	double		 x2;
1022
1023	if (d == INFINITY)
1024		x2 = INFINITY;
1025	else
1026		x2 = x + d;
1027	start = gsc_getentry(gsc, x);
1028	end = gsc_getentry(gsc, x2);
1029	if (start == NULL || end == NULL)
1030		return (-1);
1031
1032	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1033		s->m += m;
1034		s->y += y + (s->x - x) * m;
1035	}
1036
1037	end = gsc_getentry(gsc, INFINITY);
1038	for (; s != end; s = LIST_NEXT(s, _next)) {
1039		s->y += m * d;
1040	}
1041
1042	return (0);
1043}
1044
1045/* get y-projection of a service curve */
1046static double
1047sc_x2y(struct service_curve *sc, double x)
1048{
1049	double	y;
1050
1051	if (x <= (double)sc->d)
1052		/* y belongs to the 1st segment */
1053		y = x * (double)sc->m1;
1054	else
1055		/* y belongs to the 2nd segment */
1056		y = (double)sc->d * (double)sc->m1
1057			+ (x - (double)sc->d) * (double)sc->m2;
1058	return (y);
1059}
1060
1061/*
1062 * misc utilities
1063 */
1064#define	R2S_BUFS	8
1065#define	RATESTR_MAX	16
1066
1067char *
1068rate2str(double rate)
1069{
1070	char		*buf;
1071	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1072	static int	 idx = 0;
1073	int		 i;
1074	static const char unit[] = " KMG";
1075
1076	buf = r2sbuf[idx++];
1077	if (idx == R2S_BUFS)
1078		idx = 0;
1079
1080	for (i = 0; rate >= 1000 && i <= 3; i++)
1081		rate /= 1000;
1082
1083	if ((int)(rate * 100) % 100)
1084		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1085	else
1086		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1087
1088	return (buf);
1089}
1090
1091#ifdef __FreeBSD__
1092/*
1093 * XXX
1094 * FreeBSD does not have SIOCGIFDATA.
1095 * To emulate this, DIOCGIFSPEED ioctl added to pf.
1096 */
1097u_int32_t
1098getifspeed(int pfdev, char *ifname)
1099{
1100	struct pf_ifspeed io;
1101
1102	bzero(&io, sizeof io);
1103	if (strlcpy(io.ifname, ifname, IFNAMSIZ) >=
1104	    sizeof(io.ifname))
1105		errx(1, "getifspeed: strlcpy");
1106	if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1)
1107		err(1, "DIOCGIFSPEED");
1108	return ((u_int32_t)io.baudrate);
1109}
1110#else
1111u_int32_t
1112getifspeed(char *ifname)
1113{
1114	int		s;
1115	struct ifreq	ifr;
1116	struct if_data	ifrdat;
1117
1118	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1119		err(1, "socket");
1120	bzero(&ifr, sizeof(ifr));
1121	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1122	    sizeof(ifr.ifr_name))
1123		errx(1, "getifspeed: strlcpy");
1124	ifr.ifr_data = (caddr_t)&ifrdat;
1125	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1126		err(1, "SIOCGIFDATA");
1127	if (shutdown(s, SHUT_RDWR) == -1)
1128		err(1, "shutdown");
1129	if (close(s))
1130		err(1, "close");
1131	return ((u_int32_t)ifrdat.ifi_baudrate);
1132}
1133#endif
1134
1135u_long
1136getifmtu(char *ifname)
1137{
1138	int		s;
1139	struct ifreq	ifr;
1140
1141	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1142		err(1, "socket");
1143	bzero(&ifr, sizeof(ifr));
1144	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1145	    sizeof(ifr.ifr_name))
1146		errx(1, "getifmtu: strlcpy");
1147	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1148		err(1, "SIOCGIFMTU");
1149	if (shutdown(s, SHUT_RDWR) == -1)
1150		err(1, "shutdown");
1151	if (close(s))
1152		err(1, "close");
1153	if (ifr.ifr_mtu > 0)
1154		return (ifr.ifr_mtu);
1155	else {
1156		warnx("could not get mtu for %s, assuming 1500", ifname);
1157		return (1500);
1158	}
1159}
1160
1161int
1162eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1163    u_int32_t ref_bw)
1164{
1165	int	errors = 0;
1166
1167	switch (pa->scheduler) {
1168	case ALTQT_CBQ:
1169		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1170		break;
1171	case ALTQT_PRIQ:
1172		pa->pq_u.priq_opts = opts->data.priq_opts;
1173		break;
1174	case ALTQT_HFSC:
1175		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1176		if (opts->data.hfsc_opts.linkshare.used) {
1177			pa->pq_u.hfsc_opts.lssc_m1 =
1178			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1179			    ref_bw);
1180			pa->pq_u.hfsc_opts.lssc_m2 =
1181			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1182			    ref_bw);
1183			pa->pq_u.hfsc_opts.lssc_d =
1184			    opts->data.hfsc_opts.linkshare.d;
1185		}
1186		if (opts->data.hfsc_opts.realtime.used) {
1187			pa->pq_u.hfsc_opts.rtsc_m1 =
1188			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1189			    ref_bw);
1190			pa->pq_u.hfsc_opts.rtsc_m2 =
1191			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1192			    ref_bw);
1193			pa->pq_u.hfsc_opts.rtsc_d =
1194			    opts->data.hfsc_opts.realtime.d;
1195		}
1196		if (opts->data.hfsc_opts.upperlimit.used) {
1197			pa->pq_u.hfsc_opts.ulsc_m1 =
1198			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1199			    ref_bw);
1200			pa->pq_u.hfsc_opts.ulsc_m2 =
1201			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1202			    ref_bw);
1203			pa->pq_u.hfsc_opts.ulsc_d =
1204			    opts->data.hfsc_opts.upperlimit.d;
1205		}
1206		break;
1207	default:
1208		warnx("eval_queue_opts: unknown scheduler type %u",
1209		    opts->qtype);
1210		errors++;
1211		break;
1212	}
1213
1214	return (errors);
1215}
1216
1217u_int32_t
1218eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1219{
1220	if (bw->bw_absolute > 0)
1221		return (bw->bw_absolute);
1222
1223	if (bw->bw_percent > 0)
1224		return (ref_bw / 100 * bw->bw_percent);
1225
1226	return (0);
1227}
1228
1229void
1230print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1231    const struct node_hfsc_sc *sc)
1232{
1233	printf(" %s", scname);
1234
1235	if (d != 0) {
1236		printf("(");
1237		if (sc != NULL && sc->m1.bw_percent > 0)
1238			printf("%u%%", sc->m1.bw_percent);
1239		else
1240			printf("%s", rate2str((double)m1));
1241		printf(" %u", d);
1242	}
1243
1244	if (sc != NULL && sc->m2.bw_percent > 0)
1245		printf(" %u%%", sc->m2.bw_percent);
1246	else
1247		printf(" %s", rate2str((double)m2));
1248
1249	if (d != 0)
1250		printf(")");
1251}
1252