pfctl_altq.c revision 126354
1/*	$OpenBSD: pfctl_altq.c,v 1.77 2003/08/22 21:50:34 david Exp $	*/
2
3/*
4 * Copyright (c) 2002
5 *	Sony Computer Science Laboratories Inc.
6 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <sys/types.h>
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/limits.h>
25
26#include <net/if.h>
27#include <netinet/in.h>
28#include <net/pfvar.h>
29
30#include <err.h>
31#include <errno.h>
32#include <math.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <unistd.h>
37
38#include <altq/altq.h>
39#include <altq/altq_cbq.h>
40#include <altq/altq_priq.h>
41#include <altq/altq_hfsc.h>
42
43#include "pfctl_parser.h"
44#include "pfctl.h"
45
46#define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
47
48TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
49LIST_HEAD(gen_sc, segment) rtsc, lssc;
50
51struct pf_altq	*qname_to_pfaltq(const char *, const char *);
52u_int32_t	 qname_to_qid(const char *);
53
54static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
55static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
56static int	check_commit_cbq(int, int, struct pf_altq *);
57static int	print_cbq_opts(const struct pf_altq *);
58
59static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
60static int	check_commit_priq(int, int, struct pf_altq *);
61static int	print_priq_opts(const struct pf_altq *);
62
63static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
64static int	check_commit_hfsc(int, int, struct pf_altq *);
65static int	print_hfsc_opts(const struct pf_altq *,
66		    const struct node_queue_opt *);
67
68static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
69static int		 is_gsc_under_sc(struct gen_sc *,
70			     struct service_curve *);
71static void		 gsc_destroy(struct gen_sc *);
72static struct segment	*gsc_getentry(struct gen_sc *, double);
73static int		 gsc_add_seg(struct gen_sc *, double, double, double,
74			     double);
75static double		 sc_x2y(struct service_curve *, double);
76
77u_int32_t	 getifspeed(char *);
78u_long		 getifmtu(char *);
79int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
80		     u_int32_t);
81u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
82void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
83		     const struct node_hfsc_sc *);
84
85static u_int32_t	 max_qid = 1;
86
87void
88pfaltq_store(struct pf_altq *a)
89{
90	struct pf_altq	*altq;
91
92	if ((altq = malloc(sizeof(*altq))) == NULL)
93		err(1, "malloc");
94	memcpy(altq, a, sizeof(struct pf_altq));
95	TAILQ_INSERT_TAIL(&altqs, altq, entries);
96}
97
98void
99pfaltq_free(struct pf_altq *a)
100{
101	struct pf_altq	*altq;
102
103	TAILQ_FOREACH(altq, &altqs, entries) {
104		if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 &&
105		    strncmp(a->qname, altq->qname, PF_QNAME_SIZE) == 0) {
106			TAILQ_REMOVE(&altqs, altq, entries);
107			free(altq);
108			return;
109		}
110	}
111}
112
113struct pf_altq *
114pfaltq_lookup(const char *ifname)
115{
116	struct pf_altq	*altq;
117
118	TAILQ_FOREACH(altq, &altqs, entries) {
119		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
120		    altq->qname[0] == 0)
121			return (altq);
122	}
123	return (NULL);
124}
125
126struct pf_altq *
127qname_to_pfaltq(const char *qname, const char *ifname)
128{
129	struct pf_altq	*altq;
130
131	TAILQ_FOREACH(altq, &altqs, entries) {
132		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
133		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
134			return (altq);
135	}
136	return (NULL);
137}
138
139u_int32_t
140qname_to_qid(const char *qname)
141{
142	struct pf_altq	*altq;
143
144	/*
145	 * We guarantee that same named queues on different interfaces
146	 * have the same qid, so we do NOT need to limit matching on
147	 * one interface!
148	 */
149
150	TAILQ_FOREACH(altq, &altqs, entries) {
151		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
152			return (altq->qid);
153	}
154	return (0);
155}
156
157void
158print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
159	struct node_queue_opt *qopts)
160{
161	if (a->qname[0] != NULL) {
162		print_queue(a, level, bw, 0, qopts);
163		return;
164	}
165
166	printf("altq on %s ", a->ifname);
167
168	switch(a->scheduler) {
169	case ALTQT_CBQ:
170		if (!print_cbq_opts(a))
171			printf("cbq ");
172		break;
173	case ALTQT_PRIQ:
174		if (!print_priq_opts(a))
175			printf("priq ");
176		break;
177	case ALTQT_HFSC:
178		if (!print_hfsc_opts(a, qopts))
179			printf("hfsc ");
180		break;
181	}
182
183	if (bw != NULL && bw->bw_percent > 0) {
184		if (bw->bw_percent < 100)
185			printf("bandwidth %u%% ", bw->bw_percent);
186	} else
187		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
188
189	if (a->qlimit != DEFAULT_QLIMIT)
190		printf("qlimit %u ", a->qlimit);
191	printf("tbrsize %u ", a->tbrsize);
192}
193
194void
195print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
196    int print_interface, struct node_queue_opt *qopts)
197{
198	unsigned	i;
199
200	printf("queue ");
201	for (i = 0; i < level; ++i)
202		printf(" ");
203	printf("%s ", a->qname);
204	if (print_interface)
205		printf("on %s ", a->ifname);
206	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
207		if (bw != NULL && bw->bw_percent > 0) {
208			if (bw->bw_percent < 100)
209				printf("bandwidth %u%% ", bw->bw_percent);
210		} else
211			printf("bandwidth %s ", rate2str((double)a->bandwidth));
212	}
213	if (a->priority != DEFAULT_PRIORITY)
214		printf("priority %u ", a->priority);
215	if (a->qlimit != DEFAULT_QLIMIT)
216		printf("qlimit %u ", a->qlimit);
217	switch (a->scheduler) {
218	case ALTQT_CBQ:
219		print_cbq_opts(a);
220		break;
221	case ALTQT_PRIQ:
222		print_priq_opts(a);
223		break;
224	case ALTQT_HFSC:
225		print_hfsc_opts(a, qopts);
226		break;
227	}
228}
229
230/*
231 * eval_pfaltq computes the discipline parameters.
232 */
233int
234eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
235    struct node_queue_opt *opts)
236{
237	u_int	rate, size, errors = 0;
238
239	if (bw->bw_absolute > 0)
240		pa->ifbandwidth = bw->bw_absolute;
241	else
242		if ((rate = getifspeed(pa->ifname)) == 0) {
243			fprintf(stderr, "cannot determine interface bandwidth "
244			    "for %s, specify an absolute bandwidth\n",
245			    pa->ifname);
246			errors++;
247		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
248			pa->ifbandwidth = rate;
249
250	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
251
252	/* if tbrsize is not specified, use heuristics */
253	if (pa->tbrsize == 0) {
254		rate = pa->ifbandwidth;
255		if (rate <= 1 * 1000 * 1000)
256			size = 1;
257		else if (rate <= 10 * 1000 * 1000)
258			size = 4;
259		else if (rate <= 200 * 1000 * 1000)
260			size = 8;
261		else
262			size = 24;
263		size = size * getifmtu(pa->ifname);
264		pa->tbrsize = size;
265	}
266	return (errors);
267}
268
269/*
270 * check_commit_altq does consistency check for each interface
271 */
272int
273check_commit_altq(int dev, int opts)
274{
275	struct pf_altq	*altq;
276	int		 error = 0;
277
278	/* call the discipline check for each interface. */
279	TAILQ_FOREACH(altq, &altqs, entries) {
280		if (altq->qname[0] == 0) {
281			switch (altq->scheduler) {
282			case ALTQT_CBQ:
283				error = check_commit_cbq(dev, opts, altq);
284				break;
285			case ALTQT_PRIQ:
286				error = check_commit_priq(dev, opts, altq);
287				break;
288			case ALTQT_HFSC:
289				error = check_commit_hfsc(dev, opts, altq);
290				break;
291			default:
292				break;
293			}
294		}
295	}
296	return (error);
297}
298
299/*
300 * eval_pfqueue computes the queue parameters.
301 */
302int
303eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
304    struct node_queue_opt *opts)
305{
306	/* should be merged with expand_queue */
307	struct pf_altq	*if_pa, *parent;
308	int		 error = 0;
309
310	/* find the corresponding interface and copy fields used by queues */
311	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
312		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
313		return (1);
314	}
315	pa->scheduler = if_pa->scheduler;
316	pa->ifbandwidth = if_pa->ifbandwidth;
317
318	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
319		fprintf(stderr, "queue %s already exists on interface %s\n",
320		    pa->qname, pa->ifname);
321		return (1);
322	}
323	pa->qid = qname_to_qid(pa->qname);
324
325	parent = NULL;
326	if (pa->parent[0] != 0) {
327		parent = qname_to_pfaltq(pa->parent, pa->ifname);
328		if (parent == NULL) {
329			fprintf(stderr, "parent %s not found for %s\n",
330			    pa->parent, pa->qname);
331			return (1);
332		}
333		pa->parent_qid = parent->qid;
334	}
335	if (pa->qlimit == 0)
336		pa->qlimit = DEFAULT_QLIMIT;
337
338	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
339		if ((pa->bandwidth = eval_bwspec(bw,
340		    parent == NULL ? 0 : parent->bandwidth)) == 0) {
341			fprintf(stderr, "bandwidth for %s invalid (%d / %d)\n",
342			    pa->qname, bw->bw_absolute, bw->bw_percent);
343			return (1);
344		}
345
346		if (pa->bandwidth > pa->ifbandwidth) {
347			fprintf(stderr, "bandwidth for %s higher than "
348			    "interface\n", pa->qname);
349			return (1);
350		}
351		if (parent != NULL && pa->bandwidth > parent->bandwidth) {
352			fprintf(stderr, "bandwidth for %s higher than parent\n",
353			    pa->qname);
354			return (1);
355		}
356	}
357
358	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
359		return (1);
360
361	switch (pa->scheduler) {
362	case ALTQT_CBQ:
363		error = eval_pfqueue_cbq(pf, pa);
364		break;
365	case ALTQT_PRIQ:
366		error = eval_pfqueue_priq(pf, pa);
367		break;
368	case ALTQT_HFSC:
369		error = eval_pfqueue_hfsc(pf, pa);
370		break;
371	default:
372		break;
373	}
374	return (error);
375}
376
377/*
378 * CBQ support functions
379 */
380#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
381#define	RM_NS_PER_SEC	(1000000000)
382
383static int
384eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
385{
386	struct cbq_opts	*opts;
387	u_int		 ifmtu;
388
389	if (pa->priority >= CBQ_MAXPRI) {
390		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
391		return (-1);
392	}
393
394	ifmtu = getifmtu(pa->ifname);
395	opts = &pa->pq_u.cbq_opts;
396
397	if (opts->pktsize == 0) {	/* use default */
398		opts->pktsize = ifmtu;
399		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
400			opts->pktsize &= ~MCLBYTES;
401	} else if (opts->pktsize > ifmtu)
402		opts->pktsize = ifmtu;
403	if (opts->maxpktsize == 0)	/* use default */
404		opts->maxpktsize = ifmtu;
405	else if (opts->maxpktsize > ifmtu)
406		opts->pktsize = ifmtu;
407
408	if (opts->pktsize > opts->maxpktsize)
409		opts->pktsize = opts->maxpktsize;
410
411	if (pa->parent[0] == 0)
412		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
413	else if (pa->qid == 0 && (opts->flags & CBQCLF_DEFCLASS) == 0)
414		pa->qid = ++max_qid;
415
416	cbq_compute_idletime(pf, pa);
417	return (0);
418}
419
420/*
421 * compute ns_per_byte, maxidle, minidle, and offtime
422 */
423static int
424cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
425{
426	struct cbq_opts	*opts;
427	double		 maxidle_s, maxidle, minidle;
428	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
429	double		 z, g, f, gton, gtom;
430	u_int		 minburst, maxburst;
431
432	opts = &pa->pq_u.cbq_opts;
433	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
434	minburst = opts->minburst;
435	maxburst = opts->maxburst;
436
437	if (pa->bandwidth == 0)
438		f = 0.0001;	/* small enough? */
439	else
440		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
441
442	nsPerByte = ifnsPerByte / f;
443	ptime = (double)opts->pktsize * ifnsPerByte;
444	cptime = ptime * (1.0 - f) / f;
445
446	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
447		/*
448		 * this causes integer overflow in kernel!
449		 * (bandwidth < 6Kbps when max_pkt_size=1500)
450		 */
451		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
452			warnx("queue bandwidth must be larger than %s",
453			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
454			    (double)INT_MAX * (double)pa->ifbandwidth));
455			fprintf(stderr, "cbq: queue %s is too slow!\n",
456			    pa->qname);
457		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
458	}
459
460	if (maxburst == 0) {  /* use default */
461		if (cptime > 10.0 * 1000000)
462			maxburst = 4;
463		else
464			maxburst = 16;
465	}
466	if (minburst == 0)  /* use default */
467		minburst = 2;
468	if (minburst > maxburst)
469		minburst = maxburst;
470
471	z = (double)(1 << RM_FILTER_GAIN);
472	g = (1.0 - 1.0 / z);
473	gton = pow(g, (double)maxburst);
474	gtom = pow(g, (double)(minburst-1));
475	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
476	maxidle_s = (1.0 - g);
477	if (maxidle > maxidle_s)
478		maxidle = ptime * maxidle;
479	else
480		maxidle = ptime * maxidle_s;
481	if (minburst)
482		offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
483	else
484		offtime = cptime;
485	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
486
487	/* scale parameters */
488	maxidle = ((maxidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN);
489	offtime = (offtime * 8.0) / nsPerByte * pow(2.0, (double)RM_FILTER_GAIN);
490	minidle = ((minidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN);
491
492	maxidle = maxidle / 1000.0;
493	offtime = offtime / 1000.0;
494	minidle = minidle / 1000.0;
495
496	opts->minburst = minburst;
497	opts->maxburst = maxburst;
498	opts->ns_per_byte = (u_int) nsPerByte;
499	opts->maxidle = (u_int) fabs(maxidle);
500	opts->minidle = (int)minidle;
501	opts->offtime = (u_int) fabs(offtime);
502
503	return (0);
504}
505
506static int
507check_commit_cbq(int dev, int opts, struct pf_altq *pa)
508{
509	struct pf_altq	*altq;
510	int		 root_class, default_class;
511	int		 error = 0;
512
513	/*
514	 * check if cbq has one root queue and one default queue
515	 * for this interface
516	 */
517	root_class = default_class = 0;
518	TAILQ_FOREACH(altq, &altqs, entries) {
519		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
520			continue;
521		if (altq->qname[0] == 0)  /* this is for interface */
522			continue;
523		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
524			root_class++;
525		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
526			default_class++;
527	}
528	if (root_class != 1) {
529		warnx("should have one root queue on %s", pa->ifname);
530		error++;
531	}
532	if (default_class != 1) {
533		warnx("should have one default queue on %s", pa->ifname);
534		error++;
535	}
536	return (error);
537}
538
539static int
540print_cbq_opts(const struct pf_altq *a)
541{
542	const struct cbq_opts	*opts;
543
544	opts = &a->pq_u.cbq_opts;
545	if (opts->flags) {
546		printf("cbq(");
547		if (opts->flags & CBQCLF_RED)
548			printf(" red");
549		if (opts->flags & CBQCLF_ECN)
550			printf(" ecn");
551		if (opts->flags & CBQCLF_RIO)
552			printf(" rio");
553		if (opts->flags & CBQCLF_CLEARDSCP)
554			printf(" cleardscp");
555		if (opts->flags & CBQCLF_FLOWVALVE)
556			printf(" flowvalve");
557		if (opts->flags & CBQCLF_BORROW)
558			printf(" borrow");
559		if (opts->flags & CBQCLF_WRR)
560			printf(" wrr");
561		if (opts->flags & CBQCLF_EFFICIENT)
562			printf(" efficient");
563		if (opts->flags & CBQCLF_ROOTCLASS)
564			printf(" root");
565		if (opts->flags & CBQCLF_DEFCLASS)
566			printf(" default");
567		printf(" ) ");
568
569		return (1);
570	} else
571		return (0);
572}
573
574/*
575 * PRIQ support functions
576 */
577static int
578eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
579{
580	struct pf_altq	*altq;
581
582	if (pa->priority >= PRIQ_MAXPRI) {
583		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
584		return (-1);
585	}
586	/* the priority should be unique for the interface */
587	TAILQ_FOREACH(altq, &altqs, entries) {
588		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
589		    altq->qname[0] != 0 && altq->priority == pa->priority) {
590			warnx("%s and %s have the same priority",
591			    altq->qname, pa->qname);
592			return (-1);
593		}
594	}
595
596	if (pa->qid == 0)
597		pa->qid = ++max_qid;
598
599	return (0);
600}
601
602static int
603check_commit_priq(int dev, int opts, struct pf_altq *pa)
604{
605	struct pf_altq	*altq;
606	int		 default_class;
607	int		 error = 0;
608
609	/*
610	 * check if priq has one default class for this interface
611	 */
612	default_class = 0;
613	TAILQ_FOREACH(altq, &altqs, entries) {
614		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
615			continue;
616		if (altq->qname[0] == 0)  /* this is for interface */
617			continue;
618		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
619			default_class++;
620	}
621	if (default_class != 1) {
622		warnx("should have one default queue on %s", pa->ifname);
623		error++;
624	}
625	return (error);
626}
627
628static int
629print_priq_opts(const struct pf_altq *a)
630{
631	const struct priq_opts	*opts;
632
633	opts = &a->pq_u.priq_opts;
634
635	if (opts->flags) {
636		printf("priq(");
637		if (opts->flags & PRCF_RED)
638			printf(" red");
639		if (opts->flags & PRCF_ECN)
640			printf(" ecn");
641		if (opts->flags & PRCF_RIO)
642			printf(" rio");
643		if (opts->flags & PRCF_CLEARDSCP)
644			printf(" cleardscp");
645		if (opts->flags & PRCF_DEFAULTCLASS)
646			printf(" default");
647		printf(" ) ");
648
649		return (1);
650	} else
651		return (0);
652}
653
654/*
655 * HFSC support functions
656 */
657static int
658eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
659{
660	struct pf_altq		*altq, *parent;
661	struct hfsc_opts	*opts;
662	struct service_curve	 sc;
663
664	opts = &pa->pq_u.hfsc_opts;
665
666	if (pa->parent[0] == 0) {
667		/* root queue */
668		pa->qid = HFSC_ROOTCLASS_HANDLE;
669		opts->lssc_m1 = pa->ifbandwidth;
670		opts->lssc_m2 = pa->ifbandwidth;
671		opts->lssc_d = 0;
672		return (0);
673	} else if (pa->qid == 0)
674		pa->qid = ++max_qid;
675
676	LIST_INIT(&rtsc);
677	LIST_INIT(&lssc);
678
679	/* if link_share is not specified, use bandwidth */
680	if (opts->lssc_m2 == 0)
681		opts->lssc_m2 = pa->bandwidth;
682
683	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
684	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
685	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
686		warnx("m2 is zero for %s", pa->qname);
687		return (-1);
688	}
689
690	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
691	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
692	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0)) {
693		warnx("m1 must be zero for convex curve: %s", pa->qname);
694		return (-1);
695	}
696
697	/*
698	 * admission control:
699	 * for the real-time service curve, the sum of the service curves
700	 * should not exceed 80% of the interface bandwidth.  20% is reserved
701	 * not to over-commit the actual interface bandwidth.
702	 * for the link-sharing service curve, the sum of the child service
703	 * curve should not exceed the parent service curve.
704	 * for the upper-limit service curve, the assigned bandwidth should
705	 * be smaller than the interface bandwidth, and the upper-limit should
706	 * be larger than the real-time service curve when both are defined.
707	 */
708	parent = qname_to_pfaltq(pa->parent, pa->ifname);
709	if (parent == NULL)
710		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
711
712	TAILQ_FOREACH(altq, &altqs, entries) {
713		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
714			continue;
715		if (altq->qname[0] == 0)  /* this is for interface */
716			continue;
717
718		/* if the class has a real-time service curve, add it. */
719		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
720			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
721			sc.d  = altq->pq_u.hfsc_opts.rtsc_d;
722			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
723			gsc_add_sc(&rtsc, &sc);
724		}
725
726		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
727			continue;
728
729		/* if the class has a link-sharing service curve, add it. */
730		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
731			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
732			sc.d  = altq->pq_u.hfsc_opts.lssc_d;
733			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
734			gsc_add_sc(&lssc, &sc);
735		}
736	}
737
738	/* check the real-time service curve.  reserve 20% of interface bw */
739	if (opts->rtsc_m2 != 0) {
740		sc.m1 = 0;
741		sc.d  = 0;
742		sc.m2 = pa->ifbandwidth / 100 * 80;
743		if (!is_gsc_under_sc(&rtsc, &sc)) {
744			warnx("real-time sc exceeds the interface bandwidth");
745			goto err_ret;
746		}
747	}
748
749	/* check the link-sharing service curve. */
750	if (opts->lssc_m2 != 0) {
751		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
752		sc.d  = parent->pq_u.hfsc_opts.lssc_d;
753		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
754		if (!is_gsc_under_sc(&lssc, &sc)) {
755			warnx("link-sharing sc exceeds parent's sc");
756			goto err_ret;
757		}
758	}
759
760	/* check the upper-limit service curve. */
761	if (opts->ulsc_m2 != 0) {
762		if (opts->ulsc_m1 > pa->ifbandwidth ||
763		    opts->ulsc_m2 > pa->ifbandwidth) {
764			warnx("upper-limit larger than interface bandwidth");
765			goto err_ret;
766		}
767		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
768			warnx("upper-limit sc smaller than real-time sc");
769			goto err_ret;
770		}
771	}
772
773	gsc_destroy(&rtsc);
774	gsc_destroy(&lssc);
775
776	return (0);
777
778err_ret:
779	gsc_destroy(&rtsc);
780	gsc_destroy(&lssc);
781	return (-1);
782}
783
784static int
785check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
786{
787	struct pf_altq	*altq, *def = NULL;
788	int		 default_class;
789	int		 error = 0;
790
791	/* check if hfsc has one default queue for this interface */
792	default_class = 0;
793	TAILQ_FOREACH(altq, &altqs, entries) {
794		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
795			continue;
796		if (altq->qname[0] == 0)  /* this is for interface */
797			continue;
798		if (altq->parent[0] == 0)  /* dummy root */
799			continue;
800		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
801			default_class++;
802			def = altq;
803		}
804	}
805	if (default_class != 1) {
806		warnx("should have one default queue on %s", pa->ifname);
807		return (1);
808	}
809	/* make sure the default queue is a leaf */
810	TAILQ_FOREACH(altq, &altqs, entries) {
811		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
812			continue;
813		if (altq->qname[0] == 0)  /* this is for interface */
814			continue;
815		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
816			warnx("default queue is not a leaf");
817			error++;
818		}
819	}
820	return (error);
821}
822
823static int
824print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
825{
826	const struct hfsc_opts		*opts;
827	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
828
829	opts = &a->pq_u.hfsc_opts;
830	if (qopts == NULL)
831		rtsc = lssc = ulsc = NULL;
832	else {
833		rtsc = &qopts->data.hfsc_opts.realtime;
834		lssc = &qopts->data.hfsc_opts.linkshare;
835		ulsc = &qopts->data.hfsc_opts.upperlimit;
836	}
837
838	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
839	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
840	    opts->lssc_d != 0))) {
841		printf("hfsc(");
842		if (opts->flags & HFCF_RED)
843			printf(" red");
844		if (opts->flags & HFCF_ECN)
845			printf(" ecn");
846		if (opts->flags & HFCF_RIO)
847			printf(" rio");
848		if (opts->flags & HFCF_CLEARDSCP)
849			printf(" cleardscp");
850		if (opts->flags & HFCF_DEFAULTCLASS)
851			printf(" default");
852		if (opts->rtsc_m2 != 0)
853			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
854			    opts->rtsc_m2, rtsc);
855		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
856		    opts->lssc_d != 0))
857			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
858			    opts->lssc_m2, lssc);
859		if (opts->ulsc_m2 != 0)
860			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
861			    opts->ulsc_m2, ulsc);
862		printf(" ) ");
863
864		return (1);
865	} else
866		return (0);
867}
868
869/*
870 * admission control using generalized service curve
871 */
872#define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
873
874/* add a new service curve to a generalized service curve */
875static void
876gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
877{
878	if (is_sc_null(sc))
879		return;
880	if (sc->d != 0)
881		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
882	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
883}
884
885/*
886 * check whether all points of a generalized service curve have
887 * their y-coordinates no larger than a given two-piece linear
888 * service curve.
889 */
890static int
891is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
892{
893	struct segment	*s, *last, *end;
894	double		 y;
895
896	if (is_sc_null(sc)) {
897		if (LIST_EMPTY(gsc))
898			return (1);
899		LIST_FOREACH(s, gsc, _next) {
900			if (s->m != 0)
901				return (0);
902		}
903		return (1);
904	}
905	/*
906	 * gsc has a dummy entry at the end with x = INFINITY.
907	 * loop through up to this dummy entry.
908	 */
909	end = gsc_getentry(gsc, INFINITY);
910	if (end == NULL)
911		return (1);
912	last = NULL;
913	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
914		if (s->y > sc_x2y(sc, s->x))
915			return (0);
916		last = s;
917	}
918	/* last now holds the real last segment */
919	if (last == NULL)
920		return (1);
921	if (last->m > sc->m2)
922		return (0);
923	if (last->x < sc->d && last->m > sc->m1) {
924		y = last->y + (sc->d - last->x) * last->m;
925		if (y > sc_x2y(sc, sc->d))
926			return (0);
927	}
928	return (1);
929}
930
931static void
932gsc_destroy(struct gen_sc *gsc)
933{
934	struct segment	*s;
935
936	while ((s = LIST_FIRST(gsc)) != NULL) {
937		LIST_REMOVE(s, _next);
938		free(s);
939	}
940}
941
942/*
943 * return a segment entry starting at x.
944 * if gsc has no entry starting at x, a new entry is created at x.
945 */
946static struct segment *
947gsc_getentry(struct gen_sc *gsc, double x)
948{
949	struct segment	*new, *prev, *s;
950
951	prev = NULL;
952	LIST_FOREACH(s, gsc, _next) {
953		if (s->x == x)
954			return (s);	/* matching entry found */
955		else if (s->x < x)
956			prev = s;
957		else
958			break;
959	}
960
961	/* we have to create a new entry */
962	if ((new = calloc(1, sizeof(struct segment))) == NULL)
963		return (NULL);
964
965	new->x = x;
966	if (x == INFINITY || s == NULL)
967		new->d = 0;
968	else if (s->x == INFINITY)
969		new->d = INFINITY;
970	else
971		new->d = s->x - x;
972	if (prev == NULL) {
973		/* insert the new entry at the head of the list */
974		new->y = 0;
975		new->m = 0;
976		LIST_INSERT_HEAD(gsc, new, _next);
977	} else {
978		/*
979		 * the start point intersects with the segment pointed by
980		 * prev.  divide prev into 2 segments
981		 */
982		if (x == INFINITY) {
983			prev->d = INFINITY;
984			if (prev->m == 0)
985				new->y = prev->y;
986			else
987				new->y = INFINITY;
988		} else {
989			prev->d = x - prev->x;
990			new->y = prev->d * prev->m + prev->y;
991		}
992		new->m = prev->m;
993		LIST_INSERT_AFTER(prev, new, _next);
994	}
995	return (new);
996}
997
998/* add a segment to a generalized service curve */
999static int
1000gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1001{
1002	struct segment	*start, *end, *s;
1003	double		 x2;
1004
1005	if (d == INFINITY)
1006		x2 = INFINITY;
1007	else
1008		x2 = x + d;
1009	start = gsc_getentry(gsc, x);
1010	end   = gsc_getentry(gsc, x2);
1011	if (start == NULL || end == NULL)
1012		return (-1);
1013
1014	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1015		s->m += m;
1016		s->y += y + (s->x - x) * m;
1017	}
1018
1019	end = gsc_getentry(gsc, INFINITY);
1020	for (; s != end; s = LIST_NEXT(s, _next)) {
1021		s->y += m * d;
1022	}
1023
1024	return (0);
1025}
1026
1027/* get y-projection of a service curve */
1028static double
1029sc_x2y(struct service_curve *sc, double x)
1030{
1031	double	y;
1032
1033	if (x <= (double)sc->d)
1034		/* y belongs to the 1st segment */
1035		y = x * (double)sc->m1;
1036	else
1037		/* y belongs to the 2nd segment */
1038		y = (double)sc->d * (double)sc->m1
1039			+ (x - (double)sc->d) * (double)sc->m2;
1040	return (y);
1041}
1042
1043/*
1044 * misc utilities
1045 */
1046#define	R2S_BUFS	8
1047#define	RATESTR_MAX	16
1048
1049char *
1050rate2str(double rate)
1051{
1052	char		*buf;
1053	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1054	static int	 idx = 0;
1055	int		 i;
1056	static const char unit[] = " KMG";
1057
1058	buf = r2sbuf[idx++];
1059	if (idx == R2S_BUFS)
1060		idx = 0;
1061
1062	for (i = 0; rate >= 1000 && i <= 3; i++)
1063		rate /= 1000;
1064
1065	if ((int)(rate * 100) % 100)
1066		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1067	else
1068		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1069
1070	return (buf);
1071}
1072
1073u_int32_t
1074getifspeed(char *ifname)
1075{
1076	int		s;
1077	struct ifreq	ifr;
1078	struct if_data	ifrdat;
1079
1080	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1081		err(1, "socket");
1082	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1083	    sizeof(ifr.ifr_name))
1084		errx(1, "getifspeed: strlcpy");
1085	ifr.ifr_data = (caddr_t)&ifrdat;
1086	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1087		err(1, "SIOCGIFDATA");
1088	if (shutdown(s, SHUT_RDWR) == -1)
1089		err(1, "shutdown");
1090	if (close(s))
1091		err(1, "close");
1092	return ((u_int32_t)ifrdat.ifi_baudrate);
1093}
1094
1095u_long
1096getifmtu(char *ifname)
1097{
1098	int		s;
1099	struct ifreq	ifr;
1100
1101	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1102		err(1, "socket");
1103	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1104	    sizeof(ifr.ifr_name))
1105		errx(1, "getifmtu: strlcpy");
1106	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1107		err(1, "SIOCGIFMTU");
1108	if (shutdown(s, SHUT_RDWR) == -1)
1109		err(1, "shutdown");
1110	if (close(s))
1111		err(1, "close");
1112	if (ifr.ifr_mtu > 0)
1113		return (ifr.ifr_mtu);
1114	else {
1115		warnx("could not get mtu for %s, assuming 1500", ifname);
1116		return (1500);
1117	}
1118}
1119
1120int
1121eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1122    u_int32_t ref_bw)
1123{
1124	int	errors = 0;
1125
1126	switch (pa->scheduler) {
1127	case ALTQT_CBQ:
1128		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1129		break;
1130	case ALTQT_PRIQ:
1131		pa->pq_u.priq_opts = opts->data.priq_opts;
1132		break;
1133	case ALTQT_HFSC:
1134		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1135		if (opts->data.hfsc_opts.linkshare.used) {
1136			pa->pq_u.hfsc_opts.lssc_m1 =
1137			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1138			    ref_bw);
1139			pa->pq_u.hfsc_opts.lssc_m2 =
1140			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1141			    ref_bw);
1142			pa->pq_u.hfsc_opts.lssc_d =
1143			    opts->data.hfsc_opts.linkshare.d;
1144		}
1145		if (opts->data.hfsc_opts.realtime.used) {
1146			pa->pq_u.hfsc_opts.rtsc_m1 =
1147			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1148			    ref_bw);
1149			pa->pq_u.hfsc_opts.rtsc_m2 =
1150			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1151			    ref_bw);
1152			pa->pq_u.hfsc_opts.rtsc_d =
1153			    opts->data.hfsc_opts.realtime.d;
1154		}
1155		if (opts->data.hfsc_opts.upperlimit.used) {
1156			pa->pq_u.hfsc_opts.ulsc_m1 =
1157			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1158			    ref_bw);
1159			pa->pq_u.hfsc_opts.ulsc_m2 =
1160			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1161			    ref_bw);
1162			pa->pq_u.hfsc_opts.ulsc_d =
1163			    opts->data.hfsc_opts.upperlimit.d;
1164		}
1165		break;
1166	default:
1167		warnx("eval_queue_opts: unknown scheduler type %u",
1168		    opts->qtype);
1169		errors++;
1170		break;
1171	}
1172
1173	return (errors);
1174}
1175
1176u_int32_t
1177eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1178{
1179	if (bw->bw_absolute > 0)
1180		return (bw->bw_absolute);
1181
1182	if (bw->bw_percent > 0)
1183		return (ref_bw / 100 * bw->bw_percent);
1184
1185	return (0);
1186}
1187
1188void
1189print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1190    const struct node_hfsc_sc *sc)
1191{
1192	printf(" %s", scname);
1193
1194	if (d != 0) {
1195		printf("(");
1196		if (sc != NULL && sc->m1.bw_percent > 0)
1197			printf("%u%%", sc->m1.bw_percent);
1198		else
1199			printf("%s", rate2str((double)m1));
1200		printf(" %u", d);
1201	}
1202
1203	if (sc != NULL && sc->m2.bw_percent > 0)
1204		printf(" %u%%", sc->m2.bw_percent);
1205	else
1206		printf(" %s", rate2str((double)m2));
1207
1208	if (d != 0)
1209		printf(")");
1210}
1211