pfctl_altq.c revision 1.9
1/*	$NetBSD: pfctl_altq.c,v 1.9 2010/03/01 00:14:08 joerg Exp $	*/
2/*	$OpenBSD: pfctl_altq.c,v 1.92 2007/05/27 05:15:17 claudio Exp $	*/
3
4/*
5 * Copyright (c) 2002
6 *	Sony Computer Science Laboratories Inc.
7 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8 *
9 * Permission to use, copy, modify, and distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 */
21
22#include <sys/types.h>
23#include <sys/ioctl.h>
24#include <sys/socket.h>
25#ifdef __NetBSD__
26#include <sys/param.h>
27#include <sys/mbuf.h>
28#endif
29
30#include <net/if.h>
31#include <netinet/in.h>
32#include <net/pfvar.h>
33
34#include <err.h>
35#include <errno.h>
36#include <limits.h>
37#include <math.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include <altq/altq.h>
44#include <altq/altq_cbq.h>
45#include <altq/altq_priq.h>
46#include <altq/altq_hfsc.h>
47
48#include "pfctl_parser.h"
49#include "pfctl.h"
50
51#define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
52
53TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
54LIST_HEAD(gen_sc, segment) rtsc, lssc;
55
56struct pf_altq	*qname_to_pfaltq(const char *, const char *);
57u_int32_t	 qname_to_qid(const char *);
58
59static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
60static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
61static int	check_commit_cbq(int, int, struct pf_altq *);
62static int	print_cbq_opts(const struct pf_altq *);
63
64static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
65static int	check_commit_priq(int, int, struct pf_altq *);
66static int	print_priq_opts(const struct pf_altq *);
67
68static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
69static int	check_commit_hfsc(int, int, struct pf_altq *);
70static int	print_hfsc_opts(const struct pf_altq *,
71		    const struct node_queue_opt *);
72
73static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
74static int		 is_gsc_under_sc(struct gen_sc *,
75			     struct service_curve *);
76static void		 gsc_destroy(struct gen_sc *);
77static struct segment	*gsc_getentry(struct gen_sc *, double);
78static int		 gsc_add_seg(struct gen_sc *, double, double, double,
79			     double);
80static double		 sc_x2y(struct service_curve *, double);
81
82u_int32_t	 getifspeed(char *);
83u_long		 getifmtu(char *);
84int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
85		     u_int32_t);
86u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
87void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
88		     const struct node_hfsc_sc *);
89
90void
91pfaltq_store(struct pf_altq *a)
92{
93	struct pf_altq	*altq;
94
95	if ((altq = malloc(sizeof(*altq))) == NULL)
96		err(1, "malloc");
97	memcpy(altq, a, sizeof(struct pf_altq));
98	TAILQ_INSERT_TAIL(&altqs, altq, entries);
99}
100
101struct pf_altq *
102pfaltq_lookup(const char *ifname)
103{
104	struct pf_altq	*altq;
105
106	TAILQ_FOREACH(altq, &altqs, entries) {
107		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
108		    altq->qname[0] == 0)
109			return (altq);
110	}
111	return (NULL);
112}
113
114struct pf_altq *
115qname_to_pfaltq(const char *qname, const char *ifname)
116{
117	struct pf_altq	*altq;
118
119	TAILQ_FOREACH(altq, &altqs, entries) {
120		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
121		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
122			return (altq);
123	}
124	return (NULL);
125}
126
127u_int32_t
128qname_to_qid(const char *qname)
129{
130	struct pf_altq	*altq;
131
132	/*
133	 * We guarantee that same named queues on different interfaces
134	 * have the same qid, so we do NOT need to limit matching on
135	 * one interface!
136	 */
137
138	TAILQ_FOREACH(altq, &altqs, entries) {
139		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
140			return (altq->qid);
141	}
142	return (0);
143}
144
145void
146print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
147	struct node_queue_opt *qopts)
148{
149	if (a->qname[0] != 0) {
150		print_queue(a, level, bw, 1, qopts);
151		return;
152	}
153
154	printf("altq on %s ", a->ifname);
155
156	switch (a->scheduler) {
157	case ALTQT_CBQ:
158		if (!print_cbq_opts(a))
159			printf("cbq ");
160		break;
161	case ALTQT_PRIQ:
162		if (!print_priq_opts(a))
163			printf("priq ");
164		break;
165	case ALTQT_HFSC:
166		if (!print_hfsc_opts(a, qopts))
167			printf("hfsc ");
168		break;
169	}
170
171	if (bw != NULL && bw->bw_percent > 0) {
172		if (bw->bw_percent < 100)
173			printf("bandwidth %u%% ", bw->bw_percent);
174	} else
175		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
176
177	if (a->qlimit != DEFAULT_QLIMIT)
178		printf("qlimit %u ", a->qlimit);
179	printf("tbrsize %u ", a->tbrsize);
180}
181
182void
183print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
184    int print_interface, struct node_queue_opt *qopts)
185{
186	unsigned	i;
187
188	printf("queue ");
189	for (i = 0; i < level; ++i)
190		printf(" ");
191	printf("%s ", a->qname);
192	if (print_interface)
193		printf("on %s ", a->ifname);
194	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
195		if (bw != NULL && bw->bw_percent > 0) {
196			if (bw->bw_percent < 100)
197				printf("bandwidth %u%% ", bw->bw_percent);
198		} else
199			printf("bandwidth %s ", rate2str((double)a->bandwidth));
200	}
201	if (a->priority != DEFAULT_PRIORITY)
202		printf("priority %u ", a->priority);
203	if (a->qlimit != DEFAULT_QLIMIT)
204		printf("qlimit %u ", a->qlimit);
205	switch (a->scheduler) {
206	case ALTQT_CBQ:
207		print_cbq_opts(a);
208		break;
209	case ALTQT_PRIQ:
210		print_priq_opts(a);
211		break;
212	case ALTQT_HFSC:
213		print_hfsc_opts(a, qopts);
214		break;
215	}
216}
217
218/*
219 * eval_pfaltq computes the discipline parameters.
220 */
221int
222eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
223    struct node_queue_opt *opts)
224{
225	u_int	rate, size, errors = 0;
226
227	if (bw->bw_absolute > 0)
228		pa->ifbandwidth = bw->bw_absolute;
229	else
230		if ((rate = getifspeed(pa->ifname)) == 0) {
231			fprintf(stderr, "interface %s does not know its bandwidth, "
232			    "please specify an absolute bandwidth\n",
233			    pa->ifname);
234			errors++;
235		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
236			pa->ifbandwidth = rate;
237
238	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
239
240	/* if tbrsize is not specified, use heuristics */
241	if (pa->tbrsize == 0) {
242		rate = pa->ifbandwidth;
243		if (rate <= 1 * 1000 * 1000)
244			size = 1;
245		else if (rate <= 10 * 1000 * 1000)
246			size = 4;
247		else if (rate <= 200 * 1000 * 1000)
248			size = 8;
249		else
250			size = 24;
251		size = size * getifmtu(pa->ifname);
252		if (size > 0xffff)
253			size = 0xffff;
254		pa->tbrsize = size;
255	}
256	return (errors);
257}
258
259/*
260 * check_commit_altq does consistency check for each interface
261 */
262int
263check_commit_altq(int dev, int opts)
264{
265	struct pf_altq	*altq;
266	int		 error = 0;
267
268	/* call the discipline check for each interface. */
269	TAILQ_FOREACH(altq, &altqs, entries) {
270		if (altq->qname[0] == 0) {
271			switch (altq->scheduler) {
272			case ALTQT_CBQ:
273				error = check_commit_cbq(dev, opts, altq);
274				break;
275			case ALTQT_PRIQ:
276				error = check_commit_priq(dev, opts, altq);
277				break;
278			case ALTQT_HFSC:
279				error = check_commit_hfsc(dev, opts, altq);
280				break;
281			default:
282				break;
283			}
284		}
285	}
286	return (error);
287}
288
289/*
290 * eval_pfqueue computes the queue parameters.
291 */
292int
293eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
294    struct node_queue_opt *opts)
295{
296	/* should be merged with expand_queue */
297	struct pf_altq	*if_pa, *parent, *altq;
298	u_int32_t	 bwsum;
299	int		 error = 0;
300
301	/* find the corresponding interface and copy fields used by queues */
302	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
303		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
304		return (1);
305	}
306	pa->scheduler = if_pa->scheduler;
307	pa->ifbandwidth = if_pa->ifbandwidth;
308
309	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
310		fprintf(stderr, "queue %s already exists on interface %s\n",
311		    pa->qname, pa->ifname);
312		return (1);
313	}
314	pa->qid = qname_to_qid(pa->qname);
315
316	parent = NULL;
317	if (pa->parent[0] != 0) {
318		parent = qname_to_pfaltq(pa->parent, pa->ifname);
319		if (parent == NULL) {
320			fprintf(stderr, "parent %s not found for %s\n",
321			    pa->parent, pa->qname);
322			return (1);
323		}
324		pa->parent_qid = parent->qid;
325	}
326	if (pa->qlimit == 0)
327		pa->qlimit = DEFAULT_QLIMIT;
328
329	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
330		pa->bandwidth = eval_bwspec(bw,
331		    parent == NULL ? 0 : parent->bandwidth);
332
333		if (pa->bandwidth > pa->ifbandwidth) {
334			fprintf(stderr, "bandwidth for %s higher than "
335			    "interface\n", pa->qname);
336			return (1);
337		}
338		/* check the sum of the child bandwidth is under parent's */
339		if (parent != NULL) {
340			if (pa->bandwidth > parent->bandwidth) {
341				warnx("bandwidth for %s higher than parent",
342				    pa->qname);
343				return (1);
344			}
345			bwsum = 0;
346			TAILQ_FOREACH(altq, &altqs, entries) {
347				if (strncmp(altq->ifname, pa->ifname,
348				    IFNAMSIZ) == 0 &&
349				    altq->qname[0] != 0 &&
350				    strncmp(altq->parent, pa->parent,
351				    PF_QNAME_SIZE) == 0)
352					bwsum += altq->bandwidth;
353			}
354			bwsum += pa->bandwidth;
355			if (bwsum > parent->bandwidth) {
356				warnx("the sum of the child bandwidth higher"
357				    " than parent \"%s\"", parent->qname);
358			}
359		}
360	}
361
362	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
363		return (1);
364
365	switch (pa->scheduler) {
366	case ALTQT_CBQ:
367		error = eval_pfqueue_cbq(pf, pa);
368		break;
369	case ALTQT_PRIQ:
370		error = eval_pfqueue_priq(pf, pa);
371		break;
372	case ALTQT_HFSC:
373		error = eval_pfqueue_hfsc(pf, pa);
374		break;
375	default:
376		break;
377	}
378	return (error);
379}
380
381/*
382 * CBQ support functions
383 */
384#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
385#define	RM_NS_PER_SEC	(1000000000)
386
387static int
388eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
389{
390	struct cbq_opts	*opts;
391	u_int		 ifmtu;
392
393	if (pa->priority >= CBQ_MAXPRI) {
394		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
395		return (-1);
396	}
397
398	ifmtu = getifmtu(pa->ifname);
399	opts = &pa->pq_u.cbq_opts;
400
401	if (opts->pktsize == 0) {	/* use default */
402		opts->pktsize = ifmtu;
403		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
404			opts->pktsize &= ~MCLBYTES;
405	} else if (opts->pktsize > ifmtu)
406		opts->pktsize = ifmtu;
407	if (opts->maxpktsize == 0)	/* use default */
408		opts->maxpktsize = ifmtu;
409	else if (opts->maxpktsize > ifmtu)
410		opts->pktsize = ifmtu;
411
412	if (opts->pktsize > opts->maxpktsize)
413		opts->pktsize = opts->maxpktsize;
414
415	if (pa->parent[0] == 0)
416		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
417
418	cbq_compute_idletime(pf, pa);
419	return (0);
420}
421
422/*
423 * compute ns_per_byte, maxidle, minidle, and offtime
424 */
425static int
426cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
427{
428	struct cbq_opts	*opts;
429	double		 maxidle_s, maxidle, minidle;
430	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
431	double		 z, g, f, gton, gtom;
432	u_int		 minburst, maxburst;
433
434	opts = &pa->pq_u.cbq_opts;
435	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
436	minburst = opts->minburst;
437	maxburst = opts->maxburst;
438
439	if (pa->bandwidth == 0)
440		f = 0.0001;	/* small enough? */
441	else
442		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
443
444	nsPerByte = ifnsPerByte / f;
445	ptime = (double)opts->pktsize * ifnsPerByte;
446	cptime = ptime * (1.0 - f) / f;
447
448	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
449		/*
450		 * this causes integer overflow in kernel!
451		 * (bandwidth < 6Kbps when max_pkt_size=1500)
452		 */
453		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
454			warnx("queue bandwidth must be larger than %s",
455			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
456			    (double)INT_MAX * (double)pa->ifbandwidth));
457			fprintf(stderr, "cbq: queue %s is too slow!\n",
458			    pa->qname);
459		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
460	}
461
462	if (maxburst == 0) {  /* use default */
463		if (cptime > 10.0 * 1000000)
464			maxburst = 4;
465		else
466			maxburst = 16;
467	}
468	if (minburst == 0)  /* use default */
469		minburst = 2;
470	if (minburst > maxburst)
471		minburst = maxburst;
472
473	z = (double)(1 << RM_FILTER_GAIN);
474	g = (1.0 - 1.0 / z);
475	gton = pow(g, (double)maxburst);
476	gtom = pow(g, (double)(minburst-1));
477	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
478	maxidle_s = (1.0 - g);
479	if (maxidle > maxidle_s)
480		maxidle = ptime * maxidle;
481	else
482		maxidle = ptime * maxidle_s;
483	offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
484	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
485
486	/* scale parameters */
487	maxidle = ((maxidle * 8.0) / nsPerByte) *
488	    pow(2.0, (double)RM_FILTER_GAIN);
489	offtime = (offtime * 8.0) / nsPerByte *
490	    pow(2.0, (double)RM_FILTER_GAIN);
491	minidle = ((minidle * 8.0) / nsPerByte) *
492	    pow(2.0, (double)RM_FILTER_GAIN);
493
494	maxidle = maxidle / 1000.0;
495	offtime = offtime / 1000.0;
496	minidle = minidle / 1000.0;
497
498	opts->minburst = minburst;
499	opts->maxburst = maxburst;
500	opts->ns_per_byte = (u_int)nsPerByte;
501	opts->maxidle = (u_int)fabs(maxidle);
502	opts->minidle = (int)minidle;
503	opts->offtime = (u_int)fabs(offtime);
504
505	return (0);
506}
507
508static int
509check_commit_cbq(int dev, int opts, struct pf_altq *pa)
510{
511	struct pf_altq	*altq;
512	int		 root_class, default_class;
513	int		 error = 0;
514
515	/*
516	 * check if cbq has one root queue and one default queue
517	 * for this interface
518	 */
519	root_class = default_class = 0;
520	TAILQ_FOREACH(altq, &altqs, entries) {
521		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
522			continue;
523		if (altq->qname[0] == 0)  /* this is for interface */
524			continue;
525		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
526			root_class++;
527		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
528			default_class++;
529	}
530	if (root_class != 1) {
531		warnx("should have one root queue on %s", pa->ifname);
532		error++;
533	}
534	if (default_class != 1) {
535		warnx("should have one default queue on %s", pa->ifname);
536		error++;
537	}
538	return (error);
539}
540
541static int
542print_cbq_opts(const struct pf_altq *a)
543{
544	const struct cbq_opts	*opts;
545
546	opts = &a->pq_u.cbq_opts;
547	if (opts->flags) {
548		printf("cbq(");
549		if (opts->flags & CBQCLF_RED)
550			printf(" red");
551		if (opts->flags & CBQCLF_ECN)
552			printf(" ecn");
553		if (opts->flags & CBQCLF_RIO)
554			printf(" rio");
555		if (opts->flags & CBQCLF_CLEARDSCP)
556			printf(" cleardscp");
557		if (opts->flags & CBQCLF_FLOWVALVE)
558			printf(" flowvalve");
559#ifdef CBQCLF_BORROW
560		if (opts->flags & CBQCLF_BORROW)
561			printf(" borrow");
562#endif
563		if (opts->flags & CBQCLF_WRR)
564			printf(" wrr");
565		if (opts->flags & CBQCLF_EFFICIENT)
566			printf(" efficient");
567		if (opts->flags & CBQCLF_ROOTCLASS)
568			printf(" root");
569		if (opts->flags & CBQCLF_DEFCLASS)
570			printf(" default");
571		printf(" ) ");
572
573		return (1);
574	} else
575		return (0);
576}
577
578/*
579 * PRIQ support functions
580 */
581static int
582eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
583{
584	struct pf_altq	*altq;
585
586	if (pa->priority >= PRIQ_MAXPRI) {
587		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
588		return (-1);
589	}
590	/* the priority should be unique for the interface */
591	TAILQ_FOREACH(altq, &altqs, entries) {
592		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
593		    altq->qname[0] != 0 && altq->priority == pa->priority) {
594			warnx("%s and %s have the same priority",
595			    altq->qname, pa->qname);
596			return (-1);
597		}
598	}
599
600	return (0);
601}
602
603static int
604check_commit_priq(int dev, int opts, struct pf_altq *pa)
605{
606	struct pf_altq	*altq;
607	int		 default_class;
608	int		 error = 0;
609
610	/*
611	 * check if priq has one default class for this interface
612	 */
613	default_class = 0;
614	TAILQ_FOREACH(altq, &altqs, entries) {
615		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
616			continue;
617		if (altq->qname[0] == 0)  /* this is for interface */
618			continue;
619		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
620			default_class++;
621	}
622	if (default_class != 1) {
623		warnx("should have one default queue on %s", pa->ifname);
624		error++;
625	}
626	return (error);
627}
628
629static int
630print_priq_opts(const struct pf_altq *a)
631{
632	const struct priq_opts	*opts;
633
634	opts = &a->pq_u.priq_opts;
635
636	if (opts->flags) {
637		printf("priq(");
638		if (opts->flags & PRCF_RED)
639			printf(" red");
640		if (opts->flags & PRCF_ECN)
641			printf(" ecn");
642		if (opts->flags & PRCF_RIO)
643			printf(" rio");
644		if (opts->flags & PRCF_CLEARDSCP)
645			printf(" cleardscp");
646		if (opts->flags & PRCF_DEFAULTCLASS)
647			printf(" default");
648		printf(" ) ");
649
650		return (1);
651	} else
652		return (0);
653}
654
655/*
656 * HFSC support functions
657 */
658static int
659eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
660{
661	struct pf_altq		*altq, *parent;
662	struct hfsc_opts	*opts;
663	struct service_curve	 sc;
664
665	opts = &pa->pq_u.hfsc_opts;
666
667	if (pa->parent[0] == 0) {
668		/* root queue */
669		opts->lssc_m1 = pa->ifbandwidth;
670		opts->lssc_m2 = pa->ifbandwidth;
671		opts->lssc_d = 0;
672		return (0);
673	}
674
675	LIST_INIT(&rtsc);
676	LIST_INIT(&lssc);
677
678	/* if link_share is not specified, use bandwidth */
679	if (opts->lssc_m2 == 0)
680		opts->lssc_m2 = pa->bandwidth;
681
682	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
683	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
684	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
685		warnx("m2 is zero for %s", pa->qname);
686		return (-1);
687	}
688
689	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
690	    (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
691	    (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
692		warnx("m1 must be zero for convex curve: %s", pa->qname);
693		return (-1);
694	}
695
696	/*
697	 * admission control:
698	 * for the real-time service curve, the sum of the service curves
699	 * should not exceed 80% of the interface bandwidth.  20% is reserved
700	 * not to over-commit the actual interface bandwidth.
701	 * for the linkshare service curve, the sum of the child service
702	 * curve should not exceed the parent service curve.
703	 * for the upper-limit service curve, the assigned bandwidth should
704	 * be smaller than the interface bandwidth, and the upper-limit should
705	 * be larger than the real-time service curve when both are defined.
706	 */
707	parent = qname_to_pfaltq(pa->parent, pa->ifname);
708	if (parent == NULL)
709		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
710
711	TAILQ_FOREACH(altq, &altqs, entries) {
712		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
713			continue;
714		if (altq->qname[0] == 0)  /* this is for interface */
715			continue;
716
717		/* if the class has a real-time service curve, add it. */
718		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
719			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
720			sc.d = altq->pq_u.hfsc_opts.rtsc_d;
721			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
722			gsc_add_sc(&rtsc, &sc);
723		}
724
725		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
726			continue;
727
728		/* if the class has a linkshare service curve, add it. */
729		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
730			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
731			sc.d = altq->pq_u.hfsc_opts.lssc_d;
732			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
733			gsc_add_sc(&lssc, &sc);
734		}
735	}
736
737	/* check the real-time service curve.  reserve 20% of interface bw */
738	if (opts->rtsc_m2 != 0) {
739		/* add this queue to the sum */
740		sc.m1 = opts->rtsc_m1;
741		sc.d = opts->rtsc_d;
742		sc.m2 = opts->rtsc_m2;
743		gsc_add_sc(&rtsc, &sc);
744		/* compare the sum with 80% of the interface */
745		sc.m1 = 0;
746		sc.d = 0;
747		sc.m2 = pa->ifbandwidth / 100 * 80;
748		if (!is_gsc_under_sc(&rtsc, &sc)) {
749			warnx("real-time sc exceeds 80%% of the interface "
750			    "bandwidth (%s)", rate2str((double)sc.m2));
751			goto err_ret;
752		}
753	}
754
755	/* check the linkshare service curve. */
756	if (opts->lssc_m2 != 0) {
757		/* add this queue to the child sum */
758		sc.m1 = opts->lssc_m1;
759		sc.d = opts->lssc_d;
760		sc.m2 = opts->lssc_m2;
761		gsc_add_sc(&lssc, &sc);
762		/* compare the sum of the children with parent's sc */
763		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
764		sc.d = parent->pq_u.hfsc_opts.lssc_d;
765		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
766		if (!is_gsc_under_sc(&lssc, &sc)) {
767			warnx("linkshare sc exceeds parent's sc");
768			goto err_ret;
769		}
770	}
771
772	/* check the upper-limit service curve. */
773	if (opts->ulsc_m2 != 0) {
774		if (opts->ulsc_m1 > pa->ifbandwidth ||
775		    opts->ulsc_m2 > pa->ifbandwidth) {
776			warnx("upper-limit larger than interface bandwidth");
777			goto err_ret;
778		}
779		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
780			warnx("upper-limit sc smaller than real-time sc");
781			goto err_ret;
782		}
783	}
784
785	gsc_destroy(&rtsc);
786	gsc_destroy(&lssc);
787
788	return (0);
789
790err_ret:
791	gsc_destroy(&rtsc);
792	gsc_destroy(&lssc);
793	return (-1);
794}
795
796static int
797check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
798{
799	struct pf_altq	*altq, *def = NULL;
800	int		 default_class;
801	int		 error = 0;
802
803	/* check if hfsc has one default queue for this interface */
804	default_class = 0;
805	TAILQ_FOREACH(altq, &altqs, entries) {
806		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
807			continue;
808		if (altq->qname[0] == 0)  /* this is for interface */
809			continue;
810		if (altq->parent[0] == 0)  /* dummy root */
811			continue;
812		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
813			default_class++;
814			def = altq;
815		}
816	}
817	if (default_class != 1) {
818		warnx("should have one default queue on %s", pa->ifname);
819		return (1);
820	}
821	/* make sure the default queue is a leaf */
822	TAILQ_FOREACH(altq, &altqs, entries) {
823		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
824			continue;
825		if (altq->qname[0] == 0)  /* this is for interface */
826			continue;
827		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
828			warnx("default queue is not a leaf");
829			error++;
830		}
831	}
832	return (error);
833}
834
835static int
836print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
837{
838	const struct hfsc_opts		*opts;
839	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
840
841	opts = &a->pq_u.hfsc_opts;
842	if (qopts == NULL)
843		rtsc = lssc = ulsc = NULL;
844	else {
845		rtsc = &qopts->data.hfsc_opts.realtime;
846		lssc = &qopts->data.hfsc_opts.linkshare;
847		ulsc = &qopts->data.hfsc_opts.upperlimit;
848	}
849
850	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
851	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
852	    opts->lssc_d != 0))) {
853		printf("hfsc(");
854		if (opts->flags & HFCF_RED)
855			printf(" red");
856		if (opts->flags & HFCF_ECN)
857			printf(" ecn");
858		if (opts->flags & HFCF_RIO)
859			printf(" rio");
860		if (opts->flags & HFCF_CLEARDSCP)
861			printf(" cleardscp");
862		if (opts->flags & HFCF_DEFAULTCLASS)
863			printf(" default");
864		if (opts->rtsc_m2 != 0)
865			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
866			    opts->rtsc_m2, rtsc);
867		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
868		    opts->lssc_d != 0))
869			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
870			    opts->lssc_m2, lssc);
871		if (opts->ulsc_m2 != 0)
872			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
873			    opts->ulsc_m2, ulsc);
874		printf(" ) ");
875
876		return (1);
877	} else
878		return (0);
879}
880
881/*
882 * admission control using generalized service curve
883 */
884
885/* add a new service curve to a generalized service curve */
886static void
887gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
888{
889	if (is_sc_null(sc))
890		return;
891	if (sc->d != 0)
892		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
893	gsc_add_seg(gsc, (double)sc->d, 0.0, HUGE_VAL, (double)sc->m2);
894}
895
896/*
897 * check whether all points of a generalized service curve have
898 * their y-coordinates no larger than a given two-piece linear
899 * service curve.
900 */
901static int
902is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
903{
904	struct segment	*s, *last, *end;
905	double		 y;
906
907	if (is_sc_null(sc)) {
908		if (LIST_EMPTY(gsc))
909			return (1);
910		LIST_FOREACH(s, gsc, _next) {
911			if (s->m != 0)
912				return (0);
913		}
914		return (1);
915	}
916	/*
917	 * gsc has a dummy entry at the end with x = HUGE_VAL.
918	 * loop through up to this dummy entry.
919	 */
920	end = gsc_getentry(gsc, HUGE_VAL);
921	if (end == NULL)
922		return (1);
923	last = NULL;
924	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
925		if (s->y > sc_x2y(sc, s->x))
926			return (0);
927		last = s;
928	}
929	/* last now holds the real last segment */
930	if (last == NULL)
931		return (1);
932	if (last->m > sc->m2)
933		return (0);
934	if (last->x < sc->d && last->m > sc->m1) {
935		y = last->y + (sc->d - last->x) * last->m;
936		if (y > sc_x2y(sc, sc->d))
937			return (0);
938	}
939	return (1);
940}
941
942static void
943gsc_destroy(struct gen_sc *gsc)
944{
945	struct segment	*s;
946
947	while ((s = LIST_FIRST(gsc)) != NULL) {
948		LIST_REMOVE(s, _next);
949		free(s);
950	}
951}
952
953/*
954 * return a segment entry starting at x.
955 * if gsc has no entry starting at x, a new entry is created at x.
956 */
957static struct segment *
958gsc_getentry(struct gen_sc *gsc, double x)
959{
960	struct segment	*new, *prev, *s;
961
962	prev = NULL;
963	LIST_FOREACH(s, gsc, _next) {
964		if (s->x == x)
965			return (s);	/* matching entry found */
966		else if (s->x < x)
967			prev = s;
968		else
969			break;
970	}
971
972	/* we have to create a new entry */
973	if ((new = calloc(1, sizeof(struct segment))) == NULL)
974		return (NULL);
975
976	new->x = x;
977	if (x == HUGE_VAL || s == NULL)
978		new->d = 0;
979	else if (s->x == HUGE_VAL)
980		new->d = HUGE_VAL;
981	else
982		new->d = s->x - x;
983	if (prev == NULL) {
984		/* insert the new entry at the head of the list */
985		new->y = 0;
986		new->m = 0;
987		LIST_INSERT_HEAD(gsc, new, _next);
988	} else {
989		/*
990		 * the start point intersects with the segment pointed by
991		 * prev.  divide prev into 2 segments
992		 */
993		if (x == HUGE_VAL) {
994			prev->d = HUGE_VAL;
995			if (prev->m == 0)
996				new->y = prev->y;
997			else
998				new->y = HUGE_VAL;
999		} else {
1000			prev->d = x - prev->x;
1001			new->y = prev->d * prev->m + prev->y;
1002		}
1003		new->m = prev->m;
1004		LIST_INSERT_AFTER(prev, new, _next);
1005	}
1006	return (new);
1007}
1008
1009/* add a segment to a generalized service curve */
1010static int
1011gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1012{
1013	struct segment	*start, *end, *s;
1014	double		 x2;
1015
1016	if (d == HUGE_VAL)
1017		x2 = HUGE_VAL;
1018	else
1019		x2 = x + d;
1020	start = gsc_getentry(gsc, x);
1021	end = gsc_getentry(gsc, x2);
1022	if (start == NULL || end == NULL)
1023		return (-1);
1024
1025	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1026		s->m += m;
1027		s->y += y + (s->x - x) * m;
1028	}
1029
1030	end = gsc_getentry(gsc, HUGE_VAL);
1031	for (; s != end; s = LIST_NEXT(s, _next)) {
1032		s->y += m * d;
1033	}
1034
1035	return (0);
1036}
1037
1038/* get y-projection of a service curve */
1039static double
1040sc_x2y(struct service_curve *sc, double x)
1041{
1042	double	y;
1043
1044	if (x <= (double)sc->d)
1045		/* y belongs to the 1st segment */
1046		y = x * (double)sc->m1;
1047	else
1048		/* y belongs to the 2nd segment */
1049		y = (double)sc->d * (double)sc->m1
1050			+ (x - (double)sc->d) * (double)sc->m2;
1051	return (y);
1052}
1053
1054/*
1055 * misc utilities
1056 */
1057#define	R2S_BUFS	8
1058#define	RATESTR_MAX	16
1059
1060char *
1061rate2str(double rate)
1062{
1063	char		*buf;
1064	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1065	static int	 idx = 0;
1066	int		 i;
1067	static const char unit[] = " KMG";
1068
1069	buf = r2sbuf[idx++];
1070	if (idx == R2S_BUFS)
1071		idx = 0;
1072
1073	for (i = 0; rate >= 1000 && i <= 3; i++)
1074		rate /= 1000;
1075
1076	if ((int)(rate * 100) % 100)
1077		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1078	else
1079		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1080
1081	return (buf);
1082}
1083
1084u_int32_t
1085getifspeed(char *ifname)
1086{
1087#ifdef __NetBSD__
1088	int			 s;
1089	struct ifdatareq	 ifdr;
1090	struct if_data		*ifrdat;
1091
1092	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1093		err(1, "getifspeed: socket");
1094	memset(&ifdr, 0, sizeof(ifdr));
1095	if (strlcpy(ifdr.ifdr_name, ifname, sizeof(ifdr.ifdr_name)) >=
1096	    sizeof(ifdr.ifdr_name))
1097		errx(1, "getifspeed: strlcpy");
1098	if (ioctl(s, SIOCGIFDATA, &ifdr) == -1)
1099		err(1, "getifspeed: SIOCGIFDATA");
1100	ifrdat = &ifdr.ifdr_data;
1101	if (close(s) == -1)
1102		err(1, "getifspeed: close");
1103	return ((u_int32_t)ifrdat->ifi_baudrate);
1104#else
1105	int		s;
1106	struct ifreq	ifr;
1107	struct if_data	ifrdat;
1108
1109	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1110		err(1, "socket");
1111	bzero(&ifr, sizeof(ifr));
1112	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1113	    sizeof(ifr.ifr_name))
1114		errx(1, "getifspeed: strlcpy");
1115	ifr.ifr_data = (caddr_t)&ifrdat;
1116	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1117		err(1, "SIOCGIFDATA");
1118	if (close(s))
1119		err(1, "close");
1120	return ((u_int32_t)ifrdat.ifi_baudrate);
1121#endif /* !__NetBSD__ */
1122}
1123
1124u_long
1125getifmtu(char *ifname)
1126{
1127	int		s;
1128	struct ifreq	ifr;
1129
1130	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1131		err(1, "socket");
1132	bzero(&ifr, sizeof(ifr));
1133	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1134	    sizeof(ifr.ifr_name))
1135		errx(1, "getifmtu: strlcpy");
1136	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1137		err(1, "SIOCGIFMTU");
1138	if (close(s) == -1)
1139		err(1, "close");
1140	if (ifr.ifr_mtu > 0)
1141		return (ifr.ifr_mtu);
1142	else {
1143		warnx("could not get mtu for %s, assuming 1500", ifname);
1144		return (1500);
1145	}
1146}
1147
1148int
1149eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1150    u_int32_t ref_bw)
1151{
1152	int	errors = 0;
1153
1154	switch (pa->scheduler) {
1155	case ALTQT_CBQ:
1156		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1157		break;
1158	case ALTQT_PRIQ:
1159		pa->pq_u.priq_opts = opts->data.priq_opts;
1160		break;
1161	case ALTQT_HFSC:
1162		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1163		if (opts->data.hfsc_opts.linkshare.used) {
1164			pa->pq_u.hfsc_opts.lssc_m1 =
1165			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1166			    ref_bw);
1167			pa->pq_u.hfsc_opts.lssc_m2 =
1168			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1169			    ref_bw);
1170			pa->pq_u.hfsc_opts.lssc_d =
1171			    opts->data.hfsc_opts.linkshare.d;
1172		}
1173		if (opts->data.hfsc_opts.realtime.used) {
1174			pa->pq_u.hfsc_opts.rtsc_m1 =
1175			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1176			    ref_bw);
1177			pa->pq_u.hfsc_opts.rtsc_m2 =
1178			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1179			    ref_bw);
1180			pa->pq_u.hfsc_opts.rtsc_d =
1181			    opts->data.hfsc_opts.realtime.d;
1182		}
1183		if (opts->data.hfsc_opts.upperlimit.used) {
1184			pa->pq_u.hfsc_opts.ulsc_m1 =
1185			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1186			    ref_bw);
1187			pa->pq_u.hfsc_opts.ulsc_m2 =
1188			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1189			    ref_bw);
1190			pa->pq_u.hfsc_opts.ulsc_d =
1191			    opts->data.hfsc_opts.upperlimit.d;
1192		}
1193		break;
1194	default:
1195		warnx("eval_queue_opts: unknown scheduler type %u",
1196		    opts->qtype);
1197		errors++;
1198		break;
1199	}
1200
1201	return (errors);
1202}
1203
1204u_int32_t
1205eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1206{
1207	if (bw->bw_absolute > 0)
1208		return (bw->bw_absolute);
1209
1210	if (bw->bw_percent > 0)
1211		return (ref_bw / 100 * bw->bw_percent);
1212
1213	return (0);
1214}
1215
1216void
1217print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1218    const struct node_hfsc_sc *sc)
1219{
1220	printf(" %s", scname);
1221
1222	if (d != 0) {
1223		printf("(");
1224		if (sc != NULL && sc->m1.bw_percent > 0)
1225			printf("%u%%", sc->m1.bw_percent);
1226		else
1227			printf("%s", rate2str((double)m1));
1228		printf(" %u", d);
1229	}
1230
1231	if (sc != NULL && sc->m2.bw_percent > 0)
1232		printf(" %u%%", sc->m2.bw_percent);
1233	else
1234		printf(" %s", rate2str((double)m2));
1235
1236	if (d != 0)
1237		printf(")");
1238}
1239