pfctl_altq.c revision 145840
1/*	$OpenBSD: pfctl_altq.c,v 1.86 2005/02/28 14:04:51 henning Exp $	*/
2
3/*
4 * Copyright (c) 2002
5 *	Sony Computer Science Laboratories Inc.
6 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21#include <sys/cdefs.h>
22__FBSDID("$FreeBSD: head/contrib/pf/pfctl/pfctl_altq.c 145840 2005-05-03 16:55:20Z mlaier $");
23
24#include <sys/param.h>
25#include <sys/ioctl.h>
26#include <sys/socket.h>
27
28#include <net/if.h>
29#include <netinet/in.h>
30#include <net/pfvar.h>
31
32#include <err.h>
33#include <errno.h>
34#include <limits.h>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40
41#include <altq/altq.h>
42#include <altq/altq_cbq.h>
43#include <altq/altq_priq.h>
44#include <altq/altq_hfsc.h>
45
46#include "pfctl_parser.h"
47#include "pfctl.h"
48
49#define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
50
51TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
52LIST_HEAD(gen_sc, segment) rtsc, lssc;
53
54struct pf_altq	*qname_to_pfaltq(const char *, const char *);
55u_int32_t	 qname_to_qid(const char *);
56
57static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
58static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
59static int	check_commit_cbq(int, int, struct pf_altq *);
60static int	print_cbq_opts(const struct pf_altq *);
61
62static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
63static int	check_commit_priq(int, int, struct pf_altq *);
64static int	print_priq_opts(const struct pf_altq *);
65
66static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
67static int	check_commit_hfsc(int, int, struct pf_altq *);
68static int	print_hfsc_opts(const struct pf_altq *,
69		    const struct node_queue_opt *);
70
71static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
72static int		 is_gsc_under_sc(struct gen_sc *,
73			     struct service_curve *);
74static void		 gsc_destroy(struct gen_sc *);
75static struct segment	*gsc_getentry(struct gen_sc *, double);
76static int		 gsc_add_seg(struct gen_sc *, double, double, double,
77			     double);
78static double		 sc_x2y(struct service_curve *, double);
79
80#ifdef __FreeBSD__
81u_int32_t	 getifspeed(int, char *);
82#else
83u_int32_t	 getifspeed(char *);
84#endif
85u_long		 getifmtu(char *);
86int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
87		     u_int32_t);
88u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
89void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
90		     const struct node_hfsc_sc *);
91
92void
93pfaltq_store(struct pf_altq *a)
94{
95	struct pf_altq	*altq;
96
97	if ((altq = malloc(sizeof(*altq))) == NULL)
98		err(1, "malloc");
99	memcpy(altq, a, sizeof(struct pf_altq));
100	TAILQ_INSERT_TAIL(&altqs, altq, entries);
101}
102
103void
104pfaltq_free(struct pf_altq *a)
105{
106	struct pf_altq	*altq;
107
108	TAILQ_FOREACH(altq, &altqs, entries) {
109		if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 &&
110		    strncmp(a->qname, altq->qname, PF_QNAME_SIZE) == 0) {
111			TAILQ_REMOVE(&altqs, altq, entries);
112			free(altq);
113			return;
114		}
115	}
116}
117
118struct pf_altq *
119pfaltq_lookup(const char *ifname)
120{
121	struct pf_altq	*altq;
122
123	TAILQ_FOREACH(altq, &altqs, entries) {
124		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
125		    altq->qname[0] == 0)
126			return (altq);
127	}
128	return (NULL);
129}
130
131struct pf_altq *
132qname_to_pfaltq(const char *qname, const char *ifname)
133{
134	struct pf_altq	*altq;
135
136	TAILQ_FOREACH(altq, &altqs, entries) {
137		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
138		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
139			return (altq);
140	}
141	return (NULL);
142}
143
144u_int32_t
145qname_to_qid(const char *qname)
146{
147	struct pf_altq	*altq;
148
149	/*
150	 * We guarantee that same named queues on different interfaces
151	 * have the same qid, so we do NOT need to limit matching on
152	 * one interface!
153	 */
154
155	TAILQ_FOREACH(altq, &altqs, entries) {
156		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
157			return (altq->qid);
158	}
159	return (0);
160}
161
162void
163print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
164	struct node_queue_opt *qopts)
165{
166	if (a->qname[0] != 0) {
167		print_queue(a, level, bw, 0, qopts);
168		return;
169	}
170
171	printf("altq on %s ", a->ifname);
172
173	switch (a->scheduler) {
174	case ALTQT_CBQ:
175		if (!print_cbq_opts(a))
176			printf("cbq ");
177		break;
178	case ALTQT_PRIQ:
179		if (!print_priq_opts(a))
180			printf("priq ");
181		break;
182	case ALTQT_HFSC:
183		if (!print_hfsc_opts(a, qopts))
184			printf("hfsc ");
185		break;
186	}
187
188	if (bw != NULL && bw->bw_percent > 0) {
189		if (bw->bw_percent < 100)
190			printf("bandwidth %u%% ", bw->bw_percent);
191	} else
192		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
193
194	if (a->qlimit != DEFAULT_QLIMIT)
195		printf("qlimit %u ", a->qlimit);
196	printf("tbrsize %u ", a->tbrsize);
197}
198
199void
200print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
201    int print_interface, struct node_queue_opt *qopts)
202{
203	unsigned	i;
204
205	printf("queue ");
206	for (i = 0; i < level; ++i)
207		printf(" ");
208	printf("%s ", a->qname);
209	if (print_interface)
210		printf("on %s ", a->ifname);
211	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
212		if (bw != NULL && bw->bw_percent > 0) {
213			if (bw->bw_percent < 100)
214				printf("bandwidth %u%% ", bw->bw_percent);
215		} else
216			printf("bandwidth %s ", rate2str((double)a->bandwidth));
217	}
218	if (a->priority != DEFAULT_PRIORITY)
219		printf("priority %u ", a->priority);
220	if (a->qlimit != DEFAULT_QLIMIT)
221		printf("qlimit %u ", a->qlimit);
222	switch (a->scheduler) {
223	case ALTQT_CBQ:
224		print_cbq_opts(a);
225		break;
226	case ALTQT_PRIQ:
227		print_priq_opts(a);
228		break;
229	case ALTQT_HFSC:
230		print_hfsc_opts(a, qopts);
231		break;
232	}
233}
234
235/*
236 * eval_pfaltq computes the discipline parameters.
237 */
238int
239eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
240    struct node_queue_opt *opts)
241{
242	u_int	rate, size, errors = 0;
243
244	if (bw->bw_absolute > 0)
245		pa->ifbandwidth = bw->bw_absolute;
246	else
247#ifdef __FreeBSD__
248		if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) {
249#else
250		if ((rate = getifspeed(pa->ifname)) == 0) {
251#endif
252			fprintf(stderr, "cannot determine interface bandwidth "
253			    "for %s, specify an absolute bandwidth\n",
254			    pa->ifname);
255			errors++;
256		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
257			pa->ifbandwidth = rate;
258
259	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
260
261	/* if tbrsize is not specified, use heuristics */
262	if (pa->tbrsize == 0) {
263		rate = pa->ifbandwidth;
264		if (rate <= 1 * 1000 * 1000)
265			size = 1;
266		else if (rate <= 10 * 1000 * 1000)
267			size = 4;
268		else if (rate <= 200 * 1000 * 1000)
269			size = 8;
270		else
271			size = 24;
272		size = size * getifmtu(pa->ifname);
273		if (size > 0xffff)
274			size = 0xffff;
275		pa->tbrsize = size;
276	}
277	return (errors);
278}
279
280/*
281 * check_commit_altq does consistency check for each interface
282 */
283int
284check_commit_altq(int dev, int opts)
285{
286	struct pf_altq	*altq;
287	int		 error = 0;
288
289	/* call the discipline check for each interface. */
290	TAILQ_FOREACH(altq, &altqs, entries) {
291		if (altq->qname[0] == 0) {
292			switch (altq->scheduler) {
293			case ALTQT_CBQ:
294				error = check_commit_cbq(dev, opts, altq);
295				break;
296			case ALTQT_PRIQ:
297				error = check_commit_priq(dev, opts, altq);
298				break;
299			case ALTQT_HFSC:
300				error = check_commit_hfsc(dev, opts, altq);
301				break;
302			default:
303				break;
304			}
305		}
306	}
307	return (error);
308}
309
310/*
311 * eval_pfqueue computes the queue parameters.
312 */
313int
314eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
315    struct node_queue_opt *opts)
316{
317	/* should be merged with expand_queue */
318	struct pf_altq	*if_pa, *parent, *altq;
319	u_int32_t	 bwsum;
320	int		 error = 0;
321
322	/* find the corresponding interface and copy fields used by queues */
323	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
324		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
325		return (1);
326	}
327	pa->scheduler = if_pa->scheduler;
328	pa->ifbandwidth = if_pa->ifbandwidth;
329
330	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
331		fprintf(stderr, "queue %s already exists on interface %s\n",
332		    pa->qname, pa->ifname);
333		return (1);
334	}
335	pa->qid = qname_to_qid(pa->qname);
336
337	parent = NULL;
338	if (pa->parent[0] != 0) {
339		parent = qname_to_pfaltq(pa->parent, pa->ifname);
340		if (parent == NULL) {
341			fprintf(stderr, "parent %s not found for %s\n",
342			    pa->parent, pa->qname);
343			return (1);
344		}
345		pa->parent_qid = parent->qid;
346	}
347	if (pa->qlimit == 0)
348		pa->qlimit = DEFAULT_QLIMIT;
349
350	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
351		pa->bandwidth = eval_bwspec(bw,
352		    parent == NULL ? 0 : parent->bandwidth);
353
354		if (pa->bandwidth > pa->ifbandwidth) {
355			fprintf(stderr, "bandwidth for %s higher than "
356			    "interface\n", pa->qname);
357			return (1);
358		}
359		/* check the sum of the child bandwidth is under parent's */
360		if (parent != NULL) {
361			if (pa->bandwidth > parent->bandwidth) {
362				warnx("bandwidth for %s higher than parent",
363				    pa->qname);
364				return (1);
365			}
366			bwsum = 0;
367			TAILQ_FOREACH(altq, &altqs, entries) {
368				if (strncmp(altq->ifname, pa->ifname,
369				    IFNAMSIZ) == 0 &&
370				    altq->qname[0] != 0 &&
371				    strncmp(altq->parent, pa->parent,
372				    PF_QNAME_SIZE) == 0)
373					bwsum += altq->bandwidth;
374			}
375			bwsum += pa->bandwidth;
376			if (bwsum > parent->bandwidth) {
377				warnx("the sum of the child bandwidth higher"
378				    " than parent \"%s\"", parent->qname);
379			}
380		}
381	}
382
383	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
384		return (1);
385
386	switch (pa->scheduler) {
387	case ALTQT_CBQ:
388		error = eval_pfqueue_cbq(pf, pa);
389		break;
390	case ALTQT_PRIQ:
391		error = eval_pfqueue_priq(pf, pa);
392		break;
393	case ALTQT_HFSC:
394		error = eval_pfqueue_hfsc(pf, pa);
395		break;
396	default:
397		break;
398	}
399	return (error);
400}
401
402/*
403 * CBQ support functions
404 */
405#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
406#define	RM_NS_PER_SEC	(1000000000)
407
408static int
409eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
410{
411	struct cbq_opts	*opts;
412	u_int		 ifmtu;
413
414	if (pa->priority >= CBQ_MAXPRI) {
415		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
416		return (-1);
417	}
418
419	ifmtu = getifmtu(pa->ifname);
420	opts = &pa->pq_u.cbq_opts;
421
422	if (opts->pktsize == 0) {	/* use default */
423		opts->pktsize = ifmtu;
424		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
425			opts->pktsize &= ~MCLBYTES;
426	} else if (opts->pktsize > ifmtu)
427		opts->pktsize = ifmtu;
428	if (opts->maxpktsize == 0)	/* use default */
429		opts->maxpktsize = ifmtu;
430	else if (opts->maxpktsize > ifmtu)
431		opts->pktsize = ifmtu;
432
433	if (opts->pktsize > opts->maxpktsize)
434		opts->pktsize = opts->maxpktsize;
435
436	if (pa->parent[0] == 0)
437		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
438
439	cbq_compute_idletime(pf, pa);
440	return (0);
441}
442
443/*
444 * compute ns_per_byte, maxidle, minidle, and offtime
445 */
446static int
447cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
448{
449	struct cbq_opts	*opts;
450	double		 maxidle_s, maxidle, minidle;
451	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
452	double		 z, g, f, gton, gtom;
453	u_int		 minburst, maxburst;
454
455	opts = &pa->pq_u.cbq_opts;
456	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
457	minburst = opts->minburst;
458	maxburst = opts->maxburst;
459
460	if (pa->bandwidth == 0)
461		f = 0.0001;	/* small enough? */
462	else
463		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
464
465	nsPerByte = ifnsPerByte / f;
466	ptime = (double)opts->pktsize * ifnsPerByte;
467	cptime = ptime * (1.0 - f) / f;
468
469	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
470		/*
471		 * this causes integer overflow in kernel!
472		 * (bandwidth < 6Kbps when max_pkt_size=1500)
473		 */
474		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
475			warnx("queue bandwidth must be larger than %s",
476			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
477			    (double)INT_MAX * (double)pa->ifbandwidth));
478			fprintf(stderr, "cbq: queue %s is too slow!\n",
479			    pa->qname);
480		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
481	}
482
483	if (maxburst == 0) {  /* use default */
484		if (cptime > 10.0 * 1000000)
485			maxburst = 4;
486		else
487			maxburst = 16;
488	}
489	if (minburst == 0)  /* use default */
490		minburst = 2;
491	if (minburst > maxburst)
492		minburst = maxburst;
493
494	z = (double)(1 << RM_FILTER_GAIN);
495	g = (1.0 - 1.0 / z);
496	gton = pow(g, (double)maxburst);
497	gtom = pow(g, (double)(minburst-1));
498	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
499	maxidle_s = (1.0 - g);
500	if (maxidle > maxidle_s)
501		maxidle = ptime * maxidle;
502	else
503		maxidle = ptime * maxidle_s;
504	if (minburst)
505		offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
506	else
507		offtime = cptime;
508	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
509
510	/* scale parameters */
511	maxidle = ((maxidle * 8.0) / nsPerByte) *
512	    pow(2.0, (double)RM_FILTER_GAIN);
513	offtime = (offtime * 8.0) / nsPerByte *
514	    pow(2.0, (double)RM_FILTER_GAIN);
515	minidle = ((minidle * 8.0) / nsPerByte) *
516	    pow(2.0, (double)RM_FILTER_GAIN);
517
518	maxidle = maxidle / 1000.0;
519	offtime = offtime / 1000.0;
520	minidle = minidle / 1000.0;
521
522	opts->minburst = minburst;
523	opts->maxburst = maxburst;
524	opts->ns_per_byte = (u_int)nsPerByte;
525	opts->maxidle = (u_int)fabs(maxidle);
526	opts->minidle = (int)minidle;
527	opts->offtime = (u_int)fabs(offtime);
528
529	return (0);
530}
531
532static int
533check_commit_cbq(int dev, int opts, struct pf_altq *pa)
534{
535	struct pf_altq	*altq;
536	int		 root_class, default_class;
537	int		 error = 0;
538
539	/*
540	 * check if cbq has one root queue and one default queue
541	 * for this interface
542	 */
543	root_class = default_class = 0;
544	TAILQ_FOREACH(altq, &altqs, entries) {
545		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
546			continue;
547		if (altq->qname[0] == 0)  /* this is for interface */
548			continue;
549		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
550			root_class++;
551		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
552			default_class++;
553	}
554	if (root_class != 1) {
555		warnx("should have one root queue on %s", pa->ifname);
556		error++;
557	}
558	if (default_class != 1) {
559		warnx("should have one default queue on %s", pa->ifname);
560		error++;
561	}
562	return (error);
563}
564
565static int
566print_cbq_opts(const struct pf_altq *a)
567{
568	const struct cbq_opts	*opts;
569
570	opts = &a->pq_u.cbq_opts;
571	if (opts->flags) {
572		printf("cbq(");
573		if (opts->flags & CBQCLF_RED)
574			printf(" red");
575		if (opts->flags & CBQCLF_ECN)
576			printf(" ecn");
577		if (opts->flags & CBQCLF_RIO)
578			printf(" rio");
579		if (opts->flags & CBQCLF_CLEARDSCP)
580			printf(" cleardscp");
581		if (opts->flags & CBQCLF_FLOWVALVE)
582			printf(" flowvalve");
583		if (opts->flags & CBQCLF_BORROW)
584			printf(" borrow");
585		if (opts->flags & CBQCLF_WRR)
586			printf(" wrr");
587		if (opts->flags & CBQCLF_EFFICIENT)
588			printf(" efficient");
589		if (opts->flags & CBQCLF_ROOTCLASS)
590			printf(" root");
591		if (opts->flags & CBQCLF_DEFCLASS)
592			printf(" default");
593		printf(" ) ");
594
595		return (1);
596	} else
597		return (0);
598}
599
600/*
601 * PRIQ support functions
602 */
603static int
604eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
605{
606	struct pf_altq	*altq;
607
608	if (pa->priority >= PRIQ_MAXPRI) {
609		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
610		return (-1);
611	}
612	/* the priority should be unique for the interface */
613	TAILQ_FOREACH(altq, &altqs, entries) {
614		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
615		    altq->qname[0] != 0 && altq->priority == pa->priority) {
616			warnx("%s and %s have the same priority",
617			    altq->qname, pa->qname);
618			return (-1);
619		}
620	}
621
622	return (0);
623}
624
625static int
626check_commit_priq(int dev, int opts, struct pf_altq *pa)
627{
628	struct pf_altq	*altq;
629	int		 default_class;
630	int		 error = 0;
631
632	/*
633	 * check if priq has one default class for this interface
634	 */
635	default_class = 0;
636	TAILQ_FOREACH(altq, &altqs, entries) {
637		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
638			continue;
639		if (altq->qname[0] == 0)  /* this is for interface */
640			continue;
641		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
642			default_class++;
643	}
644	if (default_class != 1) {
645		warnx("should have one default queue on %s", pa->ifname);
646		error++;
647	}
648	return (error);
649}
650
651static int
652print_priq_opts(const struct pf_altq *a)
653{
654	const struct priq_opts	*opts;
655
656	opts = &a->pq_u.priq_opts;
657
658	if (opts->flags) {
659		printf("priq(");
660		if (opts->flags & PRCF_RED)
661			printf(" red");
662		if (opts->flags & PRCF_ECN)
663			printf(" ecn");
664		if (opts->flags & PRCF_RIO)
665			printf(" rio");
666		if (opts->flags & PRCF_CLEARDSCP)
667			printf(" cleardscp");
668		if (opts->flags & PRCF_DEFAULTCLASS)
669			printf(" default");
670		printf(" ) ");
671
672		return (1);
673	} else
674		return (0);
675}
676
677/*
678 * HFSC support functions
679 */
680static int
681eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
682{
683	struct pf_altq		*altq, *parent;
684	struct hfsc_opts	*opts;
685	struct service_curve	 sc;
686
687	opts = &pa->pq_u.hfsc_opts;
688
689	if (pa->parent[0] == 0) {
690		/* root queue */
691		opts->lssc_m1 = pa->ifbandwidth;
692		opts->lssc_m2 = pa->ifbandwidth;
693		opts->lssc_d = 0;
694		return (0);
695	}
696
697	LIST_INIT(&rtsc);
698	LIST_INIT(&lssc);
699
700	/* if link_share is not specified, use bandwidth */
701	if (opts->lssc_m2 == 0)
702		opts->lssc_m2 = pa->bandwidth;
703
704	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
705	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
706	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
707		warnx("m2 is zero for %s", pa->qname);
708		return (-1);
709	}
710
711	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
712	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
713	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0)) {
714		warnx("m1 must be zero for convex curve: %s", pa->qname);
715		return (-1);
716	}
717
718	/*
719	 * admission control:
720	 * for the real-time service curve, the sum of the service curves
721	 * should not exceed 80% of the interface bandwidth.  20% is reserved
722	 * not to over-commit the actual interface bandwidth.
723	 * for the linkshare service curve, the sum of the child service
724	 * curve should not exceed the parent service curve.
725	 * for the upper-limit service curve, the assigned bandwidth should
726	 * be smaller than the interface bandwidth, and the upper-limit should
727	 * be larger than the real-time service curve when both are defined.
728	 */
729	parent = qname_to_pfaltq(pa->parent, pa->ifname);
730	if (parent == NULL)
731		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
732
733	TAILQ_FOREACH(altq, &altqs, entries) {
734		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
735			continue;
736		if (altq->qname[0] == 0)  /* this is for interface */
737			continue;
738
739		/* if the class has a real-time service curve, add it. */
740		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
741			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
742			sc.d = altq->pq_u.hfsc_opts.rtsc_d;
743			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
744			gsc_add_sc(&rtsc, &sc);
745		}
746
747		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
748			continue;
749
750		/* if the class has a linkshare service curve, add it. */
751		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
752			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
753			sc.d = altq->pq_u.hfsc_opts.lssc_d;
754			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
755			gsc_add_sc(&lssc, &sc);
756		}
757	}
758
759	/* check the real-time service curve.  reserve 20% of interface bw */
760	if (opts->rtsc_m2 != 0) {
761		/* add this queue to the sum */
762		sc.m1 = opts->rtsc_m1;
763		sc.d = opts->rtsc_d;
764		sc.m2 = opts->rtsc_m2;
765		gsc_add_sc(&rtsc, &sc);
766		/* compare the sum with 80% of the interface */
767		sc.m1 = 0;
768		sc.d = 0;
769		sc.m2 = pa->ifbandwidth / 100 * 80;
770		if (!is_gsc_under_sc(&rtsc, &sc)) {
771			warnx("real-time sc exceeds 80%% of the interface "
772			    "bandwidth (%s)", rate2str((double)sc.m2));
773			goto err_ret;
774		}
775	}
776
777	/* check the linkshare service curve. */
778	if (opts->lssc_m2 != 0) {
779		/* add this queue to the child sum */
780		sc.m1 = opts->lssc_m1;
781		sc.d = opts->lssc_d;
782		sc.m2 = opts->lssc_m2;
783		gsc_add_sc(&lssc, &sc);
784		/* compare the sum of the children with parent's sc */
785		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
786		sc.d = parent->pq_u.hfsc_opts.lssc_d;
787		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
788		if (!is_gsc_under_sc(&lssc, &sc)) {
789			warnx("linkshare sc exceeds parent's sc");
790			goto err_ret;
791		}
792	}
793
794	/* check the upper-limit service curve. */
795	if (opts->ulsc_m2 != 0) {
796		if (opts->ulsc_m1 > pa->ifbandwidth ||
797		    opts->ulsc_m2 > pa->ifbandwidth) {
798			warnx("upper-limit larger than interface bandwidth");
799			goto err_ret;
800		}
801		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
802			warnx("upper-limit sc smaller than real-time sc");
803			goto err_ret;
804		}
805	}
806
807	gsc_destroy(&rtsc);
808	gsc_destroy(&lssc);
809
810	return (0);
811
812err_ret:
813	gsc_destroy(&rtsc);
814	gsc_destroy(&lssc);
815	return (-1);
816}
817
818static int
819check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
820{
821	struct pf_altq	*altq, *def = NULL;
822	int		 default_class;
823	int		 error = 0;
824
825	/* check if hfsc has one default queue for this interface */
826	default_class = 0;
827	TAILQ_FOREACH(altq, &altqs, entries) {
828		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
829			continue;
830		if (altq->qname[0] == 0)  /* this is for interface */
831			continue;
832		if (altq->parent[0] == 0)  /* dummy root */
833			continue;
834		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
835			default_class++;
836			def = altq;
837		}
838	}
839	if (default_class != 1) {
840		warnx("should have one default queue on %s", pa->ifname);
841		return (1);
842	}
843	/* make sure the default queue is a leaf */
844	TAILQ_FOREACH(altq, &altqs, entries) {
845		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
846			continue;
847		if (altq->qname[0] == 0)  /* this is for interface */
848			continue;
849		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
850			warnx("default queue is not a leaf");
851			error++;
852		}
853	}
854	return (error);
855}
856
857static int
858print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
859{
860	const struct hfsc_opts		*opts;
861	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
862
863	opts = &a->pq_u.hfsc_opts;
864	if (qopts == NULL)
865		rtsc = lssc = ulsc = NULL;
866	else {
867		rtsc = &qopts->data.hfsc_opts.realtime;
868		lssc = &qopts->data.hfsc_opts.linkshare;
869		ulsc = &qopts->data.hfsc_opts.upperlimit;
870	}
871
872	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
873	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
874	    opts->lssc_d != 0))) {
875		printf("hfsc(");
876		if (opts->flags & HFCF_RED)
877			printf(" red");
878		if (opts->flags & HFCF_ECN)
879			printf(" ecn");
880		if (opts->flags & HFCF_RIO)
881			printf(" rio");
882		if (opts->flags & HFCF_CLEARDSCP)
883			printf(" cleardscp");
884		if (opts->flags & HFCF_DEFAULTCLASS)
885			printf(" default");
886		if (opts->rtsc_m2 != 0)
887			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
888			    opts->rtsc_m2, rtsc);
889		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
890		    opts->lssc_d != 0))
891			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
892			    opts->lssc_m2, lssc);
893		if (opts->ulsc_m2 != 0)
894			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
895			    opts->ulsc_m2, ulsc);
896		printf(" ) ");
897
898		return (1);
899	} else
900		return (0);
901}
902
903/*
904 * admission control using generalized service curve
905 */
906#ifndef INFINITY
907#define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
908#endif
909
910/* add a new service curve to a generalized service curve */
911static void
912gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
913{
914	if (is_sc_null(sc))
915		return;
916	if (sc->d != 0)
917		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
918	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
919}
920
921/*
922 * check whether all points of a generalized service curve have
923 * their y-coordinates no larger than a given two-piece linear
924 * service curve.
925 */
926static int
927is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
928{
929	struct segment	*s, *last, *end;
930	double		 y;
931
932	if (is_sc_null(sc)) {
933		if (LIST_EMPTY(gsc))
934			return (1);
935		LIST_FOREACH(s, gsc, _next) {
936			if (s->m != 0)
937				return (0);
938		}
939		return (1);
940	}
941	/*
942	 * gsc has a dummy entry at the end with x = INFINITY.
943	 * loop through up to this dummy entry.
944	 */
945	end = gsc_getentry(gsc, INFINITY);
946	if (end == NULL)
947		return (1);
948	last = NULL;
949	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
950		if (s->y > sc_x2y(sc, s->x))
951			return (0);
952		last = s;
953	}
954	/* last now holds the real last segment */
955	if (last == NULL)
956		return (1);
957	if (last->m > sc->m2)
958		return (0);
959	if (last->x < sc->d && last->m > sc->m1) {
960		y = last->y + (sc->d - last->x) * last->m;
961		if (y > sc_x2y(sc, sc->d))
962			return (0);
963	}
964	return (1);
965}
966
967static void
968gsc_destroy(struct gen_sc *gsc)
969{
970	struct segment	*s;
971
972	while ((s = LIST_FIRST(gsc)) != NULL) {
973		LIST_REMOVE(s, _next);
974		free(s);
975	}
976}
977
978/*
979 * return a segment entry starting at x.
980 * if gsc has no entry starting at x, a new entry is created at x.
981 */
982static struct segment *
983gsc_getentry(struct gen_sc *gsc, double x)
984{
985	struct segment	*new, *prev, *s;
986
987	prev = NULL;
988	LIST_FOREACH(s, gsc, _next) {
989		if (s->x == x)
990			return (s);	/* matching entry found */
991		else if (s->x < x)
992			prev = s;
993		else
994			break;
995	}
996
997	/* we have to create a new entry */
998	if ((new = calloc(1, sizeof(struct segment))) == NULL)
999		return (NULL);
1000
1001	new->x = x;
1002	if (x == INFINITY || s == NULL)
1003		new->d = 0;
1004	else if (s->x == INFINITY)
1005		new->d = INFINITY;
1006	else
1007		new->d = s->x - x;
1008	if (prev == NULL) {
1009		/* insert the new entry at the head of the list */
1010		new->y = 0;
1011		new->m = 0;
1012		LIST_INSERT_HEAD(gsc, new, _next);
1013	} else {
1014		/*
1015		 * the start point intersects with the segment pointed by
1016		 * prev.  divide prev into 2 segments
1017		 */
1018		if (x == INFINITY) {
1019			prev->d = INFINITY;
1020			if (prev->m == 0)
1021				new->y = prev->y;
1022			else
1023				new->y = INFINITY;
1024		} else {
1025			prev->d = x - prev->x;
1026			new->y = prev->d * prev->m + prev->y;
1027		}
1028		new->m = prev->m;
1029		LIST_INSERT_AFTER(prev, new, _next);
1030	}
1031	return (new);
1032}
1033
1034/* add a segment to a generalized service curve */
1035static int
1036gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1037{
1038	struct segment	*start, *end, *s;
1039	double		 x2;
1040
1041	if (d == INFINITY)
1042		x2 = INFINITY;
1043	else
1044		x2 = x + d;
1045	start = gsc_getentry(gsc, x);
1046	end = gsc_getentry(gsc, x2);
1047	if (start == NULL || end == NULL)
1048		return (-1);
1049
1050	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1051		s->m += m;
1052		s->y += y + (s->x - x) * m;
1053	}
1054
1055	end = gsc_getentry(gsc, INFINITY);
1056	for (; s != end; s = LIST_NEXT(s, _next)) {
1057		s->y += m * d;
1058	}
1059
1060	return (0);
1061}
1062
1063/* get y-projection of a service curve */
1064static double
1065sc_x2y(struct service_curve *sc, double x)
1066{
1067	double	y;
1068
1069	if (x <= (double)sc->d)
1070		/* y belongs to the 1st segment */
1071		y = x * (double)sc->m1;
1072	else
1073		/* y belongs to the 2nd segment */
1074		y = (double)sc->d * (double)sc->m1
1075			+ (x - (double)sc->d) * (double)sc->m2;
1076	return (y);
1077}
1078
1079/*
1080 * misc utilities
1081 */
1082#define	R2S_BUFS	8
1083#define	RATESTR_MAX	16
1084
1085char *
1086rate2str(double rate)
1087{
1088	char		*buf;
1089	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1090	static int	 idx = 0;
1091	int		 i;
1092	static const char unit[] = " KMG";
1093
1094	buf = r2sbuf[idx++];
1095	if (idx == R2S_BUFS)
1096		idx = 0;
1097
1098	for (i = 0; rate >= 1000 && i <= 3; i++)
1099		rate /= 1000;
1100
1101	if ((int)(rate * 100) % 100)
1102		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1103	else
1104		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1105
1106	return (buf);
1107}
1108
1109#ifdef __FreeBSD__
1110/*
1111 * XXX
1112 * FreeBSD does not have SIOCGIFDATA.
1113 * To emulate this, DIOCGIFSPEED ioctl added to pf.
1114 */
1115u_int32_t
1116getifspeed(int pfdev, char *ifname)
1117{
1118	struct pf_ifspeed io;
1119
1120	bzero(&io, sizeof io);
1121	if (strlcpy(io.ifname, ifname, IFNAMSIZ) >=
1122	    sizeof(io.ifname))
1123		errx(1, "getifspeed: strlcpy");
1124	if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1)
1125		err(1, "DIOCGIFSPEED");
1126	return ((u_int32_t)io.baudrate);
1127}
1128#else
1129u_int32_t
1130getifspeed(char *ifname)
1131{
1132	int		s;
1133	struct ifreq	ifr;
1134	struct if_data	ifrdat;
1135
1136	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1137		err(1, "socket");
1138	bzero(&ifr, sizeof(ifr));
1139	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1140	    sizeof(ifr.ifr_name))
1141		errx(1, "getifspeed: strlcpy");
1142	ifr.ifr_data = (caddr_t)&ifrdat;
1143	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1144		err(1, "SIOCGIFDATA");
1145	if (shutdown(s, SHUT_RDWR) == -1)
1146		err(1, "shutdown");
1147	if (close(s))
1148		err(1, "close");
1149	return ((u_int32_t)ifrdat.ifi_baudrate);
1150}
1151#endif
1152
1153u_long
1154getifmtu(char *ifname)
1155{
1156	int		s;
1157	struct ifreq	ifr;
1158
1159	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1160		err(1, "socket");
1161	bzero(&ifr, sizeof(ifr));
1162	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1163	    sizeof(ifr.ifr_name))
1164		errx(1, "getifmtu: strlcpy");
1165	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1166		err(1, "SIOCGIFMTU");
1167	if (shutdown(s, SHUT_RDWR) == -1)
1168		err(1, "shutdown");
1169	if (close(s))
1170		err(1, "close");
1171	if (ifr.ifr_mtu > 0)
1172		return (ifr.ifr_mtu);
1173	else {
1174		warnx("could not get mtu for %s, assuming 1500", ifname);
1175		return (1500);
1176	}
1177}
1178
1179int
1180eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1181    u_int32_t ref_bw)
1182{
1183	int	errors = 0;
1184
1185	switch (pa->scheduler) {
1186	case ALTQT_CBQ:
1187		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1188		break;
1189	case ALTQT_PRIQ:
1190		pa->pq_u.priq_opts = opts->data.priq_opts;
1191		break;
1192	case ALTQT_HFSC:
1193		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1194		if (opts->data.hfsc_opts.linkshare.used) {
1195			pa->pq_u.hfsc_opts.lssc_m1 =
1196			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1197			    ref_bw);
1198			pa->pq_u.hfsc_opts.lssc_m2 =
1199			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1200			    ref_bw);
1201			pa->pq_u.hfsc_opts.lssc_d =
1202			    opts->data.hfsc_opts.linkshare.d;
1203		}
1204		if (opts->data.hfsc_opts.realtime.used) {
1205			pa->pq_u.hfsc_opts.rtsc_m1 =
1206			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1207			    ref_bw);
1208			pa->pq_u.hfsc_opts.rtsc_m2 =
1209			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1210			    ref_bw);
1211			pa->pq_u.hfsc_opts.rtsc_d =
1212			    opts->data.hfsc_opts.realtime.d;
1213		}
1214		if (opts->data.hfsc_opts.upperlimit.used) {
1215			pa->pq_u.hfsc_opts.ulsc_m1 =
1216			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1217			    ref_bw);
1218			pa->pq_u.hfsc_opts.ulsc_m2 =
1219			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1220			    ref_bw);
1221			pa->pq_u.hfsc_opts.ulsc_d =
1222			    opts->data.hfsc_opts.upperlimit.d;
1223		}
1224		break;
1225	default:
1226		warnx("eval_queue_opts: unknown scheduler type %u",
1227		    opts->qtype);
1228		errors++;
1229		break;
1230	}
1231
1232	return (errors);
1233}
1234
1235u_int32_t
1236eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1237{
1238	if (bw->bw_absolute > 0)
1239		return (bw->bw_absolute);
1240
1241	if (bw->bw_percent > 0)
1242		return (ref_bw / 100 * bw->bw_percent);
1243
1244	return (0);
1245}
1246
1247void
1248print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1249    const struct node_hfsc_sc *sc)
1250{
1251	printf(" %s", scname);
1252
1253	if (d != 0) {
1254		printf("(");
1255		if (sc != NULL && sc->m1.bw_percent > 0)
1256			printf("%u%%", sc->m1.bw_percent);
1257		else
1258			printf("%s", rate2str((double)m1));
1259		printf(" %u", d);
1260	}
1261
1262	if (sc != NULL && sc->m2.bw_percent > 0)
1263		printf(" %u%%", sc->m2.bw_percent);
1264	else
1265		printf(" %s", rate2str((double)m2));
1266
1267	if (d != 0)
1268		printf(")");
1269}
1270