pfctl_altq.c revision 126355
1/*	$FreeBSD: head/contrib/pf/pfctl/pfctl_altq.c 126355 2004-02-28 17:32:53Z mlaier $	*/
2/*	$OpenBSD: pfctl_altq.c,v 1.77 2003/08/22 21:50:34 david Exp $	*/
3
4/*
5 * Copyright (c) 2002
6 *	Sony Computer Science Laboratories Inc.
7 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8 *
9 * Permission to use, copy, modify, and distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 */
21
22#include <sys/types.h>
23#include <sys/ioctl.h>
24#include <sys/socket.h>
25#if !defined(__FreeBSD__)
26#include <sys/limits.h>
27#endif
28
29#include <net/if.h>
30#include <netinet/in.h>
31#include <net/pfvar.h>
32
33#include <err.h>
34#include <errno.h>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40
41#include <altq/altq.h>
42#include <altq/altq_cbq.h>
43#include <altq/altq_priq.h>
44#include <altq/altq_hfsc.h>
45
46#include "pfctl_parser.h"
47#include "pfctl.h"
48
49#define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
50
51TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
52LIST_HEAD(gen_sc, segment) rtsc, lssc;
53
54struct pf_altq	*qname_to_pfaltq(const char *, const char *);
55u_int32_t	 qname_to_qid(const char *);
56
57static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
58static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
59static int	check_commit_cbq(int, int, struct pf_altq *);
60static int	print_cbq_opts(const struct pf_altq *);
61
62static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
63static int	check_commit_priq(int, int, struct pf_altq *);
64static int	print_priq_opts(const struct pf_altq *);
65
66static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
67static int	check_commit_hfsc(int, int, struct pf_altq *);
68static int	print_hfsc_opts(const struct pf_altq *,
69		    const struct node_queue_opt *);
70
71static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
72static int		 is_gsc_under_sc(struct gen_sc *,
73			     struct service_curve *);
74static void		 gsc_destroy(struct gen_sc *);
75static struct segment	*gsc_getentry(struct gen_sc *, double);
76static int		 gsc_add_seg(struct gen_sc *, double, double, double,
77			     double);
78static double		 sc_x2y(struct service_curve *, double);
79
80#if defined(__FreeBSD__)
81u_int32_t	 getifspeed(int, char *);
82#else
83u_int32_t	 getifspeed(char *);
84#endif
85u_long		 getifmtu(char *);
86int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
87		     u_int32_t);
88u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
89void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
90		     const struct node_hfsc_sc *);
91
92static u_int32_t	 max_qid = 1;
93
94void
95pfaltq_store(struct pf_altq *a)
96{
97	struct pf_altq	*altq;
98
99	if ((altq = malloc(sizeof(*altq))) == NULL)
100		err(1, "malloc");
101	memcpy(altq, a, sizeof(struct pf_altq));
102	TAILQ_INSERT_TAIL(&altqs, altq, entries);
103}
104
105void
106pfaltq_free(struct pf_altq *a)
107{
108	struct pf_altq	*altq;
109
110	TAILQ_FOREACH(altq, &altqs, entries) {
111		if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 &&
112		    strncmp(a->qname, altq->qname, PF_QNAME_SIZE) == 0) {
113			TAILQ_REMOVE(&altqs, altq, entries);
114			free(altq);
115			return;
116		}
117	}
118}
119
120struct pf_altq *
121pfaltq_lookup(const char *ifname)
122{
123	struct pf_altq	*altq;
124
125	TAILQ_FOREACH(altq, &altqs, entries) {
126		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
127		    altq->qname[0] == 0)
128			return (altq);
129	}
130	return (NULL);
131}
132
133struct pf_altq *
134qname_to_pfaltq(const char *qname, const char *ifname)
135{
136	struct pf_altq	*altq;
137
138	TAILQ_FOREACH(altq, &altqs, entries) {
139		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
140		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
141			return (altq);
142	}
143	return (NULL);
144}
145
146u_int32_t
147qname_to_qid(const char *qname)
148{
149	struct pf_altq	*altq;
150
151	/*
152	 * We guarantee that same named queues on different interfaces
153	 * have the same qid, so we do NOT need to limit matching on
154	 * one interface!
155	 */
156
157	TAILQ_FOREACH(altq, &altqs, entries) {
158		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
159			return (altq->qid);
160	}
161	return (0);
162}
163
164void
165print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
166	struct node_queue_opt *qopts)
167{
168	if (a->qname[0] != NULL) {
169		print_queue(a, level, bw, 0, qopts);
170		return;
171	}
172
173	printf("altq on %s ", a->ifname);
174
175	switch(a->scheduler) {
176	case ALTQT_CBQ:
177		if (!print_cbq_opts(a))
178			printf("cbq ");
179		break;
180	case ALTQT_PRIQ:
181		if (!print_priq_opts(a))
182			printf("priq ");
183		break;
184	case ALTQT_HFSC:
185		if (!print_hfsc_opts(a, qopts))
186			printf("hfsc ");
187		break;
188	}
189
190	if (bw != NULL && bw->bw_percent > 0) {
191		if (bw->bw_percent < 100)
192			printf("bandwidth %u%% ", bw->bw_percent);
193	} else
194		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
195
196	if (a->qlimit != DEFAULT_QLIMIT)
197		printf("qlimit %u ", a->qlimit);
198	printf("tbrsize %u ", a->tbrsize);
199}
200
201void
202print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
203    int print_interface, struct node_queue_opt *qopts)
204{
205	unsigned	i;
206
207	printf("queue ");
208	for (i = 0; i < level; ++i)
209		printf(" ");
210	printf("%s ", a->qname);
211	if (print_interface)
212		printf("on %s ", a->ifname);
213	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
214		if (bw != NULL && bw->bw_percent > 0) {
215			if (bw->bw_percent < 100)
216				printf("bandwidth %u%% ", bw->bw_percent);
217		} else
218			printf("bandwidth %s ", rate2str((double)a->bandwidth));
219	}
220	if (a->priority != DEFAULT_PRIORITY)
221		printf("priority %u ", a->priority);
222	if (a->qlimit != DEFAULT_QLIMIT)
223		printf("qlimit %u ", a->qlimit);
224	switch (a->scheduler) {
225	case ALTQT_CBQ:
226		print_cbq_opts(a);
227		break;
228	case ALTQT_PRIQ:
229		print_priq_opts(a);
230		break;
231	case ALTQT_HFSC:
232		print_hfsc_opts(a, qopts);
233		break;
234	}
235}
236
237/*
238 * eval_pfaltq computes the discipline parameters.
239 */
240int
241eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
242    struct node_queue_opt *opts)
243{
244	u_int	rate, size, errors = 0;
245
246	if (bw->bw_absolute > 0)
247		pa->ifbandwidth = bw->bw_absolute;
248	else
249#if defined(__FreeBSD__)
250		if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) {
251#else
252		if ((rate = getifspeed(pa->ifname)) == 0) {
253#endif
254			fprintf(stderr, "cannot determine interface bandwidth "
255			    "for %s, specify an absolute bandwidth\n",
256			    pa->ifname);
257			errors++;
258		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
259			pa->ifbandwidth = rate;
260
261	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
262
263	/* if tbrsize is not specified, use heuristics */
264	if (pa->tbrsize == 0) {
265		rate = pa->ifbandwidth;
266		if (rate <= 1 * 1000 * 1000)
267			size = 1;
268		else if (rate <= 10 * 1000 * 1000)
269			size = 4;
270		else if (rate <= 200 * 1000 * 1000)
271			size = 8;
272		else
273			size = 24;
274		size = size * getifmtu(pa->ifname);
275		pa->tbrsize = size;
276	}
277	return (errors);
278}
279
280/*
281 * check_commit_altq does consistency check for each interface
282 */
283int
284check_commit_altq(int dev, int opts)
285{
286	struct pf_altq	*altq;
287	int		 error = 0;
288
289	/* call the discipline check for each interface. */
290	TAILQ_FOREACH(altq, &altqs, entries) {
291		if (altq->qname[0] == 0) {
292			switch (altq->scheduler) {
293			case ALTQT_CBQ:
294				error = check_commit_cbq(dev, opts, altq);
295				break;
296			case ALTQT_PRIQ:
297				error = check_commit_priq(dev, opts, altq);
298				break;
299			case ALTQT_HFSC:
300				error = check_commit_hfsc(dev, opts, altq);
301				break;
302			default:
303				break;
304			}
305		}
306	}
307	return (error);
308}
309
310/*
311 * eval_pfqueue computes the queue parameters.
312 */
313int
314eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
315    struct node_queue_opt *opts)
316{
317	/* should be merged with expand_queue */
318	struct pf_altq	*if_pa, *parent;
319	int		 error = 0;
320
321	/* find the corresponding interface and copy fields used by queues */
322	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
323		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
324		return (1);
325	}
326	pa->scheduler = if_pa->scheduler;
327	pa->ifbandwidth = if_pa->ifbandwidth;
328
329	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
330		fprintf(stderr, "queue %s already exists on interface %s\n",
331		    pa->qname, pa->ifname);
332		return (1);
333	}
334	pa->qid = qname_to_qid(pa->qname);
335
336	parent = NULL;
337	if (pa->parent[0] != 0) {
338		parent = qname_to_pfaltq(pa->parent, pa->ifname);
339		if (parent == NULL) {
340			fprintf(stderr, "parent %s not found for %s\n",
341			    pa->parent, pa->qname);
342			return (1);
343		}
344		pa->parent_qid = parent->qid;
345	}
346	if (pa->qlimit == 0)
347		pa->qlimit = DEFAULT_QLIMIT;
348
349	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
350		if ((pa->bandwidth = eval_bwspec(bw,
351		    parent == NULL ? 0 : parent->bandwidth)) == 0) {
352			fprintf(stderr, "bandwidth for %s invalid (%d / %d)\n",
353			    pa->qname, bw->bw_absolute, bw->bw_percent);
354			return (1);
355		}
356
357		if (pa->bandwidth > pa->ifbandwidth) {
358			fprintf(stderr, "bandwidth for %s higher than "
359			    "interface\n", pa->qname);
360			return (1);
361		}
362		if (parent != NULL && pa->bandwidth > parent->bandwidth) {
363			fprintf(stderr, "bandwidth for %s higher than parent\n",
364			    pa->qname);
365			return (1);
366		}
367	}
368
369	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
370		return (1);
371
372	switch (pa->scheduler) {
373	case ALTQT_CBQ:
374		error = eval_pfqueue_cbq(pf, pa);
375		break;
376	case ALTQT_PRIQ:
377		error = eval_pfqueue_priq(pf, pa);
378		break;
379	case ALTQT_HFSC:
380		error = eval_pfqueue_hfsc(pf, pa);
381		break;
382	default:
383		break;
384	}
385	return (error);
386}
387
388/*
389 * CBQ support functions
390 */
391#define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
392#define	RM_NS_PER_SEC	(1000000000)
393
394static int
395eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
396{
397	struct cbq_opts	*opts;
398	u_int		 ifmtu;
399
400	if (pa->priority >= CBQ_MAXPRI) {
401		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
402		return (-1);
403	}
404
405	ifmtu = getifmtu(pa->ifname);
406	opts = &pa->pq_u.cbq_opts;
407
408	if (opts->pktsize == 0) {	/* use default */
409		opts->pktsize = ifmtu;
410		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
411			opts->pktsize &= ~MCLBYTES;
412	} else if (opts->pktsize > ifmtu)
413		opts->pktsize = ifmtu;
414	if (opts->maxpktsize == 0)	/* use default */
415		opts->maxpktsize = ifmtu;
416	else if (opts->maxpktsize > ifmtu)
417		opts->pktsize = ifmtu;
418
419	if (opts->pktsize > opts->maxpktsize)
420		opts->pktsize = opts->maxpktsize;
421
422	if (pa->parent[0] == 0)
423		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
424	else if (pa->qid == 0 && (opts->flags & CBQCLF_DEFCLASS) == 0)
425		pa->qid = ++max_qid;
426
427	cbq_compute_idletime(pf, pa);
428	return (0);
429}
430
431/*
432 * compute ns_per_byte, maxidle, minidle, and offtime
433 */
434static int
435cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
436{
437	struct cbq_opts	*opts;
438	double		 maxidle_s, maxidle, minidle;
439	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
440	double		 z, g, f, gton, gtom;
441	u_int		 minburst, maxburst;
442
443	opts = &pa->pq_u.cbq_opts;
444	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
445	minburst = opts->minburst;
446	maxburst = opts->maxburst;
447
448	if (pa->bandwidth == 0)
449		f = 0.0001;	/* small enough? */
450	else
451		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
452
453	nsPerByte = ifnsPerByte / f;
454	ptime = (double)opts->pktsize * ifnsPerByte;
455	cptime = ptime * (1.0 - f) / f;
456
457	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
458		/*
459		 * this causes integer overflow in kernel!
460		 * (bandwidth < 6Kbps when max_pkt_size=1500)
461		 */
462		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
463			warnx("queue bandwidth must be larger than %s",
464			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
465			    (double)INT_MAX * (double)pa->ifbandwidth));
466			fprintf(stderr, "cbq: queue %s is too slow!\n",
467			    pa->qname);
468		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
469	}
470
471	if (maxburst == 0) {  /* use default */
472		if (cptime > 10.0 * 1000000)
473			maxburst = 4;
474		else
475			maxburst = 16;
476	}
477	if (minburst == 0)  /* use default */
478		minburst = 2;
479	if (minburst > maxburst)
480		minburst = maxburst;
481
482	z = (double)(1 << RM_FILTER_GAIN);
483	g = (1.0 - 1.0 / z);
484	gton = pow(g, (double)maxburst);
485	gtom = pow(g, (double)(minburst-1));
486	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
487	maxidle_s = (1.0 - g);
488	if (maxidle > maxidle_s)
489		maxidle = ptime * maxidle;
490	else
491		maxidle = ptime * maxidle_s;
492	if (minburst)
493		offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
494	else
495		offtime = cptime;
496	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
497
498	/* scale parameters */
499	maxidle = ((maxidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN);
500	offtime = (offtime * 8.0) / nsPerByte * pow(2.0, (double)RM_FILTER_GAIN);
501	minidle = ((minidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN);
502
503	maxidle = maxidle / 1000.0;
504	offtime = offtime / 1000.0;
505	minidle = minidle / 1000.0;
506
507	opts->minburst = minburst;
508	opts->maxburst = maxburst;
509	opts->ns_per_byte = (u_int) nsPerByte;
510	opts->maxidle = (u_int) fabs(maxidle);
511	opts->minidle = (int)minidle;
512	opts->offtime = (u_int) fabs(offtime);
513
514	return (0);
515}
516
517static int
518check_commit_cbq(int dev, int opts, struct pf_altq *pa)
519{
520	struct pf_altq	*altq;
521	int		 root_class, default_class;
522	int		 error = 0;
523
524	/*
525	 * check if cbq has one root queue and one default queue
526	 * for this interface
527	 */
528	root_class = default_class = 0;
529	TAILQ_FOREACH(altq, &altqs, entries) {
530		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
531			continue;
532		if (altq->qname[0] == 0)  /* this is for interface */
533			continue;
534		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
535			root_class++;
536		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
537			default_class++;
538	}
539	if (root_class != 1) {
540		warnx("should have one root queue on %s", pa->ifname);
541		error++;
542	}
543	if (default_class != 1) {
544		warnx("should have one default queue on %s", pa->ifname);
545		error++;
546	}
547	return (error);
548}
549
550static int
551print_cbq_opts(const struct pf_altq *a)
552{
553	const struct cbq_opts	*opts;
554
555	opts = &a->pq_u.cbq_opts;
556	if (opts->flags) {
557		printf("cbq(");
558		if (opts->flags & CBQCLF_RED)
559			printf(" red");
560		if (opts->flags & CBQCLF_ECN)
561			printf(" ecn");
562		if (opts->flags & CBQCLF_RIO)
563			printf(" rio");
564		if (opts->flags & CBQCLF_CLEARDSCP)
565			printf(" cleardscp");
566		if (opts->flags & CBQCLF_FLOWVALVE)
567			printf(" flowvalve");
568		if (opts->flags & CBQCLF_BORROW)
569			printf(" borrow");
570		if (opts->flags & CBQCLF_WRR)
571			printf(" wrr");
572		if (opts->flags & CBQCLF_EFFICIENT)
573			printf(" efficient");
574		if (opts->flags & CBQCLF_ROOTCLASS)
575			printf(" root");
576		if (opts->flags & CBQCLF_DEFCLASS)
577			printf(" default");
578		printf(" ) ");
579
580		return (1);
581	} else
582		return (0);
583}
584
585/*
586 * PRIQ support functions
587 */
588static int
589eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
590{
591	struct pf_altq	*altq;
592
593	if (pa->priority >= PRIQ_MAXPRI) {
594		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
595		return (-1);
596	}
597	/* the priority should be unique for the interface */
598	TAILQ_FOREACH(altq, &altqs, entries) {
599		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
600		    altq->qname[0] != 0 && altq->priority == pa->priority) {
601			warnx("%s and %s have the same priority",
602			    altq->qname, pa->qname);
603			return (-1);
604		}
605	}
606
607	if (pa->qid == 0)
608		pa->qid = ++max_qid;
609
610	return (0);
611}
612
613static int
614check_commit_priq(int dev, int opts, struct pf_altq *pa)
615{
616	struct pf_altq	*altq;
617	int		 default_class;
618	int		 error = 0;
619
620	/*
621	 * check if priq has one default class for this interface
622	 */
623	default_class = 0;
624	TAILQ_FOREACH(altq, &altqs, entries) {
625		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
626			continue;
627		if (altq->qname[0] == 0)  /* this is for interface */
628			continue;
629		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
630			default_class++;
631	}
632	if (default_class != 1) {
633		warnx("should have one default queue on %s", pa->ifname);
634		error++;
635	}
636	return (error);
637}
638
639static int
640print_priq_opts(const struct pf_altq *a)
641{
642	const struct priq_opts	*opts;
643
644	opts = &a->pq_u.priq_opts;
645
646	if (opts->flags) {
647		printf("priq(");
648		if (opts->flags & PRCF_RED)
649			printf(" red");
650		if (opts->flags & PRCF_ECN)
651			printf(" ecn");
652		if (opts->flags & PRCF_RIO)
653			printf(" rio");
654		if (opts->flags & PRCF_CLEARDSCP)
655			printf(" cleardscp");
656		if (opts->flags & PRCF_DEFAULTCLASS)
657			printf(" default");
658		printf(" ) ");
659
660		return (1);
661	} else
662		return (0);
663}
664
665/*
666 * HFSC support functions
667 */
668static int
669eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
670{
671	struct pf_altq		*altq, *parent;
672	struct hfsc_opts	*opts;
673	struct service_curve	 sc;
674
675	opts = &pa->pq_u.hfsc_opts;
676
677	if (pa->parent[0] == 0) {
678		/* root queue */
679		pa->qid = HFSC_ROOTCLASS_HANDLE;
680		opts->lssc_m1 = pa->ifbandwidth;
681		opts->lssc_m2 = pa->ifbandwidth;
682		opts->lssc_d = 0;
683		return (0);
684	} else if (pa->qid == 0)
685		pa->qid = ++max_qid;
686
687	LIST_INIT(&rtsc);
688	LIST_INIT(&lssc);
689
690	/* if link_share is not specified, use bandwidth */
691	if (opts->lssc_m2 == 0)
692		opts->lssc_m2 = pa->bandwidth;
693
694	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
695	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
696	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
697		warnx("m2 is zero for %s", pa->qname);
698		return (-1);
699	}
700
701	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
702	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
703	    (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0)) {
704		warnx("m1 must be zero for convex curve: %s", pa->qname);
705		return (-1);
706	}
707
708	/*
709	 * admission control:
710	 * for the real-time service curve, the sum of the service curves
711	 * should not exceed 80% of the interface bandwidth.  20% is reserved
712	 * not to over-commit the actual interface bandwidth.
713	 * for the link-sharing service curve, the sum of the child service
714	 * curve should not exceed the parent service curve.
715	 * for the upper-limit service curve, the assigned bandwidth should
716	 * be smaller than the interface bandwidth, and the upper-limit should
717	 * be larger than the real-time service curve when both are defined.
718	 */
719	parent = qname_to_pfaltq(pa->parent, pa->ifname);
720	if (parent == NULL)
721		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
722
723	TAILQ_FOREACH(altq, &altqs, entries) {
724		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
725			continue;
726		if (altq->qname[0] == 0)  /* this is for interface */
727			continue;
728
729		/* if the class has a real-time service curve, add it. */
730		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
731			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
732			sc.d  = altq->pq_u.hfsc_opts.rtsc_d;
733			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
734			gsc_add_sc(&rtsc, &sc);
735		}
736
737		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
738			continue;
739
740		/* if the class has a link-sharing service curve, add it. */
741		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
742			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
743			sc.d  = altq->pq_u.hfsc_opts.lssc_d;
744			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
745			gsc_add_sc(&lssc, &sc);
746		}
747	}
748
749	/* check the real-time service curve.  reserve 20% of interface bw */
750	if (opts->rtsc_m2 != 0) {
751		sc.m1 = 0;
752		sc.d  = 0;
753		sc.m2 = pa->ifbandwidth / 100 * 80;
754		if (!is_gsc_under_sc(&rtsc, &sc)) {
755			warnx("real-time sc exceeds the interface bandwidth");
756			goto err_ret;
757		}
758	}
759
760	/* check the link-sharing service curve. */
761	if (opts->lssc_m2 != 0) {
762		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
763		sc.d  = parent->pq_u.hfsc_opts.lssc_d;
764		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
765		if (!is_gsc_under_sc(&lssc, &sc)) {
766			warnx("link-sharing sc exceeds parent's sc");
767			goto err_ret;
768		}
769	}
770
771	/* check the upper-limit service curve. */
772	if (opts->ulsc_m2 != 0) {
773		if (opts->ulsc_m1 > pa->ifbandwidth ||
774		    opts->ulsc_m2 > pa->ifbandwidth) {
775			warnx("upper-limit larger than interface bandwidth");
776			goto err_ret;
777		}
778		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
779			warnx("upper-limit sc smaller than real-time sc");
780			goto err_ret;
781		}
782	}
783
784	gsc_destroy(&rtsc);
785	gsc_destroy(&lssc);
786
787	return (0);
788
789err_ret:
790	gsc_destroy(&rtsc);
791	gsc_destroy(&lssc);
792	return (-1);
793}
794
795static int
796check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
797{
798	struct pf_altq	*altq, *def = NULL;
799	int		 default_class;
800	int		 error = 0;
801
802	/* check if hfsc has one default queue for this interface */
803	default_class = 0;
804	TAILQ_FOREACH(altq, &altqs, entries) {
805		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
806			continue;
807		if (altq->qname[0] == 0)  /* this is for interface */
808			continue;
809		if (altq->parent[0] == 0)  /* dummy root */
810			continue;
811		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
812			default_class++;
813			def = altq;
814		}
815	}
816	if (default_class != 1) {
817		warnx("should have one default queue on %s", pa->ifname);
818		return (1);
819	}
820	/* make sure the default queue is a leaf */
821	TAILQ_FOREACH(altq, &altqs, entries) {
822		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
823			continue;
824		if (altq->qname[0] == 0)  /* this is for interface */
825			continue;
826		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
827			warnx("default queue is not a leaf");
828			error++;
829		}
830	}
831	return (error);
832}
833
834static int
835print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
836{
837	const struct hfsc_opts		*opts;
838	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
839
840	opts = &a->pq_u.hfsc_opts;
841	if (qopts == NULL)
842		rtsc = lssc = ulsc = NULL;
843	else {
844		rtsc = &qopts->data.hfsc_opts.realtime;
845		lssc = &qopts->data.hfsc_opts.linkshare;
846		ulsc = &qopts->data.hfsc_opts.upperlimit;
847	}
848
849	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
850	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
851	    opts->lssc_d != 0))) {
852		printf("hfsc(");
853		if (opts->flags & HFCF_RED)
854			printf(" red");
855		if (opts->flags & HFCF_ECN)
856			printf(" ecn");
857		if (opts->flags & HFCF_RIO)
858			printf(" rio");
859		if (opts->flags & HFCF_CLEARDSCP)
860			printf(" cleardscp");
861		if (opts->flags & HFCF_DEFAULTCLASS)
862			printf(" default");
863		if (opts->rtsc_m2 != 0)
864			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
865			    opts->rtsc_m2, rtsc);
866		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
867		    opts->lssc_d != 0))
868			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
869			    opts->lssc_m2, lssc);
870		if (opts->ulsc_m2 != 0)
871			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
872			    opts->ulsc_m2, ulsc);
873		printf(" ) ");
874
875		return (1);
876	} else
877		return (0);
878}
879
880/*
881 * admission control using generalized service curve
882 */
883#if defined(__FreeBSD__)
884#if defined(INFINITY)
885#undef INFINITY
886#endif
887#define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
888#else
889#define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
890#endif
891
892/* add a new service curve to a generalized service curve */
893static void
894gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
895{
896	if (is_sc_null(sc))
897		return;
898	if (sc->d != 0)
899		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
900	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
901}
902
903/*
904 * check whether all points of a generalized service curve have
905 * their y-coordinates no larger than a given two-piece linear
906 * service curve.
907 */
908static int
909is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
910{
911	struct segment	*s, *last, *end;
912	double		 y;
913
914	if (is_sc_null(sc)) {
915		if (LIST_EMPTY(gsc))
916			return (1);
917		LIST_FOREACH(s, gsc, _next) {
918			if (s->m != 0)
919				return (0);
920		}
921		return (1);
922	}
923	/*
924	 * gsc has a dummy entry at the end with x = INFINITY.
925	 * loop through up to this dummy entry.
926	 */
927	end = gsc_getentry(gsc, INFINITY);
928	if (end == NULL)
929		return (1);
930	last = NULL;
931	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
932		if (s->y > sc_x2y(sc, s->x))
933			return (0);
934		last = s;
935	}
936	/* last now holds the real last segment */
937	if (last == NULL)
938		return (1);
939	if (last->m > sc->m2)
940		return (0);
941	if (last->x < sc->d && last->m > sc->m1) {
942		y = last->y + (sc->d - last->x) * last->m;
943		if (y > sc_x2y(sc, sc->d))
944			return (0);
945	}
946	return (1);
947}
948
949static void
950gsc_destroy(struct gen_sc *gsc)
951{
952	struct segment	*s;
953
954	while ((s = LIST_FIRST(gsc)) != NULL) {
955		LIST_REMOVE(s, _next);
956		free(s);
957	}
958}
959
960/*
961 * return a segment entry starting at x.
962 * if gsc has no entry starting at x, a new entry is created at x.
963 */
964static struct segment *
965gsc_getentry(struct gen_sc *gsc, double x)
966{
967	struct segment	*new, *prev, *s;
968
969	prev = NULL;
970	LIST_FOREACH(s, gsc, _next) {
971		if (s->x == x)
972			return (s);	/* matching entry found */
973		else if (s->x < x)
974			prev = s;
975		else
976			break;
977	}
978
979	/* we have to create a new entry */
980	if ((new = calloc(1, sizeof(struct segment))) == NULL)
981		return (NULL);
982
983	new->x = x;
984	if (x == INFINITY || s == NULL)
985		new->d = 0;
986	else if (s->x == INFINITY)
987		new->d = INFINITY;
988	else
989		new->d = s->x - x;
990	if (prev == NULL) {
991		/* insert the new entry at the head of the list */
992		new->y = 0;
993		new->m = 0;
994		LIST_INSERT_HEAD(gsc, new, _next);
995	} else {
996		/*
997		 * the start point intersects with the segment pointed by
998		 * prev.  divide prev into 2 segments
999		 */
1000		if (x == INFINITY) {
1001			prev->d = INFINITY;
1002			if (prev->m == 0)
1003				new->y = prev->y;
1004			else
1005				new->y = INFINITY;
1006		} else {
1007			prev->d = x - prev->x;
1008			new->y = prev->d * prev->m + prev->y;
1009		}
1010		new->m = prev->m;
1011		LIST_INSERT_AFTER(prev, new, _next);
1012	}
1013	return (new);
1014}
1015
1016/* add a segment to a generalized service curve */
1017static int
1018gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1019{
1020	struct segment	*start, *end, *s;
1021	double		 x2;
1022
1023	if (d == INFINITY)
1024		x2 = INFINITY;
1025	else
1026		x2 = x + d;
1027	start = gsc_getentry(gsc, x);
1028	end   = gsc_getentry(gsc, x2);
1029	if (start == NULL || end == NULL)
1030		return (-1);
1031
1032	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1033		s->m += m;
1034		s->y += y + (s->x - x) * m;
1035	}
1036
1037	end = gsc_getentry(gsc, INFINITY);
1038	for (; s != end; s = LIST_NEXT(s, _next)) {
1039		s->y += m * d;
1040	}
1041
1042	return (0);
1043}
1044
1045/* get y-projection of a service curve */
1046static double
1047sc_x2y(struct service_curve *sc, double x)
1048{
1049	double	y;
1050
1051	if (x <= (double)sc->d)
1052		/* y belongs to the 1st segment */
1053		y = x * (double)sc->m1;
1054	else
1055		/* y belongs to the 2nd segment */
1056		y = (double)sc->d * (double)sc->m1
1057			+ (x - (double)sc->d) * (double)sc->m2;
1058	return (y);
1059}
1060
1061/*
1062 * misc utilities
1063 */
1064#define	R2S_BUFS	8
1065#define	RATESTR_MAX	16
1066
1067char *
1068rate2str(double rate)
1069{
1070	char		*buf;
1071	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1072	static int	 idx = 0;
1073	int		 i;
1074	static const char unit[] = " KMG";
1075
1076	buf = r2sbuf[idx++];
1077	if (idx == R2S_BUFS)
1078		idx = 0;
1079
1080	for (i = 0; rate >= 1000 && i <= 3; i++)
1081		rate /= 1000;
1082
1083	if ((int)(rate * 100) % 100)
1084		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1085	else
1086		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1087
1088	return (buf);
1089}
1090
1091#if defined(__FreeBSD__)
1092/*
1093 * XXX
1094 * FreeBSD do not have SIOCGIFDATA.
1095 * To emulate this, DIOCGIFSPEED ioctl added to pf.
1096 */
1097u_int32_t
1098getifspeed(int pfdev, char *ifname)
1099{
1100	struct pf_ifspeed io;
1101
1102	bzero(&io, sizeof io);
1103	if (strlcpy(io.ifname, ifname, IFNAMSIZ) >=
1104	    sizeof(io.ifname))
1105		errx(1, "getifspeed: strlcpy");
1106	if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1)
1107		err(1, "DIOCGIFSPEED");
1108	return ((u_int32_t)io.baudrate);
1109}
1110#else
1111u_int32_t
1112getifspeed(char *ifname)
1113{
1114	int		s;
1115	struct ifreq	ifr;
1116	struct if_data	ifrdat;
1117
1118	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1119		err(1, "socket");
1120	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1121	    sizeof(ifr.ifr_name))
1122		errx(1, "getifspeed: strlcpy");
1123	ifr.ifr_data = (caddr_t)&ifrdat;
1124	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1125		err(1, "SIOCGIFDATA");
1126	if (shutdown(s, SHUT_RDWR) == -1)
1127		err(1, "shutdown");
1128	if (close(s))
1129		err(1, "close");
1130	return ((u_int32_t)ifrdat.ifi_baudrate);
1131}
1132#endif
1133
1134u_long
1135getifmtu(char *ifname)
1136{
1137	int		s;
1138	struct ifreq	ifr;
1139
1140	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1141		err(1, "socket");
1142	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1143	    sizeof(ifr.ifr_name))
1144		errx(1, "getifmtu: strlcpy");
1145	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1146		err(1, "SIOCGIFMTU");
1147	if (shutdown(s, SHUT_RDWR) == -1)
1148		err(1, "shutdown");
1149	if (close(s))
1150		err(1, "close");
1151	if (ifr.ifr_mtu > 0)
1152		return (ifr.ifr_mtu);
1153	else {
1154		warnx("could not get mtu for %s, assuming 1500", ifname);
1155		return (1500);
1156	}
1157}
1158
1159int
1160eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1161    u_int32_t ref_bw)
1162{
1163	int	errors = 0;
1164
1165	switch (pa->scheduler) {
1166	case ALTQT_CBQ:
1167		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1168		break;
1169	case ALTQT_PRIQ:
1170		pa->pq_u.priq_opts = opts->data.priq_opts;
1171		break;
1172	case ALTQT_HFSC:
1173		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1174		if (opts->data.hfsc_opts.linkshare.used) {
1175			pa->pq_u.hfsc_opts.lssc_m1 =
1176			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1177			    ref_bw);
1178			pa->pq_u.hfsc_opts.lssc_m2 =
1179			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1180			    ref_bw);
1181			pa->pq_u.hfsc_opts.lssc_d =
1182			    opts->data.hfsc_opts.linkshare.d;
1183		}
1184		if (opts->data.hfsc_opts.realtime.used) {
1185			pa->pq_u.hfsc_opts.rtsc_m1 =
1186			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1187			    ref_bw);
1188			pa->pq_u.hfsc_opts.rtsc_m2 =
1189			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1190			    ref_bw);
1191			pa->pq_u.hfsc_opts.rtsc_d =
1192			    opts->data.hfsc_opts.realtime.d;
1193		}
1194		if (opts->data.hfsc_opts.upperlimit.used) {
1195			pa->pq_u.hfsc_opts.ulsc_m1 =
1196			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1197			    ref_bw);
1198			pa->pq_u.hfsc_opts.ulsc_m2 =
1199			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1200			    ref_bw);
1201			pa->pq_u.hfsc_opts.ulsc_d =
1202			    opts->data.hfsc_opts.upperlimit.d;
1203		}
1204		break;
1205	default:
1206		warnx("eval_queue_opts: unknown scheduler type %u",
1207		    opts->qtype);
1208		errors++;
1209		break;
1210	}
1211
1212	return (errors);
1213}
1214
1215u_int32_t
1216eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1217{
1218	if (bw->bw_absolute > 0)
1219		return (bw->bw_absolute);
1220
1221	if (bw->bw_percent > 0)
1222		return (ref_bw / 100 * bw->bw_percent);
1223
1224	return (0);
1225}
1226
1227void
1228print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1229    const struct node_hfsc_sc *sc)
1230{
1231	printf(" %s", scname);
1232
1233	if (d != 0) {
1234		printf("(");
1235		if (sc != NULL && sc->m1.bw_percent > 0)
1236			printf("%u%%", sc->m1.bw_percent);
1237		else
1238			printf("%s", rate2str((double)m1));
1239		printf(" %u", d);
1240	}
1241
1242	if (sc != NULL && sc->m2.bw_percent > 0)
1243		printf(" %u%%", sc->m2.bw_percent);
1244	else
1245		printf(" %s", rate2str((double)m2));
1246
1247	if (d != 0)
1248		printf(")");
1249}
1250