altq_cdnr.c revision 164033
1/*	$FreeBSD: head/sys/contrib/altq/altq/altq_cdnr.c 164033 2006-11-06 13:42:10Z rwatson $	*/
2/*	$KAME: altq_cdnr.c,v 1.14 2003/09/05 22:40:36 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1999-2002
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#if (__FreeBSD__ != 2)
33#include "opt_inet.h"
34#ifdef __FreeBSD__
35#include "opt_inet6.h"
36#endif
37#endif
38#endif /* __FreeBSD__ || __NetBSD__ */
39
40#include <sys/param.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/errno.h>
48#include <sys/kernel.h>
49#include <sys/queue.h>
50
51#include <net/if.h>
52#include <net/if_types.h>
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56#ifdef INET6
57#include <netinet/ip6.h>
58#endif
59
60#include <altq/altq.h>
61#ifdef ALTQ3_COMPAT
62#include <altq/altq_conf.h>
63#endif
64#include <altq/altq_cdnr.h>
65
66#ifdef ALTQ3_COMPAT
67/*
68 * diffserv traffic conditioning module
69 */
70
71int altq_cdnr_enabled = 0;
72
73/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
74#ifdef ALTQ_CDNR
75
76/* cdnr_list keeps all cdnr's allocated. */
77static LIST_HEAD(, top_cdnr) tcb_list;
78
79static int altq_cdnr_input(struct mbuf *, int);
80static struct top_cdnr *tcb_lookup(char *ifname);
81static struct cdnr_block *cdnr_handle2cb(u_long);
82static u_long cdnr_cb2handle(struct cdnr_block *);
83static void *cdnr_cballoc(struct top_cdnr *, int,
84       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
85static void cdnr_cbdestroy(void *);
86static int tca_verify_action(struct tc_action *);
87static void tca_import_action(struct tc_action *, struct tc_action *);
88static void tca_invalidate_action(struct tc_action *);
89
90static int generic_element_destroy(struct cdnr_block *);
91static struct top_cdnr *top_create(struct ifaltq *);
92static int top_destroy(struct top_cdnr *);
93static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
94static int element_destroy(struct cdnr_block *);
95static void tb_import_profile(struct tbe *, struct tb_profile *);
96static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
97				  struct tc_action *, struct tc_action *);
98static int tbm_destroy(struct tbmeter *);
99static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
100static struct trtcm *trtcm_create(struct top_cdnr *,
101		  struct tb_profile *, struct tb_profile *,
102		  struct tc_action *, struct tc_action *, struct tc_action *,
103		  int);
104static int trtcm_destroy(struct trtcm *);
105static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
106static struct tswtcm *tswtcm_create(struct top_cdnr *,
107		  u_int32_t, u_int32_t, u_int32_t,
108		  struct tc_action *, struct tc_action *, struct tc_action *);
109static int tswtcm_destroy(struct tswtcm *);
110static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
111
112static int cdnrcmd_if_attach(char *);
113static int cdnrcmd_if_detach(char *);
114static int cdnrcmd_add_element(struct cdnr_add_element *);
115static int cdnrcmd_delete_element(struct cdnr_delete_element *);
116static int cdnrcmd_add_filter(struct cdnr_add_filter *);
117static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
118static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
119static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
120static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
121static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
122static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
123static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
124static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
125static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
126static int cdnrcmd_get_stats(struct cdnr_get_stats *);
127
128altqdev_decl(cdnr);
129
130/*
131 * top level input function called from ip_input.
132 * should be called before converting header fields to host-byte-order.
133 */
134int
135altq_cdnr_input(m, af)
136	struct mbuf	*m;
137	int		af;	/* address family */
138{
139	struct ifnet		*ifp;
140	struct ip		*ip;
141	struct top_cdnr		*top;
142	struct tc_action	*tca;
143	struct cdnr_block	*cb;
144	struct cdnr_pktinfo	pktinfo;
145
146	ifp = m->m_pkthdr.rcvif;
147	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
148		/* traffic conditioner is not enabled on this interface */
149		return (1);
150
151	top = ifp->if_snd.altq_cdnr;
152
153	ip = mtod(m, struct ip *);
154#ifdef INET6
155	if (af == AF_INET6) {
156		u_int32_t flowlabel;
157
158		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
159		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
160	} else
161#endif
162		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
163	pktinfo.pkt_len = m_pktlen(m);
164
165	tca = NULL;
166
167	cb = acc_classify(&top->tc_classifier, m, af);
168	if (cb != NULL)
169		tca = &cb->cb_action;
170
171	if (tca == NULL)
172		tca = &top->tc_block.cb_action;
173
174	while (1) {
175		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
176
177		switch (tca->tca_code) {
178		case TCACODE_PASS:
179			return (1);
180		case TCACODE_DROP:
181			m_freem(m);
182			return (0);
183		case TCACODE_RETURN:
184			return (0);
185		case TCACODE_MARK:
186#ifdef INET6
187			if (af == AF_INET6) {
188				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
189				u_int32_t flowlabel;
190
191				flowlabel = ntohl(ip6->ip6_flow);
192				flowlabel = (tca->tca_dscp << 20) |
193					(flowlabel & ~(DSCP_MASK << 20));
194				ip6->ip6_flow = htonl(flowlabel);
195			} else
196#endif
197				ip->ip_tos = tca->tca_dscp |
198					(ip->ip_tos & DSCP_CUMASK);
199			return (1);
200		case TCACODE_NEXT:
201			cb = tca->tca_next;
202			tca = (*cb->cb_input)(cb, &pktinfo);
203			break;
204		case TCACODE_NONE:
205		default:
206			return (1);
207		}
208	}
209}
210
211static struct top_cdnr *
212tcb_lookup(ifname)
213	char *ifname;
214{
215	struct top_cdnr *top;
216	struct ifnet *ifp;
217
218	if ((ifp = ifunit(ifname)) != NULL)
219		LIST_FOREACH(top, &tcb_list, tc_next)
220			if (top->tc_ifq->altq_ifp == ifp)
221				return (top);
222	return (NULL);
223}
224
225static struct cdnr_block *
226cdnr_handle2cb(handle)
227	u_long handle;
228{
229	struct cdnr_block *cb;
230
231	cb = (struct cdnr_block *)handle;
232	if (handle != ALIGN(cb))
233		return (NULL);
234
235	if (cb == NULL || cb->cb_handle != handle)
236		return (NULL);
237	return (cb);
238}
239
240static u_long
241cdnr_cb2handle(cb)
242	struct cdnr_block *cb;
243{
244	return (cb->cb_handle);
245}
246
247static void *
248cdnr_cballoc(top, type, input_func)
249	struct top_cdnr *top;
250	int type;
251	struct tc_action *(*input_func)(struct cdnr_block *,
252					struct cdnr_pktinfo *);
253{
254	struct cdnr_block *cb;
255	int size;
256
257	switch (type) {
258	case TCETYPE_TOP:
259		size = sizeof(struct top_cdnr);
260		break;
261	case TCETYPE_ELEMENT:
262		size = sizeof(struct cdnr_block);
263		break;
264	case TCETYPE_TBMETER:
265		size = sizeof(struct tbmeter);
266		break;
267	case TCETYPE_TRTCM:
268		size = sizeof(struct trtcm);
269		break;
270	case TCETYPE_TSWTCM:
271		size = sizeof(struct tswtcm);
272		break;
273	default:
274		return (NULL);
275	}
276
277	MALLOC(cb, struct cdnr_block *, size, M_DEVBUF, M_WAITOK);
278	if (cb == NULL)
279		return (NULL);
280	bzero(cb, size);
281
282	cb->cb_len = size;
283	cb->cb_type = type;
284	cb->cb_ref = 0;
285	cb->cb_handle = (u_long)cb;
286	if (top == NULL)
287		cb->cb_top = (struct top_cdnr *)cb;
288	else
289		cb->cb_top = top;
290
291	if (input_func != NULL) {
292		/*
293		 * if this cdnr has an action function,
294		 * make tc_action to call itself.
295		 */
296		cb->cb_action.tca_code = TCACODE_NEXT;
297		cb->cb_action.tca_next = cb;
298		cb->cb_input = input_func;
299	} else
300		cb->cb_action.tca_code = TCACODE_NONE;
301
302	/* if this isn't top, register the element to the top level cdnr */
303	if (top != NULL)
304		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
305
306	return ((void *)cb);
307}
308
309static void
310cdnr_cbdestroy(cblock)
311	void *cblock;
312{
313	struct cdnr_block *cb = cblock;
314
315	/* delete filters belonging to this cdnr */
316	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
317
318	/* remove from the top level cdnr */
319	if (cb->cb_top != cblock)
320		LIST_REMOVE(cb, cb_next);
321
322	FREE(cb, M_DEVBUF);
323}
324
325/*
326 * conditioner common destroy routine
327 */
328static int
329generic_element_destroy(cb)
330	struct cdnr_block *cb;
331{
332	int error = 0;
333
334	switch (cb->cb_type) {
335	case TCETYPE_TOP:
336		error = top_destroy((struct top_cdnr *)cb);
337		break;
338	case TCETYPE_ELEMENT:
339		error = element_destroy(cb);
340		break;
341	case TCETYPE_TBMETER:
342		error = tbm_destroy((struct tbmeter *)cb);
343		break;
344	case TCETYPE_TRTCM:
345		error = trtcm_destroy((struct trtcm *)cb);
346		break;
347	case TCETYPE_TSWTCM:
348		error = tswtcm_destroy((struct tswtcm *)cb);
349		break;
350	default:
351		error = EINVAL;
352	}
353	return (error);
354}
355
356static int
357tca_verify_action(utca)
358	struct tc_action *utca;
359{
360	switch (utca->tca_code) {
361	case TCACODE_PASS:
362	case TCACODE_DROP:
363	case TCACODE_MARK:
364		/* these are ok */
365		break;
366
367	case TCACODE_HANDLE:
368		/* verify handle value */
369		if (cdnr_handle2cb(utca->tca_handle) == NULL)
370			return (-1);
371		break;
372
373	case TCACODE_NONE:
374	case TCACODE_RETURN:
375	case TCACODE_NEXT:
376	default:
377		/* should not be passed from a user */
378		return (-1);
379	}
380	return (0);
381}
382
383static void
384tca_import_action(ktca, utca)
385	struct tc_action *ktca, *utca;
386{
387	struct cdnr_block *cb;
388
389	*ktca = *utca;
390	if (ktca->tca_code == TCACODE_HANDLE) {
391		cb = cdnr_handle2cb(ktca->tca_handle);
392		if (cb == NULL) {
393			ktca->tca_code = TCACODE_NONE;
394			return;
395		}
396		ktca->tca_code = TCACODE_NEXT;
397		ktca->tca_next = cb;
398		cb->cb_ref++;
399	} else if (ktca->tca_code == TCACODE_MARK) {
400		ktca->tca_dscp &= DSCP_MASK;
401	}
402	return;
403}
404
405static void
406tca_invalidate_action(tca)
407	struct tc_action *tca;
408{
409	struct cdnr_block *cb;
410
411	if (tca->tca_code == TCACODE_NEXT) {
412		cb = tca->tca_next;
413		if (cb == NULL)
414			return;
415		cb->cb_ref--;
416	}
417	tca->tca_code = TCACODE_NONE;
418}
419
420/*
421 * top level traffic conditioner
422 */
423static struct top_cdnr *
424top_create(ifq)
425	struct ifaltq *ifq;
426{
427	struct top_cdnr *top;
428
429	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
430		return (NULL);
431
432	top->tc_ifq = ifq;
433	/* set default action for the top level conditioner */
434	top->tc_block.cb_action.tca_code = TCACODE_PASS;
435
436	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
437
438	ifq->altq_cdnr = top;
439
440	return (top);
441}
442
443static int
444top_destroy(top)
445	struct top_cdnr *top;
446{
447	struct cdnr_block *cb;
448
449	if (ALTQ_IS_CNDTNING(top->tc_ifq))
450		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
451	top->tc_ifq->altq_cdnr = NULL;
452
453	/*
454	 * destroy all the conditioner elements belonging to this interface
455	 */
456	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
457		while (cb != NULL && cb->cb_ref > 0)
458			cb = LIST_NEXT(cb, cb_next);
459		if (cb != NULL)
460			generic_element_destroy(cb);
461	}
462
463	LIST_REMOVE(top, tc_next);
464
465	cdnr_cbdestroy(top);
466
467	/* if there is no active conditioner, remove the input hook */
468	if (altq_input != NULL) {
469		LIST_FOREACH(top, &tcb_list, tc_next)
470			if (ALTQ_IS_CNDTNING(top->tc_ifq))
471				break;
472		if (top == NULL)
473			altq_input = NULL;
474	}
475
476	return (0);
477}
478
479/*
480 * simple tc elements without input function (e.g., dropper and makers).
481 */
482static struct cdnr_block *
483element_create(top, action)
484	struct top_cdnr *top;
485	struct tc_action *action;
486{
487	struct cdnr_block *cb;
488
489	if (tca_verify_action(action) < 0)
490		return (NULL);
491
492	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
493		return (NULL);
494
495	tca_import_action(&cb->cb_action, action);
496
497	return (cb);
498}
499
500static int
501element_destroy(cb)
502	struct cdnr_block *cb;
503{
504	if (cb->cb_ref > 0)
505		return (EBUSY);
506
507	tca_invalidate_action(&cb->cb_action);
508
509	cdnr_cbdestroy(cb);
510	return (0);
511}
512
513/*
514 * internal representation of token bucket parameters
515 *	rate: 	byte_per_unittime << 32
516 *		(((bits_per_sec) / 8) << 32) / machclk_freq
517 *	depth:	byte << 32
518 *
519 */
520#define	TB_SHIFT	32
521#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
522#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
523
524static void
525tb_import_profile(tb, profile)
526	struct tbe *tb;
527	struct tb_profile *profile;
528{
529	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
530	tb->depth = TB_SCALE(profile->depth);
531	if (tb->rate > 0)
532		tb->filluptime = tb->depth / tb->rate;
533	else
534		tb->filluptime = 0xffffffffffffffffLL;
535	tb->token = tb->depth;
536	tb->last = read_machclk();
537}
538
539/*
540 * simple token bucket meter
541 */
542static struct tbmeter *
543tbm_create(top, profile, in_action, out_action)
544	struct top_cdnr *top;
545	struct tb_profile *profile;
546	struct tc_action *in_action, *out_action;
547{
548	struct tbmeter *tbm = NULL;
549
550	if (tca_verify_action(in_action) < 0
551	    || tca_verify_action(out_action) < 0)
552		return (NULL);
553
554	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
555				tbm_input)) == NULL)
556		return (NULL);
557
558	tb_import_profile(&tbm->tb, profile);
559
560	tca_import_action(&tbm->in_action, in_action);
561	tca_import_action(&tbm->out_action, out_action);
562
563	return (tbm);
564}
565
566static int
567tbm_destroy(tbm)
568	struct tbmeter *tbm;
569{
570	if (tbm->cdnrblk.cb_ref > 0)
571		return (EBUSY);
572
573	tca_invalidate_action(&tbm->in_action);
574	tca_invalidate_action(&tbm->out_action);
575
576	cdnr_cbdestroy(tbm);
577	return (0);
578}
579
580static struct tc_action *
581tbm_input(cb, pktinfo)
582	struct cdnr_block *cb;
583	struct cdnr_pktinfo *pktinfo;
584{
585	struct tbmeter *tbm = (struct tbmeter *)cb;
586	u_int64_t	len;
587	u_int64_t	interval, now;
588
589	len = TB_SCALE(pktinfo->pkt_len);
590
591	if (tbm->tb.token < len) {
592		now = read_machclk();
593		interval = now - tbm->tb.last;
594		if (interval >= tbm->tb.filluptime)
595			tbm->tb.token = tbm->tb.depth;
596		else {
597			tbm->tb.token += interval * tbm->tb.rate;
598			if (tbm->tb.token > tbm->tb.depth)
599				tbm->tb.token = tbm->tb.depth;
600		}
601		tbm->tb.last = now;
602	}
603
604	if (tbm->tb.token < len) {
605		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
606		return (&tbm->out_action);
607	}
608
609	tbm->tb.token -= len;
610	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
611	return (&tbm->in_action);
612}
613
614/*
615 * two rate three color marker
616 * as described in draft-heinanen-diffserv-trtcm-01.txt
617 */
618static struct trtcm *
619trtcm_create(top, cmtd_profile, peak_profile,
620	     green_action, yellow_action, red_action, coloraware)
621	struct top_cdnr *top;
622	struct tb_profile *cmtd_profile, *peak_profile;
623	struct tc_action *green_action, *yellow_action, *red_action;
624	int	coloraware;
625{
626	struct trtcm *tcm = NULL;
627
628	if (tca_verify_action(green_action) < 0
629	    || tca_verify_action(yellow_action) < 0
630	    || tca_verify_action(red_action) < 0)
631		return (NULL);
632
633	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
634				trtcm_input)) == NULL)
635		return (NULL);
636
637	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
638	tb_import_profile(&tcm->peak_tb, peak_profile);
639
640	tca_import_action(&tcm->green_action, green_action);
641	tca_import_action(&tcm->yellow_action, yellow_action);
642	tca_import_action(&tcm->red_action, red_action);
643
644	/* set dscps to use */
645	if (tcm->green_action.tca_code == TCACODE_MARK)
646		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
647	else
648		tcm->green_dscp = DSCP_AF11;
649	if (tcm->yellow_action.tca_code == TCACODE_MARK)
650		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
651	else
652		tcm->yellow_dscp = DSCP_AF12;
653	if (tcm->red_action.tca_code == TCACODE_MARK)
654		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
655	else
656		tcm->red_dscp = DSCP_AF13;
657
658	tcm->coloraware = coloraware;
659
660	return (tcm);
661}
662
663static int
664trtcm_destroy(tcm)
665	struct trtcm *tcm;
666{
667	if (tcm->cdnrblk.cb_ref > 0)
668		return (EBUSY);
669
670	tca_invalidate_action(&tcm->green_action);
671	tca_invalidate_action(&tcm->yellow_action);
672	tca_invalidate_action(&tcm->red_action);
673
674	cdnr_cbdestroy(tcm);
675	return (0);
676}
677
678static struct tc_action *
679trtcm_input(cb, pktinfo)
680	struct cdnr_block *cb;
681	struct cdnr_pktinfo *pktinfo;
682{
683	struct trtcm *tcm = (struct trtcm *)cb;
684	u_int64_t	len;
685	u_int64_t	interval, now;
686	u_int8_t	color;
687
688	len = TB_SCALE(pktinfo->pkt_len);
689	if (tcm->coloraware) {
690		color = pktinfo->pkt_dscp;
691		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
692			color = tcm->green_dscp;
693	} else {
694		/* if color-blind, precolor it as green */
695		color = tcm->green_dscp;
696	}
697
698	now = read_machclk();
699	if (tcm->cmtd_tb.token < len) {
700		interval = now - tcm->cmtd_tb.last;
701		if (interval >= tcm->cmtd_tb.filluptime)
702			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
703		else {
704			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
705			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
706				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
707		}
708		tcm->cmtd_tb.last = now;
709	}
710	if (tcm->peak_tb.token < len) {
711		interval = now - tcm->peak_tb.last;
712		if (interval >= tcm->peak_tb.filluptime)
713			tcm->peak_tb.token = tcm->peak_tb.depth;
714		else {
715			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
716			if (tcm->peak_tb.token > tcm->peak_tb.depth)
717				tcm->peak_tb.token = tcm->peak_tb.depth;
718		}
719		tcm->peak_tb.last = now;
720	}
721
722	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
723		pktinfo->pkt_dscp = tcm->red_dscp;
724		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
725		return (&tcm->red_action);
726	}
727
728	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
729		pktinfo->pkt_dscp = tcm->yellow_dscp;
730		tcm->peak_tb.token -= len;
731		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
732		return (&tcm->yellow_action);
733	}
734
735	pktinfo->pkt_dscp = tcm->green_dscp;
736	tcm->cmtd_tb.token -= len;
737	tcm->peak_tb.token -= len;
738	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
739	return (&tcm->green_action);
740}
741
742/*
743 * time sliding window three color marker
744 * as described in draft-fang-diffserv-tc-tswtcm-00.txt
745 */
746static struct tswtcm *
747tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
748	      green_action, yellow_action, red_action)
749	struct top_cdnr *top;
750	u_int32_t	cmtd_rate, peak_rate, avg_interval;
751	struct tc_action *green_action, *yellow_action, *red_action;
752{
753	struct tswtcm *tsw;
754
755	if (tca_verify_action(green_action) < 0
756	    || tca_verify_action(yellow_action) < 0
757	    || tca_verify_action(red_action) < 0)
758		return (NULL);
759
760	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
761				tswtcm_input)) == NULL)
762		return (NULL);
763
764	tca_import_action(&tsw->green_action, green_action);
765	tca_import_action(&tsw->yellow_action, yellow_action);
766	tca_import_action(&tsw->red_action, red_action);
767
768	/* set dscps to use */
769	if (tsw->green_action.tca_code == TCACODE_MARK)
770		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
771	else
772		tsw->green_dscp = DSCP_AF11;
773	if (tsw->yellow_action.tca_code == TCACODE_MARK)
774		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
775	else
776		tsw->yellow_dscp = DSCP_AF12;
777	if (tsw->red_action.tca_code == TCACODE_MARK)
778		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
779	else
780		tsw->red_dscp = DSCP_AF13;
781
782	/* convert rates from bits/sec to bytes/sec */
783	tsw->cmtd_rate = cmtd_rate / 8;
784	tsw->peak_rate = peak_rate / 8;
785	tsw->avg_rate = 0;
786
787	/* timewin is converted from msec to machine clock unit */
788	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
789
790	return (tsw);
791}
792
793static int
794tswtcm_destroy(tsw)
795	struct tswtcm *tsw;
796{
797	if (tsw->cdnrblk.cb_ref > 0)
798		return (EBUSY);
799
800	tca_invalidate_action(&tsw->green_action);
801	tca_invalidate_action(&tsw->yellow_action);
802	tca_invalidate_action(&tsw->red_action);
803
804	cdnr_cbdestroy(tsw);
805	return (0);
806}
807
808static struct tc_action *
809tswtcm_input(cb, pktinfo)
810	struct cdnr_block *cb;
811	struct cdnr_pktinfo *pktinfo;
812{
813	struct tswtcm	*tsw = (struct tswtcm *)cb;
814	int		len;
815	u_int32_t	avg_rate;
816	u_int64_t	interval, now, tmp;
817
818	/*
819	 * rate estimator
820	 */
821	len = pktinfo->pkt_len;
822	now = read_machclk();
823
824	interval = now - tsw->t_front;
825	/*
826	 * calculate average rate:
827	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
828	 * pkt_len needs to be multiplied by machclk_freq in order to
829	 * get (bytes/sec).
830	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
831	 * less than 32 bits, the following 64-bit operation has enough
832	 * precision.
833	 */
834	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
835	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
836	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
837	tsw->t_front = now;
838
839	/*
840	 * marker
841	 */
842	if (avg_rate > tsw->cmtd_rate) {
843		u_int32_t randval = arc4random() % avg_rate;
844
845		if (avg_rate > tsw->peak_rate) {
846			if (randval < avg_rate - tsw->peak_rate) {
847				/* mark red */
848				pktinfo->pkt_dscp = tsw->red_dscp;
849				PKTCNTR_ADD(&tsw->red_cnt, len);
850				return (&tsw->red_action);
851			} else if (randval < avg_rate - tsw->cmtd_rate)
852				goto mark_yellow;
853		} else {
854			/* peak_rate >= avg_rate > cmtd_rate */
855			if (randval < avg_rate - tsw->cmtd_rate) {
856			mark_yellow:
857				pktinfo->pkt_dscp = tsw->yellow_dscp;
858				PKTCNTR_ADD(&tsw->yellow_cnt, len);
859				return (&tsw->yellow_action);
860			}
861		}
862	}
863
864	/* mark green */
865	pktinfo->pkt_dscp = tsw->green_dscp;
866	PKTCNTR_ADD(&tsw->green_cnt, len);
867	return (&tsw->green_action);
868}
869
870/*
871 * ioctl requests
872 */
873static int
874cdnrcmd_if_attach(ifname)
875	char *ifname;
876{
877	struct ifnet *ifp;
878	struct top_cdnr *top;
879
880	if ((ifp = ifunit(ifname)) == NULL)
881		return (EBADF);
882
883	if (ifp->if_snd.altq_cdnr != NULL)
884		return (EBUSY);
885
886	if ((top = top_create(&ifp->if_snd)) == NULL)
887		return (ENOMEM);
888	return (0);
889}
890
891static int
892cdnrcmd_if_detach(ifname)
893	char *ifname;
894{
895	struct top_cdnr *top;
896
897	if ((top = tcb_lookup(ifname)) == NULL)
898		return (EBADF);
899
900	return top_destroy(top);
901}
902
903static int
904cdnrcmd_add_element(ap)
905	struct cdnr_add_element *ap;
906{
907	struct top_cdnr *top;
908	struct cdnr_block *cb;
909
910	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
911		return (EBADF);
912
913	cb = element_create(top, &ap->action);
914	if (cb == NULL)
915		return (EINVAL);
916	/* return a class handle to the user */
917	ap->cdnr_handle = cdnr_cb2handle(cb);
918	return (0);
919}
920
921static int
922cdnrcmd_delete_element(ap)
923	struct cdnr_delete_element *ap;
924{
925	struct top_cdnr *top;
926	struct cdnr_block *cb;
927
928	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
929		return (EBADF);
930
931	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
932		return (EINVAL);
933
934	if (cb->cb_type != TCETYPE_ELEMENT)
935		return generic_element_destroy(cb);
936
937	return element_destroy(cb);
938}
939
940static int
941cdnrcmd_add_filter(ap)
942	struct cdnr_add_filter *ap;
943{
944	struct top_cdnr *top;
945	struct cdnr_block *cb;
946
947	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
948		return (EBADF);
949
950	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
951		return (EINVAL);
952
953	return acc_add_filter(&top->tc_classifier, &ap->filter,
954			      cb, &ap->filter_handle);
955}
956
957static int
958cdnrcmd_delete_filter(ap)
959	struct cdnr_delete_filter *ap;
960{
961	struct top_cdnr *top;
962
963	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
964		return (EBADF);
965
966	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
967}
968
969static int
970cdnrcmd_add_tbm(ap)
971	struct cdnr_add_tbmeter *ap;
972{
973	struct top_cdnr *top;
974	struct tbmeter *tbm;
975
976	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
977		return (EBADF);
978
979	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
980	if (tbm == NULL)
981		return (EINVAL);
982	/* return a class handle to the user */
983	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
984	return (0);
985}
986
987static int
988cdnrcmd_modify_tbm(ap)
989	struct cdnr_modify_tbmeter *ap;
990{
991	struct tbmeter *tbm;
992
993	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
994		return (EINVAL);
995
996	tb_import_profile(&tbm->tb, &ap->profile);
997
998	return (0);
999}
1000
1001static int
1002cdnrcmd_tbm_stats(ap)
1003	struct cdnr_tbmeter_stats *ap;
1004{
1005	struct tbmeter *tbm;
1006
1007	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1008		return (EINVAL);
1009
1010	ap->in_cnt = tbm->in_cnt;
1011	ap->out_cnt = tbm->out_cnt;
1012
1013	return (0);
1014}
1015
1016static int
1017cdnrcmd_add_trtcm(ap)
1018	struct cdnr_add_trtcm *ap;
1019{
1020	struct top_cdnr *top;
1021	struct trtcm *tcm;
1022
1023	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1024		return (EBADF);
1025
1026	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
1027			   &ap->green_action, &ap->yellow_action,
1028			   &ap->red_action, ap->coloraware);
1029	if (tcm == NULL)
1030		return (EINVAL);
1031
1032	/* return a class handle to the user */
1033	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
1034	return (0);
1035}
1036
1037static int
1038cdnrcmd_modify_trtcm(ap)
1039	struct cdnr_modify_trtcm *ap;
1040{
1041	struct trtcm *tcm;
1042
1043	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1044		return (EINVAL);
1045
1046	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
1047	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
1048
1049	return (0);
1050}
1051
1052static int
1053cdnrcmd_tcm_stats(ap)
1054	struct cdnr_tcm_stats *ap;
1055{
1056	struct cdnr_block *cb;
1057
1058	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1059		return (EINVAL);
1060
1061	if (cb->cb_type == TCETYPE_TRTCM) {
1062	    struct trtcm *tcm = (struct trtcm *)cb;
1063
1064	    ap->green_cnt = tcm->green_cnt;
1065	    ap->yellow_cnt = tcm->yellow_cnt;
1066	    ap->red_cnt = tcm->red_cnt;
1067	} else if (cb->cb_type == TCETYPE_TSWTCM) {
1068	    struct tswtcm *tsw = (struct tswtcm *)cb;
1069
1070	    ap->green_cnt = tsw->green_cnt;
1071	    ap->yellow_cnt = tsw->yellow_cnt;
1072	    ap->red_cnt = tsw->red_cnt;
1073	} else
1074	    return (EINVAL);
1075
1076	return (0);
1077}
1078
1079static int
1080cdnrcmd_add_tswtcm(ap)
1081	struct cdnr_add_tswtcm *ap;
1082{
1083	struct top_cdnr *top;
1084	struct tswtcm *tsw;
1085
1086	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1087		return (EBADF);
1088
1089	if (ap->cmtd_rate > ap->peak_rate)
1090		return (EINVAL);
1091
1092	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1093			    ap->avg_interval, &ap->green_action,
1094			    &ap->yellow_action, &ap->red_action);
1095	if (tsw == NULL)
1096	    return (EINVAL);
1097
1098	/* return a class handle to the user */
1099	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1100	return (0);
1101}
1102
1103static int
1104cdnrcmd_modify_tswtcm(ap)
1105	struct cdnr_modify_tswtcm *ap;
1106{
1107	struct tswtcm *tsw;
1108
1109	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1110		return (EINVAL);
1111
1112	if (ap->cmtd_rate > ap->peak_rate)
1113		return (EINVAL);
1114
1115	/* convert rates from bits/sec to bytes/sec */
1116	tsw->cmtd_rate = ap->cmtd_rate / 8;
1117	tsw->peak_rate = ap->peak_rate / 8;
1118	tsw->avg_rate = 0;
1119
1120	/* timewin is converted from msec to machine clock unit */
1121	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1122
1123	return (0);
1124}
1125
1126static int
1127cdnrcmd_get_stats(ap)
1128	struct cdnr_get_stats *ap;
1129{
1130	struct top_cdnr *top;
1131	struct cdnr_block *cb;
1132	struct tbmeter *tbm;
1133	struct trtcm *tcm;
1134	struct tswtcm *tsw;
1135	struct tce_stats tce, *usp;
1136	int error, n, nskip, nelements;
1137
1138	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1139		return (EBADF);
1140
1141	/* copy action stats */
1142	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
1143
1144	/* stats for each element */
1145	nelements = ap->nelements;
1146	usp = ap->tce_stats;
1147	if (nelements <= 0 || usp == NULL)
1148		return (0);
1149
1150	nskip = ap->nskip;
1151	n = 0;
1152	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1153		if (nskip > 0) {
1154			nskip--;
1155			continue;
1156		}
1157
1158		bzero(&tce, sizeof(tce));
1159		tce.tce_handle = cb->cb_handle;
1160		tce.tce_type = cb->cb_type;
1161		switch (cb->cb_type) {
1162		case TCETYPE_TBMETER:
1163			tbm = (struct tbmeter *)cb;
1164			tce.tce_cnts[0] = tbm->in_cnt;
1165			tce.tce_cnts[1] = tbm->out_cnt;
1166			break;
1167		case TCETYPE_TRTCM:
1168			tcm = (struct trtcm *)cb;
1169			tce.tce_cnts[0] = tcm->green_cnt;
1170			tce.tce_cnts[1] = tcm->yellow_cnt;
1171			tce.tce_cnts[2] = tcm->red_cnt;
1172			break;
1173		case TCETYPE_TSWTCM:
1174			tsw = (struct tswtcm *)cb;
1175			tce.tce_cnts[0] = tsw->green_cnt;
1176			tce.tce_cnts[1] = tsw->yellow_cnt;
1177			tce.tce_cnts[2] = tsw->red_cnt;
1178			break;
1179		default:
1180			continue;
1181		}
1182
1183		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
1184				     sizeof(tce))) != 0)
1185			return (error);
1186
1187		if (++n == nelements)
1188			break;
1189	}
1190	ap->nelements = n;
1191
1192	return (0);
1193}
1194
1195/*
1196 * conditioner device interface
1197 */
1198int
1199cdnropen(dev, flag, fmt, p)
1200	dev_t dev;
1201	int flag, fmt;
1202#if (__FreeBSD_version > 500000)
1203	struct thread *p;
1204#else
1205	struct proc *p;
1206#endif
1207{
1208	if (machclk_freq == 0)
1209		init_machclk();
1210
1211	if (machclk_freq == 0) {
1212		printf("cdnr: no cpu clock available!\n");
1213		return (ENXIO);
1214	}
1215
1216	/* everything will be done when the queueing scheme is attached. */
1217	return 0;
1218}
1219
1220int
1221cdnrclose(dev, flag, fmt, p)
1222	dev_t dev;
1223	int flag, fmt;
1224#if (__FreeBSD_version > 500000)
1225	struct thread *p;
1226#else
1227	struct proc *p;
1228#endif
1229{
1230	struct top_cdnr *top;
1231	int err, error = 0;
1232
1233	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1234		/* destroy all */
1235		err = top_destroy(top);
1236		if (err != 0 && error == 0)
1237			error = err;
1238	}
1239	altq_input = NULL;
1240
1241	return (error);
1242}
1243
1244int
1245cdnrioctl(dev, cmd, addr, flag, p)
1246	dev_t dev;
1247	ioctlcmd_t cmd;
1248	caddr_t addr;
1249	int flag;
1250#if (__FreeBSD_version > 500000)
1251	struct thread *p;
1252#else
1253	struct proc *p;
1254#endif
1255{
1256	struct top_cdnr *top;
1257	struct cdnr_interface *ifacep;
1258	int	s, error = 0;
1259
1260	/* check super-user privilege */
1261	switch (cmd) {
1262	case CDNR_GETSTATS:
1263		break;
1264	default:
1265#if (__FreeBSD_version > 700000)
1266		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
1267#elsif (__FreeBSD_version > 400000)
1268		if ((error = suser(p)) != 0)
1269#else
1270		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1271#endif
1272			return (error);
1273		break;
1274	}
1275
1276#ifdef __NetBSD__
1277	s = splnet();
1278#else
1279	s = splimp();
1280#endif
1281	switch (cmd) {
1282
1283	case CDNR_IF_ATTACH:
1284		ifacep = (struct cdnr_interface *)addr;
1285		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1286		break;
1287
1288	case CDNR_IF_DETACH:
1289		ifacep = (struct cdnr_interface *)addr;
1290		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1291		break;
1292
1293	case CDNR_ENABLE:
1294	case CDNR_DISABLE:
1295		ifacep = (struct cdnr_interface *)addr;
1296		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1297			error = EBADF;
1298			break;
1299		}
1300
1301		switch (cmd) {
1302
1303		case CDNR_ENABLE:
1304			ALTQ_SET_CNDTNING(top->tc_ifq);
1305			if (altq_input == NULL)
1306				altq_input = altq_cdnr_input;
1307			break;
1308
1309		case CDNR_DISABLE:
1310			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1311			LIST_FOREACH(top, &tcb_list, tc_next)
1312				if (ALTQ_IS_CNDTNING(top->tc_ifq))
1313					break;
1314			if (top == NULL)
1315				altq_input = NULL;
1316			break;
1317		}
1318		break;
1319
1320	case CDNR_ADD_ELEM:
1321		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1322		break;
1323
1324	case CDNR_DEL_ELEM:
1325		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1326		break;
1327
1328	case CDNR_ADD_TBM:
1329		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1330		break;
1331
1332	case CDNR_MOD_TBM:
1333		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1334		break;
1335
1336	case CDNR_TBM_STATS:
1337		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1338		break;
1339
1340	case CDNR_ADD_TCM:
1341		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1342		break;
1343
1344	case CDNR_MOD_TCM:
1345		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1346		break;
1347
1348	case CDNR_TCM_STATS:
1349		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1350		break;
1351
1352	case CDNR_ADD_FILTER:
1353		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1354		break;
1355
1356	case CDNR_DEL_FILTER:
1357		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1358		break;
1359
1360	case CDNR_GETSTATS:
1361		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1362		break;
1363
1364	case CDNR_ADD_TSW:
1365		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1366		break;
1367
1368	case CDNR_MOD_TSW:
1369		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1370		break;
1371
1372	default:
1373		error = EINVAL;
1374		break;
1375	}
1376	splx(s);
1377
1378	return error;
1379}
1380
1381#ifdef KLD_MODULE
1382
1383static struct altqsw cdnr_sw =
1384	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
1385
1386ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1387
1388#endif /* KLD_MODULE */
1389
1390#endif /* ALTQ3_COMPAT */
1391#endif /* ALTQ_CDNR */
1392