1/*	$FreeBSD$	*/
2/*	$KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/
3
4/*
5 * Copyright (C) 1999-2002
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if defined(__FreeBSD__) || defined(__NetBSD__)
31#include "opt_altq.h"
32#include "opt_inet.h"
33#ifdef __FreeBSD__
34#include "opt_inet6.h"
35#endif
36#endif /* __FreeBSD__ || __NetBSD__ */
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
42#include <sys/sockio.h>
43#include <sys/systm.h>
44#include <sys/proc.h>
45#include <sys/errno.h>
46#include <sys/kernel.h>
47#include <sys/queue.h>
48
49#include <net/if.h>
50#include <net/if_types.h>
51#include <netinet/in.h>
52#include <netinet/in_systm.h>
53#include <netinet/ip.h>
54#ifdef INET6
55#include <netinet/ip6.h>
56#endif
57
58#include <altq/altq.h>
59#ifdef ALTQ3_COMPAT
60#include <altq/altq_conf.h>
61#endif
62#include <altq/altq_cdnr.h>
63
64#ifdef ALTQ3_COMPAT
65/*
66 * diffserv traffic conditioning module
67 */
68
69int altq_cdnr_enabled = 0;
70
71/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
72#ifdef ALTQ_CDNR
73
74/* cdnr_list keeps all cdnr's allocated. */
75static LIST_HEAD(, top_cdnr) tcb_list;
76
77static int altq_cdnr_input(struct mbuf *, int);
78static struct top_cdnr *tcb_lookup(char *ifname);
79static struct cdnr_block *cdnr_handle2cb(u_long);
80static u_long cdnr_cb2handle(struct cdnr_block *);
81static void *cdnr_cballoc(struct top_cdnr *, int,
82       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
83static void cdnr_cbdestroy(void *);
84static int tca_verify_action(struct tc_action *);
85static void tca_import_action(struct tc_action *, struct tc_action *);
86static void tca_invalidate_action(struct tc_action *);
87
88static int generic_element_destroy(struct cdnr_block *);
89static struct top_cdnr *top_create(struct ifaltq *);
90static int top_destroy(struct top_cdnr *);
91static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
92static int element_destroy(struct cdnr_block *);
93static void tb_import_profile(struct tbe *, struct tb_profile *);
94static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
95				  struct tc_action *, struct tc_action *);
96static int tbm_destroy(struct tbmeter *);
97static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
98static struct trtcm *trtcm_create(struct top_cdnr *,
99		  struct tb_profile *, struct tb_profile *,
100		  struct tc_action *, struct tc_action *, struct tc_action *,
101		  int);
102static int trtcm_destroy(struct trtcm *);
103static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
104static struct tswtcm *tswtcm_create(struct top_cdnr *,
105		  u_int32_t, u_int32_t, u_int32_t,
106		  struct tc_action *, struct tc_action *, struct tc_action *);
107static int tswtcm_destroy(struct tswtcm *);
108static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
109
110static int cdnrcmd_if_attach(char *);
111static int cdnrcmd_if_detach(char *);
112static int cdnrcmd_add_element(struct cdnr_add_element *);
113static int cdnrcmd_delete_element(struct cdnr_delete_element *);
114static int cdnrcmd_add_filter(struct cdnr_add_filter *);
115static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
116static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
117static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
118static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
119static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
120static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
121static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
122static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
123static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
124static int cdnrcmd_get_stats(struct cdnr_get_stats *);
125
126altqdev_decl(cdnr);
127
128/*
129 * top level input function called from ip_input.
130 * should be called before converting header fields to host-byte-order.
131 */
132int
133altq_cdnr_input(m, af)
134	struct mbuf	*m;
135	int		af;	/* address family */
136{
137	struct ifnet		*ifp;
138	struct ip		*ip;
139	struct top_cdnr		*top;
140	struct tc_action	*tca;
141	struct cdnr_block	*cb;
142	struct cdnr_pktinfo	pktinfo;
143
144	ifp = m->m_pkthdr.rcvif;
145	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
146		/* traffic conditioner is not enabled on this interface */
147		return (1);
148
149	top = ifp->if_snd.altq_cdnr;
150
151	ip = mtod(m, struct ip *);
152#ifdef INET6
153	if (af == AF_INET6) {
154		u_int32_t flowlabel;
155
156		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
157		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
158	} else
159#endif
160		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
161	pktinfo.pkt_len = m_pktlen(m);
162
163	tca = NULL;
164
165	cb = acc_classify(&top->tc_classifier, m, af);
166	if (cb != NULL)
167		tca = &cb->cb_action;
168
169	if (tca == NULL)
170		tca = &top->tc_block.cb_action;
171
172	while (1) {
173		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
174
175		switch (tca->tca_code) {
176		case TCACODE_PASS:
177			return (1);
178		case TCACODE_DROP:
179			m_freem(m);
180			return (0);
181		case TCACODE_RETURN:
182			return (0);
183		case TCACODE_MARK:
184#ifdef INET6
185			if (af == AF_INET6) {
186				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
187				u_int32_t flowlabel;
188
189				flowlabel = ntohl(ip6->ip6_flow);
190				flowlabel = (tca->tca_dscp << 20) |
191					(flowlabel & ~(DSCP_MASK << 20));
192				ip6->ip6_flow = htonl(flowlabel);
193			} else
194#endif
195				ip->ip_tos = tca->tca_dscp |
196					(ip->ip_tos & DSCP_CUMASK);
197			return (1);
198		case TCACODE_NEXT:
199			cb = tca->tca_next;
200			tca = (*cb->cb_input)(cb, &pktinfo);
201			break;
202		case TCACODE_NONE:
203		default:
204			return (1);
205		}
206	}
207}
208
209static struct top_cdnr *
210tcb_lookup(ifname)
211	char *ifname;
212{
213	struct top_cdnr *top;
214	struct ifnet *ifp;
215
216	if ((ifp = ifunit(ifname)) != NULL)
217		LIST_FOREACH(top, &tcb_list, tc_next)
218			if (top->tc_ifq->altq_ifp == ifp)
219				return (top);
220	return (NULL);
221}
222
223static struct cdnr_block *
224cdnr_handle2cb(handle)
225	u_long handle;
226{
227	struct cdnr_block *cb;
228
229	cb = (struct cdnr_block *)handle;
230	if (handle != ALIGN(cb))
231		return (NULL);
232
233	if (cb == NULL || cb->cb_handle != handle)
234		return (NULL);
235	return (cb);
236}
237
238static u_long
239cdnr_cb2handle(cb)
240	struct cdnr_block *cb;
241{
242	return (cb->cb_handle);
243}
244
245static void *
246cdnr_cballoc(top, type, input_func)
247	struct top_cdnr *top;
248	int type;
249	struct tc_action *(*input_func)(struct cdnr_block *,
250					struct cdnr_pktinfo *);
251{
252	struct cdnr_block *cb;
253	int size;
254
255	switch (type) {
256	case TCETYPE_TOP:
257		size = sizeof(struct top_cdnr);
258		break;
259	case TCETYPE_ELEMENT:
260		size = sizeof(struct cdnr_block);
261		break;
262	case TCETYPE_TBMETER:
263		size = sizeof(struct tbmeter);
264		break;
265	case TCETYPE_TRTCM:
266		size = sizeof(struct trtcm);
267		break;
268	case TCETYPE_TSWTCM:
269		size = sizeof(struct tswtcm);
270		break;
271	default:
272		return (NULL);
273	}
274
275	cb = malloc(size, M_DEVBUF, M_WAITOK);
276	if (cb == NULL)
277		return (NULL);
278	bzero(cb, size);
279
280	cb->cb_len = size;
281	cb->cb_type = type;
282	cb->cb_ref = 0;
283	cb->cb_handle = (u_long)cb;
284	if (top == NULL)
285		cb->cb_top = (struct top_cdnr *)cb;
286	else
287		cb->cb_top = top;
288
289	if (input_func != NULL) {
290		/*
291		 * if this cdnr has an action function,
292		 * make tc_action to call itself.
293		 */
294		cb->cb_action.tca_code = TCACODE_NEXT;
295		cb->cb_action.tca_next = cb;
296		cb->cb_input = input_func;
297	} else
298		cb->cb_action.tca_code = TCACODE_NONE;
299
300	/* if this isn't top, register the element to the top level cdnr */
301	if (top != NULL)
302		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
303
304	return ((void *)cb);
305}
306
307static void
308cdnr_cbdestroy(cblock)
309	void *cblock;
310{
311	struct cdnr_block *cb = cblock;
312
313	/* delete filters belonging to this cdnr */
314	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
315
316	/* remove from the top level cdnr */
317	if (cb->cb_top != cblock)
318		LIST_REMOVE(cb, cb_next);
319
320	free(cb, M_DEVBUF);
321}
322
323/*
324 * conditioner common destroy routine
325 */
326static int
327generic_element_destroy(cb)
328	struct cdnr_block *cb;
329{
330	int error = 0;
331
332	switch (cb->cb_type) {
333	case TCETYPE_TOP:
334		error = top_destroy((struct top_cdnr *)cb);
335		break;
336	case TCETYPE_ELEMENT:
337		error = element_destroy(cb);
338		break;
339	case TCETYPE_TBMETER:
340		error = tbm_destroy((struct tbmeter *)cb);
341		break;
342	case TCETYPE_TRTCM:
343		error = trtcm_destroy((struct trtcm *)cb);
344		break;
345	case TCETYPE_TSWTCM:
346		error = tswtcm_destroy((struct tswtcm *)cb);
347		break;
348	default:
349		error = EINVAL;
350	}
351	return (error);
352}
353
354static int
355tca_verify_action(utca)
356	struct tc_action *utca;
357{
358	switch (utca->tca_code) {
359	case TCACODE_PASS:
360	case TCACODE_DROP:
361	case TCACODE_MARK:
362		/* these are ok */
363		break;
364
365	case TCACODE_HANDLE:
366		/* verify handle value */
367		if (cdnr_handle2cb(utca->tca_handle) == NULL)
368			return (-1);
369		break;
370
371	case TCACODE_NONE:
372	case TCACODE_RETURN:
373	case TCACODE_NEXT:
374	default:
375		/* should not be passed from a user */
376		return (-1);
377	}
378	return (0);
379}
380
381static void
382tca_import_action(ktca, utca)
383	struct tc_action *ktca, *utca;
384{
385	struct cdnr_block *cb;
386
387	*ktca = *utca;
388	if (ktca->tca_code == TCACODE_HANDLE) {
389		cb = cdnr_handle2cb(ktca->tca_handle);
390		if (cb == NULL) {
391			ktca->tca_code = TCACODE_NONE;
392			return;
393		}
394		ktca->tca_code = TCACODE_NEXT;
395		ktca->tca_next = cb;
396		cb->cb_ref++;
397	} else if (ktca->tca_code == TCACODE_MARK) {
398		ktca->tca_dscp &= DSCP_MASK;
399	}
400	return;
401}
402
403static void
404tca_invalidate_action(tca)
405	struct tc_action *tca;
406{
407	struct cdnr_block *cb;
408
409	if (tca->tca_code == TCACODE_NEXT) {
410		cb = tca->tca_next;
411		if (cb == NULL)
412			return;
413		cb->cb_ref--;
414	}
415	tca->tca_code = TCACODE_NONE;
416}
417
418/*
419 * top level traffic conditioner
420 */
421static struct top_cdnr *
422top_create(ifq)
423	struct ifaltq *ifq;
424{
425	struct top_cdnr *top;
426
427	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
428		return (NULL);
429
430	top->tc_ifq = ifq;
431	/* set default action for the top level conditioner */
432	top->tc_block.cb_action.tca_code = TCACODE_PASS;
433
434	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
435
436	ifq->altq_cdnr = top;
437
438	return (top);
439}
440
441static int
442top_destroy(top)
443	struct top_cdnr *top;
444{
445	struct cdnr_block *cb;
446
447	if (ALTQ_IS_CNDTNING(top->tc_ifq))
448		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
449	top->tc_ifq->altq_cdnr = NULL;
450
451	/*
452	 * destroy all the conditioner elements belonging to this interface
453	 */
454	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
455		while (cb != NULL && cb->cb_ref > 0)
456			cb = LIST_NEXT(cb, cb_next);
457		if (cb != NULL)
458			generic_element_destroy(cb);
459	}
460
461	LIST_REMOVE(top, tc_next);
462
463	cdnr_cbdestroy(top);
464
465	/* if there is no active conditioner, remove the input hook */
466	if (altq_input != NULL) {
467		LIST_FOREACH(top, &tcb_list, tc_next)
468			if (ALTQ_IS_CNDTNING(top->tc_ifq))
469				break;
470		if (top == NULL)
471			altq_input = NULL;
472	}
473
474	return (0);
475}
476
477/*
478 * simple tc elements without input function (e.g., dropper and makers).
479 */
480static struct cdnr_block *
481element_create(top, action)
482	struct top_cdnr *top;
483	struct tc_action *action;
484{
485	struct cdnr_block *cb;
486
487	if (tca_verify_action(action) < 0)
488		return (NULL);
489
490	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
491		return (NULL);
492
493	tca_import_action(&cb->cb_action, action);
494
495	return (cb);
496}
497
498static int
499element_destroy(cb)
500	struct cdnr_block *cb;
501{
502	if (cb->cb_ref > 0)
503		return (EBUSY);
504
505	tca_invalidate_action(&cb->cb_action);
506
507	cdnr_cbdestroy(cb);
508	return (0);
509}
510
511/*
512 * internal representation of token bucket parameters
513 *	rate: 	byte_per_unittime << 32
514 *		(((bits_per_sec) / 8) << 32) / machclk_freq
515 *	depth:	byte << 32
516 *
517 */
518#define	TB_SHIFT	32
519#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
520#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
521
522static void
523tb_import_profile(tb, profile)
524	struct tbe *tb;
525	struct tb_profile *profile;
526{
527	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
528	tb->depth = TB_SCALE(profile->depth);
529	if (tb->rate > 0)
530		tb->filluptime = tb->depth / tb->rate;
531	else
532		tb->filluptime = 0xffffffffffffffffLL;
533	tb->token = tb->depth;
534	tb->last = read_machclk();
535}
536
537/*
538 * simple token bucket meter
539 */
540static struct tbmeter *
541tbm_create(top, profile, in_action, out_action)
542	struct top_cdnr *top;
543	struct tb_profile *profile;
544	struct tc_action *in_action, *out_action;
545{
546	struct tbmeter *tbm = NULL;
547
548	if (tca_verify_action(in_action) < 0
549	    || tca_verify_action(out_action) < 0)
550		return (NULL);
551
552	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
553				tbm_input)) == NULL)
554		return (NULL);
555
556	tb_import_profile(&tbm->tb, profile);
557
558	tca_import_action(&tbm->in_action, in_action);
559	tca_import_action(&tbm->out_action, out_action);
560
561	return (tbm);
562}
563
564static int
565tbm_destroy(tbm)
566	struct tbmeter *tbm;
567{
568	if (tbm->cdnrblk.cb_ref > 0)
569		return (EBUSY);
570
571	tca_invalidate_action(&tbm->in_action);
572	tca_invalidate_action(&tbm->out_action);
573
574	cdnr_cbdestroy(tbm);
575	return (0);
576}
577
578static struct tc_action *
579tbm_input(cb, pktinfo)
580	struct cdnr_block *cb;
581	struct cdnr_pktinfo *pktinfo;
582{
583	struct tbmeter *tbm = (struct tbmeter *)cb;
584	u_int64_t	len;
585	u_int64_t	interval, now;
586
587	len = TB_SCALE(pktinfo->pkt_len);
588
589	if (tbm->tb.token < len) {
590		now = read_machclk();
591		interval = now - tbm->tb.last;
592		if (interval >= tbm->tb.filluptime)
593			tbm->tb.token = tbm->tb.depth;
594		else {
595			tbm->tb.token += interval * tbm->tb.rate;
596			if (tbm->tb.token > tbm->tb.depth)
597				tbm->tb.token = tbm->tb.depth;
598		}
599		tbm->tb.last = now;
600	}
601
602	if (tbm->tb.token < len) {
603		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
604		return (&tbm->out_action);
605	}
606
607	tbm->tb.token -= len;
608	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
609	return (&tbm->in_action);
610}
611
612/*
613 * two rate three color marker
614 * as described in draft-heinanen-diffserv-trtcm-01.txt
615 */
616static struct trtcm *
617trtcm_create(top, cmtd_profile, peak_profile,
618	     green_action, yellow_action, red_action, coloraware)
619	struct top_cdnr *top;
620	struct tb_profile *cmtd_profile, *peak_profile;
621	struct tc_action *green_action, *yellow_action, *red_action;
622	int	coloraware;
623{
624	struct trtcm *tcm = NULL;
625
626	if (tca_verify_action(green_action) < 0
627	    || tca_verify_action(yellow_action) < 0
628	    || tca_verify_action(red_action) < 0)
629		return (NULL);
630
631	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
632				trtcm_input)) == NULL)
633		return (NULL);
634
635	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
636	tb_import_profile(&tcm->peak_tb, peak_profile);
637
638	tca_import_action(&tcm->green_action, green_action);
639	tca_import_action(&tcm->yellow_action, yellow_action);
640	tca_import_action(&tcm->red_action, red_action);
641
642	/* set dscps to use */
643	if (tcm->green_action.tca_code == TCACODE_MARK)
644		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
645	else
646		tcm->green_dscp = DSCP_AF11;
647	if (tcm->yellow_action.tca_code == TCACODE_MARK)
648		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
649	else
650		tcm->yellow_dscp = DSCP_AF12;
651	if (tcm->red_action.tca_code == TCACODE_MARK)
652		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
653	else
654		tcm->red_dscp = DSCP_AF13;
655
656	tcm->coloraware = coloraware;
657
658	return (tcm);
659}
660
661static int
662trtcm_destroy(tcm)
663	struct trtcm *tcm;
664{
665	if (tcm->cdnrblk.cb_ref > 0)
666		return (EBUSY);
667
668	tca_invalidate_action(&tcm->green_action);
669	tca_invalidate_action(&tcm->yellow_action);
670	tca_invalidate_action(&tcm->red_action);
671
672	cdnr_cbdestroy(tcm);
673	return (0);
674}
675
676static struct tc_action *
677trtcm_input(cb, pktinfo)
678	struct cdnr_block *cb;
679	struct cdnr_pktinfo *pktinfo;
680{
681	struct trtcm *tcm = (struct trtcm *)cb;
682	u_int64_t	len;
683	u_int64_t	interval, now;
684	u_int8_t	color;
685
686	len = TB_SCALE(pktinfo->pkt_len);
687	if (tcm->coloraware) {
688		color = pktinfo->pkt_dscp;
689		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
690			color = tcm->green_dscp;
691	} else {
692		/* if color-blind, precolor it as green */
693		color = tcm->green_dscp;
694	}
695
696	now = read_machclk();
697	if (tcm->cmtd_tb.token < len) {
698		interval = now - tcm->cmtd_tb.last;
699		if (interval >= tcm->cmtd_tb.filluptime)
700			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
701		else {
702			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
703			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
704				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
705		}
706		tcm->cmtd_tb.last = now;
707	}
708	if (tcm->peak_tb.token < len) {
709		interval = now - tcm->peak_tb.last;
710		if (interval >= tcm->peak_tb.filluptime)
711			tcm->peak_tb.token = tcm->peak_tb.depth;
712		else {
713			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
714			if (tcm->peak_tb.token > tcm->peak_tb.depth)
715				tcm->peak_tb.token = tcm->peak_tb.depth;
716		}
717		tcm->peak_tb.last = now;
718	}
719
720	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
721		pktinfo->pkt_dscp = tcm->red_dscp;
722		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
723		return (&tcm->red_action);
724	}
725
726	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
727		pktinfo->pkt_dscp = tcm->yellow_dscp;
728		tcm->peak_tb.token -= len;
729		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
730		return (&tcm->yellow_action);
731	}
732
733	pktinfo->pkt_dscp = tcm->green_dscp;
734	tcm->cmtd_tb.token -= len;
735	tcm->peak_tb.token -= len;
736	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
737	return (&tcm->green_action);
738}
739
740/*
741 * time sliding window three color marker
742 * as described in draft-fang-diffserv-tc-tswtcm-00.txt
743 */
744static struct tswtcm *
745tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
746	      green_action, yellow_action, red_action)
747	struct top_cdnr *top;
748	u_int32_t	cmtd_rate, peak_rate, avg_interval;
749	struct tc_action *green_action, *yellow_action, *red_action;
750{
751	struct tswtcm *tsw;
752
753	if (tca_verify_action(green_action) < 0
754	    || tca_verify_action(yellow_action) < 0
755	    || tca_verify_action(red_action) < 0)
756		return (NULL);
757
758	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
759				tswtcm_input)) == NULL)
760		return (NULL);
761
762	tca_import_action(&tsw->green_action, green_action);
763	tca_import_action(&tsw->yellow_action, yellow_action);
764	tca_import_action(&tsw->red_action, red_action);
765
766	/* set dscps to use */
767	if (tsw->green_action.tca_code == TCACODE_MARK)
768		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
769	else
770		tsw->green_dscp = DSCP_AF11;
771	if (tsw->yellow_action.tca_code == TCACODE_MARK)
772		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
773	else
774		tsw->yellow_dscp = DSCP_AF12;
775	if (tsw->red_action.tca_code == TCACODE_MARK)
776		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
777	else
778		tsw->red_dscp = DSCP_AF13;
779
780	/* convert rates from bits/sec to bytes/sec */
781	tsw->cmtd_rate = cmtd_rate / 8;
782	tsw->peak_rate = peak_rate / 8;
783	tsw->avg_rate = 0;
784
785	/* timewin is converted from msec to machine clock unit */
786	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
787
788	return (tsw);
789}
790
791static int
792tswtcm_destroy(tsw)
793	struct tswtcm *tsw;
794{
795	if (tsw->cdnrblk.cb_ref > 0)
796		return (EBUSY);
797
798	tca_invalidate_action(&tsw->green_action);
799	tca_invalidate_action(&tsw->yellow_action);
800	tca_invalidate_action(&tsw->red_action);
801
802	cdnr_cbdestroy(tsw);
803	return (0);
804}
805
806static struct tc_action *
807tswtcm_input(cb, pktinfo)
808	struct cdnr_block *cb;
809	struct cdnr_pktinfo *pktinfo;
810{
811	struct tswtcm	*tsw = (struct tswtcm *)cb;
812	int		len;
813	u_int32_t	avg_rate;
814	u_int64_t	interval, now, tmp;
815
816	/*
817	 * rate estimator
818	 */
819	len = pktinfo->pkt_len;
820	now = read_machclk();
821
822	interval = now - tsw->t_front;
823	/*
824	 * calculate average rate:
825	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
826	 * pkt_len needs to be multiplied by machclk_freq in order to
827	 * get (bytes/sec).
828	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
829	 * less than 32 bits, the following 64-bit operation has enough
830	 * precision.
831	 */
832	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
833	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
834	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
835	tsw->t_front = now;
836
837	/*
838	 * marker
839	 */
840	if (avg_rate > tsw->cmtd_rate) {
841		u_int32_t randval = arc4random() % avg_rate;
842
843		if (avg_rate > tsw->peak_rate) {
844			if (randval < avg_rate - tsw->peak_rate) {
845				/* mark red */
846				pktinfo->pkt_dscp = tsw->red_dscp;
847				PKTCNTR_ADD(&tsw->red_cnt, len);
848				return (&tsw->red_action);
849			} else if (randval < avg_rate - tsw->cmtd_rate)
850				goto mark_yellow;
851		} else {
852			/* peak_rate >= avg_rate > cmtd_rate */
853			if (randval < avg_rate - tsw->cmtd_rate) {
854			mark_yellow:
855				pktinfo->pkt_dscp = tsw->yellow_dscp;
856				PKTCNTR_ADD(&tsw->yellow_cnt, len);
857				return (&tsw->yellow_action);
858			}
859		}
860	}
861
862	/* mark green */
863	pktinfo->pkt_dscp = tsw->green_dscp;
864	PKTCNTR_ADD(&tsw->green_cnt, len);
865	return (&tsw->green_action);
866}
867
868/*
869 * ioctl requests
870 */
871static int
872cdnrcmd_if_attach(ifname)
873	char *ifname;
874{
875	struct ifnet *ifp;
876	struct top_cdnr *top;
877
878	if ((ifp = ifunit(ifname)) == NULL)
879		return (EBADF);
880
881	if (ifp->if_snd.altq_cdnr != NULL)
882		return (EBUSY);
883
884	if ((top = top_create(&ifp->if_snd)) == NULL)
885		return (ENOMEM);
886	return (0);
887}
888
889static int
890cdnrcmd_if_detach(ifname)
891	char *ifname;
892{
893	struct top_cdnr *top;
894
895	if ((top = tcb_lookup(ifname)) == NULL)
896		return (EBADF);
897
898	return top_destroy(top);
899}
900
901static int
902cdnrcmd_add_element(ap)
903	struct cdnr_add_element *ap;
904{
905	struct top_cdnr *top;
906	struct cdnr_block *cb;
907
908	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
909		return (EBADF);
910
911	cb = element_create(top, &ap->action);
912	if (cb == NULL)
913		return (EINVAL);
914	/* return a class handle to the user */
915	ap->cdnr_handle = cdnr_cb2handle(cb);
916	return (0);
917}
918
919static int
920cdnrcmd_delete_element(ap)
921	struct cdnr_delete_element *ap;
922{
923	struct top_cdnr *top;
924	struct cdnr_block *cb;
925
926	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
927		return (EBADF);
928
929	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
930		return (EINVAL);
931
932	if (cb->cb_type != TCETYPE_ELEMENT)
933		return generic_element_destroy(cb);
934
935	return element_destroy(cb);
936}
937
938static int
939cdnrcmd_add_filter(ap)
940	struct cdnr_add_filter *ap;
941{
942	struct top_cdnr *top;
943	struct cdnr_block *cb;
944
945	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
946		return (EBADF);
947
948	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
949		return (EINVAL);
950
951	return acc_add_filter(&top->tc_classifier, &ap->filter,
952			      cb, &ap->filter_handle);
953}
954
955static int
956cdnrcmd_delete_filter(ap)
957	struct cdnr_delete_filter *ap;
958{
959	struct top_cdnr *top;
960
961	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
962		return (EBADF);
963
964	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
965}
966
967static int
968cdnrcmd_add_tbm(ap)
969	struct cdnr_add_tbmeter *ap;
970{
971	struct top_cdnr *top;
972	struct tbmeter *tbm;
973
974	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
975		return (EBADF);
976
977	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
978	if (tbm == NULL)
979		return (EINVAL);
980	/* return a class handle to the user */
981	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
982	return (0);
983}
984
985static int
986cdnrcmd_modify_tbm(ap)
987	struct cdnr_modify_tbmeter *ap;
988{
989	struct tbmeter *tbm;
990
991	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
992		return (EINVAL);
993
994	tb_import_profile(&tbm->tb, &ap->profile);
995
996	return (0);
997}
998
999static int
1000cdnrcmd_tbm_stats(ap)
1001	struct cdnr_tbmeter_stats *ap;
1002{
1003	struct tbmeter *tbm;
1004
1005	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1006		return (EINVAL);
1007
1008	ap->in_cnt = tbm->in_cnt;
1009	ap->out_cnt = tbm->out_cnt;
1010
1011	return (0);
1012}
1013
1014static int
1015cdnrcmd_add_trtcm(ap)
1016	struct cdnr_add_trtcm *ap;
1017{
1018	struct top_cdnr *top;
1019	struct trtcm *tcm;
1020
1021	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1022		return (EBADF);
1023
1024	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
1025			   &ap->green_action, &ap->yellow_action,
1026			   &ap->red_action, ap->coloraware);
1027	if (tcm == NULL)
1028		return (EINVAL);
1029
1030	/* return a class handle to the user */
1031	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
1032	return (0);
1033}
1034
1035static int
1036cdnrcmd_modify_trtcm(ap)
1037	struct cdnr_modify_trtcm *ap;
1038{
1039	struct trtcm *tcm;
1040
1041	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1042		return (EINVAL);
1043
1044	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
1045	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
1046
1047	return (0);
1048}
1049
1050static int
1051cdnrcmd_tcm_stats(ap)
1052	struct cdnr_tcm_stats *ap;
1053{
1054	struct cdnr_block *cb;
1055
1056	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1057		return (EINVAL);
1058
1059	if (cb->cb_type == TCETYPE_TRTCM) {
1060	    struct trtcm *tcm = (struct trtcm *)cb;
1061
1062	    ap->green_cnt = tcm->green_cnt;
1063	    ap->yellow_cnt = tcm->yellow_cnt;
1064	    ap->red_cnt = tcm->red_cnt;
1065	} else if (cb->cb_type == TCETYPE_TSWTCM) {
1066	    struct tswtcm *tsw = (struct tswtcm *)cb;
1067
1068	    ap->green_cnt = tsw->green_cnt;
1069	    ap->yellow_cnt = tsw->yellow_cnt;
1070	    ap->red_cnt = tsw->red_cnt;
1071	} else
1072	    return (EINVAL);
1073
1074	return (0);
1075}
1076
1077static int
1078cdnrcmd_add_tswtcm(ap)
1079	struct cdnr_add_tswtcm *ap;
1080{
1081	struct top_cdnr *top;
1082	struct tswtcm *tsw;
1083
1084	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1085		return (EBADF);
1086
1087	if (ap->cmtd_rate > ap->peak_rate)
1088		return (EINVAL);
1089
1090	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1091			    ap->avg_interval, &ap->green_action,
1092			    &ap->yellow_action, &ap->red_action);
1093	if (tsw == NULL)
1094	    return (EINVAL);
1095
1096	/* return a class handle to the user */
1097	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1098	return (0);
1099}
1100
1101static int
1102cdnrcmd_modify_tswtcm(ap)
1103	struct cdnr_modify_tswtcm *ap;
1104{
1105	struct tswtcm *tsw;
1106
1107	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1108		return (EINVAL);
1109
1110	if (ap->cmtd_rate > ap->peak_rate)
1111		return (EINVAL);
1112
1113	/* convert rates from bits/sec to bytes/sec */
1114	tsw->cmtd_rate = ap->cmtd_rate / 8;
1115	tsw->peak_rate = ap->peak_rate / 8;
1116	tsw->avg_rate = 0;
1117
1118	/* timewin is converted from msec to machine clock unit */
1119	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1120
1121	return (0);
1122}
1123
1124static int
1125cdnrcmd_get_stats(ap)
1126	struct cdnr_get_stats *ap;
1127{
1128	struct top_cdnr *top;
1129	struct cdnr_block *cb;
1130	struct tbmeter *tbm;
1131	struct trtcm *tcm;
1132	struct tswtcm *tsw;
1133	struct tce_stats tce, *usp;
1134	int error, n, nskip, nelements;
1135
1136	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1137		return (EBADF);
1138
1139	/* copy action stats */
1140	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
1141
1142	/* stats for each element */
1143	nelements = ap->nelements;
1144	usp = ap->tce_stats;
1145	if (nelements <= 0 || usp == NULL)
1146		return (0);
1147
1148	nskip = ap->nskip;
1149	n = 0;
1150	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1151		if (nskip > 0) {
1152			nskip--;
1153			continue;
1154		}
1155
1156		bzero(&tce, sizeof(tce));
1157		tce.tce_handle = cb->cb_handle;
1158		tce.tce_type = cb->cb_type;
1159		switch (cb->cb_type) {
1160		case TCETYPE_TBMETER:
1161			tbm = (struct tbmeter *)cb;
1162			tce.tce_cnts[0] = tbm->in_cnt;
1163			tce.tce_cnts[1] = tbm->out_cnt;
1164			break;
1165		case TCETYPE_TRTCM:
1166			tcm = (struct trtcm *)cb;
1167			tce.tce_cnts[0] = tcm->green_cnt;
1168			tce.tce_cnts[1] = tcm->yellow_cnt;
1169			tce.tce_cnts[2] = tcm->red_cnt;
1170			break;
1171		case TCETYPE_TSWTCM:
1172			tsw = (struct tswtcm *)cb;
1173			tce.tce_cnts[0] = tsw->green_cnt;
1174			tce.tce_cnts[1] = tsw->yellow_cnt;
1175			tce.tce_cnts[2] = tsw->red_cnt;
1176			break;
1177		default:
1178			continue;
1179		}
1180
1181		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
1182				     sizeof(tce))) != 0)
1183			return (error);
1184
1185		if (++n == nelements)
1186			break;
1187	}
1188	ap->nelements = n;
1189
1190	return (0);
1191}
1192
1193/*
1194 * conditioner device interface
1195 */
1196int
1197cdnropen(dev, flag, fmt, p)
1198	dev_t dev;
1199	int flag, fmt;
1200#if (__FreeBSD_version > 500000)
1201	struct thread *p;
1202#else
1203	struct proc *p;
1204#endif
1205{
1206	if (machclk_freq == 0)
1207		init_machclk();
1208
1209	if (machclk_freq == 0) {
1210		printf("cdnr: no cpu clock available!\n");
1211		return (ENXIO);
1212	}
1213
1214	/* everything will be done when the queueing scheme is attached. */
1215	return 0;
1216}
1217
1218int
1219cdnrclose(dev, flag, fmt, p)
1220	dev_t dev;
1221	int flag, fmt;
1222#if (__FreeBSD_version > 500000)
1223	struct thread *p;
1224#else
1225	struct proc *p;
1226#endif
1227{
1228	struct top_cdnr *top;
1229	int err, error = 0;
1230
1231	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1232		/* destroy all */
1233		err = top_destroy(top);
1234		if (err != 0 && error == 0)
1235			error = err;
1236	}
1237	altq_input = NULL;
1238
1239	return (error);
1240}
1241
1242int
1243cdnrioctl(dev, cmd, addr, flag, p)
1244	dev_t dev;
1245	ioctlcmd_t cmd;
1246	caddr_t addr;
1247	int flag;
1248#if (__FreeBSD_version > 500000)
1249	struct thread *p;
1250#else
1251	struct proc *p;
1252#endif
1253{
1254	struct top_cdnr *top;
1255	struct cdnr_interface *ifacep;
1256	int	s, error = 0;
1257
1258	/* check super-user privilege */
1259	switch (cmd) {
1260	case CDNR_GETSTATS:
1261		break;
1262	default:
1263#if (__FreeBSD_version > 700000)
1264		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
1265#elsif (__FreeBSD_version > 400000)
1266		if ((error = suser(p)) != 0)
1267#else
1268		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1269#endif
1270			return (error);
1271		break;
1272	}
1273
1274#ifdef __NetBSD__
1275	s = splnet();
1276#else
1277	s = splimp();
1278#endif
1279	switch (cmd) {
1280
1281	case CDNR_IF_ATTACH:
1282		ifacep = (struct cdnr_interface *)addr;
1283		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1284		break;
1285
1286	case CDNR_IF_DETACH:
1287		ifacep = (struct cdnr_interface *)addr;
1288		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1289		break;
1290
1291	case CDNR_ENABLE:
1292	case CDNR_DISABLE:
1293		ifacep = (struct cdnr_interface *)addr;
1294		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1295			error = EBADF;
1296			break;
1297		}
1298
1299		switch (cmd) {
1300
1301		case CDNR_ENABLE:
1302			ALTQ_SET_CNDTNING(top->tc_ifq);
1303			if (altq_input == NULL)
1304				altq_input = altq_cdnr_input;
1305			break;
1306
1307		case CDNR_DISABLE:
1308			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1309			LIST_FOREACH(top, &tcb_list, tc_next)
1310				if (ALTQ_IS_CNDTNING(top->tc_ifq))
1311					break;
1312			if (top == NULL)
1313				altq_input = NULL;
1314			break;
1315		}
1316		break;
1317
1318	case CDNR_ADD_ELEM:
1319		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1320		break;
1321
1322	case CDNR_DEL_ELEM:
1323		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1324		break;
1325
1326	case CDNR_ADD_TBM:
1327		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1328		break;
1329
1330	case CDNR_MOD_TBM:
1331		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1332		break;
1333
1334	case CDNR_TBM_STATS:
1335		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1336		break;
1337
1338	case CDNR_ADD_TCM:
1339		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1340		break;
1341
1342	case CDNR_MOD_TCM:
1343		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1344		break;
1345
1346	case CDNR_TCM_STATS:
1347		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1348		break;
1349
1350	case CDNR_ADD_FILTER:
1351		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1352		break;
1353
1354	case CDNR_DEL_FILTER:
1355		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1356		break;
1357
1358	case CDNR_GETSTATS:
1359		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1360		break;
1361
1362	case CDNR_ADD_TSW:
1363		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1364		break;
1365
1366	case CDNR_MOD_TSW:
1367		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1368		break;
1369
1370	default:
1371		error = EINVAL;
1372		break;
1373	}
1374	splx(s);
1375
1376	return error;
1377}
1378
1379#ifdef KLD_MODULE
1380
1381static struct altqsw cdnr_sw =
1382	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
1383
1384ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1385
1386#endif /* KLD_MODULE */
1387
1388#endif /* ALTQ3_COMPAT */
1389#endif /* ALTQ_CDNR */
1390