1/*-
2 * Copyright (C) 1999-2002
3 *	Sony Computer Science Laboratories Inc.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
27 * $FreeBSD$
28 */
29
30#include "opt_altq.h"
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/param.h>
35#include <sys/malloc.h>
36#include <sys/mbuf.h>
37#include <sys/socket.h>
38#include <sys/sockio.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/errno.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44
45#include <net/if.h>
46#include <net/if_types.h>
47#include <netinet/in.h>
48#include <netinet/in_systm.h>
49#include <netinet/ip.h>
50#ifdef INET6
51#include <netinet/ip6.h>
52#endif
53
54#include <net/altq/if_altq.h>
55#include <net/altq/altq.h>
56#ifdef ALTQ3_COMPAT
57#include <net/altq/altq_conf.h>
58#endif
59#include <net/altq/altq_cdnr.h>
60
61#ifdef ALTQ3_COMPAT
62/*
63 * diffserv traffic conditioning module
64 */
65
66int altq_cdnr_enabled = 0;
67
68/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
69#ifdef ALTQ_CDNR
70
71/* cdnr_list keeps all cdnr's allocated. */
72static LIST_HEAD(, top_cdnr) tcb_list;
73
74static int altq_cdnr_input(struct mbuf *, int);
75static struct top_cdnr *tcb_lookup(char *ifname);
76static struct cdnr_block *cdnr_handle2cb(u_long);
77static u_long cdnr_cb2handle(struct cdnr_block *);
78static void *cdnr_cballoc(struct top_cdnr *, int,
79       struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
80static void cdnr_cbdestroy(void *);
81static int tca_verify_action(struct tc_action *);
82static void tca_import_action(struct tc_action *, struct tc_action *);
83static void tca_invalidate_action(struct tc_action *);
84
85static int generic_element_destroy(struct cdnr_block *);
86static struct top_cdnr *top_create(struct ifaltq *);
87static int top_destroy(struct top_cdnr *);
88static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
89static int element_destroy(struct cdnr_block *);
90static void tb_import_profile(struct tbe *, struct tb_profile *);
91static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
92				  struct tc_action *, struct tc_action *);
93static int tbm_destroy(struct tbmeter *);
94static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
95static struct trtcm *trtcm_create(struct top_cdnr *,
96		  struct tb_profile *, struct tb_profile *,
97		  struct tc_action *, struct tc_action *, struct tc_action *,
98		  int);
99static int trtcm_destroy(struct trtcm *);
100static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
101static struct tswtcm *tswtcm_create(struct top_cdnr *,
102		  u_int32_t, u_int32_t, u_int32_t,
103		  struct tc_action *, struct tc_action *, struct tc_action *);
104static int tswtcm_destroy(struct tswtcm *);
105static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
106
107static int cdnrcmd_if_attach(char *);
108static int cdnrcmd_if_detach(char *);
109static int cdnrcmd_add_element(struct cdnr_add_element *);
110static int cdnrcmd_delete_element(struct cdnr_delete_element *);
111static int cdnrcmd_add_filter(struct cdnr_add_filter *);
112static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
113static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
114static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
115static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
116static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
117static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
118static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
119static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
120static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
121static int cdnrcmd_get_stats(struct cdnr_get_stats *);
122
123altqdev_decl(cdnr);
124
125/*
126 * top level input function called from ip_input.
127 * should be called before converting header fields to host-byte-order.
128 */
129int
130altq_cdnr_input(m, af)
131	struct mbuf	*m;
132	int		af;	/* address family */
133{
134	struct ifnet		*ifp;
135	struct ip		*ip;
136	struct top_cdnr		*top;
137	struct tc_action	*tca;
138	struct cdnr_block	*cb;
139	struct cdnr_pktinfo	pktinfo;
140
141	ifp = m->m_pkthdr.rcvif;
142	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
143		/* traffic conditioner is not enabled on this interface */
144		return (1);
145
146	top = ifp->if_snd.altq_cdnr;
147
148	ip = mtod(m, struct ip *);
149#ifdef INET6
150	if (af == AF_INET6) {
151		u_int32_t flowlabel;
152
153		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
154		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
155	} else
156#endif
157		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
158	pktinfo.pkt_len = m_pktlen(m);
159
160	tca = NULL;
161
162	cb = acc_classify(&top->tc_classifier, m, af);
163	if (cb != NULL)
164		tca = &cb->cb_action;
165
166	if (tca == NULL)
167		tca = &top->tc_block.cb_action;
168
169	while (1) {
170		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
171
172		switch (tca->tca_code) {
173		case TCACODE_PASS:
174			return (1);
175		case TCACODE_DROP:
176			m_freem(m);
177			return (0);
178		case TCACODE_RETURN:
179			return (0);
180		case TCACODE_MARK:
181#ifdef INET6
182			if (af == AF_INET6) {
183				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
184				u_int32_t flowlabel;
185
186				flowlabel = ntohl(ip6->ip6_flow);
187				flowlabel = (tca->tca_dscp << 20) |
188					(flowlabel & ~(DSCP_MASK << 20));
189				ip6->ip6_flow = htonl(flowlabel);
190			} else
191#endif
192				ip->ip_tos = tca->tca_dscp |
193					(ip->ip_tos & DSCP_CUMASK);
194			return (1);
195		case TCACODE_NEXT:
196			cb = tca->tca_next;
197			tca = (*cb->cb_input)(cb, &pktinfo);
198			break;
199		case TCACODE_NONE:
200		default:
201			return (1);
202		}
203	}
204}
205
206static struct top_cdnr *
207tcb_lookup(ifname)
208	char *ifname;
209{
210	struct top_cdnr *top;
211	struct ifnet *ifp;
212
213	if ((ifp = ifunit(ifname)) != NULL)
214		LIST_FOREACH(top, &tcb_list, tc_next)
215			if (top->tc_ifq->altq_ifp == ifp)
216				return (top);
217	return (NULL);
218}
219
220static struct cdnr_block *
221cdnr_handle2cb(handle)
222	u_long handle;
223{
224	struct cdnr_block *cb;
225
226	cb = (struct cdnr_block *)handle;
227	if (handle != ALIGN(cb))
228		return (NULL);
229
230	if (cb == NULL || cb->cb_handle != handle)
231		return (NULL);
232	return (cb);
233}
234
235static u_long
236cdnr_cb2handle(cb)
237	struct cdnr_block *cb;
238{
239	return (cb->cb_handle);
240}
241
242static void *
243cdnr_cballoc(top, type, input_func)
244	struct top_cdnr *top;
245	int type;
246	struct tc_action *(*input_func)(struct cdnr_block *,
247					struct cdnr_pktinfo *);
248{
249	struct cdnr_block *cb;
250	int size;
251
252	switch (type) {
253	case TCETYPE_TOP:
254		size = sizeof(struct top_cdnr);
255		break;
256	case TCETYPE_ELEMENT:
257		size = sizeof(struct cdnr_block);
258		break;
259	case TCETYPE_TBMETER:
260		size = sizeof(struct tbmeter);
261		break;
262	case TCETYPE_TRTCM:
263		size = sizeof(struct trtcm);
264		break;
265	case TCETYPE_TSWTCM:
266		size = sizeof(struct tswtcm);
267		break;
268	default:
269		return (NULL);
270	}
271
272	cb = malloc(size, M_DEVBUF, M_WAITOK);
273	if (cb == NULL)
274		return (NULL);
275	bzero(cb, size);
276
277	cb->cb_len = size;
278	cb->cb_type = type;
279	cb->cb_ref = 0;
280	cb->cb_handle = (u_long)cb;
281	if (top == NULL)
282		cb->cb_top = (struct top_cdnr *)cb;
283	else
284		cb->cb_top = top;
285
286	if (input_func != NULL) {
287		/*
288		 * if this cdnr has an action function,
289		 * make tc_action to call itself.
290		 */
291		cb->cb_action.tca_code = TCACODE_NEXT;
292		cb->cb_action.tca_next = cb;
293		cb->cb_input = input_func;
294	} else
295		cb->cb_action.tca_code = TCACODE_NONE;
296
297	/* if this isn't top, register the element to the top level cdnr */
298	if (top != NULL)
299		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
300
301	return ((void *)cb);
302}
303
304static void
305cdnr_cbdestroy(cblock)
306	void *cblock;
307{
308	struct cdnr_block *cb = cblock;
309
310	/* delete filters belonging to this cdnr */
311	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
312
313	/* remove from the top level cdnr */
314	if (cb->cb_top != cblock)
315		LIST_REMOVE(cb, cb_next);
316
317	free(cb, M_DEVBUF);
318}
319
320/*
321 * conditioner common destroy routine
322 */
323static int
324generic_element_destroy(cb)
325	struct cdnr_block *cb;
326{
327	int error = 0;
328
329	switch (cb->cb_type) {
330	case TCETYPE_TOP:
331		error = top_destroy((struct top_cdnr *)cb);
332		break;
333	case TCETYPE_ELEMENT:
334		error = element_destroy(cb);
335		break;
336	case TCETYPE_TBMETER:
337		error = tbm_destroy((struct tbmeter *)cb);
338		break;
339	case TCETYPE_TRTCM:
340		error = trtcm_destroy((struct trtcm *)cb);
341		break;
342	case TCETYPE_TSWTCM:
343		error = tswtcm_destroy((struct tswtcm *)cb);
344		break;
345	default:
346		error = EINVAL;
347	}
348	return (error);
349}
350
351static int
352tca_verify_action(utca)
353	struct tc_action *utca;
354{
355	switch (utca->tca_code) {
356	case TCACODE_PASS:
357	case TCACODE_DROP:
358	case TCACODE_MARK:
359		/* these are ok */
360		break;
361
362	case TCACODE_HANDLE:
363		/* verify handle value */
364		if (cdnr_handle2cb(utca->tca_handle) == NULL)
365			return (-1);
366		break;
367
368	case TCACODE_NONE:
369	case TCACODE_RETURN:
370	case TCACODE_NEXT:
371	default:
372		/* should not be passed from a user */
373		return (-1);
374	}
375	return (0);
376}
377
378static void
379tca_import_action(ktca, utca)
380	struct tc_action *ktca, *utca;
381{
382	struct cdnr_block *cb;
383
384	*ktca = *utca;
385	if (ktca->tca_code == TCACODE_HANDLE) {
386		cb = cdnr_handle2cb(ktca->tca_handle);
387		if (cb == NULL) {
388			ktca->tca_code = TCACODE_NONE;
389			return;
390		}
391		ktca->tca_code = TCACODE_NEXT;
392		ktca->tca_next = cb;
393		cb->cb_ref++;
394	} else if (ktca->tca_code == TCACODE_MARK) {
395		ktca->tca_dscp &= DSCP_MASK;
396	}
397	return;
398}
399
400static void
401tca_invalidate_action(tca)
402	struct tc_action *tca;
403{
404	struct cdnr_block *cb;
405
406	if (tca->tca_code == TCACODE_NEXT) {
407		cb = tca->tca_next;
408		if (cb == NULL)
409			return;
410		cb->cb_ref--;
411	}
412	tca->tca_code = TCACODE_NONE;
413}
414
415/*
416 * top level traffic conditioner
417 */
418static struct top_cdnr *
419top_create(ifq)
420	struct ifaltq *ifq;
421{
422	struct top_cdnr *top;
423
424	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
425		return (NULL);
426
427	top->tc_ifq = ifq;
428	/* set default action for the top level conditioner */
429	top->tc_block.cb_action.tca_code = TCACODE_PASS;
430
431	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
432
433	ifq->altq_cdnr = top;
434
435	return (top);
436}
437
438static int
439top_destroy(top)
440	struct top_cdnr *top;
441{
442	struct cdnr_block *cb;
443
444	if (ALTQ_IS_CNDTNING(top->tc_ifq))
445		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
446	top->tc_ifq->altq_cdnr = NULL;
447
448	/*
449	 * destroy all the conditioner elements belonging to this interface
450	 */
451	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
452		while (cb != NULL && cb->cb_ref > 0)
453			cb = LIST_NEXT(cb, cb_next);
454		if (cb != NULL)
455			generic_element_destroy(cb);
456	}
457
458	LIST_REMOVE(top, tc_next);
459
460	cdnr_cbdestroy(top);
461
462	/* if there is no active conditioner, remove the input hook */
463	if (altq_input != NULL) {
464		LIST_FOREACH(top, &tcb_list, tc_next)
465			if (ALTQ_IS_CNDTNING(top->tc_ifq))
466				break;
467		if (top == NULL)
468			altq_input = NULL;
469	}
470
471	return (0);
472}
473
474/*
475 * simple tc elements without input function (e.g., dropper and makers).
476 */
477static struct cdnr_block *
478element_create(top, action)
479	struct top_cdnr *top;
480	struct tc_action *action;
481{
482	struct cdnr_block *cb;
483
484	if (tca_verify_action(action) < 0)
485		return (NULL);
486
487	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
488		return (NULL);
489
490	tca_import_action(&cb->cb_action, action);
491
492	return (cb);
493}
494
495static int
496element_destroy(cb)
497	struct cdnr_block *cb;
498{
499	if (cb->cb_ref > 0)
500		return (EBUSY);
501
502	tca_invalidate_action(&cb->cb_action);
503
504	cdnr_cbdestroy(cb);
505	return (0);
506}
507
508/*
509 * internal representation of token bucket parameters
510 *	rate: 	byte_per_unittime << 32
511 *		(((bits_per_sec) / 8) << 32) / machclk_freq
512 *	depth:	byte << 32
513 *
514 */
515#define	TB_SHIFT	32
516#define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
517#define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
518
519static void
520tb_import_profile(tb, profile)
521	struct tbe *tb;
522	struct tb_profile *profile;
523{
524	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
525	tb->depth = TB_SCALE(profile->depth);
526	if (tb->rate > 0)
527		tb->filluptime = tb->depth / tb->rate;
528	else
529		tb->filluptime = 0xffffffffffffffffLL;
530	tb->token = tb->depth;
531	tb->last = read_machclk();
532}
533
534/*
535 * simple token bucket meter
536 */
537static struct tbmeter *
538tbm_create(top, profile, in_action, out_action)
539	struct top_cdnr *top;
540	struct tb_profile *profile;
541	struct tc_action *in_action, *out_action;
542{
543	struct tbmeter *tbm = NULL;
544
545	if (tca_verify_action(in_action) < 0
546	    || tca_verify_action(out_action) < 0)
547		return (NULL);
548
549	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
550				tbm_input)) == NULL)
551		return (NULL);
552
553	tb_import_profile(&tbm->tb, profile);
554
555	tca_import_action(&tbm->in_action, in_action);
556	tca_import_action(&tbm->out_action, out_action);
557
558	return (tbm);
559}
560
561static int
562tbm_destroy(tbm)
563	struct tbmeter *tbm;
564{
565	if (tbm->cdnrblk.cb_ref > 0)
566		return (EBUSY);
567
568	tca_invalidate_action(&tbm->in_action);
569	tca_invalidate_action(&tbm->out_action);
570
571	cdnr_cbdestroy(tbm);
572	return (0);
573}
574
575static struct tc_action *
576tbm_input(cb, pktinfo)
577	struct cdnr_block *cb;
578	struct cdnr_pktinfo *pktinfo;
579{
580	struct tbmeter *tbm = (struct tbmeter *)cb;
581	u_int64_t	len;
582	u_int64_t	interval, now;
583
584	len = TB_SCALE(pktinfo->pkt_len);
585
586	if (tbm->tb.token < len) {
587		now = read_machclk();
588		interval = now - tbm->tb.last;
589		if (interval >= tbm->tb.filluptime)
590			tbm->tb.token = tbm->tb.depth;
591		else {
592			tbm->tb.token += interval * tbm->tb.rate;
593			if (tbm->tb.token > tbm->tb.depth)
594				tbm->tb.token = tbm->tb.depth;
595		}
596		tbm->tb.last = now;
597	}
598
599	if (tbm->tb.token < len) {
600		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
601		return (&tbm->out_action);
602	}
603
604	tbm->tb.token -= len;
605	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
606	return (&tbm->in_action);
607}
608
609/*
610 * two rate three color marker
611 * as described in draft-heinanen-diffserv-trtcm-01.txt
612 */
613static struct trtcm *
614trtcm_create(top, cmtd_profile, peak_profile,
615	     green_action, yellow_action, red_action, coloraware)
616	struct top_cdnr *top;
617	struct tb_profile *cmtd_profile, *peak_profile;
618	struct tc_action *green_action, *yellow_action, *red_action;
619	int	coloraware;
620{
621	struct trtcm *tcm = NULL;
622
623	if (tca_verify_action(green_action) < 0
624	    || tca_verify_action(yellow_action) < 0
625	    || tca_verify_action(red_action) < 0)
626		return (NULL);
627
628	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
629				trtcm_input)) == NULL)
630		return (NULL);
631
632	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
633	tb_import_profile(&tcm->peak_tb, peak_profile);
634
635	tca_import_action(&tcm->green_action, green_action);
636	tca_import_action(&tcm->yellow_action, yellow_action);
637	tca_import_action(&tcm->red_action, red_action);
638
639	/* set dscps to use */
640	if (tcm->green_action.tca_code == TCACODE_MARK)
641		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
642	else
643		tcm->green_dscp = DSCP_AF11;
644	if (tcm->yellow_action.tca_code == TCACODE_MARK)
645		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
646	else
647		tcm->yellow_dscp = DSCP_AF12;
648	if (tcm->red_action.tca_code == TCACODE_MARK)
649		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
650	else
651		tcm->red_dscp = DSCP_AF13;
652
653	tcm->coloraware = coloraware;
654
655	return (tcm);
656}
657
658static int
659trtcm_destroy(tcm)
660	struct trtcm *tcm;
661{
662	if (tcm->cdnrblk.cb_ref > 0)
663		return (EBUSY);
664
665	tca_invalidate_action(&tcm->green_action);
666	tca_invalidate_action(&tcm->yellow_action);
667	tca_invalidate_action(&tcm->red_action);
668
669	cdnr_cbdestroy(tcm);
670	return (0);
671}
672
673static struct tc_action *
674trtcm_input(cb, pktinfo)
675	struct cdnr_block *cb;
676	struct cdnr_pktinfo *pktinfo;
677{
678	struct trtcm *tcm = (struct trtcm *)cb;
679	u_int64_t	len;
680	u_int64_t	interval, now;
681	u_int8_t	color;
682
683	len = TB_SCALE(pktinfo->pkt_len);
684	if (tcm->coloraware) {
685		color = pktinfo->pkt_dscp;
686		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
687			color = tcm->green_dscp;
688	} else {
689		/* if color-blind, precolor it as green */
690		color = tcm->green_dscp;
691	}
692
693	now = read_machclk();
694	if (tcm->cmtd_tb.token < len) {
695		interval = now - tcm->cmtd_tb.last;
696		if (interval >= tcm->cmtd_tb.filluptime)
697			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
698		else {
699			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
700			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
701				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
702		}
703		tcm->cmtd_tb.last = now;
704	}
705	if (tcm->peak_tb.token < len) {
706		interval = now - tcm->peak_tb.last;
707		if (interval >= tcm->peak_tb.filluptime)
708			tcm->peak_tb.token = tcm->peak_tb.depth;
709		else {
710			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
711			if (tcm->peak_tb.token > tcm->peak_tb.depth)
712				tcm->peak_tb.token = tcm->peak_tb.depth;
713		}
714		tcm->peak_tb.last = now;
715	}
716
717	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
718		pktinfo->pkt_dscp = tcm->red_dscp;
719		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
720		return (&tcm->red_action);
721	}
722
723	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
724		pktinfo->pkt_dscp = tcm->yellow_dscp;
725		tcm->peak_tb.token -= len;
726		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
727		return (&tcm->yellow_action);
728	}
729
730	pktinfo->pkt_dscp = tcm->green_dscp;
731	tcm->cmtd_tb.token -= len;
732	tcm->peak_tb.token -= len;
733	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
734	return (&tcm->green_action);
735}
736
737/*
738 * time sliding window three color marker
739 * as described in draft-fang-diffserv-tc-tswtcm-00.txt
740 */
741static struct tswtcm *
742tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
743	      green_action, yellow_action, red_action)
744	struct top_cdnr *top;
745	u_int32_t	cmtd_rate, peak_rate, avg_interval;
746	struct tc_action *green_action, *yellow_action, *red_action;
747{
748	struct tswtcm *tsw;
749
750	if (tca_verify_action(green_action) < 0
751	    || tca_verify_action(yellow_action) < 0
752	    || tca_verify_action(red_action) < 0)
753		return (NULL);
754
755	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
756				tswtcm_input)) == NULL)
757		return (NULL);
758
759	tca_import_action(&tsw->green_action, green_action);
760	tca_import_action(&tsw->yellow_action, yellow_action);
761	tca_import_action(&tsw->red_action, red_action);
762
763	/* set dscps to use */
764	if (tsw->green_action.tca_code == TCACODE_MARK)
765		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
766	else
767		tsw->green_dscp = DSCP_AF11;
768	if (tsw->yellow_action.tca_code == TCACODE_MARK)
769		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
770	else
771		tsw->yellow_dscp = DSCP_AF12;
772	if (tsw->red_action.tca_code == TCACODE_MARK)
773		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
774	else
775		tsw->red_dscp = DSCP_AF13;
776
777	/* convert rates from bits/sec to bytes/sec */
778	tsw->cmtd_rate = cmtd_rate / 8;
779	tsw->peak_rate = peak_rate / 8;
780	tsw->avg_rate = 0;
781
782	/* timewin is converted from msec to machine clock unit */
783	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
784
785	return (tsw);
786}
787
788static int
789tswtcm_destroy(tsw)
790	struct tswtcm *tsw;
791{
792	if (tsw->cdnrblk.cb_ref > 0)
793		return (EBUSY);
794
795	tca_invalidate_action(&tsw->green_action);
796	tca_invalidate_action(&tsw->yellow_action);
797	tca_invalidate_action(&tsw->red_action);
798
799	cdnr_cbdestroy(tsw);
800	return (0);
801}
802
803static struct tc_action *
804tswtcm_input(cb, pktinfo)
805	struct cdnr_block *cb;
806	struct cdnr_pktinfo *pktinfo;
807{
808	struct tswtcm	*tsw = (struct tswtcm *)cb;
809	int		len;
810	u_int32_t	avg_rate;
811	u_int64_t	interval, now, tmp;
812
813	/*
814	 * rate estimator
815	 */
816	len = pktinfo->pkt_len;
817	now = read_machclk();
818
819	interval = now - tsw->t_front;
820	/*
821	 * calculate average rate:
822	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
823	 * pkt_len needs to be multiplied by machclk_freq in order to
824	 * get (bytes/sec).
825	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
826	 * less than 32 bits, the following 64-bit operation has enough
827	 * precision.
828	 */
829	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
830	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
831	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
832	tsw->t_front = now;
833
834	/*
835	 * marker
836	 */
837	if (avg_rate > tsw->cmtd_rate) {
838		u_int32_t randval = arc4random() % avg_rate;
839
840		if (avg_rate > tsw->peak_rate) {
841			if (randval < avg_rate - tsw->peak_rate) {
842				/* mark red */
843				pktinfo->pkt_dscp = tsw->red_dscp;
844				PKTCNTR_ADD(&tsw->red_cnt, len);
845				return (&tsw->red_action);
846			} else if (randval < avg_rate - tsw->cmtd_rate)
847				goto mark_yellow;
848		} else {
849			/* peak_rate >= avg_rate > cmtd_rate */
850			if (randval < avg_rate - tsw->cmtd_rate) {
851			mark_yellow:
852				pktinfo->pkt_dscp = tsw->yellow_dscp;
853				PKTCNTR_ADD(&tsw->yellow_cnt, len);
854				return (&tsw->yellow_action);
855			}
856		}
857	}
858
859	/* mark green */
860	pktinfo->pkt_dscp = tsw->green_dscp;
861	PKTCNTR_ADD(&tsw->green_cnt, len);
862	return (&tsw->green_action);
863}
864
865/*
866 * ioctl requests
867 */
868static int
869cdnrcmd_if_attach(ifname)
870	char *ifname;
871{
872	struct ifnet *ifp;
873	struct top_cdnr *top;
874
875	if ((ifp = ifunit(ifname)) == NULL)
876		return (EBADF);
877
878	if (ifp->if_snd.altq_cdnr != NULL)
879		return (EBUSY);
880
881	if ((top = top_create(&ifp->if_snd)) == NULL)
882		return (ENOMEM);
883	return (0);
884}
885
886static int
887cdnrcmd_if_detach(ifname)
888	char *ifname;
889{
890	struct top_cdnr *top;
891
892	if ((top = tcb_lookup(ifname)) == NULL)
893		return (EBADF);
894
895	return top_destroy(top);
896}
897
898static int
899cdnrcmd_add_element(ap)
900	struct cdnr_add_element *ap;
901{
902	struct top_cdnr *top;
903	struct cdnr_block *cb;
904
905	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
906		return (EBADF);
907
908	cb = element_create(top, &ap->action);
909	if (cb == NULL)
910		return (EINVAL);
911	/* return a class handle to the user */
912	ap->cdnr_handle = cdnr_cb2handle(cb);
913	return (0);
914}
915
916static int
917cdnrcmd_delete_element(ap)
918	struct cdnr_delete_element *ap;
919{
920	struct top_cdnr *top;
921	struct cdnr_block *cb;
922
923	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
924		return (EBADF);
925
926	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
927		return (EINVAL);
928
929	if (cb->cb_type != TCETYPE_ELEMENT)
930		return generic_element_destroy(cb);
931
932	return element_destroy(cb);
933}
934
935static int
936cdnrcmd_add_filter(ap)
937	struct cdnr_add_filter *ap;
938{
939	struct top_cdnr *top;
940	struct cdnr_block *cb;
941
942	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
943		return (EBADF);
944
945	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
946		return (EINVAL);
947
948	return acc_add_filter(&top->tc_classifier, &ap->filter,
949			      cb, &ap->filter_handle);
950}
951
952static int
953cdnrcmd_delete_filter(ap)
954	struct cdnr_delete_filter *ap;
955{
956	struct top_cdnr *top;
957
958	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
959		return (EBADF);
960
961	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
962}
963
964static int
965cdnrcmd_add_tbm(ap)
966	struct cdnr_add_tbmeter *ap;
967{
968	struct top_cdnr *top;
969	struct tbmeter *tbm;
970
971	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
972		return (EBADF);
973
974	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
975	if (tbm == NULL)
976		return (EINVAL);
977	/* return a class handle to the user */
978	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
979	return (0);
980}
981
982static int
983cdnrcmd_modify_tbm(ap)
984	struct cdnr_modify_tbmeter *ap;
985{
986	struct tbmeter *tbm;
987
988	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
989		return (EINVAL);
990
991	tb_import_profile(&tbm->tb, &ap->profile);
992
993	return (0);
994}
995
996static int
997cdnrcmd_tbm_stats(ap)
998	struct cdnr_tbmeter_stats *ap;
999{
1000	struct tbmeter *tbm;
1001
1002	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1003		return (EINVAL);
1004
1005	ap->in_cnt = tbm->in_cnt;
1006	ap->out_cnt = tbm->out_cnt;
1007
1008	return (0);
1009}
1010
1011static int
1012cdnrcmd_add_trtcm(ap)
1013	struct cdnr_add_trtcm *ap;
1014{
1015	struct top_cdnr *top;
1016	struct trtcm *tcm;
1017
1018	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1019		return (EBADF);
1020
1021	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
1022			   &ap->green_action, &ap->yellow_action,
1023			   &ap->red_action, ap->coloraware);
1024	if (tcm == NULL)
1025		return (EINVAL);
1026
1027	/* return a class handle to the user */
1028	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
1029	return (0);
1030}
1031
1032static int
1033cdnrcmd_modify_trtcm(ap)
1034	struct cdnr_modify_trtcm *ap;
1035{
1036	struct trtcm *tcm;
1037
1038	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1039		return (EINVAL);
1040
1041	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
1042	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
1043
1044	return (0);
1045}
1046
1047static int
1048cdnrcmd_tcm_stats(ap)
1049	struct cdnr_tcm_stats *ap;
1050{
1051	struct cdnr_block *cb;
1052
1053	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1054		return (EINVAL);
1055
1056	if (cb->cb_type == TCETYPE_TRTCM) {
1057	    struct trtcm *tcm = (struct trtcm *)cb;
1058
1059	    ap->green_cnt = tcm->green_cnt;
1060	    ap->yellow_cnt = tcm->yellow_cnt;
1061	    ap->red_cnt = tcm->red_cnt;
1062	} else if (cb->cb_type == TCETYPE_TSWTCM) {
1063	    struct tswtcm *tsw = (struct tswtcm *)cb;
1064
1065	    ap->green_cnt = tsw->green_cnt;
1066	    ap->yellow_cnt = tsw->yellow_cnt;
1067	    ap->red_cnt = tsw->red_cnt;
1068	} else
1069	    return (EINVAL);
1070
1071	return (0);
1072}
1073
1074static int
1075cdnrcmd_add_tswtcm(ap)
1076	struct cdnr_add_tswtcm *ap;
1077{
1078	struct top_cdnr *top;
1079	struct tswtcm *tsw;
1080
1081	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1082		return (EBADF);
1083
1084	if (ap->cmtd_rate > ap->peak_rate)
1085		return (EINVAL);
1086
1087	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1088			    ap->avg_interval, &ap->green_action,
1089			    &ap->yellow_action, &ap->red_action);
1090	if (tsw == NULL)
1091	    return (EINVAL);
1092
1093	/* return a class handle to the user */
1094	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1095	return (0);
1096}
1097
1098static int
1099cdnrcmd_modify_tswtcm(ap)
1100	struct cdnr_modify_tswtcm *ap;
1101{
1102	struct tswtcm *tsw;
1103
1104	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1105		return (EINVAL);
1106
1107	if (ap->cmtd_rate > ap->peak_rate)
1108		return (EINVAL);
1109
1110	/* convert rates from bits/sec to bytes/sec */
1111	tsw->cmtd_rate = ap->cmtd_rate / 8;
1112	tsw->peak_rate = ap->peak_rate / 8;
1113	tsw->avg_rate = 0;
1114
1115	/* timewin is converted from msec to machine clock unit */
1116	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1117
1118	return (0);
1119}
1120
1121static int
1122cdnrcmd_get_stats(ap)
1123	struct cdnr_get_stats *ap;
1124{
1125	struct top_cdnr *top;
1126	struct cdnr_block *cb;
1127	struct tbmeter *tbm;
1128	struct trtcm *tcm;
1129	struct tswtcm *tsw;
1130	struct tce_stats tce, *usp;
1131	int error, n, nskip, nelements;
1132
1133	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1134		return (EBADF);
1135
1136	/* copy action stats */
1137	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
1138
1139	/* stats for each element */
1140	nelements = ap->nelements;
1141	usp = ap->tce_stats;
1142	if (nelements <= 0 || usp == NULL)
1143		return (0);
1144
1145	nskip = ap->nskip;
1146	n = 0;
1147	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1148		if (nskip > 0) {
1149			nskip--;
1150			continue;
1151		}
1152
1153		bzero(&tce, sizeof(tce));
1154		tce.tce_handle = cb->cb_handle;
1155		tce.tce_type = cb->cb_type;
1156		switch (cb->cb_type) {
1157		case TCETYPE_TBMETER:
1158			tbm = (struct tbmeter *)cb;
1159			tce.tce_cnts[0] = tbm->in_cnt;
1160			tce.tce_cnts[1] = tbm->out_cnt;
1161			break;
1162		case TCETYPE_TRTCM:
1163			tcm = (struct trtcm *)cb;
1164			tce.tce_cnts[0] = tcm->green_cnt;
1165			tce.tce_cnts[1] = tcm->yellow_cnt;
1166			tce.tce_cnts[2] = tcm->red_cnt;
1167			break;
1168		case TCETYPE_TSWTCM:
1169			tsw = (struct tswtcm *)cb;
1170			tce.tce_cnts[0] = tsw->green_cnt;
1171			tce.tce_cnts[1] = tsw->yellow_cnt;
1172			tce.tce_cnts[2] = tsw->red_cnt;
1173			break;
1174		default:
1175			continue;
1176		}
1177
1178		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
1179				     sizeof(tce))) != 0)
1180			return (error);
1181
1182		if (++n == nelements)
1183			break;
1184	}
1185	ap->nelements = n;
1186
1187	return (0);
1188}
1189
1190/*
1191 * conditioner device interface
1192 */
1193int
1194cdnropen(dev, flag, fmt, p)
1195	dev_t dev;
1196	int flag, fmt;
1197#if (__FreeBSD_version > 500000)
1198	struct thread *p;
1199#else
1200	struct proc *p;
1201#endif
1202{
1203	if (machclk_freq == 0)
1204		init_machclk();
1205
1206	if (machclk_freq == 0) {
1207		printf("cdnr: no cpu clock available!\n");
1208		return (ENXIO);
1209	}
1210
1211	/* everything will be done when the queueing scheme is attached. */
1212	return 0;
1213}
1214
1215int
1216cdnrclose(dev, flag, fmt, p)
1217	dev_t dev;
1218	int flag, fmt;
1219#if (__FreeBSD_version > 500000)
1220	struct thread *p;
1221#else
1222	struct proc *p;
1223#endif
1224{
1225	struct top_cdnr *top;
1226	int err, error = 0;
1227
1228	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1229		/* destroy all */
1230		err = top_destroy(top);
1231		if (err != 0 && error == 0)
1232			error = err;
1233	}
1234	altq_input = NULL;
1235
1236	return (error);
1237}
1238
1239int
1240cdnrioctl(dev, cmd, addr, flag, p)
1241	dev_t dev;
1242	ioctlcmd_t cmd;
1243	caddr_t addr;
1244	int flag;
1245#if (__FreeBSD_version > 500000)
1246	struct thread *p;
1247#else
1248	struct proc *p;
1249#endif
1250{
1251	struct top_cdnr *top;
1252	struct cdnr_interface *ifacep;
1253	int	s, error = 0;
1254
1255	/* check super-user privilege */
1256	switch (cmd) {
1257	case CDNR_GETSTATS:
1258		break;
1259	default:
1260#if (__FreeBSD_version > 700000)
1261		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
1262#elsif (__FreeBSD_version > 400000)
1263		if ((error = suser(p)) != 0)
1264#else
1265		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1266#endif
1267			return (error);
1268		break;
1269	}
1270
1271	s = splnet();
1272	switch (cmd) {
1273
1274	case CDNR_IF_ATTACH:
1275		ifacep = (struct cdnr_interface *)addr;
1276		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1277		break;
1278
1279	case CDNR_IF_DETACH:
1280		ifacep = (struct cdnr_interface *)addr;
1281		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1282		break;
1283
1284	case CDNR_ENABLE:
1285	case CDNR_DISABLE:
1286		ifacep = (struct cdnr_interface *)addr;
1287		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1288			error = EBADF;
1289			break;
1290		}
1291
1292		switch (cmd) {
1293
1294		case CDNR_ENABLE:
1295			ALTQ_SET_CNDTNING(top->tc_ifq);
1296			if (altq_input == NULL)
1297				altq_input = altq_cdnr_input;
1298			break;
1299
1300		case CDNR_DISABLE:
1301			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1302			LIST_FOREACH(top, &tcb_list, tc_next)
1303				if (ALTQ_IS_CNDTNING(top->tc_ifq))
1304					break;
1305			if (top == NULL)
1306				altq_input = NULL;
1307			break;
1308		}
1309		break;
1310
1311	case CDNR_ADD_ELEM:
1312		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1313		break;
1314
1315	case CDNR_DEL_ELEM:
1316		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1317		break;
1318
1319	case CDNR_ADD_TBM:
1320		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1321		break;
1322
1323	case CDNR_MOD_TBM:
1324		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1325		break;
1326
1327	case CDNR_TBM_STATS:
1328		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1329		break;
1330
1331	case CDNR_ADD_TCM:
1332		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1333		break;
1334
1335	case CDNR_MOD_TCM:
1336		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1337		break;
1338
1339	case CDNR_TCM_STATS:
1340		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1341		break;
1342
1343	case CDNR_ADD_FILTER:
1344		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1345		break;
1346
1347	case CDNR_DEL_FILTER:
1348		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1349		break;
1350
1351	case CDNR_GETSTATS:
1352		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1353		break;
1354
1355	case CDNR_ADD_TSW:
1356		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1357		break;
1358
1359	case CDNR_MOD_TSW:
1360		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1361		break;
1362
1363	default:
1364		error = EINVAL;
1365		break;
1366	}
1367	splx(s);
1368
1369	return error;
1370}
1371
1372#ifdef KLD_MODULE
1373
1374static struct altqsw cdnr_sw =
1375	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
1376
1377ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1378
1379#endif /* KLD_MODULE */
1380
1381#endif /* ALTQ3_COMPAT */
1382#endif /* ALTQ_CDNR */
1383