1/*
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/cdefs.h>
30#include <sys/param.h>
31#include <sys/mbuf.h>
32#include <sys/errno.h>
33#include <sys/random.h>
34#include <sys/kernel_types.h>
35#include <sys/sysctl.h>
36
37#include <kern/zalloc.h>
38
39#include <net/if.h>
40#include <net/net_osdep.h>
41#include <net/classq/classq.h>
42#if CLASSQ_RED
43#include <net/classq/classq_red.h>
44#endif /* CLASSQ_RED */
45#if CLASSQ_RIO
46#include <net/classq/classq_rio.h>
47#endif /* CLASSQ_RIO */
48#if CLASSQ_BLUE
49#include <net/classq/classq_blue.h>
50#endif /* CLASSQ_BLUE */
51#include <net/classq/classq_sfb.h>
52#include <net/pktsched/pktsched.h>
53
54#include <libkern/libkern.h>
55
56#if PF_ALTQ
57#include <net/altq/altq.h>
58#endif /* PF_ALTQ */
59
60static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
61    u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *,
62    boolean_t);
63static struct mbuf *ifclassq_poll_common(struct ifclassq *,
64    mbuf_svc_class_t, boolean_t);
65static struct mbuf *ifclassq_tbr_dequeue_common(struct ifclassq *, int,
66    mbuf_svc_class_t, boolean_t);
67
68void
69classq_init(void)
70{
71	_CASSERT(MBUF_TC_BE == 0);
72	_CASSERT(MBUF_SC_BE == 0);
73	_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
74
75#if CLASSQ_RED
76	red_init();
77#endif /* CLASSQ_RED */
78#if CLASSQ_RIO
79	rio_init();
80#endif /* CLASSQ_RIO */
81#if CLASSQ_BLUE
82	blue_init();
83#endif /* CLASSQ_BLUE */
84	sfb_init();
85}
86
87int
88ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse)
89{
90#pragma unused(reuse)
91	struct ifclassq *ifq = &ifp->if_snd;
92	int err = 0;
93
94	IFCQ_LOCK(ifq);
95	VERIFY(IFCQ_IS_EMPTY(ifq));
96	ifq->ifcq_ifp = ifp;
97	IFCQ_LEN(ifq) = 0;
98	bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
99	bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
100
101	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
102	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
103	VERIFY(ifq->ifcq_flags == 0);
104	VERIFY(ifq->ifcq_sflags == 0);
105	VERIFY(ifq->ifcq_disc == NULL);
106	VERIFY(ifq->ifcq_enqueue == NULL);
107	VERIFY(ifq->ifcq_dequeue == NULL);
108	VERIFY(ifq->ifcq_dequeue_sc == NULL);
109	VERIFY(ifq->ifcq_request == NULL);
110
111	if (ifp->if_eflags & IFEF_TXSTART) {
112		u_int32_t maxlen = 0;
113
114		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
115			maxlen = if_sndq_maxlen;
116		IFCQ_SET_MAXLEN(ifq, maxlen);
117
118		ifq->ifcq_sflags = sflags;
119		err = ifclassq_pktsched_setup(ifq);
120		if (err == 0)
121			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
122	}
123
124#if PF_ALTQ
125	ifq->ifcq_drain = 0;
126	IFCQ_ALTQ(ifq)->altq_ifcq = ifq;
127	VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
128	VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
129	VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
130	VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
131	VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
132	VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
133	VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
134
135	if ((ifp->if_eflags & IFEF_TXSTART) &&
136	    ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)
137		ALTQ_SET_READY(IFCQ_ALTQ(ifq));
138	else
139		ALTQ_CLEAR_READY(IFCQ_ALTQ(ifq));
140#endif /* PF_ALTQ */
141	IFCQ_UNLOCK(ifq);
142
143	return (err);
144}
145
146void
147ifclassq_teardown(struct ifnet *ifp)
148{
149	struct ifclassq *ifq = &ifp->if_snd;
150
151	IFCQ_LOCK(ifq);
152#if PF_ALTQ
153	if (ALTQ_IS_READY(IFCQ_ALTQ(ifq))) {
154		if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
155			altq_disable(IFCQ_ALTQ(ifq));
156		if (ALTQ_IS_ATTACHED(IFCQ_ALTQ(ifq)))
157			altq_detach(IFCQ_ALTQ(ifq));
158		IFCQ_ALTQ(ifq)->altq_flags = 0;
159	}
160	ifq->ifcq_drain = 0;
161	IFCQ_ALTQ(ifq)->altq_ifcq = NULL;
162	VERIFY(IFCQ_ALTQ(ifq)->altq_type == ALTQT_NONE);
163	VERIFY(IFCQ_ALTQ(ifq)->altq_flags == 0);
164	VERIFY(IFCQ_ALTQ(ifq)->altq_disc == NULL);
165	VERIFY(IFCQ_ALTQ(ifq)->altq_enqueue == NULL);
166	VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue == NULL);
167	VERIFY(IFCQ_ALTQ(ifq)->altq_dequeue_sc == NULL);
168	VERIFY(IFCQ_ALTQ(ifq)->altq_request == NULL);
169#endif /* PF_ALTQ */
170
171	if (IFCQ_IS_READY(ifq)) {
172		if (IFCQ_TBR_IS_ENABLED(ifq)) {
173			struct tb_profile tb = { 0, 0, 0 };
174			(void) ifclassq_tbr_set(ifq, &tb, FALSE);
175		}
176		(void) pktsched_teardown(ifq);
177		ifq->ifcq_flags = 0;
178	}
179	ifq->ifcq_sflags = 0;
180
181	VERIFY(IFCQ_IS_EMPTY(ifq));
182	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
183	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
184	VERIFY(ifq->ifcq_flags == 0);
185	VERIFY(ifq->ifcq_sflags == 0);
186	VERIFY(ifq->ifcq_disc == NULL);
187	VERIFY(ifq->ifcq_enqueue == NULL);
188	VERIFY(ifq->ifcq_dequeue == NULL);
189	VERIFY(ifq->ifcq_dequeue_sc == NULL);
190	VERIFY(ifq->ifcq_request == NULL);
191	IFCQ_LEN(ifq) = 0;
192	IFCQ_MAXLEN(ifq) = 0;
193	bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
194	bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
195
196	IFCQ_UNLOCK(ifq);
197}
198
199int
200ifclassq_pktsched_setup(struct ifclassq *ifq)
201{
202	struct ifnet *ifp = ifq->ifcq_ifp;
203	int err = 0;
204
205	IFCQ_LOCK_ASSERT_HELD(ifq);
206	VERIFY(ifp->if_eflags & IFEF_TXSTART);
207
208	switch (ifp->if_output_sched_model) {
209	case IFNET_SCHED_MODEL_DRIVER_MANAGED:
210		err = pktsched_setup(ifq, PKTSCHEDT_TCQ, ifq->ifcq_sflags);
211		break;
212
213	case IFNET_SCHED_MODEL_NORMAL:
214		err = pktsched_setup(ifq, PKTSCHEDT_QFQ, ifq->ifcq_sflags);
215		break;
216
217	default:
218		VERIFY(0);
219		/* NOTREACHED */
220	}
221
222	return (err);
223}
224
225void
226ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
227{
228	IFCQ_LOCK(ifq);
229	if (maxqlen == 0)
230		maxqlen = if_sndq_maxlen;
231	IFCQ_SET_MAXLEN(ifq, maxqlen);
232	IFCQ_UNLOCK(ifq);
233}
234
235u_int32_t
236ifclassq_get_maxlen(struct ifclassq *ifq)
237{
238	return (IFCQ_MAXLEN(ifq));
239}
240
241int
242ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int32_t *packets,
243    u_int32_t *bytes)
244{
245	int err = 0;
246
247	IFCQ_LOCK(ifq);
248	if (sc == MBUF_SC_UNSPEC) {
249		VERIFY(packets != NULL);
250		*packets = IFCQ_LEN(ifq);
251	} else {
252		VERIFY(MBUF_VALID_SC(sc));
253		VERIFY(packets != NULL && bytes != NULL);
254		IFCQ_LEN_SC(ifq, sc, packets, bytes, err);
255	}
256	IFCQ_UNLOCK(ifq);
257
258	return (err);
259}
260
261errno_t
262ifclassq_enqueue(struct ifclassq *ifq, struct mbuf *m)
263{
264	errno_t err;
265
266	IFCQ_LOCK_SPIN(ifq);
267
268#if PF_ALTQ
269	if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) {
270		ALTQ_ENQUEUE(IFCQ_ALTQ(ifq), m, err);
271	} else {
272		u_int32_t qlen = IFCQ_LEN(ifq);
273		IFCQ_ENQUEUE(ifq, m, err);
274		if (IFCQ_LEN(ifq) > qlen)
275			ifq->ifcq_drain += (IFCQ_LEN(ifq) - qlen);
276	}
277#else /* !PF_ALTQ */
278	IFCQ_ENQUEUE(ifq, m, err);
279#endif /* PF_ALTQ */
280
281	IFCQ_UNLOCK(ifq);
282
283	return (err);
284}
285
286errno_t
287ifclassq_dequeue(struct ifclassq *ifq, u_int32_t limit, struct mbuf **head,
288    struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
289{
290	return (ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, limit, head, tail,
291	    cnt, len, FALSE));
292}
293
294errno_t
295ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
296    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
297    u_int32_t *len)
298{
299	return (ifclassq_dequeue_common(ifq, sc, limit, head, tail,
300	    cnt, len, TRUE));
301}
302
303static errno_t
304ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
305    u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
306    u_int32_t *len, boolean_t drvmgt)
307{
308	struct ifnet *ifp = ifq->ifcq_ifp;
309	u_int32_t i = 0, l = 0;
310	struct mbuf **first, *last;
311#if PF_ALTQ
312	struct ifaltq *altq = IFCQ_ALTQ(ifq);
313	boolean_t draining;
314#endif /* PF_ALTQ */
315
316	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
317
318	*head = NULL;
319	first = &(*head);
320	last = NULL;
321
322	ifq = &ifp->if_snd;
323	IFCQ_LOCK_SPIN(ifq);
324
325	while (i < limit) {
326		u_int64_t pktlen;
327#if PF_ALTQ
328		u_int32_t qlen;
329
330		qlen = IFCQ_LEN(ifq);
331		draining = IFCQ_IS_DRAINING(ifq);
332
333		if (drvmgt) {
334			if (IFCQ_TBR_IS_ENABLED(ifq))
335				IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
336			else if (draining)
337				IFCQ_DEQUEUE_SC(ifq, sc, *head);
338			else if (ALTQ_IS_ENABLED(altq))
339				ALTQ_DEQUEUE_SC(altq, sc, *head);
340			else
341				*head = NULL;
342		} else {
343			if (IFCQ_TBR_IS_ENABLED(ifq))
344				IFCQ_TBR_DEQUEUE(ifq, *head);
345			else if (draining)
346				IFCQ_DEQUEUE(ifq, *head);
347			else if (ALTQ_IS_ENABLED(altq))
348				ALTQ_DEQUEUE(altq, *head);
349			else
350				*head = NULL;
351		}
352
353		if (draining && *head != NULL) {
354			VERIFY(ifq->ifcq_drain >= (qlen - IFCQ_LEN(ifq)));
355			ifq->ifcq_drain -= (qlen - IFCQ_LEN(ifq));
356		}
357#else /* ! PF_ALTQ */
358		if (drvmgt) {
359			if (IFCQ_TBR_IS_ENABLED(ifq))
360				IFCQ_TBR_DEQUEUE_SC(ifq, sc, *head);
361			else
362				IFCQ_DEQUEUE_SC(ifq, sc, *head);
363		} else {
364			if (IFCQ_TBR_IS_ENABLED(ifq))
365				IFCQ_TBR_DEQUEUE(ifq, *head);
366			else
367				IFCQ_DEQUEUE(ifq, *head);
368		}
369#endif /* !PF_ALTQ */
370
371		if (*head == NULL)
372			break;
373
374		(*head)->m_nextpkt = NULL;
375		last = *head;
376
377		l += (*head)->m_pkthdr.len;
378		pktlen = (*head)->m_pkthdr.len;
379
380#if MEASURE_BW
381		(*head)->m_pkthdr.pkt_bwseq =
382		    atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen);
383#endif /* MEASURE_BW */
384
385		head = &(*head)->m_nextpkt;
386		i++;
387	}
388
389	IFCQ_UNLOCK(ifq);
390
391	if (tail != NULL)
392		*tail = last;
393	if (cnt != NULL)
394		*cnt = i;
395	if (len != NULL)
396		*len = l;
397
398	return ((*first != NULL) ? 0 : EAGAIN);
399}
400
401struct mbuf *
402ifclassq_poll(struct ifclassq *ifq)
403{
404	return (ifclassq_poll_common(ifq, MBUF_SC_UNSPEC, FALSE));
405}
406
407struct mbuf *
408ifclassq_poll_sc(struct ifclassq *ifq, mbuf_svc_class_t sc)
409{
410	return (ifclassq_poll_common(ifq, sc, TRUE));
411}
412
413static struct mbuf *
414ifclassq_poll_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
415    boolean_t drvmgt)
416{
417#if PF_ALTQ
418	struct ifaltq *altq = IFCQ_ALTQ(ifq);
419#endif /* PF_ALTQ */
420	struct mbuf *m;
421
422	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
423
424#if PF_ALTQ
425	if (drvmgt) {
426		if (IFCQ_TBR_IS_ENABLED(ifq))
427			IFCQ_TBR_POLL_SC(ifq, sc, m);
428		else if (IFCQ_IS_DRAINING(ifq))
429			IFCQ_POLL_SC(ifq, sc, m);
430		else if (ALTQ_IS_ENABLED(altq))
431			ALTQ_POLL_SC(altq, sc, m);
432		else
433			m = NULL;
434	} else {
435		if (IFCQ_TBR_IS_ENABLED(ifq))
436			IFCQ_TBR_POLL(ifq, m);
437		else if (IFCQ_IS_DRAINING(ifq))
438			IFCQ_POLL(ifq, m);
439		else if (ALTQ_IS_ENABLED(altq))
440			ALTQ_POLL(altq, m);
441		else
442			m = NULL;
443	}
444#else /* ! PF_ALTQ */
445	if (drvmgt) {
446		if (IFCQ_TBR_IS_ENABLED(ifq))
447			IFCQ_TBR_POLL_SC(ifq, sc, m);
448		else
449			IFCQ_POLL_SC(ifq, sc, m);
450	} else {
451		if (IFCQ_TBR_IS_ENABLED(ifq))
452			IFCQ_TBR_POLL(ifq, m);
453		else
454			IFCQ_POLL(ifq, m);
455	}
456#endif /* !PF_ALTQ */
457
458	return (m);
459}
460
461void
462ifclassq_update(struct ifclassq *ifq, cqev_t ev)
463{
464	IFCQ_LOCK_ASSERT_HELD(ifq);
465	VERIFY(IFCQ_IS_READY(ifq));
466
467#if PF_ALTQ
468	if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq)))
469		ALTQ_UPDATE(IFCQ_ALTQ(ifq), ev);
470#endif /* PF_ALTQ */
471	IFCQ_UPDATE(ifq, ev);
472}
473
474int
475ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline,
476    ifclassq_enq_func enqueue, ifclassq_deq_func dequeue,
477    ifclassq_deq_sc_func dequeue_sc, ifclassq_req_func request)
478{
479	IFCQ_LOCK_ASSERT_HELD(ifq);
480
481	VERIFY(ifq->ifcq_disc == NULL);
482	VERIFY(enqueue != NULL);
483	VERIFY(!(dequeue != NULL && dequeue_sc != NULL));
484	VERIFY(request != NULL);
485
486	ifq->ifcq_type = type;
487	ifq->ifcq_disc = discipline;
488	ifq->ifcq_enqueue = enqueue;
489	ifq->ifcq_dequeue = dequeue;
490	ifq->ifcq_dequeue_sc = dequeue_sc;
491	ifq->ifcq_request = request;
492
493	return (0);
494}
495
496int
497ifclassq_detach(struct ifclassq *ifq)
498{
499	IFCQ_LOCK_ASSERT_HELD(ifq);
500
501	VERIFY(ifq->ifcq_disc == NULL);
502
503	ifq->ifcq_type = PKTSCHEDT_NONE;
504	ifq->ifcq_disc = NULL;
505	ifq->ifcq_enqueue = NULL;
506	ifq->ifcq_dequeue = NULL;
507	ifq->ifcq_dequeue_sc = NULL;
508	ifq->ifcq_request = NULL;
509
510	return (0);
511}
512
513int
514ifclassq_getqstats(struct ifclassq *ifq, u_int32_t qid, void *ubuf,
515    u_int32_t *nbytes)
516{
517	struct if_ifclassq_stats *ifqs;
518	int err;
519
520	if (*nbytes < sizeof (*ifqs))
521		return (EINVAL);
522
523	ifqs = _MALLOC(sizeof (*ifqs), M_TEMP, M_WAITOK | M_ZERO);
524	if (ifqs == NULL)
525		return (ENOMEM);
526
527	IFCQ_LOCK(ifq);
528	if (!IFCQ_IS_READY(ifq)) {
529		IFCQ_UNLOCK(ifq);
530		_FREE(ifqs, M_TEMP);
531		return (ENXIO);
532	}
533
534	ifqs->ifqs_len = IFCQ_LEN(ifq);
535	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
536	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
537	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
538	ifqs->ifqs_scheduler = ifq->ifcq_type;
539
540	err = pktsched_getqstats(ifq, qid, ifqs);
541	IFCQ_UNLOCK(ifq);
542
543	if (err == 0 && (err = copyout((caddr_t)ifqs,
544	    (user_addr_t)(uintptr_t)ubuf, sizeof (*ifqs))) == 0)
545		*nbytes = sizeof (*ifqs);
546
547	_FREE(ifqs, M_TEMP);
548
549	return (err);
550}
551
552const char *
553ifclassq_ev2str(cqev_t ev)
554{
555	const char *c;
556
557	switch (ev) {
558	case CLASSQ_EV_LINK_BANDWIDTH:
559		c = "LINK_BANDWIDTH";
560		break;
561
562	case CLASSQ_EV_LINK_LATENCY:
563		c = "LINK_LATENCY";
564		break;
565
566	case CLASSQ_EV_LINK_MTU:
567		c = "LINK_MTU";
568		break;
569
570	case CLASSQ_EV_LINK_UP:
571		c = "LINK_UP";
572		break;
573
574	case CLASSQ_EV_LINK_DOWN:
575		c = "LINK_DOWN";
576		break;
577
578	default:
579		c = "UNKNOWN";
580		break;
581	}
582
583	return (c);
584}
585
586/*
587 * internal representation of token bucket parameters
588 *	rate:	byte_per_unittime << 32
589 *		(((bits_per_sec) / 8) << 32) / machclk_freq
590 *	depth:	byte << 32
591 *
592 */
593#define	TBR_SHIFT	32
594#define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
595#define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
596
597struct mbuf *
598ifclassq_tbr_dequeue(struct ifclassq *ifq, int op)
599{
600	return (ifclassq_tbr_dequeue_common(ifq, op, MBUF_SC_UNSPEC, FALSE));
601}
602
603struct mbuf *
604ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, int op, mbuf_svc_class_t sc)
605{
606	return (ifclassq_tbr_dequeue_common(ifq, op, sc, TRUE));
607}
608
609static struct mbuf *
610ifclassq_tbr_dequeue_common(struct ifclassq *ifq, int op,
611    mbuf_svc_class_t sc, boolean_t drvmgt)
612{
613	struct tb_regulator *tbr;
614	struct mbuf *m;
615	int64_t interval;
616	u_int64_t now;
617
618	IFCQ_LOCK_ASSERT_HELD(ifq);
619
620	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
621	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
622
623	tbr = &ifq->ifcq_tbr;
624	if (op == CLASSQDQ_REMOVE && tbr->tbr_lastop == CLASSQDQ_POLL) {
625		/* if this is a remove after poll, bypass tbr check */
626	} else {
627		/* update token only when it is negative */
628		if (tbr->tbr_token <= 0) {
629			now = read_machclk();
630			interval = now - tbr->tbr_last;
631			if (interval >= tbr->tbr_filluptime) {
632				tbr->tbr_token = tbr->tbr_depth;
633			} else {
634				tbr->tbr_token += interval * tbr->tbr_rate;
635				if (tbr->tbr_token > tbr->tbr_depth)
636					tbr->tbr_token = tbr->tbr_depth;
637			}
638			tbr->tbr_last = now;
639		}
640		/* if token is still negative, don't allow dequeue */
641		if (tbr->tbr_token <= 0)
642			return (NULL);
643	}
644
645	/*
646	 * ifclassq takes precedence over ALTQ queue;
647	 * ifcq_drain count is adjusted by the caller.
648	 */
649#if PF_ALTQ
650	if (IFCQ_IS_DRAINING(ifq)) {
651#endif /* PF_ALTQ */
652		if (op == CLASSQDQ_POLL) {
653			if (drvmgt)
654				IFCQ_POLL_SC(ifq, sc, m);
655			else
656				IFCQ_POLL(ifq, m);
657		} else {
658			if (drvmgt)
659				IFCQ_DEQUEUE_SC(ifq, sc, m);
660			else
661				IFCQ_DEQUEUE(ifq, m);
662		}
663#if PF_ALTQ
664	} else {
665		struct ifaltq *altq = IFCQ_ALTQ(ifq);
666		if (ALTQ_IS_ENABLED(altq)) {
667			if (drvmgt)
668				m = (*altq->altq_dequeue_sc)(altq, sc, op);
669			else
670				m = (*altq->altq_dequeue)(altq, op);
671		} else {
672			m = NULL;
673		}
674	}
675#endif /* PF_ALTQ */
676
677	if (m != NULL && op == CLASSQDQ_REMOVE)
678		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
679	tbr->tbr_lastop = op;
680
681	return (m);
682}
683
684/*
685 * set a token bucket regulator.
686 * if the specified rate is zero, the token bucket regulator is deleted.
687 */
688int
689ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
690    boolean_t update)
691{
692	struct tb_regulator *tbr;
693	struct ifnet *ifp = ifq->ifcq_ifp;
694	u_int64_t rate, old_rate;
695
696	IFCQ_LOCK_ASSERT_HELD(ifq);
697	VERIFY(IFCQ_IS_READY(ifq));
698
699	VERIFY(machclk_freq != 0);
700
701	tbr = &ifq->ifcq_tbr;
702	old_rate = tbr->tbr_rate_raw;
703
704	rate = profile->rate;
705	if (profile->percent > 0) {
706		u_int64_t eff_rate;
707
708		if (profile->percent > 100)
709			return (EINVAL);
710		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0)
711			return (ENODEV);
712		rate = (eff_rate * profile->percent) / 100;
713	}
714
715	if (rate == 0) {
716		if (!IFCQ_TBR_IS_ENABLED(ifq))
717			return (ENOENT);
718
719		if (pktsched_verbose)
720			printf("%s: TBR disabled\n", if_name(ifp));
721
722		/* disable this TBR */
723		ifq->ifcq_flags &= ~IFCQF_TBR;
724		bzero(tbr, sizeof (*tbr));
725		ifnet_set_start_cycle(ifp, NULL);
726		if (update)
727			ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
728		return (0);
729	}
730
731	if (pktsched_verbose) {
732		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
733		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
734		    "enabled", rate, profile->depth);
735	}
736
737	/* set the new TBR */
738	bzero(tbr, sizeof (*tbr));
739	tbr->tbr_rate_raw = rate;
740	tbr->tbr_percent = profile->percent;
741	ifq->ifcq_flags |= IFCQF_TBR;
742
743	/*
744	 * Note that the TBR fill up time (hence the ifnet restart time)
745	 * is directly related to the specified TBR depth.  The ideal
746	 * depth value should be computed such that the interval time
747	 * between each successive wakeup is adequately spaced apart,
748	 * in order to reduce scheduling overheads.  A target interval
749	 * of 10 ms seems to provide good performance balance.  This can be
750	 * overridden by specifying the depth profile.  Values smaller than
751	 * the ideal depth will reduce delay at the expense of CPU cycles.
752	 */
753	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
754	if (tbr->tbr_rate > 0) {
755		u_int32_t mtu = ifp->if_mtu;
756		int64_t ival, idepth = 0;
757		int i;
758
759		if (mtu < IF_MINMTU)
760			mtu = IF_MINMTU;
761
762		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
763
764		for (i = 1; ; i++) {
765			idepth = TBR_SCALE(i * mtu);
766			if ((idepth / tbr->tbr_rate) > ival)
767				break;
768		}
769		VERIFY(idepth > 0);
770
771		tbr->tbr_depth = TBR_SCALE(profile->depth);
772		if (tbr->tbr_depth == 0) {
773			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
774			/* a little fudge factor to get closer to rate */
775			tbr->tbr_depth = idepth + (idepth >> 3);
776		} else {
777			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
778		}
779	} else {
780		tbr->tbr_depth = TBR_SCALE(profile->depth);
781		tbr->tbr_filluptime = 0xffffffffffffffffLL;
782	}
783	tbr->tbr_token = tbr->tbr_depth;
784	tbr->tbr_last = read_machclk();
785	tbr->tbr_lastop = CLASSQDQ_REMOVE;
786
787	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
788		struct timespec ts =
789		    { 0, pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
790		if (pktsched_verbose) {
791			printf("%s: TBR calculated tokens %lld "
792			    "filluptime %llu ns\n", if_name(ifp),
793			    TBR_UNSCALE(tbr->tbr_token),
794			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
795		}
796		ifnet_set_start_cycle(ifp, &ts);
797	} else {
798		if (pktsched_verbose) {
799			if (tbr->tbr_rate == 0) {
800				printf("%s: TBR calculated tokens %lld "
801				    "infinite filluptime\n", if_name(ifp),
802				    TBR_UNSCALE(tbr->tbr_token));
803			} else if (!(ifp->if_flags & IFF_UP)) {
804				printf("%s: TBR suspended (link is down)\n",
805				    if_name(ifp));
806			}
807		}
808		ifnet_set_start_cycle(ifp, NULL);
809	}
810	if (update && tbr->tbr_rate_raw != old_rate)
811		ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
812
813	return (0);
814}
815