1/*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * traffic class queue
31 */
32
33#include <sys/cdefs.h>
34#include <sys/param.h>
35#include <sys/malloc.h>
36#include <sys/mbuf.h>
37#include <sys/systm.h>
38#include <sys/errno.h>
39#include <sys/kernel.h>
40#include <sys/syslog.h>
41
42#include <kern/zalloc.h>
43
44#include <net/if.h>
45#include <net/net_osdep.h>
46
47#include <net/pktsched/pktsched_tcq.h>
48#include <netinet/in.h>
49
50/*
51 * function prototypes
52 */
53static int tcq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
54static struct mbuf *tcq_dequeue_tc_ifclassq(struct ifclassq *,
55    mbuf_svc_class_t, cqdq_op_t);
56static int tcq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
57static int tcq_clear_interface(struct tcq_if *);
58static struct tcq_class *tcq_class_create(struct tcq_if *, int, u_int32_t,
59    int, u_int32_t);
60static int tcq_class_destroy(struct tcq_if *, struct tcq_class *);
61static int tcq_destroy_locked(struct tcq_if *);
62static inline int tcq_addq(struct tcq_class *, struct mbuf *,
63    struct pf_mtag *);
64static inline struct mbuf *tcq_getq(struct tcq_class *);
65static inline struct mbuf *tcq_pollq(struct tcq_class *);
66static void tcq_purgeq(struct tcq_if *, struct tcq_class *, u_int32_t,
67    u_int32_t *, u_int32_t *);
68static void tcq_purge_sc(struct tcq_if *, cqrq_purge_sc_t *);
69static void tcq_updateq(struct tcq_if *, struct tcq_class *, cqev_t);
70static int tcq_throttle(struct tcq_if *, cqrq_throttle_t *);
71static int tcq_resumeq(struct tcq_if *, struct tcq_class *);
72static int tcq_suspendq(struct tcq_if *, struct tcq_class *);
73static struct mbuf *tcq_dequeue_cl(struct tcq_if *, struct tcq_class *,
74    mbuf_svc_class_t, cqdq_op_t);
75static inline struct tcq_class *tcq_clh_to_clp(struct tcq_if *, u_int32_t);
76static const char *tcq_style(struct tcq_if *);
77
78#define	TCQ_ZONE_MAX	32		/* maximum elements in zone */
79#define	TCQ_ZONE_NAME	"pktsched_tcq"	/* zone name */
80
81static unsigned int tcq_size;		/* size of zone element */
82static struct zone *tcq_zone;		/* zone for tcq */
83
84#define	TCQ_CL_ZONE_MAX	32		/* maximum elements in zone */
85#define	TCQ_CL_ZONE_NAME "pktsched_tcq_cl" /* zone name */
86
87static unsigned int tcq_cl_size;	/* size of zone element */
88static struct zone *tcq_cl_zone;	/* zone for tcq_class */
89
90void
91tcq_init(void)
92{
93	tcq_size = sizeof (struct tcq_if);
94	tcq_zone = zinit(tcq_size, TCQ_ZONE_MAX * tcq_size,
95	    0, TCQ_ZONE_NAME);
96	if (tcq_zone == NULL) {
97		panic("%s: failed allocating %s", __func__, TCQ_ZONE_NAME);
98		/* NOTREACHED */
99	}
100	zone_change(tcq_zone, Z_EXPAND, TRUE);
101	zone_change(tcq_zone, Z_CALLERACCT, TRUE);
102
103	tcq_cl_size = sizeof (struct tcq_class);
104	tcq_cl_zone = zinit(tcq_cl_size, TCQ_CL_ZONE_MAX * tcq_cl_size,
105	    0, TCQ_CL_ZONE_NAME);
106	if (tcq_cl_zone == NULL) {
107		panic("%s: failed allocating %s", __func__, TCQ_CL_ZONE_NAME);
108		/* NOTREACHED */
109	}
110	zone_change(tcq_cl_zone, Z_EXPAND, TRUE);
111	zone_change(tcq_cl_zone, Z_CALLERACCT, TRUE);
112}
113
114struct tcq_if *
115tcq_alloc(struct ifnet *ifp, int how, boolean_t altq)
116{
117	struct tcq_if	*tif;
118
119	tif = (how == M_WAITOK) ? zalloc(tcq_zone) : zalloc_noblock(tcq_zone);
120	if (tif == NULL)
121		return (NULL);
122
123	bzero(tif, tcq_size);
124	tif->tif_maxpri = -1;
125	tif->tif_ifq = &ifp->if_snd;
126	if (altq)
127		tif->tif_flags |= TCQIFF_ALTQ;
128
129	if (pktsched_verbose) {
130		log(LOG_DEBUG, "%s: %s scheduler allocated\n",
131		    if_name(ifp), tcq_style(tif));
132	}
133
134	return (tif);
135}
136
137int
138tcq_destroy(struct tcq_if *tif)
139{
140	struct ifclassq *ifq = tif->tif_ifq;
141	int err;
142
143	IFCQ_LOCK(ifq);
144	err = tcq_destroy_locked(tif);
145	IFCQ_UNLOCK(ifq);
146
147	return (err);
148}
149
150static int
151tcq_destroy_locked(struct tcq_if *tif)
152{
153	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
154
155	(void) tcq_clear_interface(tif);
156
157	if (pktsched_verbose) {
158		log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
159		    if_name(TCQIF_IFP(tif)), tcq_style(tif));
160	}
161
162	zfree(tcq_zone, tif);
163
164	return (0);
165}
166
167/*
168 * bring the interface back to the initial state by discarding
169 * all the filters and classes.
170 */
171static int
172tcq_clear_interface(struct tcq_if *tif)
173{
174	struct tcq_class	*cl;
175	int pri;
176
177	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
178
179	/* clear out the classes */
180	for (pri = 0; pri <= tif->tif_maxpri; pri++)
181		if ((cl = tif->tif_classes[pri]) != NULL)
182			tcq_class_destroy(tif, cl);
183
184	return (0);
185}
186
187/* discard all the queued packets on the interface */
188void
189tcq_purge(struct tcq_if *tif)
190{
191	struct tcq_class *cl;
192	int pri;
193
194	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
195
196	for (pri = 0; pri <= tif->tif_maxpri; pri++) {
197		if ((cl = tif->tif_classes[pri]) != NULL && !qempty(&cl->cl_q))
198			tcq_purgeq(tif, cl, 0, NULL, NULL);
199	}
200#if !PF_ALTQ
201	/*
202	 * This assertion is safe to be made only when PF_ALTQ is not
203	 * configured; otherwise, IFCQ_LEN represents the sum of the
204	 * packets managed by ifcq_disc and altq_disc instances, which
205	 * is possible when transitioning between the two.
206	 */
207	VERIFY(IFCQ_LEN(tif->tif_ifq) == 0);
208#endif /* !PF_ALTQ */
209}
210
211static void
212tcq_purge_sc(struct tcq_if *tif, cqrq_purge_sc_t *pr)
213{
214	struct ifclassq *ifq = tif->tif_ifq;
215	u_int32_t i;
216
217	IFCQ_LOCK_ASSERT_HELD(ifq);
218
219	VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
220	VERIFY(pr->flow != 0);
221
222	if (pr->sc != MBUF_SC_UNSPEC) {
223		i = MBUF_SCIDX(pr->sc);
224		VERIFY(i < IFCQ_SC_MAX);
225
226		tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
227		    pr->flow, &pr->packets, &pr->bytes);
228	} else {
229		u_int32_t cnt, len;
230
231		pr->packets = 0;
232		pr->bytes = 0;
233
234		for (i = 0; i < IFCQ_SC_MAX; i++) {
235			tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
236			    pr->flow, &cnt, &len);
237			pr->packets += cnt;
238			pr->bytes += len;
239		}
240	}
241}
242
243void
244tcq_event(struct tcq_if *tif, cqev_t ev)
245{
246	struct tcq_class *cl;
247	int pri;
248
249	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
250
251	for (pri = 0; pri <= tif->tif_maxpri; pri++)
252		if ((cl = tif->tif_classes[pri]) != NULL)
253			tcq_updateq(tif, cl, ev);
254}
255
256int
257tcq_add_queue(struct tcq_if *tif, int priority, u_int32_t qlimit,
258    int flags, u_int32_t qid, struct tcq_class **clp)
259{
260	struct tcq_class *cl;
261
262	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
263
264	/* check parameters */
265	if (priority >= TCQ_MAXPRI)
266		return (EINVAL);
267	if (tif->tif_classes[priority] != NULL)
268		return (EBUSY);
269	if (tcq_clh_to_clp(tif, qid) != NULL)
270		return (EBUSY);
271
272	cl = tcq_class_create(tif, priority, qlimit, flags, qid);
273	if (cl == NULL)
274		return (ENOMEM);
275
276	if (clp != NULL)
277		*clp = cl;
278
279	return (0);
280}
281
282static struct tcq_class *
283tcq_class_create(struct tcq_if *tif, int pri, u_int32_t qlimit,
284    int flags, u_int32_t qid)
285{
286	struct ifnet *ifp;
287	struct ifclassq *ifq;
288	struct tcq_class *cl;
289
290	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
291
292	/* Sanitize flags unless internally configured */
293	if (tif->tif_flags & TCQIFF_ALTQ)
294		flags &= TQCF_USERFLAGS;
295
296#if !CLASSQ_RED
297	if (flags & TQCF_RED) {
298		log(LOG_ERR, "%s: %s RED not available!\n",
299		    if_name(TCQIF_IFP(tif)), tcq_style(tif));
300		return (NULL);
301	}
302#endif /* !CLASSQ_RED */
303
304#if !CLASSQ_RIO
305	if (flags & TQCF_RIO) {
306		log(LOG_ERR, "%s: %s RIO not available!\n",
307		    if_name(TCQIF_IFP(tif)), tcq_style(tif));
308		return (NULL);
309	}
310#endif /* CLASSQ_RIO */
311
312#if !CLASSQ_BLUE
313	if (flags & TQCF_BLUE) {
314		log(LOG_ERR, "%s: %s BLUE not available!\n",
315		    if_name(TCQIF_IFP(tif)), tcq_style(tif));
316		return (NULL);
317	}
318#endif /* CLASSQ_BLUE */
319
320	/* These are mutually exclusive */
321	if ((flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) &&
322	    (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_RED &&
323	    (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_RIO &&
324	    (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_BLUE &&
325	    (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) != TQCF_SFB) {
326		log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
327		    if_name(TCQIF_IFP(tif)), tcq_style(tif));
328		return (NULL);
329	}
330
331	ifq = tif->tif_ifq;
332	ifp = TCQIF_IFP(tif);
333
334	if ((cl = tif->tif_classes[pri]) != NULL) {
335		/* modify the class instead of creating a new one */
336		if (!qempty(&cl->cl_q))
337			tcq_purgeq(tif, cl, 0, NULL, NULL);
338#if CLASSQ_RIO
339		if (q_is_rio(&cl->cl_q))
340			rio_destroy(cl->cl_rio);
341#endif /* CLASSQ_RIO */
342#if CLASSQ_RED
343		if (q_is_red(&cl->cl_q))
344			red_destroy(cl->cl_red);
345#endif /* CLASSQ_RED */
346#if CLASSQ_BLUE
347		if (q_is_blue(&cl->cl_q))
348			blue_destroy(cl->cl_blue);
349#endif /* CLASSQ_BLUE */
350		if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
351			sfb_destroy(cl->cl_sfb);
352		cl->cl_qalg.ptr = NULL;
353		qtype(&cl->cl_q) = Q_DROPTAIL;
354		qstate(&cl->cl_q) = QS_RUNNING;
355	} else {
356		cl = zalloc(tcq_cl_zone);
357		if (cl == NULL)
358			return (NULL);
359
360		bzero(cl, tcq_cl_size);
361	}
362
363	tif->tif_classes[pri] = cl;
364	if (flags & TQCF_DEFAULTCLASS)
365		tif->tif_default = cl;
366	if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
367		qlimit = IFCQ_MAXLEN(ifq);
368		if (qlimit == 0)
369			qlimit = DEFAULT_QLIMIT;  /* use default */
370	}
371	_qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
372	cl->cl_flags = flags;
373	cl->cl_pri = pri;
374	if (pri > tif->tif_maxpri)
375		tif->tif_maxpri = pri;
376	cl->cl_tif = tif;
377	cl->cl_handle = qid;
378
379	if (flags & (TQCF_RED|TQCF_RIO|TQCF_BLUE|TQCF_SFB)) {
380#if CLASSQ_RED || CLASSQ_RIO
381		u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
382		int pkttime;
383#endif /* CLASSQ_RED || CLASSQ_RIO */
384
385		cl->cl_qflags = 0;
386		if (flags & TQCF_ECN) {
387			if (flags & TQCF_BLUE)
388				cl->cl_qflags |= BLUEF_ECN;
389			else if (flags & TQCF_SFB)
390				cl->cl_qflags |= SFBF_ECN;
391			else if (flags & TQCF_RED)
392				cl->cl_qflags |= REDF_ECN;
393			else if (flags & TQCF_RIO)
394				cl->cl_qflags |= RIOF_ECN;
395		}
396		if (flags & TQCF_FLOWCTL) {
397			if (flags & TQCF_SFB)
398				cl->cl_qflags |= SFBF_FLOWCTL;
399		}
400		if (flags & TQCF_CLEARDSCP) {
401			if (flags & TQCF_RIO)
402				cl->cl_qflags |= RIOF_CLEARDSCP;
403		}
404#if CLASSQ_RED || CLASSQ_RIO
405		/*
406		 * XXX: RED & RIO should be watching link speed and MTU
407		 *	events and recompute pkttime accordingly.
408		 */
409		if (ifbandwidth < 8)
410			pkttime = 1000 * 1000 * 1000; /* 1 sec */
411		else
412			pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
413			    (ifbandwidth / 8);
414
415		/* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
416#if CLASSQ_RED
417		if (flags & TQCF_RED) {
418			cl->cl_red = red_alloc(ifp, 0, 0,
419			    qlimit(&cl->cl_q) * 10/100,
420			    qlimit(&cl->cl_q) * 30/100,
421			    cl->cl_qflags, pkttime);
422			if (cl->cl_red != NULL)
423				qtype(&cl->cl_q) = Q_RED;
424		}
425#endif /* CLASSQ_RED */
426#if CLASSQ_RIO
427		if (flags & TQCF_RIO) {
428			cl->cl_rio =
429			    rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
430			if (cl->cl_rio != NULL)
431				qtype(&cl->cl_q) = Q_RIO;
432		}
433#endif /* CLASSQ_RIO */
434#endif /* CLASSQ_RED || CLASSQ_RIO */
435#if CLASSQ_BLUE
436		if (flags & TQCF_BLUE) {
437			cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
438			if (cl->cl_blue != NULL)
439				qtype(&cl->cl_q) = Q_BLUE;
440		}
441#endif /* CLASSQ_BLUE */
442		if (flags & TQCF_SFB) {
443			if (!(cl->cl_flags & TQCF_LAZY))
444				cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
445				    qlimit(&cl->cl_q), cl->cl_qflags);
446			if (cl->cl_sfb != NULL || (cl->cl_flags & TQCF_LAZY))
447				qtype(&cl->cl_q) = Q_SFB;
448		}
449	}
450
451	if (pktsched_verbose) {
452		log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
453		    "flags=%b\n", if_name(ifp), tcq_style(tif),
454		    cl->cl_handle, cl->cl_pri, qlimit, flags, TQCF_BITS);
455	}
456
457	return (cl);
458}
459
460int
461tcq_remove_queue(struct tcq_if *tif, u_int32_t qid)
462{
463	struct tcq_class *cl;
464
465	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
466
467	if ((cl = tcq_clh_to_clp(tif, qid)) == NULL)
468		return (EINVAL);
469
470	return (tcq_class_destroy(tif, cl));
471}
472
473static int
474tcq_class_destroy(struct tcq_if *tif, struct tcq_class *cl)
475{
476	struct ifclassq *ifq = tif->tif_ifq;
477	int pri;
478
479	IFCQ_LOCK_ASSERT_HELD(ifq);
480
481	if (!qempty(&cl->cl_q))
482		tcq_purgeq(tif, cl, 0, NULL, NULL);
483
484	tif->tif_classes[cl->cl_pri] = NULL;
485	if (tif->tif_maxpri == cl->cl_pri) {
486		for (pri = cl->cl_pri; pri >= 0; pri--)
487			if (tif->tif_classes[pri] != NULL) {
488				tif->tif_maxpri = pri;
489				break;
490			}
491		if (pri < 0)
492			tif->tif_maxpri = -1;
493	}
494
495	if (tif->tif_default == cl)
496		tif->tif_default = NULL;
497
498	if (cl->cl_qalg.ptr != NULL) {
499#if CLASSQ_RIO
500		if (q_is_rio(&cl->cl_q))
501			rio_destroy(cl->cl_rio);
502#endif /* CLASSQ_RIO */
503#if CLASSQ_RED
504		if (q_is_red(&cl->cl_q))
505			red_destroy(cl->cl_red);
506#endif /* CLASSQ_RED */
507#if CLASSQ_BLUE
508		if (q_is_blue(&cl->cl_q))
509			blue_destroy(cl->cl_blue);
510#endif /* CLASSQ_BLUE */
511		if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
512			sfb_destroy(cl->cl_sfb);
513		cl->cl_qalg.ptr = NULL;
514		qtype(&cl->cl_q) = Q_DROPTAIL;
515		qstate(&cl->cl_q) = QS_RUNNING;
516	}
517
518	if (pktsched_verbose) {
519		log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
520		    if_name(TCQIF_IFP(tif)), tcq_style(tif),
521		    cl->cl_handle, cl->cl_pri);
522	}
523
524	zfree(tcq_cl_zone, cl);
525	return (0);
526}
527
528int
529tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, struct mbuf *m,
530    struct pf_mtag *t)
531{
532	struct ifclassq *ifq = tif->tif_ifq;
533	int len, ret;
534
535	IFCQ_LOCK_ASSERT_HELD(ifq);
536	VERIFY(cl == NULL || cl->cl_tif == tif);
537
538	if (cl == NULL) {
539		cl = tcq_clh_to_clp(tif, t->pftag_qid);
540		if (cl == NULL) {
541			cl = tif->tif_default;
542			if (cl == NULL) {
543				IFCQ_CONVERT_LOCK(ifq);
544				m_freem(m);
545				return (ENOBUFS);
546			}
547		}
548	}
549
550	len = m_pktlen(m);
551
552	ret = tcq_addq(cl, m, t);
553	if (ret != 0) {
554		if (ret == CLASSQEQ_SUCCESS_FC) {
555			/* packet enqueued, return advisory feedback */
556			ret = EQFULL;
557		} else {
558			VERIFY(ret == CLASSQEQ_DROPPED ||
559			    ret == CLASSQEQ_DROPPED_FC ||
560			    ret == CLASSQEQ_DROPPED_SP);
561			/* packet has been freed in tcq_addq */
562			PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
563			IFCQ_DROP_ADD(ifq, 1, len);
564			switch (ret) {
565			case CLASSQEQ_DROPPED:
566				return (ENOBUFS);
567			case CLASSQEQ_DROPPED_FC:
568				return (EQFULL);
569			case CLASSQEQ_DROPPED_SP:
570				return (EQSUSPENDED);
571			}
572			/* NOT REACHED */
573		}
574	}
575	IFCQ_INC_LEN(ifq);
576
577	/* successfully queued. */
578	return (ret);
579}
580
581/*
582 * note: CLASSQDQ_POLL returns the next packet without removing the packet
583 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
584 *	CLASSQDQ_REMOVE must return the same packet if called immediately
585 *	after CLASSQDQ_POLL.
586 */
587struct mbuf *
588tcq_dequeue_tc(struct tcq_if *tif, mbuf_svc_class_t sc, cqdq_op_t op)
589{
590	return (tcq_dequeue_cl(tif, NULL, sc, op));
591}
592
593static struct mbuf *
594tcq_dequeue_cl(struct tcq_if *tif, struct tcq_class *cl,
595    mbuf_svc_class_t sc, cqdq_op_t op)
596{
597	struct ifclassq *ifq = tif->tif_ifq;
598	struct mbuf *m;
599
600	IFCQ_LOCK_ASSERT_HELD(ifq);
601
602	if (cl == NULL) {
603		cl = tcq_clh_to_clp(tif, MBUF_SCIDX(sc));
604		if (cl == NULL)
605			return (NULL);
606	}
607
608	if (qempty(&cl->cl_q))
609		return (NULL);
610
611	VERIFY(!IFCQ_IS_EMPTY(ifq));
612
613	if (op == CLASSQDQ_POLL)
614		return (tcq_pollq(cl));
615
616	m = tcq_getq(cl);
617	if (m != NULL) {
618		IFCQ_DEC_LEN(ifq);
619		if (qempty(&cl->cl_q))
620			cl->cl_period++;
621		PKTCNTR_ADD(&cl->cl_xmitcnt, 1, m_pktlen(m));
622		IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
623	}
624	return (m);
625}
626
627static inline int
628tcq_addq(struct tcq_class *cl, struct mbuf *m, struct pf_mtag *t)
629{
630	struct tcq_if *tif = cl->cl_tif;
631	struct ifclassq *ifq = tif->tif_ifq;
632
633	IFCQ_LOCK_ASSERT_HELD(ifq);
634
635#if CLASSQ_RIO
636	if (q_is_rio(&cl->cl_q))
637		return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
638	else
639#endif /* CLASSQ_RIO */
640#if CLASSQ_RED
641	if (q_is_red(&cl->cl_q))
642		return (red_addq(cl->cl_red, &cl->cl_q, m, t));
643	else
644#endif /* CLASSQ_RED */
645#if CLASSQ_BLUE
646	if (q_is_blue(&cl->cl_q))
647		return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
648	else
649#endif /* CLASSQ_BLUE */
650	if (q_is_sfb(&cl->cl_q)) {
651		if (cl->cl_sfb == NULL) {
652			struct ifnet *ifp = TCQIF_IFP(tif);
653
654			VERIFY(cl->cl_flags & TQCF_LAZY);
655			cl->cl_flags &= ~TQCF_LAZY;
656			IFCQ_CONVERT_LOCK(ifq);
657
658			cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
659			    qlimit(&cl->cl_q), cl->cl_qflags);
660			if (cl->cl_sfb == NULL) {
661				/* fall back to droptail */
662				qtype(&cl->cl_q) = Q_DROPTAIL;
663				cl->cl_flags &= ~TQCF_SFB;
664				cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
665
666				log(LOG_ERR, "%s: %s SFB lazy allocation "
667				    "failed for qid=%d pri=%d, falling back "
668				    "to DROPTAIL\n", if_name(ifp),
669				    tcq_style(tif), cl->cl_handle,
670				    cl->cl_pri);
671			} else if (tif->tif_throttle != IFNET_THROTTLE_OFF) {
672				/* if there's pending throttling, set it */
673				cqrq_throttle_t tr = { 1, tif->tif_throttle };
674				int err = tcq_throttle(tif, &tr);
675
676				if (err == EALREADY)
677					err = 0;
678				if (err != 0) {
679					tr.level = IFNET_THROTTLE_OFF;
680					(void) tcq_throttle(tif, &tr);
681				}
682			}
683		}
684		if (cl->cl_sfb != NULL)
685			return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
686	} else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
687		IFCQ_CONVERT_LOCK(ifq);
688		m_freem(m);
689		return (CLASSQEQ_DROPPED);
690	}
691
692	if (cl->cl_flags & TQCF_CLEARDSCP)
693		write_dsfield(m, t, 0);
694
695	_addq(&cl->cl_q, m);
696
697	return (0);
698}
699
700static inline struct mbuf *
701tcq_getq(struct tcq_class *cl)
702{
703	IFCQ_LOCK_ASSERT_HELD(cl->cl_tif->tif_ifq);
704
705#if CLASSQ_RIO
706	if (q_is_rio(&cl->cl_q))
707		return (rio_getq(cl->cl_rio, &cl->cl_q));
708	else
709#endif /* CLASSQ_RIO */
710#if CLASSQ_RED
711	if (q_is_red(&cl->cl_q))
712		return (red_getq(cl->cl_red, &cl->cl_q));
713	else
714#endif /* CLASSQ_RED */
715#if CLASSQ_BLUE
716	if (q_is_blue(&cl->cl_q))
717		return (blue_getq(cl->cl_blue, &cl->cl_q));
718	else
719#endif /* CLASSQ_BLUE */
720	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
721		return (sfb_getq(cl->cl_sfb, &cl->cl_q));
722
723	return (_getq(&cl->cl_q));
724}
725
726static inline struct mbuf *
727tcq_pollq(struct tcq_class *cl)
728{
729	IFCQ_LOCK_ASSERT_HELD(cl->cl_tif->tif_ifq);
730
731	return (qhead(&cl->cl_q));
732}
733
734static void
735tcq_purgeq(struct tcq_if *tif, struct tcq_class *cl, u_int32_t flow,
736    u_int32_t *packets, u_int32_t *bytes)
737{
738	struct ifclassq *ifq = tif->tif_ifq;
739	u_int32_t cnt = 0, len = 0, qlen;
740
741	IFCQ_LOCK_ASSERT_HELD(ifq);
742
743	if ((qlen = qlen(&cl->cl_q)) == 0)
744		goto done;
745
746	/* become regular mutex before freeing mbufs */
747	IFCQ_CONVERT_LOCK(ifq);
748
749#if CLASSQ_RIO
750	if (q_is_rio(&cl->cl_q))
751		rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
752	else
753#endif /* CLASSQ_RIO */
754#if CLASSQ_RED
755	if (q_is_red(&cl->cl_q))
756		red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
757	else
758#endif /* CLASSQ_RED */
759#if CLASSQ_BLUE
760	if (q_is_blue(&cl->cl_q))
761		blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
762	else
763#endif /* CLASSQ_BLUE */
764	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
765		sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
766	else
767		_flushq_flow(&cl->cl_q, flow, &cnt, &len);
768
769	if (cnt > 0) {
770		VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
771
772		PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
773		IFCQ_DROP_ADD(ifq, cnt, len);
774
775		VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
776		IFCQ_LEN(ifq) -= cnt;
777
778		if (pktsched_verbose) {
779			log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
780			    "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
781			    if_name(TCQIF_IFP(tif)), tcq_style(tif),
782			    cl->cl_handle, cl->cl_pri, qlen, qlen(&cl->cl_q),
783			    cnt, len, flow);
784		}
785	}
786done:
787	if (packets != NULL)
788		*packets = cnt;
789	if (bytes != NULL)
790		*bytes = len;
791}
792
793static void
794tcq_updateq(struct tcq_if *tif, struct tcq_class *cl, cqev_t ev)
795{
796	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
797
798	if (pktsched_verbose) {
799		log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
800		    if_name(TCQIF_IFP(tif)), tcq_style(tif),
801		    cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
802	}
803
804#if CLASSQ_RIO
805	if (q_is_rio(&cl->cl_q))
806		return (rio_updateq(cl->cl_rio, ev));
807#endif /* CLASSQ_RIO */
808#if CLASSQ_RED
809	if (q_is_red(&cl->cl_q))
810		return (red_updateq(cl->cl_red, ev));
811#endif /* CLASSQ_RED */
812#if CLASSQ_BLUE
813	if (q_is_blue(&cl->cl_q))
814		return (blue_updateq(cl->cl_blue, ev));
815#endif /* CLASSQ_BLUE */
816	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
817		return (sfb_updateq(cl->cl_sfb, ev));
818}
819
820int
821tcq_get_class_stats(struct tcq_if *tif, u_int32_t qid,
822    struct tcq_classstats *sp)
823{
824	struct tcq_class *cl;
825
826	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
827
828	if ((cl = tcq_clh_to_clp(tif, qid)) == NULL)
829		return (EINVAL);
830
831	sp->class_handle = cl->cl_handle;
832	sp->priority = cl->cl_pri;
833	sp->qlength = qlen(&cl->cl_q);
834	sp->qlimit = qlimit(&cl->cl_q);
835	sp->period = cl->cl_period;
836	sp->xmitcnt = cl->cl_xmitcnt;
837	sp->dropcnt = cl->cl_dropcnt;
838
839	sp->qtype = qtype(&cl->cl_q);
840	sp->qstate = qstate(&cl->cl_q);
841#if CLASSQ_RED
842	if (q_is_red(&cl->cl_q))
843		red_getstats(cl->cl_red, &sp->red[0]);
844#endif /* CLASSQ_RED */
845#if CLASSQ_RIO
846	if (q_is_rio(&cl->cl_q))
847		rio_getstats(cl->cl_rio, &sp->red[0]);
848#endif /* CLASSQ_RIO */
849#if CLASSQ_BLUE
850	if (q_is_blue(&cl->cl_q))
851		blue_getstats(cl->cl_blue, &sp->blue);
852#endif /* CLASSQ_BLUE */
853	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
854		sfb_getstats(cl->cl_sfb, &sp->sfb);
855
856	return (0);
857}
858
859/* convert a class handle to the corresponding class pointer */
860static inline struct tcq_class *
861tcq_clh_to_clp(struct tcq_if *tif, u_int32_t chandle)
862{
863	struct tcq_class *cl;
864	int idx;
865
866	IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
867
868	for (idx = tif->tif_maxpri; idx >= 0; idx--)
869		if ((cl = tif->tif_classes[idx]) != NULL &&
870		    cl->cl_handle == chandle)
871			return (cl);
872
873	return (NULL);
874}
875
876static const char *
877tcq_style(struct tcq_if *tif)
878{
879	return ((tif->tif_flags & TCQIFF_ALTQ) ? "ALTQ_TCQ" : "TCQ");
880}
881
882/*
883 * tcq_enqueue_ifclassq is an enqueue function to be registered to
884 * (*ifcq_enqueue) in struct ifclassq.
885 */
886static int
887tcq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
888{
889	u_int32_t i;
890
891	IFCQ_LOCK_ASSERT_HELD(ifq);
892
893	if (!(m->m_flags & M_PKTHDR)) {
894		/* should not happen */
895		log(LOG_ERR, "%s: packet does not have pkthdr\n",
896		    if_name(ifq->ifcq_ifp));
897		IFCQ_CONVERT_LOCK(ifq);
898		m_freem(m);
899		return (ENOBUFS);
900	}
901
902	i = MBUF_SCIDX(mbuf_get_service_class(m));
903	VERIFY((u_int32_t)i < IFCQ_SC_MAX);
904
905	return (tcq_enqueue(ifq->ifcq_disc,
906	    ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
907}
908
909/*
910 * tcq_dequeue_tc_ifclassq is a dequeue function to be registered to
911 * (*ifcq_dequeue) in struct ifclass.
912 *
913 * note: CLASSQDQ_POLL returns the next packet without removing the packet
914 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
915 *	CLASSQDQ_REMOVE must return the same packet if called immediately
916 *	after CLASSQDQ_POLL.
917 */
918static struct mbuf *
919tcq_dequeue_tc_ifclassq(struct ifclassq *ifq, mbuf_svc_class_t sc,
920    cqdq_op_t op)
921{
922	u_int32_t i = MBUF_SCIDX(sc);
923
924	VERIFY((u_int32_t)i < IFCQ_SC_MAX);
925
926	return (tcq_dequeue_cl(ifq->ifcq_disc,
927	    ifq->ifcq_disc_slots[i].cl, sc, op));
928}
929
930static int
931tcq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
932{
933	struct tcq_if	*tif = (struct tcq_if *)ifq->ifcq_disc;
934	int err = 0;
935
936	IFCQ_LOCK_ASSERT_HELD(ifq);
937
938	switch (req) {
939	case CLASSQRQ_PURGE:
940		tcq_purge(tif);
941		break;
942
943	case CLASSQRQ_PURGE_SC:
944		tcq_purge_sc(tif, (cqrq_purge_sc_t *)arg);
945		break;
946
947	case CLASSQRQ_EVENT:
948		tcq_event(tif, (cqev_t)arg);
949		break;
950
951	case CLASSQRQ_THROTTLE:
952		err = tcq_throttle(tif, (cqrq_throttle_t *)arg);
953		break;
954	}
955	return (err);
956}
957
958int
959tcq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
960{
961	struct ifnet *ifp = ifq->ifcq_ifp;
962	struct tcq_class *cl0, *cl1, *cl2, *cl3;
963	struct tcq_if *tif;
964	u_int32_t maxlen = 0, qflags = 0;
965	int err = 0;
966
967	IFCQ_LOCK_ASSERT_HELD(ifq);
968	VERIFY(ifq->ifcq_disc == NULL);
969	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
970
971	if (flags & PKTSCHEDF_QALG_RED)
972		qflags |= TQCF_RED;
973	if (flags & PKTSCHEDF_QALG_RIO)
974		qflags |= TQCF_RIO;
975	if (flags & PKTSCHEDF_QALG_BLUE)
976		qflags |= TQCF_BLUE;
977	if (flags & PKTSCHEDF_QALG_SFB)
978		qflags |= TQCF_SFB;
979	if (flags & PKTSCHEDF_QALG_ECN)
980		qflags |= TQCF_ECN;
981	if (flags & PKTSCHEDF_QALG_FLOWCTL)
982		qflags |= TQCF_FLOWCTL;
983
984	tif = tcq_alloc(ifp, M_WAITOK, FALSE);
985	if (tif == NULL)
986		return (ENOMEM);
987
988	if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
989		maxlen = if_sndq_maxlen;
990
991	if ((err = tcq_add_queue(tif, 0, maxlen,
992	    qflags | PRCF_LAZY, SCIDX_BK, &cl0)) != 0)
993		goto cleanup;
994
995	if ((err = tcq_add_queue(tif, 1, maxlen,
996	    qflags | TQCF_DEFAULTCLASS, SCIDX_BE, &cl1)) != 0)
997		goto cleanup;
998
999	if ((err = tcq_add_queue(tif, 2, maxlen,
1000	    qflags | PRCF_LAZY, SCIDX_VI, &cl2)) != 0)
1001		goto cleanup;
1002
1003	if ((err = tcq_add_queue(tif, 3, maxlen,
1004	    qflags, SCIDX_VO, &cl3)) != 0)
1005		goto cleanup;
1006
1007	err = ifclassq_attach(ifq, PKTSCHEDT_TCQ, tif,
1008	    tcq_enqueue_ifclassq, NULL, tcq_dequeue_tc_ifclassq,
1009	    tcq_request_ifclassq);
1010
1011	/* cache these for faster lookup */
1012	if (err == 0) {
1013		/* Map {BK_SYS,BK} to TC_BK */
1014		ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK;
1015		ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1016
1017		ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1018		ifq->ifcq_disc_slots[SCIDX_BK].cl = cl0;
1019
1020		/* Map {BE,RD,OAM} to TC_BE */
1021		ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1022		ifq->ifcq_disc_slots[SCIDX_BE].cl = cl1;
1023
1024		ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_BE;
1025		ifq->ifcq_disc_slots[SCIDX_RD].cl = cl1;
1026
1027		ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_BE;
1028		ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl1;
1029
1030		/* Map {AV,RV,VI} to TC_VI */
1031		ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_VI;
1032		ifq->ifcq_disc_slots[SCIDX_AV].cl = cl2;
1033
1034		ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_VI;
1035		ifq->ifcq_disc_slots[SCIDX_RV].cl = cl2;
1036
1037		ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1038		ifq->ifcq_disc_slots[SCIDX_VI].cl = cl2;
1039
1040		/* Map {VO,CTL} to TC_VO */
1041		ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1042		ifq->ifcq_disc_slots[SCIDX_VO].cl = cl3;
1043
1044		ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_VO;
1045		ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl3;
1046	}
1047
1048cleanup:
1049	if (err != 0)
1050		(void) tcq_destroy_locked(tif);
1051
1052	return (err);
1053}
1054
1055int
1056tcq_teardown_ifclassq(struct ifclassq *ifq)
1057{
1058	struct tcq_if *tif = ifq->ifcq_disc;
1059	int i;
1060
1061	IFCQ_LOCK_ASSERT_HELD(ifq);
1062	VERIFY(tif != NULL && ifq->ifcq_type == PKTSCHEDT_TCQ);
1063
1064	(void) tcq_destroy_locked(tif);
1065
1066	ifq->ifcq_disc = NULL;
1067	for (i = 0; i < IFCQ_SC_MAX; i++) {
1068		ifq->ifcq_disc_slots[i].qid = 0;
1069		ifq->ifcq_disc_slots[i].cl = NULL;
1070	}
1071
1072	return (ifclassq_detach(ifq));
1073}
1074
1075int
1076tcq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1077    struct if_ifclassq_stats *ifqs)
1078{
1079	struct tcq_if *tif = ifq->ifcq_disc;
1080
1081	IFCQ_LOCK_ASSERT_HELD(ifq);
1082	VERIFY(ifq->ifcq_type == PKTSCHEDT_TCQ);
1083
1084	if (slot >= IFCQ_SC_MAX)
1085		return (EINVAL);
1086
1087	return (tcq_get_class_stats(tif, ifq->ifcq_disc_slots[slot].qid,
1088	    &ifqs->ifqs_tcq_stats));
1089}
1090
1091static int
1092tcq_throttle(struct tcq_if *tif, cqrq_throttle_t *tr)
1093{
1094	struct ifclassq *ifq = tif->tif_ifq;
1095	struct tcq_class *cl;
1096	int err;
1097
1098	IFCQ_LOCK_ASSERT_HELD(ifq);
1099	VERIFY(!(tif->tif_flags & TCQIFF_ALTQ));
1100
1101	if (!tr->set) {
1102		tr->level = tif->tif_throttle;
1103		return (0);
1104	}
1105
1106	if (tr->level == tif->tif_throttle)
1107		return (EALREADY);
1108
1109	/* Current throttling levels only involve BK_SYS class */
1110	cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1111
1112	switch (tr->level) {
1113	case IFNET_THROTTLE_OFF:
1114		err = tcq_resumeq(tif, cl);
1115		break;
1116
1117	case IFNET_THROTTLE_OPPORTUNISTIC:
1118		err = tcq_suspendq(tif, cl);
1119		break;
1120
1121	default:
1122		VERIFY(0);
1123		/* NOTREACHED */
1124	}
1125
1126	if (err == 0 || err == ENXIO) {
1127		if (pktsched_verbose) {
1128			log(LOG_DEBUG, "%s: %s throttling %slevel set %d->%d\n",
1129			    if_name(TCQIF_IFP(tif)), tcq_style(tif),
1130			    (err == 0) ? "" : "lazy ", tif->tif_throttle,
1131			    tr->level);
1132		}
1133		tif->tif_throttle = tr->level;
1134		if (err != 0)
1135			err = 0;
1136		else
1137			tcq_purgeq(tif, cl, 0, NULL, NULL);
1138	} else {
1139		log(LOG_ERR, "%s: %s unable to set throttling level "
1140		    "%d->%d [error=%d]\n", if_name(TCQIF_IFP(tif)),
1141		    tcq_style(tif), tif->tif_throttle, tr->level, err);
1142	}
1143
1144	return (err);
1145}
1146
1147static int
1148tcq_resumeq(struct tcq_if *tif, struct tcq_class *cl)
1149{
1150	struct ifclassq *ifq = tif->tif_ifq;
1151	int err = 0;
1152
1153	IFCQ_LOCK_ASSERT_HELD(ifq);
1154
1155#if CLASSQ_RIO
1156	if (q_is_rio(&cl->cl_q))
1157		err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
1158	else
1159#endif /* CLASSQ_RIO */
1160#if CLASSQ_RED
1161	if (q_is_red(&cl->cl_q))
1162		err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
1163	else
1164#endif /* CLASSQ_RED */
1165#if CLASSQ_BLUE
1166	if (q_is_blue(&cl->cl_q))
1167		err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
1168	else
1169#endif /* CLASSQ_BLUE */
1170	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1171		err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
1172
1173	if (err == 0)
1174		qstate(&cl->cl_q) = QS_RUNNING;
1175
1176	return (err);
1177}
1178
1179static int
1180tcq_suspendq(struct tcq_if *tif, struct tcq_class *cl)
1181{
1182	struct ifclassq *ifq = tif->tif_ifq;
1183	int err = 0;
1184
1185	IFCQ_LOCK_ASSERT_HELD(ifq);
1186
1187#if CLASSQ_RIO
1188	if (q_is_rio(&cl->cl_q))
1189		err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
1190	else
1191#endif /* CLASSQ_RIO */
1192#if CLASSQ_RED
1193	if (q_is_red(&cl->cl_q))
1194		err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
1195	else
1196#endif /* CLASSQ_RED */
1197#if CLASSQ_BLUE
1198	if (q_is_blue(&cl->cl_q))
1199		err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
1200	else
1201#endif /* CLASSQ_BLUE */
1202	if (q_is_sfb(&cl->cl_q)) {
1203		if (cl->cl_sfb != NULL) {
1204			err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
1205		} else {
1206			VERIFY(cl->cl_flags & TQCF_LAZY);
1207			err = ENXIO;	/* delayed throttling */
1208		}
1209	}
1210
1211	if (err == 0 || err == ENXIO)
1212		qstate(&cl->cl_q) = QS_SUSPENDED;
1213
1214	return (err);
1215}
1216