1/*
2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*	$OpenBSD: altq_priq.c,v 1.21 2007/09/13 20:40:02 chl Exp $	*/
30/*	$KAME: altq_priq.c,v 1.1 2000/10/18 09:15:23 kjc Exp $	*/
31
32/*
33 * Copyright (C) 2000-2003
34 *	Sony Computer Science Laboratories Inc.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * priority queue
60 */
61
62#if PKTSCHED_PRIQ
63
64#include <sys/cdefs.h>
65#include <sys/param.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/systm.h>
69#include <sys/errno.h>
70#include <sys/kernel.h>
71#include <sys/syslog.h>
72
73#include <kern/zalloc.h>
74
75#include <net/if.h>
76#include <net/net_osdep.h>
77
78#include <net/pktsched/pktsched_priq.h>
79#include <netinet/in.h>
80
81/*
82 * function prototypes
83 */
84static int priq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
85static struct mbuf *priq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
86static int priq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
87static int priq_clear_interface(struct priq_if *);
88static struct priq_class *priq_class_create(struct priq_if *, int, u_int32_t,
89    int, u_int32_t);
90static int priq_class_destroy(struct priq_if *, struct priq_class *);
91static int priq_destroy_locked(struct priq_if *);
92static inline int priq_addq(struct priq_class *, struct mbuf *,
93    struct pf_mtag *);
94static inline struct mbuf *priq_getq(struct priq_class *);
95static inline struct mbuf *priq_pollq(struct priq_class *);
96static void priq_purgeq(struct priq_if *, struct priq_class *, u_int32_t,
97    u_int32_t *, u_int32_t *);
98static void priq_purge_sc(struct priq_if *, cqrq_purge_sc_t *);
99static void priq_updateq(struct priq_if *, struct priq_class *, cqev_t);
100static int priq_throttle(struct priq_if *, cqrq_throttle_t *);
101static int priq_resumeq(struct priq_if *, struct priq_class *);
102static int priq_suspendq(struct priq_if *, struct priq_class *);
103static int priq_stat_sc(struct priq_if *, cqrq_stat_sc_t *);
104static inline struct priq_class *priq_clh_to_clp(struct priq_if *, u_int32_t);
105static const char *priq_style(struct priq_if *);
106
107#define	PRIQ_ZONE_MAX	32		/* maximum elements in zone */
108#define	PRIQ_ZONE_NAME	"pktsched_priq"	/* zone name */
109
110static unsigned int priq_size;		/* size of zone element */
111static struct zone *priq_zone;		/* zone for priq */
112
113#define	PRIQ_CL_ZONE_MAX	32	/* maximum elements in zone */
114#define	PRIQ_CL_ZONE_NAME	"pktsched_priq_cl" /* zone name */
115
116static unsigned int priq_cl_size;	/* size of zone element */
117static struct zone *priq_cl_zone;	/* zone for priq_class */
118
119void
120priq_init(void)
121{
122	priq_size = sizeof (struct priq_if);
123	priq_zone = zinit(priq_size, PRIQ_ZONE_MAX * priq_size,
124	    0, PRIQ_ZONE_NAME);
125	if (priq_zone == NULL) {
126		panic("%s: failed allocating %s", __func__, PRIQ_ZONE_NAME);
127		/* NOTREACHED */
128	}
129	zone_change(priq_zone, Z_EXPAND, TRUE);
130	zone_change(priq_zone, Z_CALLERACCT, TRUE);
131
132	priq_cl_size = sizeof (struct priq_class);
133	priq_cl_zone = zinit(priq_cl_size, PRIQ_CL_ZONE_MAX * priq_cl_size,
134	    0, PRIQ_CL_ZONE_NAME);
135	if (priq_cl_zone == NULL) {
136		panic("%s: failed allocating %s", __func__, PRIQ_CL_ZONE_NAME);
137		/* NOTREACHED */
138	}
139	zone_change(priq_cl_zone, Z_EXPAND, TRUE);
140	zone_change(priq_cl_zone, Z_CALLERACCT, TRUE);
141}
142
143struct priq_if *
144priq_alloc(struct ifnet *ifp, int how, boolean_t altq)
145{
146	struct priq_if	*pif;
147
148	pif = (how == M_WAITOK) ? zalloc(priq_zone) : zalloc_noblock(priq_zone);
149	if (pif == NULL)
150		return (NULL);
151
152	bzero(pif, priq_size);
153	pif->pif_maxpri = -1;
154	pif->pif_ifq = &ifp->if_snd;
155	if (altq)
156		pif->pif_flags |= PRIQIFF_ALTQ;
157
158	if (pktsched_verbose) {
159		log(LOG_DEBUG, "%s: %s scheduler allocated\n",
160		    if_name(ifp), priq_style(pif));
161	}
162
163	return (pif);
164}
165
166int
167priq_destroy(struct priq_if *pif)
168{
169	struct ifclassq *ifq = pif->pif_ifq;
170	int err;
171
172	IFCQ_LOCK(ifq);
173	err = priq_destroy_locked(pif);
174	IFCQ_UNLOCK(ifq);
175
176	return (err);
177}
178
179static int
180priq_destroy_locked(struct priq_if *pif)
181{
182	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
183
184	(void) priq_clear_interface(pif);
185
186	if (pktsched_verbose) {
187		log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
188		    if_name(PRIQIF_IFP(pif)), priq_style(pif));
189	}
190
191	zfree(priq_zone, pif);
192
193	return (0);
194}
195
196/*
197 * bring the interface back to the initial state by discarding
198 * all the filters and classes.
199 */
200static int
201priq_clear_interface(struct priq_if *pif)
202{
203	struct priq_class	*cl;
204	int pri;
205
206	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
207
208	/* clear out the classes */
209	for (pri = 0; pri <= pif->pif_maxpri; pri++)
210		if ((cl = pif->pif_classes[pri]) != NULL)
211			priq_class_destroy(pif, cl);
212
213	return (0);
214}
215
216/* discard all the queued packets on the interface */
217void
218priq_purge(struct priq_if *pif)
219{
220	struct priq_class *cl;
221	int pri;
222
223	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
224
225	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
226		if ((cl = pif->pif_classes[pri]) != NULL && !qempty(&cl->cl_q))
227			priq_purgeq(pif, cl, 0, NULL, NULL);
228	}
229#if !PF_ALTQ
230	/*
231	 * This assertion is safe to be made only when PF_ALTQ is not
232	 * configured; otherwise, IFCQ_LEN represents the sum of the
233	 * packets managed by ifcq_disc and altq_disc instances, which
234	 * is possible when transitioning between the two.
235	 */
236	VERIFY(IFCQ_LEN(pif->pif_ifq) == 0);
237#endif /* !PF_ALTQ */
238}
239
240static void
241priq_purge_sc(struct priq_if *pif, cqrq_purge_sc_t *pr)
242{
243	struct ifclassq *ifq = pif->pif_ifq;
244	u_int32_t i;
245
246	IFCQ_LOCK_ASSERT_HELD(ifq);
247
248	VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
249	VERIFY(pr->flow != 0);
250
251	if (pr->sc != MBUF_SC_UNSPEC) {
252		i = MBUF_SCIDX(pr->sc);
253		VERIFY(i < IFCQ_SC_MAX);
254
255		priq_purgeq(pif, ifq->ifcq_disc_slots[i].cl,
256		    pr->flow, &pr->packets, &pr->bytes);
257	} else {
258		u_int32_t cnt, len;
259
260		pr->packets = 0;
261		pr->bytes = 0;
262
263		for (i = 0; i < IFCQ_SC_MAX; i++) {
264			priq_purgeq(pif, ifq->ifcq_disc_slots[i].cl,
265			    pr->flow, &cnt, &len);
266			pr->packets += cnt;
267			pr->bytes += len;
268		}
269	}
270}
271
272void
273priq_event(struct priq_if *pif, cqev_t ev)
274{
275	struct priq_class *cl;
276	int pri;
277
278	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
279
280	for (pri = 0; pri <= pif->pif_maxpri; pri++)
281		if ((cl = pif->pif_classes[pri]) != NULL)
282			priq_updateq(pif, cl, ev);
283}
284
285int
286priq_add_queue(struct priq_if *pif, int priority, u_int32_t qlimit,
287    int flags, u_int32_t qid, struct priq_class **clp)
288{
289	struct priq_class *cl;
290
291	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
292
293	/* check parameters */
294	if (priority >= PRIQ_MAXPRI)
295		return (EINVAL);
296	if (pif->pif_classes[priority] != NULL)
297		return (EBUSY);
298	if (priq_clh_to_clp(pif, qid) != NULL)
299		return (EBUSY);
300
301	cl = priq_class_create(pif, priority, qlimit, flags, qid);
302	if (cl == NULL)
303		return (ENOMEM);
304
305	if (clp != NULL)
306		*clp = cl;
307
308	return (0);
309}
310
311static struct priq_class *
312priq_class_create(struct priq_if *pif, int pri, u_int32_t qlimit,
313    int flags, u_int32_t qid)
314{
315	struct ifnet *ifp;
316	struct ifclassq *ifq;
317	struct priq_class *cl;
318
319	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
320
321	/* Sanitize flags unless internally configured */
322	if (pif->pif_flags & PRIQIFF_ALTQ)
323		flags &= PRCF_USERFLAGS;
324
325#if !CLASSQ_RED
326	if (flags & PRCF_RED) {
327		log(LOG_ERR, "%s: %s RED not available!\n",
328		    if_name(PRIQIF_IFP(pif)), priq_style(pif));
329		return (NULL);
330	}
331#endif /* !CLASSQ_RED */
332
333#if !CLASSQ_RIO
334	if (flags & PRCF_RIO) {
335		log(LOG_ERR, "%s: %s RIO not available!\n",
336		    if_name(PRIQIF_IFP(pif)), priq_style(pif));
337		return (NULL);
338	}
339#endif /* CLASSQ_RIO */
340
341#if !CLASSQ_BLUE
342	if (flags & PRCF_BLUE) {
343		log(LOG_ERR, "%s: %s BLUE not available!\n",
344		    if_name(PRIQIF_IFP(pif)), priq_style(pif));
345		return (NULL);
346	}
347#endif /* CLASSQ_BLUE */
348
349	/* These are mutually exclusive */
350	if ((flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) &&
351	    (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_RED &&
352	    (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_RIO &&
353	    (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_BLUE &&
354	    (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) != PRCF_SFB) {
355		log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
356		    if_name(PRIQIF_IFP(pif)), priq_style(pif));
357		return (NULL);
358	}
359
360	ifq = pif->pif_ifq;
361	ifp = PRIQIF_IFP(pif);
362
363	if ((cl = pif->pif_classes[pri]) != NULL) {
364		/* modify the class instead of creating a new one */
365		if (!qempty(&cl->cl_q))
366			priq_purgeq(pif, cl, 0, NULL, NULL);
367#if CLASSQ_RIO
368		if (q_is_rio(&cl->cl_q))
369			rio_destroy(cl->cl_rio);
370#endif /* CLASSQ_RIO */
371#if CLASSQ_RED
372		if (q_is_red(&cl->cl_q))
373			red_destroy(cl->cl_red);
374#endif /* CLASSQ_RED */
375#if CLASSQ_BLUE
376		if (q_is_blue(&cl->cl_q))
377			blue_destroy(cl->cl_blue);
378#endif /* CLASSQ_BLUE */
379		if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
380			sfb_destroy(cl->cl_sfb);
381		cl->cl_qalg.ptr = NULL;
382		qtype(&cl->cl_q) = Q_DROPTAIL;
383		qstate(&cl->cl_q) = QS_RUNNING;
384	} else {
385		cl = zalloc(priq_cl_zone);
386		if (cl == NULL)
387			return (NULL);
388
389		bzero(cl, priq_cl_size);
390	}
391
392	pif->pif_classes[pri] = cl;
393	if (flags & PRCF_DEFAULTCLASS)
394		pif->pif_default = cl;
395	if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
396		qlimit = IFCQ_MAXLEN(ifq);
397		if (qlimit == 0)
398			qlimit = DEFAULT_QLIMIT;  /* use default */
399	}
400	_qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
401	cl->cl_flags = flags;
402	cl->cl_pri = pri;
403	if (pri > pif->pif_maxpri)
404		pif->pif_maxpri = pri;
405	cl->cl_pif = pif;
406	cl->cl_handle = qid;
407
408	if (flags & (PRCF_RED|PRCF_RIO|PRCF_BLUE|PRCF_SFB)) {
409#if CLASSQ_RED || CLASSQ_RIO
410		u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
411		int pkttime;
412#endif /* CLASSQ_RED || CLASSQ_RIO */
413
414		cl->cl_qflags = 0;
415		if (flags & PRCF_ECN) {
416			if (flags & PRCF_BLUE)
417				cl->cl_qflags |= BLUEF_ECN;
418			else if (flags & PRCF_SFB)
419				cl->cl_qflags |= SFBF_ECN;
420			else if (flags & PRCF_RED)
421				cl->cl_qflags |= REDF_ECN;
422			else if (flags & PRCF_RIO)
423				cl->cl_qflags |= RIOF_ECN;
424		}
425		if (flags & PRCF_FLOWCTL) {
426			if (flags & PRCF_SFB)
427				cl->cl_qflags |= SFBF_FLOWCTL;
428		}
429		if (flags & PRCF_CLEARDSCP) {
430			if (flags & PRCF_RIO)
431				cl->cl_qflags |= RIOF_CLEARDSCP;
432		}
433#if CLASSQ_RED || CLASSQ_RIO
434		/*
435		 * XXX: RED & RIO should be watching link speed and MTU
436		 *	events and recompute pkttime accordingly.
437		 */
438		if (ifbandwidth < 8)
439			pkttime = 1000 * 1000 * 1000; /* 1 sec */
440		else
441			pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
442			    (ifbandwidth / 8);
443
444		/* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
445#if CLASSQ_RED
446		if (flags & PRCF_RED) {
447			cl->cl_red = red_alloc(ifp, 0, 0,
448			    qlimit(&cl->cl_q) * 10/100,
449			    qlimit(&cl->cl_q) * 30/100,
450			    cl->cl_qflags, pkttime);
451			if (cl->cl_red != NULL)
452				qtype(&cl->cl_q) = Q_RED;
453		}
454#endif /* CLASSQ_RED */
455#if CLASSQ_RIO
456		if (flags & PRCF_RIO) {
457			cl->cl_rio =
458			    rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
459			if (cl->cl_rio != NULL)
460				qtype(&cl->cl_q) = Q_RIO;
461		}
462#endif /* CLASSQ_RIO */
463#endif /* CLASSQ_RED || CLASSQ_RIO */
464#if CLASSQ_BLUE
465		if (flags & PRCF_BLUE) {
466			cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
467			if (cl->cl_blue != NULL)
468				qtype(&cl->cl_q) = Q_BLUE;
469		}
470#endif /* CLASSQ_BLUE */
471		if (flags & PRCF_SFB) {
472			if (!(cl->cl_flags & PRCF_LAZY))
473				cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
474				    qlimit(&cl->cl_q), cl->cl_qflags);
475			if (cl->cl_sfb != NULL || (cl->cl_flags & PRCF_LAZY))
476				qtype(&cl->cl_q) = Q_SFB;
477		}
478	}
479
480	if (pktsched_verbose) {
481		log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
482		    "flags=%b\n", if_name(ifp), priq_style(pif),
483		    cl->cl_handle, cl->cl_pri, qlimit, flags, PRCF_BITS);
484	}
485
486	return (cl);
487}
488
489int
490priq_remove_queue(struct priq_if *pif, u_int32_t qid)
491{
492	struct priq_class *cl;
493
494	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
495
496	if ((cl = priq_clh_to_clp(pif, qid)) == NULL)
497		return (EINVAL);
498
499	return (priq_class_destroy(pif, cl));
500}
501
502static int
503priq_class_destroy(struct priq_if *pif, struct priq_class *cl)
504{
505	struct ifclassq *ifq = pif->pif_ifq;
506	int pri;
507
508	IFCQ_LOCK_ASSERT_HELD(ifq);
509
510	if (!qempty(&cl->cl_q))
511		priq_purgeq(pif, cl, 0, NULL, NULL);
512
513	VERIFY(cl->cl_pri < PRIQ_MAXPRI);
514	VERIFY(!pktsched_bit_tst(cl->cl_pri, &pif->pif_bitmap));
515
516	pif->pif_classes[cl->cl_pri] = NULL;
517	if (pif->pif_maxpri == cl->cl_pri) {
518		for (pri = cl->cl_pri; pri >= 0; pri--)
519			if (pif->pif_classes[pri] != NULL) {
520				pif->pif_maxpri = pri;
521				break;
522			}
523		if (pri < 0)
524			pif->pif_maxpri = -1;
525	}
526
527	if (pif->pif_default == cl)
528		pif->pif_default = NULL;
529
530	if (cl->cl_qalg.ptr != NULL) {
531#if CLASSQ_RIO
532		if (q_is_rio(&cl->cl_q))
533			rio_destroy(cl->cl_rio);
534#endif /* CLASSQ_RIO */
535#if CLASSQ_RED
536		if (q_is_red(&cl->cl_q))
537			red_destroy(cl->cl_red);
538#endif /* CLASSQ_RED */
539#if CLASSQ_BLUE
540		if (q_is_blue(&cl->cl_q))
541			blue_destroy(cl->cl_blue);
542#endif /* CLASSQ_BLUE */
543		if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
544			sfb_destroy(cl->cl_sfb);
545		cl->cl_qalg.ptr = NULL;
546		qtype(&cl->cl_q) = Q_DROPTAIL;
547		qstate(&cl->cl_q) = QS_RUNNING;
548	}
549
550	if (pktsched_verbose) {
551		log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
552		    if_name(PRIQIF_IFP(pif)), priq_style(pif),
553		    cl->cl_handle, cl->cl_pri);
554	}
555
556	zfree(priq_cl_zone, cl);
557
558	return (0);
559}
560
561int
562priq_enqueue(struct priq_if *pif, struct priq_class *cl, struct mbuf *m,
563    struct pf_mtag *t)
564{
565	struct ifclassq *ifq = pif->pif_ifq;
566	u_int32_t pri;
567	int len, ret;
568
569	IFCQ_LOCK_ASSERT_HELD(ifq);
570	VERIFY(cl == NULL || cl->cl_pif == pif);
571
572	if (cl == NULL) {
573#if PF_ALTQ
574		cl = priq_clh_to_clp(pif, t->pftag_qid);
575#else /* !PF_ALTQ */
576		cl = priq_clh_to_clp(pif, 0);
577#endif /* !PF_ALTQ */
578		if (cl == NULL) {
579			cl = pif->pif_default;
580			if (cl == NULL) {
581				IFCQ_CONVERT_LOCK(ifq);
582				m_freem(m);
583				return (ENOBUFS);
584			}
585		}
586	}
587	pri = cl->cl_pri;
588	VERIFY(pri < PRIQ_MAXPRI);
589
590	len = m_pktlen(m);
591
592	ret = priq_addq(cl, m, t);
593	if (ret != 0) {
594		if (ret == CLASSQEQ_SUCCESS_FC) {
595			/* packet enqueued, return advisory feedback */
596			ret = EQFULL;
597		} else {
598			VERIFY(ret == CLASSQEQ_DROPPED ||
599			    ret == CLASSQEQ_DROPPED_FC ||
600			    ret == CLASSQEQ_DROPPED_SP);
601			/* packet has been freed in priq_addq */
602			PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
603			IFCQ_DROP_ADD(ifq, 1, len);
604			switch (ret) {
605			case CLASSQEQ_DROPPED:
606				return (ENOBUFS);
607			case CLASSQEQ_DROPPED_FC:
608				return (EQFULL);
609			case CLASSQEQ_DROPPED_SP:
610				return (EQSUSPENDED);
611			}
612			/* NOT REACHED */
613		}
614	}
615	IFCQ_INC_LEN(ifq);
616
617	/* class is now active; indicate it as such */
618	if (!pktsched_bit_tst(pri, &pif->pif_bitmap))
619		pktsched_bit_set(pri, &pif->pif_bitmap);
620
621	/* successfully queued. */
622	return (ret);
623}
624
625/*
626 * note: CLASSQDQ_POLL returns the next packet without removing the packet
627 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
628 *	CLASSQDQ_REMOVE must return the same packet if called immediately
629 *	after CLASSQDQ_POLL.
630 */
631struct mbuf *
632priq_dequeue(struct priq_if *pif, cqdq_op_t op)
633{
634	struct ifclassq *ifq = pif->pif_ifq;
635	struct priq_class *cl;
636	struct mbuf *m;
637	u_int32_t pri, len;
638
639	IFCQ_LOCK_ASSERT_HELD(ifq);
640
641	if (pif->pif_bitmap == 0) {
642		/* no active class; nothing to dequeue */
643		return (NULL);
644	}
645	VERIFY(!IFCQ_IS_EMPTY(ifq));
646
647	pri = pktsched_fls(pif->pif_bitmap) - 1;	/* zero based */
648	VERIFY(pri < PRIQ_MAXPRI);
649	cl = pif->pif_classes[pri];
650	VERIFY(cl != NULL && !qempty(&cl->cl_q));
651
652	if (op == CLASSQDQ_POLL)
653		return (priq_pollq(cl));
654
655	m = priq_getq(cl);
656	VERIFY(m != NULL);	/* qalg must be work conserving */
657	len = m_pktlen(m);
658
659	IFCQ_DEC_LEN(ifq);
660	if (qempty(&cl->cl_q)) {
661		cl->cl_period++;
662		/* class is now inactive; indicate it as such */
663		pktsched_bit_clr(pri, &pif->pif_bitmap);
664	}
665	PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
666	IFCQ_XMIT_ADD(ifq, 1, len);
667
668	return (m);
669}
670
671static inline int
672priq_addq(struct priq_class *cl, struct mbuf *m, struct pf_mtag *t)
673{
674	struct priq_if *pif = cl->cl_pif;
675	struct ifclassq *ifq = pif->pif_ifq;
676
677	IFCQ_LOCK_ASSERT_HELD(ifq);
678
679#if CLASSQ_RIO
680	if (q_is_rio(&cl->cl_q))
681		return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
682	else
683#endif /* CLASSQ_RIO */
684#if CLASSQ_RED
685	if (q_is_red(&cl->cl_q))
686		return (red_addq(cl->cl_red, &cl->cl_q, m, t));
687	else
688#endif /* CLASSQ_RED */
689#if CLASSQ_BLUE
690	if (q_is_blue(&cl->cl_q))
691		return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
692	else
693#endif /* CLASSQ_BLUE */
694	if (q_is_sfb(&cl->cl_q)) {
695		if (cl->cl_sfb == NULL) {
696			struct ifnet *ifp = PRIQIF_IFP(pif);
697
698			VERIFY(cl->cl_flags & PRCF_LAZY);
699			cl->cl_flags &= ~PRCF_LAZY;
700			IFCQ_CONVERT_LOCK(ifq);
701
702			cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
703			    qlimit(&cl->cl_q), cl->cl_qflags);
704			if (cl->cl_sfb == NULL) {
705				/* fall back to droptail */
706				qtype(&cl->cl_q) = Q_DROPTAIL;
707				cl->cl_flags &= ~PRCF_SFB;
708				cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
709
710				log(LOG_ERR, "%s: %s SFB lazy allocation "
711				    "failed for qid=%d pri=%d, falling back "
712				    "to DROPTAIL\n", if_name(ifp),
713				    priq_style(pif), cl->cl_handle,
714				    cl->cl_pri);
715			} else if (pif->pif_throttle != IFNET_THROTTLE_OFF) {
716				/* if there's pending throttling, set it */
717				cqrq_throttle_t tr = { 1, pif->pif_throttle };
718				int err = priq_throttle(pif, &tr);
719
720				if (err == EALREADY)
721					err = 0;
722				if (err != 0) {
723					tr.level = IFNET_THROTTLE_OFF;
724					(void) priq_throttle(pif, &tr);
725				}
726			}
727		}
728		if (cl->cl_sfb != NULL)
729			return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
730	} else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
731		IFCQ_CONVERT_LOCK(ifq);
732		m_freem(m);
733		return (CLASSQEQ_DROPPED);
734	}
735
736#if PF_ECN
737	if (cl->cl_flags & PRCF_CLEARDSCP)
738		write_dsfield(m, t, 0);
739#endif /* PF_ECN */
740
741	_addq(&cl->cl_q, m);
742
743	return (0);
744}
745
746static inline struct mbuf *
747priq_getq(struct priq_class *cl)
748{
749	IFCQ_LOCK_ASSERT_HELD(cl->cl_pif->pif_ifq);
750
751#if CLASSQ_RIO
752	if (q_is_rio(&cl->cl_q))
753		return (rio_getq(cl->cl_rio, &cl->cl_q));
754	else
755#endif /* CLASSQ_RIO */
756#if CLASSQ_RED
757	if (q_is_red(&cl->cl_q))
758		return (red_getq(cl->cl_red, &cl->cl_q));
759	else
760#endif /* CLASSQ_RED */
761#if CLASSQ_BLUE
762	if (q_is_blue(&cl->cl_q))
763		return (blue_getq(cl->cl_blue, &cl->cl_q));
764	else
765#endif /* CLASSQ_BLUE */
766	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
767		return (sfb_getq(cl->cl_sfb, &cl->cl_q));
768
769	return (_getq(&cl->cl_q));
770}
771
772static inline struct mbuf *
773priq_pollq(struct priq_class *cl)
774{
775	IFCQ_LOCK_ASSERT_HELD(cl->cl_pif->pif_ifq);
776
777	return (qhead(&cl->cl_q));
778}
779
780static void
781priq_purgeq(struct priq_if *pif, struct priq_class *cl, u_int32_t flow,
782    u_int32_t *packets, u_int32_t *bytes)
783{
784	struct ifclassq *ifq = pif->pif_ifq;
785	u_int32_t cnt = 0, len = 0, qlen;
786
787	IFCQ_LOCK_ASSERT_HELD(ifq);
788
789	if ((qlen = qlen(&cl->cl_q)) == 0) {
790		VERIFY(!pktsched_bit_tst(cl->cl_pri, &pif->pif_bitmap));
791		goto done;
792	}
793
794	/* become regular mutex before freeing mbufs */
795	IFCQ_CONVERT_LOCK(ifq);
796
797#if CLASSQ_RIO
798	if (q_is_rio(&cl->cl_q))
799		rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
800	else
801#endif /* CLASSQ_RIO */
802#if CLASSQ_RED
803	if (q_is_red(&cl->cl_q))
804		red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
805	else
806#endif /* CLASSQ_RED */
807#if CLASSQ_BLUE
808	if (q_is_blue(&cl->cl_q))
809		blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
810	else
811#endif /* CLASSQ_BLUE */
812	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
813		sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
814	else
815		_flushq_flow(&cl->cl_q, flow, &cnt, &len);
816
817	if (cnt > 0) {
818		VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
819
820		PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
821		IFCQ_DROP_ADD(ifq, cnt, len);
822
823		VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
824		IFCQ_LEN(ifq) -= cnt;
825
826		if (qempty(&cl->cl_q))
827			pktsched_bit_clr(cl->cl_pri, &pif->pif_bitmap);
828
829		if (pktsched_verbose) {
830			log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
831			    "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
832			    if_name(PRIQIF_IFP(pif)), priq_style(pif),
833			    cl->cl_handle, cl->cl_pri, qlen, qlen(&cl->cl_q),
834			    cnt, len, flow);
835		}
836	}
837done:
838	if (packets != NULL)
839		*packets = cnt;
840	if (bytes != NULL)
841		*bytes = len;
842}
843
844static void
845priq_updateq(struct priq_if *pif, struct priq_class *cl, cqev_t ev)
846{
847	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
848
849	if (pktsched_verbose) {
850		log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
851		    if_name(PRIQIF_IFP(pif)), priq_style(pif),
852		    cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
853	}
854
855#if CLASSQ_RIO
856	if (q_is_rio(&cl->cl_q))
857		return (rio_updateq(cl->cl_rio, ev));
858#endif /* CLASSQ_RIO */
859#if CLASSQ_RED
860	if (q_is_red(&cl->cl_q))
861		return (red_updateq(cl->cl_red, ev));
862#endif /* CLASSQ_RED */
863#if CLASSQ_BLUE
864	if (q_is_blue(&cl->cl_q))
865		return (blue_updateq(cl->cl_blue, ev));
866#endif /* CLASSQ_BLUE */
867	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
868		return (sfb_updateq(cl->cl_sfb, ev));
869}
870
871int
872priq_get_class_stats(struct priq_if *pif, u_int32_t qid,
873    struct priq_classstats *sp)
874{
875	struct priq_class *cl;
876
877	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
878
879	if ((cl = priq_clh_to_clp(pif, qid)) == NULL)
880		return (EINVAL);
881
882	sp->class_handle = cl->cl_handle;
883	sp->priority = cl->cl_pri;
884	sp->qlength = qlen(&cl->cl_q);
885	sp->qlimit = qlimit(&cl->cl_q);
886	sp->period = cl->cl_period;
887	sp->xmitcnt = cl->cl_xmitcnt;
888	sp->dropcnt = cl->cl_dropcnt;
889
890	sp->qtype = qtype(&cl->cl_q);
891	sp->qstate = qstate(&cl->cl_q);
892#if CLASSQ_RED
893	if (q_is_red(&cl->cl_q))
894		red_getstats(cl->cl_red, &sp->red[0]);
895#endif /* CLASSQ_RED */
896#if CLASSQ_RIO
897	if (q_is_rio(&cl->cl_q))
898		rio_getstats(cl->cl_rio, &sp->red[0]);
899#endif /* CLASSQ_RIO */
900#if CLASSQ_BLUE
901	if (q_is_blue(&cl->cl_q))
902		blue_getstats(cl->cl_blue, &sp->blue);
903#endif /* CLASSQ_BLUE */
904	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
905		sfb_getstats(cl->cl_sfb, &sp->sfb);
906
907	return (0);
908}
909
910static int
911priq_stat_sc(struct priq_if *pif, cqrq_stat_sc_t *sr)
912{
913	struct ifclassq *ifq = pif->pif_ifq;
914	struct priq_class *cl;
915	u_int32_t i;
916
917	IFCQ_LOCK_ASSERT_HELD(ifq);
918
919	VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc));
920
921	i = MBUF_SCIDX(sr->sc);
922	VERIFY(i < IFCQ_SC_MAX);
923
924	cl = ifq->ifcq_disc_slots[i].cl;
925	sr->packets = qlen(&cl->cl_q);
926	sr->bytes = qsize(&cl->cl_q);
927
928	return (0);
929}
930
931/* convert a class handle to the corresponding class pointer */
932static inline struct priq_class *
933priq_clh_to_clp(struct priq_if *pif, u_int32_t chandle)
934{
935	struct priq_class *cl;
936	int idx;
937
938	IFCQ_LOCK_ASSERT_HELD(pif->pif_ifq);
939
940	for (idx = pif->pif_maxpri; idx >= 0; idx--)
941		if ((cl = pif->pif_classes[idx]) != NULL &&
942		    cl->cl_handle == chandle)
943			return (cl);
944
945	return (NULL);
946}
947
948static const char *
949priq_style(struct priq_if *pif)
950{
951	return ((pif->pif_flags & PRIQIFF_ALTQ) ? "ALTQ_PRIQ" : "PRIQ");
952}
953
954/*
955 * priq_enqueue_ifclassq is an enqueue function to be registered to
956 * (*ifcq_enqueue) in struct ifclassq.
957 */
958static int
959priq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
960{
961	u_int32_t i;
962
963	IFCQ_LOCK_ASSERT_HELD(ifq);
964
965	if (!(m->m_flags & M_PKTHDR)) {
966		/* should not happen */
967		log(LOG_ERR, "%s: packet does not have pkthdr\n",
968		    if_name(ifq->ifcq_ifp));
969		IFCQ_CONVERT_LOCK(ifq);
970		m_freem(m);
971		return (ENOBUFS);
972	}
973
974	i = MBUF_SCIDX(mbuf_get_service_class(m));
975	VERIFY((u_int32_t)i < IFCQ_SC_MAX);
976
977	return (priq_enqueue(ifq->ifcq_disc,
978	    ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
979}
980
981/*
982 * priq_dequeue_ifclassq is a dequeue function to be registered to
983 * (*ifcq_dequeue) in struct ifclass.
984 *
985 * note: CLASSQDQ_POLL returns the next packet without removing the packet
986 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
987 *	CLASSQDQ_REMOVE must return the same packet if called immediately
988 *	after CLASSQDQ_POLL.
989 */
990static struct mbuf *
991priq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
992{
993	return (priq_dequeue(ifq->ifcq_disc, op));
994}
995
996static int
997priq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
998{
999	struct priq_if *pif = (struct priq_if *)ifq->ifcq_disc;
1000	int err = 0;
1001
1002	IFCQ_LOCK_ASSERT_HELD(ifq);
1003
1004	switch (req) {
1005	case CLASSQRQ_PURGE:
1006		priq_purge(pif);
1007		break;
1008
1009	case CLASSQRQ_PURGE_SC:
1010		priq_purge_sc(pif, (cqrq_purge_sc_t *)arg);
1011		break;
1012
1013	case CLASSQRQ_EVENT:
1014		priq_event(pif, (cqev_t)arg);
1015		break;
1016
1017	case CLASSQRQ_THROTTLE:
1018		err = priq_throttle(pif, (cqrq_throttle_t *)arg);
1019		break;
1020
1021	case CLASSQRQ_STAT_SC:
1022		err = priq_stat_sc(pif, (cqrq_stat_sc_t *)arg);
1023		break;
1024	}
1025	return (err);
1026}
1027
1028int
1029priq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
1030{
1031	struct ifnet *ifp = ifq->ifcq_ifp;
1032	struct priq_class *cl0, *cl1, *cl2, *cl3, *cl4;
1033	struct priq_class *cl5, *cl6, *cl7, *cl8, *cl9;
1034	struct priq_if *pif;
1035	u_int32_t maxlen = 0, qflags = 0;
1036	int err = 0;
1037
1038	IFCQ_LOCK_ASSERT_HELD(ifq);
1039	VERIFY(ifq->ifcq_disc == NULL);
1040	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1041
1042	if (flags & PKTSCHEDF_QALG_RED)
1043		qflags |= PRCF_RED;
1044	if (flags & PKTSCHEDF_QALG_RIO)
1045		qflags |= PRCF_RIO;
1046	if (flags & PKTSCHEDF_QALG_BLUE)
1047		qflags |= PRCF_BLUE;
1048	if (flags & PKTSCHEDF_QALG_SFB)
1049		qflags |= PRCF_SFB;
1050	if (flags & PKTSCHEDF_QALG_ECN)
1051		qflags |= PRCF_ECN;
1052	if (flags & PKTSCHEDF_QALG_FLOWCTL)
1053		qflags |= PRCF_FLOWCTL;
1054
1055	pif = priq_alloc(ifp, M_WAITOK, FALSE);
1056	if (pif == NULL)
1057		return (ENOMEM);
1058
1059	if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
1060		maxlen = if_sndq_maxlen;
1061
1062	if ((err = priq_add_queue(pif, 0, maxlen,
1063	    qflags | PRCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
1064		goto cleanup;
1065
1066	if ((err = priq_add_queue(pif, 1, maxlen,
1067	    qflags | PRCF_LAZY, SCIDX_BK, &cl1)) != 0)
1068		goto cleanup;
1069
1070	if ((err = priq_add_queue(pif, 2, maxlen,
1071	    qflags | PRCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
1072		goto cleanup;
1073
1074	if ((err = priq_add_queue(pif, 3, maxlen,
1075	    qflags | PRCF_LAZY, SCIDX_RD, &cl3)) != 0)
1076		goto cleanup;
1077
1078	if ((err = priq_add_queue(pif, 4, maxlen,
1079	    qflags | PRCF_LAZY, SCIDX_OAM, &cl4)) != 0)
1080		goto cleanup;
1081
1082	if ((err = priq_add_queue(pif, 5, maxlen,
1083	    qflags | PRCF_LAZY, SCIDX_AV, &cl5)) != 0)
1084		goto cleanup;
1085
1086	if ((err = priq_add_queue(pif, 6, maxlen,
1087	    qflags | PRCF_LAZY, SCIDX_RV, &cl6)) != 0)
1088		goto cleanup;
1089
1090	if ((err = priq_add_queue(pif, 7, maxlen,
1091	    qflags | PRCF_LAZY, SCIDX_VI, &cl7)) != 0)
1092		goto cleanup;
1093
1094	if ((err = priq_add_queue(pif, 8, maxlen,
1095	    qflags | PRCF_LAZY, SCIDX_VO, &cl8)) != 0)
1096		goto cleanup;
1097
1098	if ((err = priq_add_queue(pif, 9, maxlen,
1099	    qflags, SCIDX_CTL, &cl9)) != 0)
1100		goto cleanup;
1101
1102	err = ifclassq_attach(ifq, PKTSCHEDT_PRIQ, pif,
1103	    priq_enqueue_ifclassq, priq_dequeue_ifclassq, NULL,
1104	    priq_request_ifclassq);
1105
1106	/* cache these for faster lookup */
1107	if (err == 0) {
1108		ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
1109		ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1110
1111		ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1112		ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
1113
1114		ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1115		ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
1116
1117		ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
1118		ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
1119
1120		ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
1121		ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
1122
1123		ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
1124		ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
1125
1126		ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
1127		ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
1128
1129		ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1130		ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
1131
1132		ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1133		ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
1134
1135		ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
1136		ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
1137	}
1138
1139cleanup:
1140	if (err != 0)
1141		(void) priq_destroy_locked(pif);
1142
1143	return (err);
1144}
1145
1146int
1147priq_teardown_ifclassq(struct ifclassq *ifq)
1148{
1149	struct priq_if *pif = ifq->ifcq_disc;
1150	int i;
1151
1152	IFCQ_LOCK_ASSERT_HELD(ifq);
1153	VERIFY(pif != NULL && ifq->ifcq_type == PKTSCHEDT_PRIQ);
1154
1155	(void) priq_destroy_locked(pif);
1156
1157	ifq->ifcq_disc = NULL;
1158	for (i = 0; i < IFCQ_SC_MAX; i++) {
1159		ifq->ifcq_disc_slots[i].qid = 0;
1160		ifq->ifcq_disc_slots[i].cl = NULL;
1161	}
1162
1163	return (ifclassq_detach(ifq));
1164}
1165
1166int
1167priq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1168    struct if_ifclassq_stats *ifqs)
1169{
1170	struct priq_if *pif = ifq->ifcq_disc;
1171
1172	IFCQ_LOCK_ASSERT_HELD(ifq);
1173	VERIFY(ifq->ifcq_type == PKTSCHEDT_PRIQ);
1174
1175	if (slot >= IFCQ_SC_MAX)
1176		return (EINVAL);
1177
1178	return (priq_get_class_stats(pif, ifq->ifcq_disc_slots[slot].qid,
1179	    &ifqs->ifqs_priq_stats));
1180}
1181
1182static int
1183priq_throttle(struct priq_if *pif, cqrq_throttle_t *tr)
1184{
1185	struct ifclassq *ifq = pif->pif_ifq;
1186	struct priq_class *cl;
1187	int err = 0;
1188
1189	IFCQ_LOCK_ASSERT_HELD(ifq);
1190	VERIFY(!(pif->pif_flags & PRIQIFF_ALTQ));
1191
1192	if (!tr->set) {
1193		tr->level = pif->pif_throttle;
1194		return (0);
1195	}
1196
1197	if (tr->level == pif->pif_throttle)
1198		return (EALREADY);
1199
1200	/* Current throttling levels only involve BK_SYS class */
1201	cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1202
1203	switch (tr->level) {
1204	case IFNET_THROTTLE_OFF:
1205		err = priq_resumeq(pif, cl);
1206		break;
1207
1208	case IFNET_THROTTLE_OPPORTUNISTIC:
1209		err = priq_suspendq(pif, cl);
1210		break;
1211
1212	default:
1213		VERIFY(0);
1214		/* NOTREACHED */
1215	}
1216
1217	if (err == 0 || err == ENXIO) {
1218		if (pktsched_verbose) {
1219			log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
1220			    if_name(PRIQIF_IFP(pif)), priq_style(pif),
1221			    (err == 0) ? "" : "lazy ", pif->pif_throttle,
1222			    tr->level);
1223		}
1224		pif->pif_throttle = tr->level;
1225		if (err != 0)
1226			err = 0;
1227		else
1228			priq_purgeq(pif, cl, 0, NULL, NULL);
1229	} else {
1230		log(LOG_ERR, "%s: %s unable to set throttling level "
1231		    "%d->%d [error=%d]\n", if_name(PRIQIF_IFP(pif)),
1232		    priq_style(pif), pif->pif_throttle, tr->level, err);
1233	}
1234
1235	return (err);
1236}
1237
1238static int
1239priq_resumeq(struct priq_if *pif, struct priq_class *cl)
1240{
1241	struct ifclassq *ifq = pif->pif_ifq;
1242	int err = 0;
1243
1244	IFCQ_LOCK_ASSERT_HELD(ifq);
1245
1246#if CLASSQ_RIO
1247	if (q_is_rio(&cl->cl_q))
1248		err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
1249	else
1250#endif /* CLASSQ_RIO */
1251#if CLASSQ_RED
1252	if (q_is_red(&cl->cl_q))
1253		err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
1254	else
1255#endif /* CLASSQ_RED */
1256#if CLASSQ_BLUE
1257	if (q_is_blue(&cl->cl_q))
1258		err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
1259	else
1260#endif /* CLASSQ_BLUE */
1261	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1262		err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
1263
1264	if (err == 0)
1265		qstate(&cl->cl_q) = QS_RUNNING;
1266
1267	return (err);
1268}
1269
1270static int
1271priq_suspendq(struct priq_if *pif, struct priq_class *cl)
1272{
1273	struct ifclassq *ifq = pif->pif_ifq;
1274	int err = 0;
1275
1276	IFCQ_LOCK_ASSERT_HELD(ifq);
1277
1278#if CLASSQ_RIO
1279	if (q_is_rio(&cl->cl_q))
1280		err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
1281	else
1282#endif /* CLASSQ_RIO */
1283#if CLASSQ_RED
1284	if (q_is_red(&cl->cl_q))
1285		err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
1286	else
1287#endif /* CLASSQ_RED */
1288#if CLASSQ_BLUE
1289	if (q_is_blue(&cl->cl_q))
1290		err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
1291	else
1292#endif /* CLASSQ_BLUE */
1293	if (q_is_sfb(&cl->cl_q)) {
1294		if (cl->cl_sfb != NULL) {
1295			err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
1296		} else {
1297			VERIFY(cl->cl_flags & PRCF_LAZY);
1298			err = ENXIO;	/* delayed throttling */
1299		}
1300	}
1301
1302	if (err == 0 || err == ENXIO)
1303		qstate(&cl->cl_q) = QS_SUSPENDED;
1304
1305	return (err);
1306}
1307#endif /* PKTSCHED_PRIQ */
1308