1/*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55/*
56 * Quick Fair Queueing is described in
57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution
58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo.
59 *
60 * This code is ported from the dummynet(4) QFQ implementation.
61 * See also http://info.iet.unipi.it/~luigi/qfq/
62 */
63
64#include <sys/cdefs.h>
65#include <sys/param.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/systm.h>
69#include <sys/errno.h>
70#include <sys/kernel.h>
71#include <sys/syslog.h>
72
73#include <kern/zalloc.h>
74
75#include <net/if.h>
76#include <net/net_osdep.h>
77
78#include <net/pktsched/pktsched_qfq.h>
79#include <netinet/in.h>
80
81/*
82 * function prototypes
83 */
84static int qfq_enqueue_ifclassq(struct ifclassq *, struct mbuf *);
85static struct mbuf *qfq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t);
86static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
87static int qfq_clear_interface(struct qfq_if *);
88static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t,
89    u_int32_t, u_int32_t, u_int32_t, u_int32_t);
90static int qfq_class_destroy(struct qfq_if *, struct qfq_class *);
91static int qfq_destroy_locked(struct qfq_if *);
92static inline int qfq_addq(struct qfq_class *, struct mbuf *, struct pf_mtag *);
93static inline struct mbuf *qfq_getq(struct qfq_class *);
94static inline struct mbuf *qfq_pollq(struct qfq_class *);
95static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t,
96    u_int32_t *, u_int32_t *);
97static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *);
98static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t);
99static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *);
100static int qfq_resumeq(struct qfq_if *, struct qfq_class *);
101static int qfq_suspendq(struct qfq_if *, struct qfq_class *);
102static int qfq_stat_sc(struct qfq_if *, cqrq_stat_sc_t *);
103static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t);
104static const char *qfq_style(struct qfq_if *);
105
106static inline int qfq_gt(u_int64_t, u_int64_t);
107static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t);
108static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t);
109static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t);
110static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int);
111static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *);
112static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t,
113    int, int);
114static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t);
115static inline void qfq_make_eligible(struct qfq_if *, u_int64_t);
116static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *,
117    struct qfq_class *, u_int64_t);
118static inline void qfq_front_slot_remove(struct qfq_group *);
119static inline struct qfq_class *qfq_slot_scan(struct qfq_if *,
120    struct qfq_group *);
121static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *,
122    u_int64_t);
123static inline void qfq_update_eligible(struct qfq_if *, u_int64_t);
124static inline int qfq_update_class(struct qfq_if *, struct qfq_group *,
125    struct qfq_class *);
126static inline void qfq_update_start(struct qfq_if *, struct qfq_class *);
127static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *,
128    struct qfq_class *);
129static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *);
130static const char *qfq_state2str(int);
131#if QFQ_DEBUG
132static void qfq_dump_groups(struct qfq_if *, u_int32_t);
133static void qfq_dump_sched(struct qfq_if *, const char *);
134#endif /* QFQ_DEBUG */
135
136#define	QFQ_ZONE_MAX	32		/* maximum elements in zone */
137#define	QFQ_ZONE_NAME	"pktsched_qfq"	/* zone name */
138
139static unsigned int qfq_size;		/* size of zone element */
140static struct zone *qfq_zone;		/* zone for qfq */
141
142#define	QFQ_CL_ZONE_MAX	32	/* maximum elements in zone */
143#define	QFQ_CL_ZONE_NAME	"pktsched_qfq_cl" /* zone name */
144
145static unsigned int qfq_cl_size;	/* size of zone element */
146static struct zone *qfq_cl_zone;	/* zone for qfq_class */
147
148/*
149 * Maximum number of consecutive slots occupied by backlogged classes
150 * inside a group.  This is approx lmax/lmin + 5.  Used when ALTQ is
151 * available.
152 *
153 * XXX check because it poses constraints on MAX_INDEX
154 */
155#define	QFQ_MAX_SLOTS	32	/* default when ALTQ is available */
156
157void
158qfq_init(void)
159{
160	qfq_size = sizeof (struct qfq_if);
161	qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size,
162	    0, QFQ_ZONE_NAME);
163	if (qfq_zone == NULL) {
164		panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME);
165		/* NOTREACHED */
166	}
167	zone_change(qfq_zone, Z_EXPAND, TRUE);
168	zone_change(qfq_zone, Z_CALLERACCT, TRUE);
169
170	qfq_cl_size = sizeof (struct qfq_class);
171	qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size,
172	    0, QFQ_CL_ZONE_NAME);
173	if (qfq_cl_zone == NULL) {
174		panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME);
175		/* NOTREACHED */
176	}
177	zone_change(qfq_cl_zone, Z_EXPAND, TRUE);
178	zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE);
179}
180
181struct qfq_if *
182qfq_alloc(struct ifnet *ifp, int how, boolean_t altq)
183{
184	struct qfq_if	*qif;
185
186	qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone);
187	if (qif == NULL)
188		return (NULL);
189
190	bzero(qif, qfq_size);
191	qif->qif_ifq = &ifp->if_snd;
192	if (altq) {
193		qif->qif_maxclasses = QFQ_MAX_CLASSES;
194		qif->qif_maxslots = QFQ_MAX_SLOTS;
195		qif->qif_flags |= QFQIFF_ALTQ;
196	} else {
197		qif->qif_maxclasses = IFCQ_SC_MAX;
198		/*
199		 * TODO: adi@apple.com
200		 *
201		 * Ideally I would like to have the following
202		 * but QFQ needs further modifications.
203		 *
204		 *	qif->qif_maxslots = IFCQ_SC_MAX;
205		 */
206		qif->qif_maxslots = QFQ_MAX_SLOTS;
207	}
208
209	if ((qif->qif_class_tbl = _MALLOC(sizeof (struct qfq_class *) *
210	    qif->qif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
211		log(LOG_ERR, "%s: %s unable to allocate class table array\n",
212		    if_name(ifp), qfq_style(qif));
213		goto error;
214	}
215
216	if ((qif->qif_groups = _MALLOC(sizeof (struct qfq_group *) *
217	    (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
218		log(LOG_ERR, "%s: %s unable to allocate group array\n",
219		    if_name(ifp), qfq_style(qif));
220		goto error;
221	}
222
223	if (pktsched_verbose) {
224		log(LOG_DEBUG, "%s: %s scheduler allocated\n",
225		    if_name(ifp), qfq_style(qif));
226	}
227
228	return (qif);
229
230error:
231	if (qif->qif_class_tbl != NULL) {
232		_FREE(qif->qif_class_tbl, M_DEVBUF);
233		qif->qif_class_tbl = NULL;
234	}
235	if (qif->qif_groups != NULL) {
236		_FREE(qif->qif_groups, M_DEVBUF);
237		qif->qif_groups = NULL;
238	}
239	zfree(qfq_zone, qif);
240
241	return (NULL);
242}
243
244int
245qfq_destroy(struct qfq_if *qif)
246{
247	struct ifclassq *ifq = qif->qif_ifq;
248	int err;
249
250	IFCQ_LOCK(ifq);
251	err = qfq_destroy_locked(qif);
252	IFCQ_UNLOCK(ifq);
253
254	return (err);
255}
256
257static int
258qfq_destroy_locked(struct qfq_if *qif)
259{
260	int i;
261
262	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
263
264	(void) qfq_clear_interface(qif);
265
266	VERIFY(qif->qif_class_tbl != NULL);
267	_FREE(qif->qif_class_tbl, M_DEVBUF);
268	qif->qif_class_tbl = NULL;
269
270	VERIFY(qif->qif_groups != NULL);
271	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
272		struct qfq_group *grp = qif->qif_groups[i];
273
274		if (grp != NULL) {
275			VERIFY(grp->qfg_slots != NULL);
276			_FREE(grp->qfg_slots, M_DEVBUF);
277			grp->qfg_slots = NULL;
278			_FREE(grp, M_DEVBUF);
279			qif->qif_groups[i] = NULL;
280		}
281	}
282	_FREE(qif->qif_groups, M_DEVBUF);
283	qif->qif_groups = NULL;
284
285	if (pktsched_verbose) {
286		log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
287		    if_name(QFQIF_IFP(qif)), qfq_style(qif));
288	}
289
290	zfree(qfq_zone, qif);
291
292	return (0);
293}
294
295/*
296 * bring the interface back to the initial state by discarding
297 * all the filters and classes.
298 */
299static int
300qfq_clear_interface(struct qfq_if *qif)
301{
302	struct qfq_class *cl;
303	int i;
304
305	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
306
307	/* clear out the classes */
308	for (i = 0; i < qif->qif_maxclasses; i++)
309		if ((cl = qif->qif_class_tbl[i]) != NULL)
310			qfq_class_destroy(qif, cl);
311
312	return (0);
313}
314
315/* discard all the queued packets on the interface */
316void
317qfq_purge(struct qfq_if *qif)
318{
319	struct qfq_class *cl;
320	int i;
321
322	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
323
324	for (i = 0; i < qif->qif_maxclasses; i++) {
325		if ((cl = qif->qif_class_tbl[i]) != NULL)
326			qfq_purgeq(qif, cl, 0, NULL, NULL);
327	}
328#if !PF_ALTQ
329	/*
330	 * This assertion is safe to be made only when PF_ALTQ is not
331	 * configured; otherwise, IFCQ_LEN represents the sum of the
332	 * packets managed by ifcq_disc and altq_disc instances, which
333	 * is possible when transitioning between the two.
334	 */
335	VERIFY(IFCQ_LEN(qif->qif_ifq) == 0);
336#endif /* !PF_ALTQ */
337}
338
339static void
340qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr)
341{
342	struct ifclassq *ifq = qif->qif_ifq;
343	u_int32_t i;
344
345	IFCQ_LOCK_ASSERT_HELD(ifq);
346
347	VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
348	VERIFY(pr->flow != 0);
349
350	if (pr->sc != MBUF_SC_UNSPEC) {
351		i = MBUF_SCIDX(pr->sc);
352		VERIFY(i < IFCQ_SC_MAX);
353
354		qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
355		    pr->flow, &pr->packets, &pr->bytes);
356	} else {
357		u_int32_t cnt, len;
358
359		pr->packets = 0;
360		pr->bytes = 0;
361
362		for (i = 0; i < IFCQ_SC_MAX; i++) {
363			qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl,
364			    pr->flow, &cnt, &len);
365			pr->packets += cnt;
366			pr->bytes += len;
367		}
368	}
369}
370
371void
372qfq_event(struct qfq_if *qif, cqev_t ev)
373{
374	struct qfq_class *cl;
375	int i;
376
377	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
378
379	for (i = 0; i < qif->qif_maxclasses; i++)
380		if ((cl = qif->qif_class_tbl[i]) != NULL)
381			qfq_updateq(qif, cl, ev);
382}
383
384int
385qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight,
386    u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp)
387{
388	struct qfq_class *cl;
389	u_int32_t w;
390
391	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
392
393	if (qfq_clh_to_clp(qif, qid) != NULL)
394		return (EBUSY);
395
396	/* check parameters */
397	if (weight == 0 || weight > QFQ_MAX_WEIGHT)
398		return (EINVAL);
399
400	w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight));
401	if (qif->qif_wsum + w > QFQ_MAX_WSUM)
402		return (EINVAL);
403
404	if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT))
405		return (EINVAL);
406
407	cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid);
408	if (cl == NULL)
409		return (ENOMEM);
410
411	if (clp != NULL)
412		*clp = cl;
413
414	return (0);
415}
416
417static struct qfq_class *
418qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit,
419    u_int32_t flags, u_int32_t maxsz, u_int32_t qid)
420{
421	struct ifnet *ifp;
422	struct ifclassq *ifq;
423	struct qfq_group *grp;
424	struct qfq_class *cl;
425	u_int32_t w;			/* approximated weight */
426	int i;
427
428	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
429
430	/* Sanitize flags unless internally configured */
431	if (qif->qif_flags & QFQIFF_ALTQ)
432		flags &= QFCF_USERFLAGS;
433
434	if (qif->qif_classes >= qif->qif_maxclasses) {
435		log(LOG_ERR, "%s: %s out of classes! (max %d)\n",
436		    if_name(QFQIF_IFP(qif)), qfq_style(qif),
437		    qif->qif_maxclasses);
438		return (NULL);
439	}
440
441#if !CLASSQ_RED
442	if (flags & QFCF_RED) {
443		log(LOG_ERR, "%s: %s RED not available!\n",
444		    if_name(QFQIF_IFP(qif)), qfq_style(qif));
445		return (NULL);
446	}
447#endif /* !CLASSQ_RED */
448
449#if !CLASSQ_RIO
450	if (flags & QFCF_RIO) {
451		log(LOG_ERR, "%s: %s RIO not available!\n",
452		    if_name(QFQIF_IFP(qif)), qfq_style(qif));
453		return (NULL);
454	}
455#endif /* CLASSQ_RIO */
456
457#if !CLASSQ_BLUE
458	if (flags & QFCF_BLUE) {
459		log(LOG_ERR, "%s: %s BLUE not available!\n",
460		    if_name(QFQIF_IFP(qif)), qfq_style(qif));
461		return (NULL);
462	}
463#endif /* CLASSQ_BLUE */
464
465	/* These are mutually exclusive */
466	if ((flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) &&
467	    (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RED &&
468	    (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RIO &&
469	    (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_BLUE &&
470	    (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_SFB) {
471		log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n",
472		    if_name(QFQIF_IFP(qif)), qfq_style(qif));
473		return (NULL);
474	}
475
476	ifq = qif->qif_ifq;
477	ifp = QFQIF_IFP(qif);
478
479	cl = zalloc(qfq_cl_zone);
480	if (cl == NULL)
481		return (NULL);
482
483	bzero(cl, qfq_cl_size);
484
485	if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
486		qlimit = IFCQ_MAXLEN(ifq);
487		if (qlimit == 0)
488			qlimit = DEFAULT_QLIMIT;  /* use default */
489	}
490	_qinit(&cl->cl_q, Q_DROPTAIL, qlimit);
491	cl->cl_qif = qif;
492	cl->cl_flags = flags;
493	cl->cl_handle = qid;
494
495	/*
496	 * Find a free slot in the class table.  If the slot matching
497	 * the lower bits of qid is free, use this slot.  Otherwise,
498	 * use the first free slot.
499	 */
500	i = qid % qif->qif_maxclasses;
501	if (qif->qif_class_tbl[i] == NULL) {
502		qif->qif_class_tbl[i] = cl;
503	} else {
504		for (i = 0; i < qif->qif_maxclasses; i++) {
505			if (qif->qif_class_tbl[i] == NULL) {
506				qif->qif_class_tbl[i] = cl;
507				break;
508			}
509		}
510		if (i == qif->qif_maxclasses) {
511			zfree(qfq_cl_zone, cl);
512			return (NULL);
513		}
514	}
515
516	w = weight;
517	VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT);
518	cl->cl_lmax = maxsz;
519	cl->cl_inv_w = (QFQ_ONE_FP / w);
520	w = (QFQ_ONE_FP / cl->cl_inv_w);
521	VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM);
522
523	i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax);
524	VERIFY(i <= QFQ_MAX_INDEX);
525	grp = qif->qif_groups[i];
526	if (grp == NULL) {
527		grp = _MALLOC(sizeof (*grp), M_DEVBUF, M_WAITOK|M_ZERO);
528		if (grp != NULL) {
529			grp->qfg_index = i;
530			grp->qfg_slot_shift =
531			    QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i);
532			grp->qfg_slots = _MALLOC(sizeof (struct qfq_class *) *
533			    qif->qif_maxslots, M_DEVBUF, M_WAITOK|M_ZERO);
534			if (grp->qfg_slots == NULL) {
535				log(LOG_ERR, "%s: %s unable to allocate group "
536				    "slots for index %d\n", if_name(ifp),
537				    qfq_style(qif), i);
538			}
539		} else {
540			log(LOG_ERR, "%s: %s unable to allocate group for "
541			    "qid=%d\n", if_name(ifp), qfq_style(qif),
542			    cl->cl_handle);
543		}
544		if (grp == NULL || grp->qfg_slots == NULL) {
545			qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL;
546			if (grp != NULL)
547				_FREE(grp, M_DEVBUF);
548			zfree(qfq_cl_zone, cl);
549			return (NULL);
550		} else {
551			qif->qif_groups[i] = grp;
552		}
553	}
554	cl->cl_grp = grp;
555	qif->qif_wsum += w;
556	/* XXX cl->cl_S = qif->qif_V; ? */
557	/* XXX compute qif->qif_i_wsum */
558
559	qif->qif_classes++;
560
561	if (flags & QFCF_DEFAULTCLASS)
562		qif->qif_default = cl;
563
564	if (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) {
565#if CLASSQ_RED || CLASSQ_RIO
566		u_int64_t ifbandwidth = ifnet_output_linkrate(ifp);
567		int pkttime;
568#endif /* CLASSQ_RED || CLASSQ_RIO */
569
570		cl->cl_qflags = 0;
571		if (flags & QFCF_ECN) {
572			if (flags & QFCF_BLUE)
573				cl->cl_qflags |= BLUEF_ECN;
574			else if (flags & QFCF_SFB)
575				cl->cl_qflags |= SFBF_ECN;
576			else if (flags & QFCF_RED)
577				cl->cl_qflags |= REDF_ECN;
578			else if (flags & QFCF_RIO)
579				cl->cl_qflags |= RIOF_ECN;
580		}
581		if (flags & QFCF_FLOWCTL) {
582			if (flags & QFCF_SFB)
583				cl->cl_qflags |= SFBF_FLOWCTL;
584		}
585		if (flags & QFCF_CLEARDSCP) {
586			if (flags & QFCF_RIO)
587				cl->cl_qflags |= RIOF_CLEARDSCP;
588		}
589#if CLASSQ_RED || CLASSQ_RIO
590		/*
591		 * XXX: RED & RIO should be watching link speed and MTU
592		 *	events and recompute pkttime accordingly.
593		 */
594		if (ifbandwidth < 8)
595			pkttime = 1000 * 1000 * 1000; /* 1 sec */
596		else
597			pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 /
598			    (ifbandwidth / 8);
599
600		/* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */
601#if CLASSQ_RED
602		if (flags & QFCF_RED) {
603			cl->cl_red = red_alloc(ifp, 0, 0,
604			    qlimit(&cl->cl_q) * 10/100,
605			    qlimit(&cl->cl_q) * 30/100,
606			    cl->cl_qflags, pkttime);
607			if (cl->cl_red != NULL)
608				qtype(&cl->cl_q) = Q_RED;
609		}
610#endif /* CLASSQ_RED */
611#if CLASSQ_RIO
612		if (flags & QFCF_RIO) {
613			cl->cl_rio =
614			    rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime);
615			if (cl->cl_rio != NULL)
616				qtype(&cl->cl_q) = Q_RIO;
617		}
618#endif /* CLASSQ_RIO */
619#endif /* CLASSQ_RED || CLASSQ_RIO */
620#if CLASSQ_BLUE
621		if (flags & QFCF_BLUE) {
622			cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags);
623			if (cl->cl_blue != NULL)
624				qtype(&cl->cl_q) = Q_BLUE;
625		}
626#endif /* CLASSQ_BLUE */
627		if (flags & QFCF_SFB) {
628			if (!(cl->cl_flags & QFCF_LAZY))
629				cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
630				    qlimit(&cl->cl_q), cl->cl_qflags);
631			if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY))
632				qtype(&cl->cl_q) = Q_SFB;
633		}
634	}
635
636	if (pktsched_verbose) {
637		log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d "
638		    "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif),
639		    cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit,
640		    flags, QFCF_BITS);
641	}
642
643	return (cl);
644}
645
646int
647qfq_remove_queue(struct qfq_if *qif, u_int32_t qid)
648{
649	struct qfq_class *cl;
650
651	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
652
653	if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
654		return (EINVAL);
655
656	return (qfq_class_destroy(qif, cl));
657}
658
659static int
660qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl)
661{
662	struct ifclassq *ifq = qif->qif_ifq;
663	int i;
664
665	IFCQ_LOCK_ASSERT_HELD(ifq);
666
667	qfq_purgeq(qif, cl, 0, NULL, NULL);
668
669	if (cl->cl_inv_w != 0) {
670		qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w);
671		cl->cl_inv_w = 0;	/* reset weight to avoid run twice */
672	}
673
674	for (i = 0; i < qif->qif_maxclasses; i++) {
675		if (qif->qif_class_tbl[i] == cl) {
676			qif->qif_class_tbl[i] = NULL;
677			break;
678		}
679	}
680	qif->qif_classes--;
681
682	if (cl->cl_qalg.ptr != NULL) {
683#if CLASSQ_RIO
684		if (q_is_rio(&cl->cl_q))
685			rio_destroy(cl->cl_rio);
686#endif /* CLASSQ_RIO */
687#if CLASSQ_RED
688		if (q_is_red(&cl->cl_q))
689			red_destroy(cl->cl_red);
690#endif /* CLASSQ_RED */
691#if CLASSQ_BLUE
692		if (q_is_blue(&cl->cl_q))
693			blue_destroy(cl->cl_blue);
694#endif /* CLASSQ_BLUE */
695		if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
696			sfb_destroy(cl->cl_sfb);
697		cl->cl_qalg.ptr = NULL;
698		qtype(&cl->cl_q) = Q_DROPTAIL;
699		qstate(&cl->cl_q) = QS_RUNNING;
700	}
701
702	if (qif->qif_default == cl)
703		qif->qif_default = NULL;
704
705	if (pktsched_verbose) {
706		log(LOG_DEBUG, "%s: %s destroyed qid=%d\n",
707		    if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle);
708	}
709
710	zfree(qfq_cl_zone, cl);
711
712	return (0);
713}
714
715/*
716 * Calculate a mask to mimic what would be ffs_from()
717 */
718static inline pktsched_bitmap_t
719mask_from(pktsched_bitmap_t bitmap, int from)
720{
721	return (bitmap & ~((1UL << from) - 1));
722}
723
724/*
725 * The state computation relies on ER=0, IR=1, EB=2, IB=3
726 * First compute eligibility comparing grp->qfg_S, qif->qif_V,
727 * then check if someone is blocking us and possibly add EB
728 */
729static inline u_int32_t
730qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp)
731{
732	/* if S > V we are not eligible */
733	u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V);
734	pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER],
735	    grp->qfg_index);
736	struct qfq_group *next;
737
738	if (mask) {
739		next = qfq_ffs(qif, mask);
740		if (qfq_gt(grp->qfg_F, next->qfg_F))
741			state |= EB;
742	}
743
744	return (state);
745}
746
747/*
748 * In principle
749 *	qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
750 *	qif->qif_bitmaps[src] &= ~mask;
751 * but we should make sure that src != dst
752 */
753static inline void
754qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst)
755{
756	qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask;
757	qif->qif_bitmaps[src] &= ~mask;
758}
759
760static inline void
761qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish)
762{
763	pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1);
764	struct qfq_group *next;
765
766	if (mask) {
767		next = qfq_ffs(qif, mask);
768		if (!qfq_gt(next->qfg_F, old_finish))
769			return;
770	}
771
772	mask = (1UL << index) - 1;
773	qfq_move_groups(qif, mask, EB, ER);
774	qfq_move_groups(qif, mask, IB, IR);
775}
776
777/*
778 * perhaps
779 *
780 *	old_V ^= qif->qif_V;
781 *	old_V >>= QFQ_MIN_SLOT_SHIFT;
782 *	if (old_V) {
783 *		...
784 *	}
785 */
786static inline void
787qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V)
788{
789	pktsched_bitmap_t mask, vslot, old_vslot;
790
791	vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT;
792	old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
793
794	if (vslot != old_vslot) {
795		mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
796		qfq_move_groups(qif, mask, IR, ER);
797		qfq_move_groups(qif, mask, IB, EB);
798	}
799}
800
801/*
802 * XXX we should make sure that slot becomes less than 32.
803 * This is guaranteed by the input values.
804 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits.
805 */
806static inline void
807qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp,
808    struct qfq_class *cl, u_int64_t roundedS)
809{
810	u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
811	u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots;
812
813	cl->cl_next = grp->qfg_slots[i];
814	grp->qfg_slots[i] = cl;
815	pktsched_bit_set(slot, &grp->qfg_full_slots);
816}
817
818/*
819 * remove the entry from the slot
820 */
821static inline void
822qfq_front_slot_remove(struct qfq_group *grp)
823{
824	struct qfq_class **h = &grp->qfg_slots[grp->qfg_front];
825
826	*h = (*h)->cl_next;
827	if (!*h)
828		pktsched_bit_clr(0, &grp->qfg_full_slots);
829}
830
831/*
832 * Returns the first full queue in a group. As a side effect,
833 * adjust the bucket list so the first non-empty bucket is at
834 * position 0 in qfg_full_slots.
835 */
836static inline struct qfq_class *
837qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp)
838{
839	int i;
840
841	if (pktsched_verbose > 2) {
842		log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n",
843		    if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index,
844		    grp->qfg_full_slots);
845	}
846
847	if (grp->qfg_full_slots == 0)
848		return (NULL);
849
850	i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */
851	if (i > 0) {
852		grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots;
853		grp->qfg_full_slots >>= i;
854	}
855
856	return (grp->qfg_slots[grp->qfg_front]);
857}
858
859/*
860 * adjust the bucket list. When the start time of a group decreases,
861 * we move the index down (modulo qif->qif_maxslots) so we don't need to
862 * move the objects. The mask of occupied slots must be shifted
863 * because we use ffs() to find the first non-empty slot.
864 * This covers decreases in the group's start time, but what about
865 * increases of the start time ?
866 * Here too we should make sure that i is less than 32
867 */
868static inline void
869qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS)
870{
871#pragma unused(qif)
872	u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift;
873
874	grp->qfg_full_slots <<= i;
875	grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots;
876}
877
878static inline void
879qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V)
880{
881	pktsched_bitmap_t ineligible;
882
883	ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB];
884	if (ineligible) {
885		if (!qif->qif_bitmaps[ER]) {
886			struct qfq_group *grp;
887			grp = qfq_ffs(qif, ineligible);
888			if (qfq_gt(grp->qfg_S, qif->qif_V))
889				qif->qif_V = grp->qfg_S;
890		}
891		qfq_make_eligible(qif, old_V);
892	}
893}
894
895/*
896 * Updates the class, returns true if also the group needs to be updated.
897 */
898static inline int
899qfq_update_class(struct qfq_if *qif, struct qfq_group *grp,
900    struct qfq_class *cl)
901{
902#pragma unused(qif)
903	cl->cl_S = cl->cl_F;
904	if (qempty(&cl->cl_q))  {
905		qfq_front_slot_remove(grp);
906	} else {
907		u_int32_t len;
908		u_int64_t roundedS;
909
910		len = m_pktlen(qhead(&cl->cl_q));
911		cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
912		roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
913		if (roundedS == grp->qfg_S)
914			return (0);
915
916		qfq_front_slot_remove(grp);
917		qfq_slot_insert(qif, grp, cl, roundedS);
918	}
919	return (1);
920}
921
922/*
923 * note: CLASSQDQ_POLL returns the next packet without removing the packet
924 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
925 *	CLASSQDQ_REMOVE must return the same packet if called immediately
926 *	after CLASSQDQ_POLL.
927 */
928struct mbuf *
929qfq_dequeue(struct qfq_if *qif, cqdq_op_t op)
930{
931	pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER];
932	struct ifclassq *ifq = qif->qif_ifq;
933	struct qfq_group *grp;
934	struct qfq_class *cl;
935	struct mbuf *m;
936	u_int64_t old_V;
937	u_int32_t len;
938
939	IFCQ_LOCK_ASSERT_HELD(ifq);
940
941	for (;;) {
942		if (er_bits == 0) {
943#if QFQ_DEBUG
944			if (qif->qif_queued && pktsched_verbose > 1)
945				qfq_dump_sched(qif, "start dequeue");
946#endif /* QFQ_DEBUG */
947			/* no eligible and ready packet */
948			return (NULL);
949		}
950		grp = qfq_ffs(qif, er_bits);
951		/* if group is non-empty, use it */
952		if (grp->qfg_full_slots != 0)
953			break;
954		pktsched_bit_clr(grp->qfg_index, &er_bits);
955#if QFQ_DEBUG
956		qif->qif_emptygrp++;
957#endif /* QFQ_DEBUG */
958	}
959	VERIFY(!IFCQ_IS_EMPTY(ifq));
960
961	cl = grp->qfg_slots[grp->qfg_front];
962	VERIFY(cl != NULL && !qempty(&cl->cl_q));
963
964	if (op == CLASSQDQ_POLL)
965		return (qfq_pollq(cl));
966
967	m = qfq_getq(cl);
968	VERIFY(m != NULL);	/* qalg must be work conserving */
969	len = m_pktlen(m);
970
971#if QFQ_DEBUG
972	qif->qif_queued--;
973#endif /* QFQ_DEBUG */
974
975	IFCQ_DEC_LEN(ifq);
976	if (qempty(&cl->cl_q))
977		cl->cl_period++;
978	PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
979	IFCQ_XMIT_ADD(ifq, 1, len);
980
981	old_V = qif->qif_V;
982	qif->qif_V += (u_int64_t)len * QFQ_IWSUM;
983
984	if (pktsched_verbose > 2) {
985		log(LOG_DEBUG, "%s: %s qid=%d dequeue m=0x%llx F=0x%llx "
986		    "V=0x%llx", if_name(QFQIF_IFP(qif)), qfq_style(qif),
987		    cl->cl_handle, (uint64_t)VM_KERNEL_ADDRPERM(m), cl->cl_F,
988		    qif->qif_V);
989	}
990
991	if (qfq_update_class(qif, grp, cl)) {
992		u_int64_t old_F = grp->qfg_F;
993
994		cl = qfq_slot_scan(qif, grp);
995		if (!cl) { /* group gone, remove from ER */
996			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
997		} else {
998			u_int32_t s;
999			u_int64_t roundedS =
1000			    qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1001
1002			if (grp->qfg_S == roundedS)
1003				goto skip_unblock;
1004
1005			grp->qfg_S = roundedS;
1006			grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1007
1008			/* remove from ER and put in the new set */
1009			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1010			s = qfq_calc_state(qif, grp);
1011			pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1012		}
1013		/* we need to unblock even if the group has gone away */
1014		qfq_unblock_groups(qif, grp->qfg_index, old_F);
1015	}
1016
1017skip_unblock:
1018	qfq_update_eligible(qif, old_V);
1019
1020#if QFQ_DEBUG
1021	if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1)
1022		qfq_dump_sched(qif, "end dequeue");
1023#endif /* QFQ_DEBUG */
1024
1025	return (m);
1026}
1027
1028/*
1029 * Assign a reasonable start time for a new flow k in group i.
1030 * Admissible values for hat(F) are multiples of sigma_i
1031 * no greater than V+sigma_i . Larger values mean that
1032 * we had a wraparound so we consider the timestamp to be stale.
1033 *
1034 * If F is not stale and F >= V then we set S = F.
1035 * Otherwise we should assign S = V, but this may violate
1036 * the ordering in ER. So, if we have groups in ER, set S to
1037 * the F_j of the first group j which would be blocking us.
1038 * We are guaranteed not to move S backward because
1039 * otherwise our group i would still be blocked.
1040 */
1041static inline void
1042qfq_update_start(struct qfq_if *qif, struct qfq_class *cl)
1043{
1044	pktsched_bitmap_t mask;
1045	u_int64_t limit, roundedF;
1046	int slot_shift = cl->cl_grp->qfg_slot_shift;
1047
1048	roundedF = qfq_round_down(cl->cl_F, slot_shift);
1049	limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift);
1050
1051	if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) {
1052		/* timestamp was stale */
1053		mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index);
1054		if (mask) {
1055			struct qfq_group *next = qfq_ffs(qif, mask);
1056			if (qfq_gt(roundedF, next->qfg_F)) {
1057				cl->cl_S = next->qfg_F;
1058				return;
1059			}
1060		}
1061		cl->cl_S = qif->qif_V;
1062	} else { /* timestamp is not stale */
1063		cl->cl_S = cl->cl_F;
1064	}
1065}
1066
1067int
1068qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m,
1069    struct pf_mtag *t)
1070{
1071	struct ifclassq *ifq = qif->qif_ifq;
1072	struct qfq_group *grp;
1073	u_int64_t roundedS;
1074	int len, ret, s;
1075
1076	IFCQ_LOCK_ASSERT_HELD(ifq);
1077	VERIFY(cl == NULL || cl->cl_qif == qif);
1078
1079	if (cl == NULL) {
1080#if PF_ALTQ
1081		cl = qfq_clh_to_clp(qif, t->pftag_qid);
1082#else /* !PF_ALTQ */
1083		cl = qfq_clh_to_clp(qif, 0);
1084#endif /* !PF_ALTQ */
1085		if (cl == NULL) {
1086			cl = qif->qif_default;
1087			if (cl == NULL) {
1088				IFCQ_CONVERT_LOCK(ifq);
1089				m_freem(m);
1090				return (ENOBUFS);
1091			}
1092		}
1093	}
1094
1095	len = m_pktlen(m);
1096
1097	ret = qfq_addq(cl, m, t);
1098	if (ret != 0) {
1099		if (ret == CLASSQEQ_SUCCESS_FC) {
1100			/* packet enqueued, return advisory feedback */
1101			ret = EQFULL;
1102		} else {
1103			VERIFY(ret == CLASSQEQ_DROPPED ||
1104			    ret == CLASSQEQ_DROPPED_FC ||
1105			    ret == CLASSQEQ_DROPPED_SP);
1106			/* packet has been freed in qfq_addq */
1107			PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
1108			IFCQ_DROP_ADD(ifq, 1, len);
1109			switch (ret) {
1110			case CLASSQEQ_DROPPED:
1111				return (ENOBUFS);
1112			case CLASSQEQ_DROPPED_FC:
1113				return (EQFULL);
1114			case CLASSQEQ_DROPPED_SP:
1115				return (EQSUSPENDED);
1116			}
1117			/* NOT REACHED */
1118		}
1119	}
1120	IFCQ_INC_LEN(ifq);
1121
1122#if QFQ_DEBUG
1123	qif->qif_queued++;
1124#endif /* QFQ_DEBUG */
1125
1126	/* queue was not idle, we're done */
1127	if (qlen(&cl->cl_q) > 1)
1128		goto done;
1129
1130	/* queue was idle */
1131	grp = cl->cl_grp;
1132	qfq_update_start(qif, cl);	/* adjust start time */
1133
1134	/* compute new finish time and rounded start */
1135	cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w;
1136	roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1137
1138	/*
1139	 * Insert cl in the correct bucket.
1140	 *
1141	 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list
1142	 * and simply go to the insertion phase.  Otherwise grp->qfg_S is
1143	 * decreasing, we must make room in the bucket list, and also
1144	 * recompute the group state.  Finally, if there were no flows
1145	 * in this group and nobody was in ER make sure to adjust V.
1146	 */
1147	if (grp->qfg_full_slots != 0) {
1148		if (!qfq_gt(grp->qfg_S, cl->cl_S))
1149			goto skip_update;
1150
1151		/* create a slot for this cl->cl_S */
1152		qfq_slot_rotate(qif, grp, roundedS);
1153
1154		/* group was surely ineligible, remove */
1155		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1156		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1157	} else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) {
1158		qif->qif_V = roundedS;
1159	}
1160
1161	grp->qfg_S = roundedS;
1162	grp->qfg_F =
1163	    roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */
1164	s = qfq_calc_state(qif, grp);
1165	pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1166
1167	if (pktsched_verbose > 2) {
1168		log(LOG_DEBUG, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x "
1169		    "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)),
1170		    qfq_style(qif), cl->cl_handle,
1171		    (uint64_t)VM_KERNEL_ADDRPERM(m), qfq_state2str(s),
1172		    qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V);
1173	}
1174
1175skip_update:
1176	qfq_slot_insert(qif, grp, cl, roundedS);
1177
1178done:
1179	/* successfully queued. */
1180	return (ret);
1181}
1182
1183static inline void
1184qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp,
1185    struct qfq_class *cl)
1186{
1187#pragma unused(qif)
1188	struct qfq_class **pprev;
1189	u_int32_t i, offset;
1190	u_int64_t roundedS;
1191
1192	roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1193	offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift;
1194	i = (grp->qfg_front + offset) % qif->qif_maxslots;
1195
1196	pprev = &grp->qfg_slots[i];
1197	while (*pprev && *pprev != cl)
1198		pprev = &(*pprev)->cl_next;
1199
1200	*pprev = cl->cl_next;
1201	if (!grp->qfg_slots[i])
1202		pktsched_bit_clr(offset, &grp->qfg_full_slots);
1203}
1204
1205/*
1206 * Called to forcibly destroy a queue.
1207 * If the queue is not in the front bucket, or if it has
1208 * other queues in the front bucket, we can simply remove
1209 * the queue with no other side effects.
1210 * Otherwise we must propagate the event up.
1211 * XXX description to be completed.
1212 */
1213static void
1214qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl)
1215{
1216	struct qfq_group *grp = cl->cl_grp;
1217	pktsched_bitmap_t mask;
1218	u_int64_t roundedS;
1219	int s;
1220
1221	if (pktsched_verbose) {
1222		log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d "
1223		    "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x,"
1224		    "IR=0x%x,IB=0x%x}\n",
1225		    if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1226		    cl->cl_handle, grp->qfg_index, grp->qfg_full_slots,
1227		    grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB],
1228		    qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]);
1229#if QFQ_DEBUG
1230		if (pktsched_verbose > 1)
1231			qfq_dump_sched(qif, "start deactivate");
1232#endif /* QFQ_DEBUG */
1233	}
1234
1235	cl->cl_F = cl->cl_S;	/* not needed if the class goes away */
1236	qfq_slot_remove(qif, grp, cl);
1237
1238	if (grp->qfg_full_slots == 0) {
1239		/*
1240		 * Nothing left in the group, remove from all sets.
1241		 * Do ER last because if we were blocking other groups
1242		 * we must unblock them.
1243		 */
1244		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1245		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1246		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1247
1248		if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) &&
1249		    !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) {
1250			mask = qif->qif_bitmaps[ER] &
1251			    ((1UL << grp->qfg_index) - 1);
1252			if (mask)
1253				mask = ~((1UL << __fls(mask)) - 1);
1254			else
1255				mask = (pktsched_bitmap_t)~0UL;
1256			qfq_move_groups(qif, mask, EB, ER);
1257			qfq_move_groups(qif, mask, IB, IR);
1258		}
1259		pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1260	} else if (!grp->qfg_slots[grp->qfg_front]) {
1261		cl = qfq_slot_scan(qif, grp);
1262		roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift);
1263		if (grp->qfg_S != roundedS) {
1264			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]);
1265			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]);
1266			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]);
1267			pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]);
1268			grp->qfg_S = roundedS;
1269			grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift);
1270			s = qfq_calc_state(qif, grp);
1271			pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]);
1272		}
1273	}
1274	qfq_update_eligible(qif, qif->qif_V);
1275
1276#if QFQ_DEBUG
1277	if (pktsched_verbose > 1)
1278		qfq_dump_sched(qif, "end deactivate");
1279#endif /* QFQ_DEBUG */
1280}
1281
1282static const char *
1283qfq_state2str(int s)
1284{
1285	const char *c;
1286
1287	switch (s) {
1288	case ER:
1289		c = "ER";
1290		break;
1291	case IR:
1292		c = "IR";
1293		break;
1294	case EB:
1295		c = "EB";
1296		break;
1297	case IB:
1298		c = "IB";
1299		break;
1300	default:
1301		c = "?";
1302		break;
1303	}
1304	return (c);
1305}
1306
1307static inline int
1308qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t)
1309{
1310	struct qfq_if	*qif = cl->cl_qif;
1311	struct ifclassq *ifq = qif->qif_ifq;
1312
1313	IFCQ_LOCK_ASSERT_HELD(ifq);
1314
1315#if CLASSQ_RIO
1316	if (q_is_rio(&cl->cl_q))
1317		return (rio_addq(cl->cl_rio, &cl->cl_q, m, t));
1318	else
1319#endif /* CLASSQ_RIO */
1320#if CLASSQ_RED
1321	if (q_is_red(&cl->cl_q))
1322		return (red_addq(cl->cl_red, &cl->cl_q, m, t));
1323	else
1324#endif /* CLASSQ_RED */
1325#if CLASSQ_BLUE
1326	if (q_is_blue(&cl->cl_q))
1327		return (blue_addq(cl->cl_blue, &cl->cl_q, m, t));
1328	else
1329#endif /* CLASSQ_BLUE */
1330	if (q_is_sfb(&cl->cl_q)) {
1331		if (cl->cl_sfb == NULL) {
1332			struct ifnet *ifp = QFQIF_IFP(qif);
1333
1334			VERIFY(cl->cl_flags & QFCF_LAZY);
1335			cl->cl_flags &= ~QFCF_LAZY;
1336			IFCQ_CONVERT_LOCK(ifq);
1337
1338			cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
1339			    qlimit(&cl->cl_q), cl->cl_qflags);
1340			if (cl->cl_sfb == NULL) {
1341				/* fall back to droptail */
1342				qtype(&cl->cl_q) = Q_DROPTAIL;
1343				cl->cl_flags &= ~QFCF_SFB;
1344				cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
1345
1346				log(LOG_ERR, "%s: %s SFB lazy allocation "
1347				    "failed for qid=%d grp=%d, falling back "
1348				    "to DROPTAIL\n", if_name(ifp),
1349				    qfq_style(qif), cl->cl_handle,
1350				    cl->cl_grp->qfg_index);
1351			} else if (qif->qif_throttle != IFNET_THROTTLE_OFF) {
1352				/* if there's pending throttling, set it */
1353				cqrq_throttle_t tr = { 1, qif->qif_throttle };
1354				int err = qfq_throttle(qif, &tr);
1355
1356				if (err == EALREADY)
1357					err = 0;
1358				if (err != 0) {
1359					tr.level = IFNET_THROTTLE_OFF;
1360					(void) qfq_throttle(qif, &tr);
1361				}
1362			}
1363		}
1364		if (cl->cl_sfb != NULL)
1365			return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t));
1366	} else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
1367		IFCQ_CONVERT_LOCK(ifq);
1368		m_freem(m);
1369		return (CLASSQEQ_DROPPED);
1370	}
1371
1372#if PF_ECN
1373	if (cl->cl_flags & QFCF_CLEARDSCP)
1374		write_dsfield(m, t, 0);
1375#endif /* PF_ECN */
1376
1377	_addq(&cl->cl_q, m);
1378
1379	return (0);
1380}
1381
1382static inline struct mbuf *
1383qfq_getq(struct qfq_class *cl)
1384{
1385	IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1386
1387#if CLASSQ_RIO
1388	if (q_is_rio(&cl->cl_q))
1389		return (rio_getq(cl->cl_rio, &cl->cl_q));
1390	else
1391#endif /* CLASSQ_RIO */
1392#if CLASSQ_RED
1393	if (q_is_red(&cl->cl_q))
1394		return (red_getq(cl->cl_red, &cl->cl_q));
1395	else
1396#endif /* CLASSQ_RED */
1397#if CLASSQ_BLUE
1398	if (q_is_blue(&cl->cl_q))
1399		return (blue_getq(cl->cl_blue, &cl->cl_q));
1400	else
1401#endif /* CLASSQ_BLUE */
1402	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1403		return (sfb_getq(cl->cl_sfb, &cl->cl_q));
1404
1405	return (_getq(&cl->cl_q));
1406}
1407
1408static inline struct mbuf *
1409qfq_pollq(struct qfq_class *cl)
1410{
1411	IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq);
1412
1413	return (qhead(&cl->cl_q));
1414}
1415
1416static void
1417qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow,
1418    u_int32_t *packets, u_int32_t *bytes)
1419{
1420	struct ifclassq *ifq = qif->qif_ifq;
1421	u_int32_t cnt = 0, len = 0, qlen;
1422
1423	IFCQ_LOCK_ASSERT_HELD(ifq);
1424
1425	if ((qlen = qlen(&cl->cl_q)) == 0)
1426		goto done;
1427
1428	/* become regular mutex before freeing mbufs */
1429	IFCQ_CONVERT_LOCK(ifq);
1430
1431#if CLASSQ_RIO
1432	if (q_is_rio(&cl->cl_q))
1433		rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len);
1434	else
1435#endif /* CLASSQ_RIO */
1436#if CLASSQ_RED
1437	if (q_is_red(&cl->cl_q))
1438		red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len);
1439	else
1440#endif /* CLASSQ_RED */
1441#if CLASSQ_BLUE
1442	if (q_is_blue(&cl->cl_q))
1443		blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len);
1444	else
1445#endif /* CLASSQ_BLUE */
1446	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1447		sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
1448	else
1449		_flushq_flow(&cl->cl_q, flow, &cnt, &len);
1450
1451	if (cnt > 0) {
1452		VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
1453#if QFQ_DEBUG
1454		VERIFY(qif->qif_queued >= cnt);
1455		qif->qif_queued -= cnt;
1456#endif /* QFQ_DEBUG */
1457
1458		PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
1459		IFCQ_DROP_ADD(ifq, cnt, len);
1460
1461		VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
1462		IFCQ_LEN(ifq) -= cnt;
1463
1464		if (qempty(&cl->cl_q))
1465			qfq_deactivate_class(qif, cl);
1466
1467		if (pktsched_verbose) {
1468			log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d "
1469			    "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
1470			    if_name(QFQIF_IFP(qif)),
1471			    qfq_style(qif), cl->cl_handle,
1472			    (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen,
1473			    qlen(&cl->cl_q), cnt, len, flow);
1474		}
1475	}
1476done:
1477	if (packets != NULL)
1478		*packets = cnt;
1479	if (bytes != NULL)
1480		*bytes = len;
1481}
1482
1483static void
1484qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev)
1485{
1486	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1487
1488	if (pktsched_verbose) {
1489		log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n",
1490		    if_name(QFQIF_IFP(qif)), qfq_style(qif),
1491		    cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w),
1492		    ifclassq_ev2str(ev));
1493	}
1494
1495#if CLASSQ_RIO
1496	if (q_is_rio(&cl->cl_q))
1497		return (rio_updateq(cl->cl_rio, ev));
1498#endif /* CLASSQ_RIO */
1499#if CLASSQ_RED
1500	if (q_is_red(&cl->cl_q))
1501		return (red_updateq(cl->cl_red, ev));
1502#endif /* CLASSQ_RED */
1503#if CLASSQ_BLUE
1504	if (q_is_blue(&cl->cl_q))
1505		return (blue_updateq(cl->cl_blue, ev));
1506#endif /* CLASSQ_BLUE */
1507	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1508		return (sfb_updateq(cl->cl_sfb, ev));
1509}
1510
1511int
1512qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid,
1513    struct qfq_classstats *sp)
1514{
1515	struct qfq_class *cl;
1516
1517	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1518
1519	if ((cl = qfq_clh_to_clp(qif, qid)) == NULL)
1520		return (EINVAL);
1521
1522	sp->class_handle = cl->cl_handle;
1523	sp->index = cl->cl_grp->qfg_index;
1524	sp->weight = (QFQ_ONE_FP / cl->cl_inv_w);
1525	sp->lmax = cl->cl_lmax;
1526	sp->qlength = qlen(&cl->cl_q);
1527	sp->qlimit = qlimit(&cl->cl_q);
1528	sp->period = cl->cl_period;
1529	sp->xmitcnt = cl->cl_xmitcnt;
1530	sp->dropcnt = cl->cl_dropcnt;
1531
1532	sp->qtype = qtype(&cl->cl_q);
1533	sp->qstate = qstate(&cl->cl_q);
1534#if CLASSQ_RED
1535	if (q_is_red(&cl->cl_q))
1536		red_getstats(cl->cl_red, &sp->red[0]);
1537#endif /* CLASSQ_RED */
1538#if CLASSQ_RIO
1539	if (q_is_rio(&cl->cl_q))
1540		rio_getstats(cl->cl_rio, &sp->red[0]);
1541#endif /* CLASSQ_RIO */
1542#if CLASSQ_BLUE
1543	if (q_is_blue(&cl->cl_q))
1544		blue_getstats(cl->cl_blue, &sp->blue);
1545#endif /* CLASSQ_BLUE */
1546	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
1547		sfb_getstats(cl->cl_sfb, &sp->sfb);
1548
1549	return (0);
1550}
1551
1552static int
1553qfq_stat_sc(struct qfq_if *qif, cqrq_stat_sc_t *sr)
1554{
1555	struct ifclassq *ifq = qif->qif_ifq;
1556	struct qfq_class *cl;
1557	u_int32_t i;
1558
1559	IFCQ_LOCK_ASSERT_HELD(ifq);
1560
1561	VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc));
1562
1563	i = MBUF_SCIDX(sr->sc);
1564	VERIFY(i < IFCQ_SC_MAX);
1565
1566	cl = ifq->ifcq_disc_slots[i].cl;
1567	sr->packets = qlen(&cl->cl_q);
1568	sr->bytes = qsize(&cl->cl_q);
1569
1570	return (0);
1571}
1572
1573/* convert a class handle to the corresponding class pointer */
1574static inline struct qfq_class *
1575qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle)
1576{
1577	struct qfq_class *cl;
1578	int i;
1579
1580	IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq);
1581
1582	/*
1583	 * First, try optimistically the slot matching the lower bits of
1584	 * the handle.  If it fails, do the linear table search.
1585	 */
1586	i = chandle % qif->qif_maxclasses;
1587	if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1588		return (cl);
1589	for (i = 0; i < qif->qif_maxclasses; i++)
1590		if ((cl = qif->qif_class_tbl[i]) != NULL &&
1591		    cl->cl_handle == chandle)
1592			return (cl);
1593
1594	return (NULL);
1595}
1596
1597static const char *
1598qfq_style(struct qfq_if *qif)
1599{
1600	return ((qif->qif_flags & QFQIFF_ALTQ) ? "ALTQ_QFQ" : "QFQ");
1601}
1602
1603/*
1604 * Generic comparison function, handling wraparound
1605 */
1606static inline int
1607qfq_gt(u_int64_t a, u_int64_t b)
1608{
1609	return ((int64_t)(a - b) > 0);
1610}
1611
1612/*
1613 * Round a precise timestamp to its slotted value
1614 */
1615static inline u_int64_t
1616qfq_round_down(u_int64_t ts, u_int32_t shift)
1617{
1618	return (ts & ~((1ULL << shift) - 1));
1619}
1620
1621/*
1622 * Return the pointer to the group with lowest index in the bitmap
1623 */
1624static inline struct qfq_group *
1625qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap)
1626{
1627	int index = pktsched_ffs(bitmap) - 1;	/* zero-based */
1628	VERIFY(index >= 0 && index <= QFQ_MAX_INDEX &&
1629	    qif->qif_groups[index] != NULL);
1630	return (qif->qif_groups[index]);
1631}
1632
1633/*
1634 * Calculate a flow index, given its weight and maximum packet length.
1635 * index = log_2(maxlen/weight) but we need to apply the scaling.
1636 * This is used only once at flow creation.
1637 */
1638static int
1639qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen)
1640{
1641	u_int64_t slot_size = (u_int64_t)maxlen *inv_w;
1642	pktsched_bitmap_t size_map;
1643	int index = 0;
1644
1645	size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT);
1646	if (!size_map)
1647		goto out;
1648
1649	index = __fls(size_map) + 1;	/* basically a log_2() */
1650	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
1651
1652	if (index < 0)
1653		index = 0;
1654out:
1655	if (pktsched_verbose) {
1656		log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n",
1657		    if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif),
1658		    cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP/inv_w),
1659		    maxlen, index);
1660	}
1661	return (index);
1662}
1663
1664#if QFQ_DEBUG
1665static void
1666qfq_dump_groups(struct qfq_if *qif, u_int32_t mask)
1667{
1668	int i, j;
1669
1670	for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
1671		struct qfq_group *g = qif->qif_groups[i];
1672
1673		if (0 == (mask & (1 << i)))
1674			continue;
1675		if (g == NULL)
1676			continue;
1677
1678		log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n",
1679		    if_name(QFQIF_IFP(qif)), qfq_style(qif), i,
1680		    g->qfg_full_slots);
1681		log(LOG_DEBUG, "%s: %s             S 0x%20llx F 0x%llx %c\n",
1682		    if_name(QFQIF_IFP(qif)), qfq_style(qif),
1683		    g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0');
1684
1685		for (j = 0; j < qif->qif_maxslots; j++) {
1686			if (g->qfg_slots[j]) {
1687				log(LOG_DEBUG, "%s: %s      bucket %d 0x%llx "
1688				    "qid %d\n", if_name(QFQIF_IFP(qif)),
1689				    qfq_style(qif), j,
1690				    (uint64_t)VM_KERNEL_ADDRPERM(
1691				    g->qfg_slots[j]),
1692				    g->qfg_slots[j]->cl_handle);
1693			}
1694		}
1695	}
1696}
1697
1698static void
1699qfq_dump_sched(struct qfq_if *qif, const char *msg)
1700{
1701	log(LOG_DEBUG, "%s: %s --- in %s: ---\n",
1702	    if_name(QFQIF_IFP(qif)), qfq_style(qif), msg);
1703	log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n",
1704	    if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp,
1705	    qif->qif_queued, qif->qif_V);
1706	log(LOG_DEBUG, "%s: %s      ER 0x%08x\n",
1707	    if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]);
1708	log(LOG_DEBUG, "%s: %s      EB 0x%08x\n",
1709	    if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]);
1710	log(LOG_DEBUG, "%s: %s      IR 0x%08x\n",
1711	    if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]);
1712	log(LOG_DEBUG, "%s: %s      IB 0x%08x\n",
1713	    if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]);
1714	qfq_dump_groups(qif, 0xffffffff);
1715};
1716#endif /* QFQ_DEBUG */
1717
1718/*
1719 * qfq_enqueue_ifclassq is an enqueue function to be registered to
1720 * (*ifcq_enqueue) in struct ifclassq.
1721 */
1722static int
1723qfq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m)
1724{
1725	u_int32_t i;
1726
1727	IFCQ_LOCK_ASSERT_HELD(ifq);
1728
1729	if (!(m->m_flags & M_PKTHDR)) {
1730		/* should not happen */
1731		log(LOG_ERR, "%s: packet does not have pkthdr\n",
1732		    if_name(ifq->ifcq_ifp));
1733		IFCQ_CONVERT_LOCK(ifq);
1734		m_freem(m);
1735		return (ENOBUFS);
1736	}
1737
1738	i = MBUF_SCIDX(mbuf_get_service_class(m));
1739	VERIFY((u_int32_t)i < IFCQ_SC_MAX);
1740
1741	return (qfq_enqueue(ifq->ifcq_disc,
1742	    ifq->ifcq_disc_slots[i].cl, m, m_pftag(m)));
1743}
1744
1745/*
1746 * qfq_dequeue_ifclassq is a dequeue function to be registered to
1747 * (*ifcq_dequeue) in struct ifclass.
1748 *
1749 * note: CLASSQDQ_POLL returns the next packet without removing the packet
1750 *	from the queue.  CLASSQDQ_REMOVE is a normal dequeue operation.
1751 *	CLASSQDQ_REMOVE must return the same packet if called immediately
1752 *	after CLASSQDQ_POLL.
1753 */
1754static struct mbuf *
1755qfq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op)
1756{
1757	return (qfq_dequeue(ifq->ifcq_disc, op));
1758}
1759
1760static int
1761qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
1762{
1763	struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc;
1764	int err = 0;
1765
1766	IFCQ_LOCK_ASSERT_HELD(ifq);
1767
1768	switch (req) {
1769	case CLASSQRQ_PURGE:
1770		qfq_purge(qif);
1771		break;
1772
1773	case CLASSQRQ_PURGE_SC:
1774		qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg);
1775		break;
1776
1777	case CLASSQRQ_EVENT:
1778		qfq_event(qif, (cqev_t)arg);
1779		break;
1780
1781	case CLASSQRQ_THROTTLE:
1782		err = qfq_throttle(qif, (cqrq_throttle_t *)arg);
1783		break;
1784	case CLASSQRQ_STAT_SC:
1785		err = qfq_stat_sc(qif, (cqrq_stat_sc_t *)arg);
1786		break;
1787	}
1788	return (err);
1789}
1790
1791int
1792qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
1793{
1794	struct ifnet *ifp = ifq->ifcq_ifp;
1795	struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4;
1796	struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9;
1797	struct qfq_if *qif;
1798	u_int32_t maxlen = 0, qflags = 0;
1799	int err = 0;
1800
1801	IFCQ_LOCK_ASSERT_HELD(ifq);
1802	VERIFY(ifq->ifcq_disc == NULL);
1803	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1804
1805	if (flags & PKTSCHEDF_QALG_RED)
1806		qflags |= QFCF_RED;
1807	if (flags & PKTSCHEDF_QALG_RIO)
1808		qflags |= QFCF_RIO;
1809	if (flags & PKTSCHEDF_QALG_BLUE)
1810		qflags |= QFCF_BLUE;
1811	if (flags & PKTSCHEDF_QALG_SFB)
1812		qflags |= QFCF_SFB;
1813	if (flags & PKTSCHEDF_QALG_ECN)
1814		qflags |= QFCF_ECN;
1815	if (flags & PKTSCHEDF_QALG_FLOWCTL)
1816		qflags |= QFCF_FLOWCTL;
1817
1818	qif = qfq_alloc(ifp, M_WAITOK, FALSE);
1819	if (qif == NULL)
1820		return (ENOMEM);
1821
1822	if ((maxlen = IFCQ_MAXLEN(ifq)) == 0)
1823		maxlen = if_sndq_maxlen;
1824
1825	if ((err = qfq_add_queue(qif, maxlen, 300, 1200,
1826	    qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0)
1827		goto cleanup;
1828
1829	if ((err = qfq_add_queue(qif, maxlen, 600, 1400,
1830	    qflags | QFCF_LAZY, SCIDX_BK, &cl1)) != 0)
1831		goto cleanup;
1832
1833	if ((err = qfq_add_queue(qif, maxlen, 2400, 600,
1834	    qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0)
1835		goto cleanup;
1836
1837	if ((err = qfq_add_queue(qif, maxlen, 2700, 600,
1838	    qflags | QFCF_LAZY, SCIDX_RD, &cl3)) != 0)
1839		goto cleanup;
1840
1841	if ((err = qfq_add_queue(qif, maxlen, 3000, 400,
1842	    qflags | QFCF_LAZY, SCIDX_OAM, &cl4)) != 0)
1843		goto cleanup;
1844
1845	if ((err = qfq_add_queue(qif, maxlen, 8000, 1000,
1846	    qflags | QFCF_LAZY, SCIDX_AV, &cl5)) != 0)
1847		goto cleanup;
1848
1849	if ((err = qfq_add_queue(qif, maxlen, 15000, 1200,
1850	    qflags | QFCF_LAZY, SCIDX_RV, &cl6)) != 0)
1851		goto cleanup;
1852
1853	if ((err = qfq_add_queue(qif, maxlen, 20000, 1400,
1854	    qflags | QFCF_LAZY, SCIDX_VI, &cl7)) != 0)
1855		goto cleanup;
1856
1857	if ((err = qfq_add_queue(qif, maxlen, 23000, 200,
1858	    qflags | QFCF_LAZY, SCIDX_VO, &cl8)) != 0)
1859		goto cleanup;
1860
1861	if ((err = qfq_add_queue(qif, maxlen, 25000, 200,
1862	    qflags, SCIDX_CTL, &cl9)) != 0)
1863		goto cleanup;
1864
1865	err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif,
1866	    qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL,
1867	    qfq_request_ifclassq);
1868
1869	/* cache these for faster lookup */
1870	if (err == 0) {
1871		ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS;
1872		ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
1873
1874		ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
1875		ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1;
1876
1877		ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
1878		ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2;
1879
1880		ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD;
1881		ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3;
1882
1883		ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM;
1884		ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4;
1885
1886		ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV;
1887		ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5;
1888
1889		ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV;
1890		ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6;
1891
1892		ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
1893		ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7;
1894
1895		ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
1896		ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8;
1897
1898		ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL;
1899		ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9;
1900	}
1901
1902cleanup:
1903	if (err != 0)
1904		(void) qfq_destroy_locked(qif);
1905
1906	return (err);
1907}
1908
1909int
1910qfq_teardown_ifclassq(struct ifclassq *ifq)
1911{
1912	struct qfq_if *qif = ifq->ifcq_disc;
1913	int i;
1914
1915	IFCQ_LOCK_ASSERT_HELD(ifq);
1916	VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ);
1917
1918	(void) qfq_destroy_locked(qif);
1919
1920	ifq->ifcq_disc = NULL;
1921	for (i = 0; i < IFCQ_SC_MAX; i++) {
1922		ifq->ifcq_disc_slots[i].qid = 0;
1923		ifq->ifcq_disc_slots[i].cl = NULL;
1924	}
1925
1926	return (ifclassq_detach(ifq));
1927}
1928
1929int
1930qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
1931    struct if_ifclassq_stats *ifqs)
1932{
1933	struct qfq_if *qif = ifq->ifcq_disc;
1934
1935	IFCQ_LOCK_ASSERT_HELD(ifq);
1936	VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ);
1937
1938	if (slot >= IFCQ_SC_MAX)
1939		return (EINVAL);
1940
1941	return (qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid,
1942	    &ifqs->ifqs_qfq_stats));
1943}
1944
1945static int
1946qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr)
1947{
1948	struct ifclassq *ifq = qif->qif_ifq;
1949	struct qfq_class *cl;
1950	int err = 0;
1951
1952	IFCQ_LOCK_ASSERT_HELD(ifq);
1953	VERIFY(!(qif->qif_flags & QFQIFF_ALTQ));
1954
1955	if (!tr->set) {
1956		tr->level = qif->qif_throttle;
1957		return (0);
1958	}
1959
1960	if (tr->level == qif->qif_throttle)
1961		return (EALREADY);
1962
1963	/* Current throttling levels only involve BK_SYS class */
1964	cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1965
1966	switch (tr->level) {
1967	case IFNET_THROTTLE_OFF:
1968		err = qfq_resumeq(qif, cl);
1969		break;
1970
1971	case IFNET_THROTTLE_OPPORTUNISTIC:
1972		err = qfq_suspendq(qif, cl);
1973		break;
1974
1975	default:
1976		VERIFY(0);
1977		/* NOTREACHED */
1978	}
1979
1980	if (err == 0 || err == ENXIO) {
1981		if (pktsched_verbose) {
1982			log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n",
1983			    if_name(QFQIF_IFP(qif)), qfq_style(qif),
1984			    (err == 0) ? "" : "lazy ", qif->qif_throttle,
1985			    tr->level);
1986		}
1987		qif->qif_throttle = tr->level;
1988		if (err != 0)
1989			err = 0;
1990		else
1991			qfq_purgeq(qif, cl, 0, NULL, NULL);
1992	} else {
1993		log(LOG_ERR, "%s: %s unable to set throttling level "
1994		    "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)),
1995		    qfq_style(qif), qif->qif_throttle, tr->level, err);
1996	}
1997
1998	return (err);
1999}
2000
2001static int
2002qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl)
2003{
2004	struct ifclassq *ifq = qif->qif_ifq;
2005	int err = 0;
2006
2007	IFCQ_LOCK_ASSERT_HELD(ifq);
2008
2009#if CLASSQ_RIO
2010	if (q_is_rio(&cl->cl_q))
2011		err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE);
2012	else
2013#endif /* CLASSQ_RIO */
2014#if CLASSQ_RED
2015	if (q_is_red(&cl->cl_q))
2016		err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE);
2017	else
2018#endif /* CLASSQ_RED */
2019#if CLASSQ_BLUE
2020	if (q_is_blue(&cl->cl_q))
2021		err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE);
2022	else
2023#endif /* CLASSQ_BLUE */
2024	if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL)
2025		err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
2026
2027	if (err == 0)
2028		qstate(&cl->cl_q) = QS_RUNNING;
2029
2030	return (err);
2031}
2032
2033static int
2034qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl)
2035{
2036	struct ifclassq *ifq = qif->qif_ifq;
2037	int err = 0;
2038
2039	IFCQ_LOCK_ASSERT_HELD(ifq);
2040
2041#if CLASSQ_RIO
2042	if (q_is_rio(&cl->cl_q))
2043		err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE);
2044	else
2045#endif /* CLASSQ_RIO */
2046#if CLASSQ_RED
2047	if (q_is_red(&cl->cl_q))
2048		err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE);
2049	else
2050#endif /* CLASSQ_RED */
2051#if CLASSQ_BLUE
2052	if (q_is_blue(&cl->cl_q))
2053		err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE);
2054	else
2055#endif /* CLASSQ_BLUE */
2056	if (q_is_sfb(&cl->cl_q)) {
2057		if (cl->cl_sfb != NULL) {
2058			err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
2059		} else {
2060			VERIFY(cl->cl_flags & QFCF_LAZY);
2061			err = ENXIO;	/* delayed throttling */
2062		}
2063	}
2064
2065	if (err == 0 || err == ENXIO)
2066		qstate(&cl->cl_q) = QS_SUSPENDED;
2067
2068	return (err);
2069}
2070