1/*
2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
31 * All rights reserved
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55#ifndef _NET_PKTSCHED_PKTSCHED_QFQ_H_
56#define	_NET_PKTSCHED_PKTSCHED_QFQ_H_
57
58#ifdef PRIVATE
59#include <net/pktsched/pktsched.h>
60#include <net/classq/classq.h>
61#include <net/classq/classq_red.h>
62#include <net/classq/classq_rio.h>
63#include <net/classq/classq_blue.h>
64#include <net/classq/classq_sfb.h>
65
66#ifdef __cplusplus
67extern "C" {
68#endif
69
70/* qfq class flags */
71#define	QFCF_RED		0x0001	/* use RED */
72#define	QFCF_ECN		0x0002  /* use ECN with RED/BLUE/SFB */
73#define	QFCF_RIO		0x0004  /* use RIO */
74#define	QFCF_CLEARDSCP		0x0010  /* clear diffserv codepoint */
75#define	QFCF_BLUE		0x0100	/* use BLUE */
76#define	QFCF_SFB		0x0200	/* use SFB */
77#define	QFCF_FLOWCTL		0x0400	/* enable flow control advisories */
78#define	QFCF_DEFAULTCLASS	0x1000	/* default class */
79#ifdef BSD_KERNEL_PRIVATE
80#define	QFCF_LAZY		0x10000000 /* on-demand resource allocation */
81#endif /* BSD_KERNEL_PRIVATE */
82
83#define	QFCF_USERFLAGS							\
84	(QFCF_RED | QFCF_ECN | QFCF_RIO | QFCF_CLEARDSCP | QFCF_BLUE |	\
85	QFCF_SFB | QFCF_FLOWCTL | QFCF_DEFAULTCLASS)
86
87#ifdef BSD_KERNEL_PRIVATE
88#define	QFCF_BITS \
89	"\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT" \
90	"\35LAZY"
91#else
92#define	QFCF_BITS \
93	"\020\1RED\2ECN\3RIO\5CLEARDSCP\11BLUE\12SFB\13FLOWCTL\15DEFAULT"
94#endif /* !BSD_KERNEL_PRIVATE */
95
96#define	QFQ_MAX_CLASSES		32
97#define	QFQ_MAX_WSHIFT		16	/* log2(max_weight) */
98#define	QFQ_MAX_WEIGHT		(1 << QFQ_MAX_WSHIFT)
99
100struct qfq_classstats {
101	u_int32_t		class_handle;
102	u_int32_t		index;
103	u_int32_t		weight;
104	u_int32_t		lmax;
105
106	u_int32_t		qlength;
107	u_int32_t		qlimit;
108	u_int32_t		period;
109	struct pktcntr		xmitcnt;  /* transmitted packet counter */
110	struct pktcntr		dropcnt;  /* dropped packet counter */
111
112	/* RED, RIO, BLUE, SFB related info */
113	classq_type_t		qtype;
114	union {
115		/* RIO has 3 red stats */
116		struct red_stats	red[RIO_NDROPPREC];
117		struct blue_stats	blue;
118		struct sfb_stats	sfb;
119	};
120	classq_state_t		qstate;
121};
122
123#ifdef BSD_KERNEL_PRIVATE
124#define	QFQ_DEBUG	1	/* enable extra debugging */
125
126/*
127 * Virtual time computations.
128 *
129 * S, F and V are all computed in fixed point arithmetic with
130 * FRAC_BITS decimal bits.
131 *
132 * QFQ_MAX_INDEX is the maximum index allowed for a group. We need
133 * one bit per index.
134 *
135 * QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
136 * The layout of the bits is as below:
137 *
138 *                 [ MTU_SHIFT ][      FRAC_BITS    ]
139 *                 [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
140 *				 ^.__grp->index = 0
141 *				 *.__grp->slot_shift
142 *
143 * where MIN_SLOT_SHIFT is derived by difference from the others.
144 *
145 * The max group index corresponds to Lmax/w_min, where
146 *	Lmax=1<<MTU_SHIFT, w_min = 1 .
147 * From this, and knowing how many groups (MAX_INDEX) we want,
148 * we can derive the shift corresponding to each group.
149 *
150 * Because we often need to compute
151 *	F = S + len/w_i  and V = V + len/wsum
152 * instead of storing w_i store the value
153 *	inv_w = (1<<FRAC_BITS)/w_i
154 * so we can do F = S + len * inv_w * wsum.
155 * We use W_TOT in the formulas so we can easily move between
156 * static and adaptive weight sum.
157 *
158 * The per-scheduler-instance data contain all the data structures
159 * for the scheduler: bitmaps and bucket lists.
160 */
161
162/*
163 * Shifts used for class<->group mapping.  Class weights are in the
164 * range [1, QFQ_MAX_WEIGHT], we need to map each class i to the
165 * group with the smallest index that can support the L_i / r_i
166 * configured for the class.
167 *
168 * grp->qfg_index is the index of the group; and grp->qfg_slot_shift
169 * is the shift for the corresponding (scaled) sigma_i.
170 *
171 * When computing the group index, we do (len<<FP_SHIFT)/weight,
172 * then compute an FLS (which is like a log2()), and if the result
173 * is below the MAX_INDEX region we use 0 (which is the same as
174 * using a larger len).
175 */
176#define	QFQ_MAX_INDEX		19
177#define	QFQ_MAX_WSUM		(2 * QFQ_MAX_WEIGHT)
178
179#define	QFQ_FRAC_BITS		30	/* fixed point arithmetic */
180#define	QFQ_ONE_FP		(1UL << QFQ_FRAC_BITS)
181#define	QFQ_IWSUM		(QFQ_ONE_FP / QFQ_MAX_WSUM)
182
183#define	QFQ_MTU_SHIFT		11	/* log2(max_len) */
184#define	QFQ_MIN_SLOT_SHIFT	(QFQ_FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
185
186/*
187 * Possible group states, also indexes for the bitmaps array in
188 * struct qfq_if. We rely on ER, IR, EB, IB being numbered 0..3
189 */
190enum qfq_state {
191	ER = 0,				/* eligible, ready */
192	IR = 1,				/* ineligible, ready */
193	EB = 2,				/* eligible, backlogged */
194	IB = 3,				/* ineligible, backlogged */
195	QFQ_MAX_STATE
196};
197
198struct qfq_group;
199
200struct qfq_class {
201	u_int32_t	cl_handle;	/* class handle */
202	class_queue_t	cl_q;		/* class queue structure */
203	u_int32_t	cl_qflags;	/* class queue flags */
204	union {
205		void		*ptr;
206		struct red	*red;	/* RED state */
207		struct rio	*rio;	/* RIO state */
208		struct blue	*blue;	/* BLUE state */
209		struct sfb	*sfb;	/* SFB state */
210	} cl_qalg;
211	struct qfq_if	*cl_qif;	/* back pointer to qif */
212	u_int32_t	cl_flags;	/* class flags */
213
214	u_int64_t	cl_S, cl_F;	/* flow timestamps (exact) */
215	struct qfq_class *cl_next;	/* link for the slot list */
216	/*
217	 * Group we belong to.  In principle we would need the index,
218	 * which is log_2(lmax/weight), but we never reference it
219	 * directly, only the group.
220	 */
221	struct qfq_group *cl_grp;
222	u_int32_t	cl_inv_w;	/* QFQ_ONE_FP/weight */
223	u_int32_t	cl_lmax;	/* max packet size for this flow */
224
225	/* statistics */
226	u_int32_t	cl_period;	/* backlog period */
227	struct pktcntr  cl_xmitcnt;	/* transmitted packet counter */
228	struct pktcntr  cl_dropcnt;	/* dropped packet counter */
229};
230
231#define	cl_red	cl_qalg.red
232#define	cl_rio	cl_qalg.rio
233#define	cl_blue	cl_qalg.blue
234#define	cl_sfb	cl_qalg.sfb
235
236/*
237 * Group descriptor, see the paper for details.
238 * Basically this contains the bucket lists.
239 */
240struct qfq_group {
241	u_int64_t	qfg_S, qfg_F;	/* group timestamps (approx) */
242	u_int8_t	qfg_slot_shift;	/* slot shift */
243	u_int8_t	qfg_index;	/* group index */
244	u_int8_t	qfg_front;	/* index of the front slot */
245	pktsched_bitmap_t qfg_full_slots; /* non-empty slots */
246
247	/* array of lists of active classes */
248	struct qfq_class **qfg_slots;
249};
250
251/* qfq_if flags */
252#define	QFQIFF_ALTQ		0x1	/* configured via PF/ALTQ */
253
254/*
255 * qfq interface state
256 */
257struct qfq_if {
258	struct ifclassq		*qif_ifq;	/* backpointer to ifclassq */
259	u_int32_t		qif_flags;	/* flags */
260	u_int32_t		qif_throttle;	/* throttling level */
261	u_int8_t		qif_classes;	/* # of classes in table */
262	u_int8_t		qif_maxclasses;	/* max # of classes in table */
263	u_int8_t		qif_maxslots;	/* max # of slots */
264	struct qfq_class	*qif_default;	/* default class */
265	struct qfq_class	**qif_class_tbl;
266
267	u_int64_t		qif_V;		/* precise virtual time */
268	u_int32_t		qif_wsum;	/* weight sum */
269#if QFQ_DEBUG
270	u_int32_t		qif_i_wsum;	/* QFQ_ONE_FP/w_sum */
271	u_int32_t		qif_queued;	/* debugging */
272	u_int32_t		qif_emptygrp;	/* debugging */
273#endif /* QFQ_DEBUG */
274	pktsched_bitmap_t	qif_bitmaps[QFQ_MAX_STATE]; /* group bitmaps */
275	struct qfq_group	**qif_groups;	/* the groups */
276};
277
278#define	QFQIF_IFP(_qif)	((_qif)->qif_ifq->ifcq_ifp)
279
280struct if_ifclassq_stats;
281
282extern void qfq_init(void);
283extern struct qfq_if *qfq_alloc(struct ifnet *, int, boolean_t);
284extern int qfq_destroy(struct qfq_if *);
285extern void qfq_purge(struct qfq_if *);
286extern void qfq_event(struct qfq_if *, cqev_t);
287extern int qfq_add_queue(struct qfq_if *, u_int32_t, u_int32_t, u_int32_t,
288    u_int32_t, u_int32_t, struct qfq_class **);
289extern int qfq_remove_queue(struct qfq_if *, u_int32_t);
290extern int qfq_get_class_stats(struct qfq_if *, u_int32_t,
291    struct qfq_classstats *);
292extern int qfq_enqueue(struct qfq_if *, struct qfq_class *, struct mbuf *,
293    struct pf_mtag *);
294extern struct mbuf *qfq_dequeue(struct qfq_if *, cqdq_op_t);
295extern int qfq_setup_ifclassq(struct ifclassq *, u_int32_t);
296extern int qfq_teardown_ifclassq(struct ifclassq *ifq);
297extern int qfq_getqstats_ifclassq(struct ifclassq *, u_int32_t,
298    struct if_ifclassq_stats *);
299#endif /* BSD_KERNEL_PRIVATE */
300#ifdef __cplusplus
301}
302#endif
303#endif /* PRIVATE */
304#endif /* _NET_PKTSCHED_PKTSCHED_QFQ_H_ */
305