1298091Sloos/*
2298091Sloos * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3298091Sloos *
4298091Sloos * This code is derived from software contributed to The DragonFly Project
5298091Sloos * by Matthew Dillon <dillon@backplane.com>
6298091Sloos *
7298091Sloos * Redistribution and use in source and binary forms, with or without
8298091Sloos * modification, are permitted provided that the following conditions
9298091Sloos * are met:
10298091Sloos *
11298091Sloos * 1. Redistributions of source code must retain the above copyright
12298091Sloos *    notice, this list of conditions and the following disclaimer.
13298091Sloos * 2. Redistributions in binary form must reproduce the above copyright
14298091Sloos *    notice, this list of conditions and the following disclaimer in
15298091Sloos *    the documentation and/or other materials provided with the
16298091Sloos *    distribution.
17298091Sloos * 3. Neither the name of The DragonFly Project nor the names of its
18298091Sloos *    contributors may be used to endorse or promote products derived
19298091Sloos *    from this software without specific, prior written permission.
20298091Sloos *
21298091Sloos * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22298091Sloos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23298091Sloos * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24298091Sloos * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25298091Sloos * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26298091Sloos * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27298091Sloos * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28298091Sloos * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29298091Sloos * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30298091Sloos * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31298091Sloos * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32298091Sloos * SUCH DAMAGE.
33298091Sloos *
34298091Sloos * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
35298091Sloos * $FreeBSD$
36298091Sloos */
37298091Sloos/*
38298091Sloos * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
39298091Sloos * fairq.  The fairq algorithm is completely different then priq, of course,
40298091Sloos * but because I used priq's skeleton I believe I should include priq's
41298091Sloos * copyright.
42298091Sloos *
43298091Sloos * Copyright (C) 2000-2003
44298091Sloos *	Sony Computer Science Laboratories Inc.  All rights reserved.
45298091Sloos *
46298091Sloos * Redistribution and use in source and binary forms, with or without
47298091Sloos * modification, are permitted provided that the following conditions
48298091Sloos * are met:
49298091Sloos * 1. Redistributions of source code must retain the above copyright
50298091Sloos *    notice, this list of conditions and the following disclaimer.
51298091Sloos * 2. Redistributions in binary form must reproduce the above copyright
52298091Sloos *    notice, this list of conditions and the following disclaimer in the
53298091Sloos *    documentation and/or other materials provided with the distribution.
54298091Sloos *
55298091Sloos * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
56298091Sloos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57298091Sloos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58298091Sloos * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
59298091Sloos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60298091Sloos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61298091Sloos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62298091Sloos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63298091Sloos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64298091Sloos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65298091Sloos * SUCH DAMAGE.
66298091Sloos */
67298091Sloos
68298091Sloos/*
69298091Sloos * FAIRQ - take traffic classified by keep state (hashed into
70298091Sloos * mbuf->m_pkthdr.altq_state_hash) and bucketize it.  Fairly extract
71298091Sloos * the first packet from each bucket in a round-robin fashion.
72298091Sloos *
73298091Sloos * TODO - better overall qlimit support (right now it is per-bucket).
74298091Sloos *	- NOTE: red etc is per bucket, not overall.
75298091Sloos *	- better service curve support.
76298091Sloos *
77298091Sloos * EXAMPLE:
78298091Sloos *
79298091Sloos *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
80298091Sloos *  queue std  priority 3 bandwidth 400Kb \
81298091Sloos *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
82298091Sloos *  queue bulk priority 2 bandwidth 100Kb \
83298091Sloos *	fairq (buckets 64, hogs 1Kb) qlimit 50
84298091Sloos *
85298091Sloos *  pass out on em0 from any to any keep state queue std
86298091Sloos *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
87298091Sloos */
88298091Sloos#include "opt_altq.h"
89298091Sloos#include "opt_inet.h"
90298091Sloos#include "opt_inet6.h"
91298091Sloos
92298091Sloos#ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
93298091Sloos
94298091Sloos#include <sys/param.h>
95298091Sloos#include <sys/malloc.h>
96298091Sloos#include <sys/mbuf.h>
97298091Sloos#include <sys/socket.h>
98298091Sloos#include <sys/sockio.h>
99298091Sloos#include <sys/systm.h>
100298091Sloos#include <sys/proc.h>
101298091Sloos#include <sys/errno.h>
102298091Sloos#include <sys/kernel.h>
103298091Sloos#include <sys/queue.h>
104298091Sloos
105298091Sloos#include <net/if.h>
106298091Sloos#include <net/if_var.h>
107298091Sloos#include <netinet/in.h>
108298091Sloos
109298091Sloos#include <netpfil/pf/pf.h>
110298091Sloos#include <netpfil/pf/pf_altq.h>
111298091Sloos#include <netpfil/pf/pf_mtag.h>
112298091Sloos#include <altq/altq.h>
113298091Sloos#include <altq/altq_fairq.h>
114298091Sloos
115298091Sloos/*
116298091Sloos * function prototypes
117298091Sloos */
118298091Sloosstatic int	fairq_clear_interface(struct fairq_if *);
119298091Sloosstatic int	fairq_request(struct ifaltq *, int, void *);
120298091Sloosstatic void	fairq_purge(struct fairq_if *);
121298091Sloosstatic struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
122298091Sloosstatic int	fairq_class_destroy(struct fairq_class *);
123298091Sloosstatic int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
124298091Sloosstatic struct mbuf *fairq_dequeue(struct ifaltq *, int);
125298091Sloos
126298091Sloosstatic int	fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
127298091Sloosstatic struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
128298091Sloosstatic struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
129298091Sloosstatic fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
130298091Sloosstatic void	fairq_purgeq(struct fairq_class *);
131298091Sloos
132298091Sloosstatic void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
133298091Sloosstatic struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
134298091Sloos
135298091Sloosint
136298091Sloosfairq_pfattach(struct pf_altq *a)
137298091Sloos{
138298091Sloos	struct ifnet *ifp;
139298091Sloos	int error;
140298091Sloos
141298091Sloos	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
142298091Sloos		return (EINVAL);
143298091Sloos
144298091Sloos	error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
145298091Sloos	    fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
146298091Sloos
147298091Sloos	return (error);
148298091Sloos}
149298091Sloos
150298091Sloosint
151298091Sloosfairq_add_altq(struct pf_altq *a)
152298091Sloos{
153298091Sloos	struct fairq_if *pif;
154298091Sloos	struct ifnet *ifp;
155298091Sloos
156298091Sloos	if ((ifp = ifunit(a->ifname)) == NULL)
157298091Sloos		return (EINVAL);
158298091Sloos	if (!ALTQ_IS_READY(&ifp->if_snd))
159298091Sloos		return (ENODEV);
160298091Sloos
161298091Sloos
162298091Sloos	pif = malloc(sizeof(struct fairq_if),
163298091Sloos			M_DEVBUF, M_WAITOK | M_ZERO);
164298091Sloos	pif->pif_bandwidth = a->ifbandwidth;
165298091Sloos	pif->pif_maxpri = -1;
166298091Sloos	pif->pif_ifq = &ifp->if_snd;
167298091Sloos
168298091Sloos	/* keep the state in pf_altq */
169298091Sloos	a->altq_disc = pif;
170298091Sloos
171298091Sloos	return (0);
172298091Sloos}
173298091Sloos
174298091Sloosint
175298091Sloosfairq_remove_altq(struct pf_altq *a)
176298091Sloos{
177298091Sloos	struct fairq_if *pif;
178298091Sloos
179298091Sloos	if ((pif = a->altq_disc) == NULL)
180298091Sloos		return (EINVAL);
181298091Sloos	a->altq_disc = NULL;
182298091Sloos
183298091Sloos	fairq_clear_interface(pif);
184298091Sloos
185298091Sloos	free(pif, M_DEVBUF);
186298091Sloos	return (0);
187298091Sloos}
188298091Sloos
189298091Sloosint
190298091Sloosfairq_add_queue(struct pf_altq *a)
191298091Sloos{
192298091Sloos	struct fairq_if *pif;
193298091Sloos	struct fairq_class *cl;
194298091Sloos
195298091Sloos	if ((pif = a->altq_disc) == NULL)
196298091Sloos		return (EINVAL);
197298091Sloos
198298091Sloos	/* check parameters */
199298091Sloos	if (a->priority >= FAIRQ_MAXPRI)
200298091Sloos		return (EINVAL);
201298091Sloos	if (a->qid == 0)
202298091Sloos		return (EINVAL);
203298091Sloos	if (pif->pif_classes[a->priority] != NULL)
204298091Sloos		return (EBUSY);
205298091Sloos	if (clh_to_clp(pif, a->qid) != NULL)
206298091Sloos		return (EBUSY);
207298091Sloos
208298091Sloos	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
209298091Sloos			       &a->pq_u.fairq_opts, a->qid);
210298091Sloos	if (cl == NULL)
211298091Sloos		return (ENOMEM);
212298091Sloos
213298091Sloos	return (0);
214298091Sloos}
215298091Sloos
216298091Sloosint
217298091Sloosfairq_remove_queue(struct pf_altq *a)
218298091Sloos{
219298091Sloos	struct fairq_if *pif;
220298091Sloos	struct fairq_class *cl;
221298091Sloos
222298091Sloos	if ((pif = a->altq_disc) == NULL)
223298091Sloos		return (EINVAL);
224298091Sloos
225298091Sloos	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
226298091Sloos		return (EINVAL);
227298091Sloos
228298091Sloos	return (fairq_class_destroy(cl));
229298091Sloos}
230298091Sloos
231298091Sloosint
232298091Sloosfairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
233298091Sloos{
234298091Sloos	struct fairq_if *pif;
235298091Sloos	struct fairq_class *cl;
236298091Sloos	struct fairq_classstats stats;
237298091Sloos	int error = 0;
238298091Sloos
239298091Sloos	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
240298091Sloos		return (EBADF);
241298091Sloos
242298091Sloos	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
243298091Sloos		return (EINVAL);
244298091Sloos
245298091Sloos	if (*nbytes < sizeof(stats))
246298091Sloos		return (EINVAL);
247298091Sloos
248298091Sloos	get_class_stats(&stats, cl);
249298091Sloos
250298091Sloos	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
251298091Sloos		return (error);
252298091Sloos	*nbytes = sizeof(stats);
253298091Sloos	return (0);
254298091Sloos}
255298091Sloos
256298091Sloos/*
257298091Sloos * bring the interface back to the initial state by discarding
258298091Sloos * all the filters and classes.
259298091Sloos */
260298091Sloosstatic int
261298091Sloosfairq_clear_interface(struct fairq_if *pif)
262298091Sloos{
263298091Sloos	struct fairq_class *cl;
264298091Sloos	int pri;
265298091Sloos
266298091Sloos	/* clear out the classes */
267298091Sloos	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
268298091Sloos		if ((cl = pif->pif_classes[pri]) != NULL)
269298091Sloos			fairq_class_destroy(cl);
270298091Sloos	}
271298091Sloos
272298091Sloos	return (0);
273298091Sloos}
274298091Sloos
275298091Sloosstatic int
276298091Sloosfairq_request(struct ifaltq *ifq, int req, void *arg)
277298091Sloos{
278298091Sloos	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
279298091Sloos
280298091Sloos	IFQ_LOCK_ASSERT(ifq);
281298091Sloos
282298091Sloos	switch (req) {
283298091Sloos	case ALTRQ_PURGE:
284298091Sloos		fairq_purge(pif);
285298091Sloos		break;
286298091Sloos	}
287298091Sloos	return (0);
288298091Sloos}
289298091Sloos
290298091Sloos/* discard all the queued packets on the interface */
291298091Sloosstatic void
292298091Sloosfairq_purge(struct fairq_if *pif)
293298091Sloos{
294298091Sloos	struct fairq_class *cl;
295298091Sloos	int pri;
296298091Sloos
297298091Sloos	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
298298091Sloos		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
299298091Sloos			fairq_purgeq(cl);
300298091Sloos	}
301298091Sloos	if (ALTQ_IS_ENABLED(pif->pif_ifq))
302298091Sloos		pif->pif_ifq->ifq_len = 0;
303298091Sloos}
304298091Sloos
305298091Sloosstatic struct fairq_class *
306298091Sloosfairq_class_create(struct fairq_if *pif, int pri, int qlimit,
307298091Sloos		   u_int bandwidth, struct fairq_opts *opts, int qid)
308298091Sloos{
309298091Sloos	struct fairq_class *cl;
310298091Sloos	int flags = opts->flags;
311298091Sloos	u_int nbuckets = opts->nbuckets;
312298091Sloos	int i;
313298091Sloos
314298091Sloos#ifndef ALTQ_RED
315298091Sloos	if (flags & FARF_RED) {
316298091Sloos#ifdef ALTQ_DEBUG
317298091Sloos		printf("fairq_class_create: RED not configured for FAIRQ!\n");
318298091Sloos#endif
319298091Sloos		return (NULL);
320298091Sloos	}
321298091Sloos#endif
322298133Sloos#ifndef ALTQ_CODEL
323298133Sloos	if (flags & FARF_CODEL) {
324298133Sloos#ifdef ALTQ_DEBUG
325298133Sloos		printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
326298133Sloos#endif
327298133Sloos		return (NULL);
328298133Sloos	}
329298133Sloos#endif
330298091Sloos	if (nbuckets == 0)
331298091Sloos		nbuckets = 256;
332298091Sloos	if (nbuckets > FAIRQ_MAX_BUCKETS)
333298091Sloos		nbuckets = FAIRQ_MAX_BUCKETS;
334298091Sloos	/* enforce power-of-2 size */
335298091Sloos	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
336298091Sloos		++nbuckets;
337298091Sloos
338298091Sloos	if ((cl = pif->pif_classes[pri]) != NULL) {
339298091Sloos		/* modify the class instead of creating a new one */
340298091Sloos		IFQ_LOCK(cl->cl_pif->pif_ifq);
341298091Sloos		if (cl->cl_head)
342298091Sloos			fairq_purgeq(cl);
343298091Sloos		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
344298091Sloos#ifdef ALTQ_RIO
345298091Sloos		if (cl->cl_qtype == Q_RIO)
346298091Sloos			rio_destroy((rio_t *)cl->cl_red);
347298091Sloos#endif
348298091Sloos#ifdef ALTQ_RED
349298091Sloos		if (cl->cl_qtype == Q_RED)
350298091Sloos			red_destroy(cl->cl_red);
351298091Sloos#endif
352298133Sloos#ifdef ALTQ_CODEL
353298133Sloos		if (cl->cl_qtype == Q_CODEL)
354298133Sloos			codel_destroy(cl->cl_codel);
355298133Sloos#endif
356298091Sloos	} else {
357298091Sloos		cl = malloc(sizeof(struct fairq_class),
358298091Sloos				M_DEVBUF, M_WAITOK | M_ZERO);
359298091Sloos		cl->cl_nbuckets = nbuckets;
360298091Sloos		cl->cl_nbucket_mask = nbuckets - 1;
361298091Sloos
362298091Sloos		cl->cl_buckets = malloc(
363298091Sloos			sizeof(struct fairq_bucket) * cl->cl_nbuckets,
364298091Sloos			M_DEVBUF, M_WAITOK | M_ZERO);
365298091Sloos		cl->cl_head = NULL;
366298091Sloos	}
367298091Sloos
368298091Sloos	pif->pif_classes[pri] = cl;
369298091Sloos	if (flags & FARF_DEFAULTCLASS)
370298091Sloos		pif->pif_default = cl;
371298091Sloos	if (qlimit == 0)
372298091Sloos		qlimit = 50;  /* use default */
373298091Sloos	cl->cl_qlimit = qlimit;
374298091Sloos	for (i = 0; i < cl->cl_nbuckets; ++i) {
375298091Sloos		qlimit(&cl->cl_buckets[i].queue) = qlimit;
376298091Sloos	}
377298091Sloos	cl->cl_bandwidth = bandwidth / 8;
378298091Sloos	cl->cl_qtype = Q_DROPTAIL;
379298091Sloos	cl->cl_flags = flags & FARF_USERFLAGS;
380298091Sloos	cl->cl_pri = pri;
381298091Sloos	if (pri > pif->pif_maxpri)
382298091Sloos		pif->pif_maxpri = pri;
383298091Sloos	cl->cl_pif = pif;
384298091Sloos	cl->cl_handle = qid;
385298091Sloos	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
386298091Sloos	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
387298091Sloos
388298091Sloos#ifdef ALTQ_RED
389298091Sloos	if (flags & (FARF_RED|FARF_RIO)) {
390298091Sloos		int red_flags, red_pkttime;
391298091Sloos
392298091Sloos		red_flags = 0;
393298091Sloos		if (flags & FARF_ECN)
394298091Sloos			red_flags |= REDF_ECN;
395298091Sloos#ifdef ALTQ_RIO
396298091Sloos		if (flags & FARF_CLEARDSCP)
397298091Sloos			red_flags |= RIOF_CLEARDSCP;
398298091Sloos#endif
399298091Sloos		if (pif->pif_bandwidth < 8)
400298091Sloos			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
401298091Sloos		else
402298091Sloos			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
403298091Sloos			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
404298091Sloos#ifdef ALTQ_RIO
405298091Sloos		if (flags & FARF_RIO) {
406298091Sloos			cl->cl_red = (red_t *)rio_alloc(0, NULL,
407298091Sloos						red_flags, red_pkttime);
408298091Sloos			if (cl->cl_red != NULL)
409298091Sloos				cl->cl_qtype = Q_RIO;
410298091Sloos		} else
411298091Sloos#endif
412298091Sloos		if (flags & FARF_RED) {
413298091Sloos			cl->cl_red = red_alloc(0, 0,
414298091Sloos			    cl->cl_qlimit * 10/100,
415298091Sloos			    cl->cl_qlimit * 30/100,
416298091Sloos			    red_flags, red_pkttime);
417298091Sloos			if (cl->cl_red != NULL)
418298091Sloos				cl->cl_qtype = Q_RED;
419298091Sloos		}
420298091Sloos	}
421298091Sloos#endif /* ALTQ_RED */
422298133Sloos#ifdef ALTQ_CODEL
423298133Sloos	if (flags & FARF_CODEL) {
424298133Sloos		cl->cl_codel = codel_alloc(5, 100, 0);
425298133Sloos		if (cl->cl_codel != NULL)
426298133Sloos			cl->cl_qtype = Q_CODEL;
427298133Sloos	}
428298133Sloos#endif
429298091Sloos
430298091Sloos	return (cl);
431298091Sloos}
432298091Sloos
433298091Sloosstatic int
434298091Sloosfairq_class_destroy(struct fairq_class *cl)
435298091Sloos{
436298091Sloos	struct fairq_if *pif;
437298091Sloos	int pri;
438298091Sloos
439298091Sloos	IFQ_LOCK(cl->cl_pif->pif_ifq);
440298091Sloos
441298091Sloos	if (cl->cl_head)
442298091Sloos		fairq_purgeq(cl);
443298091Sloos
444298091Sloos	pif = cl->cl_pif;
445298091Sloos	pif->pif_classes[cl->cl_pri] = NULL;
446298091Sloos	if (pif->pif_poll_cache == cl)
447298091Sloos		pif->pif_poll_cache = NULL;
448298091Sloos	if (pif->pif_maxpri == cl->cl_pri) {
449298091Sloos		for (pri = cl->cl_pri; pri >= 0; pri--)
450298091Sloos			if (pif->pif_classes[pri] != NULL) {
451298091Sloos				pif->pif_maxpri = pri;
452298091Sloos				break;
453298091Sloos			}
454298091Sloos		if (pri < 0)
455298091Sloos			pif->pif_maxpri = -1;
456298091Sloos	}
457298091Sloos	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
458298091Sloos
459298091Sloos	if (cl->cl_red != NULL) {
460298091Sloos#ifdef ALTQ_RIO
461298091Sloos		if (cl->cl_qtype == Q_RIO)
462298091Sloos			rio_destroy((rio_t *)cl->cl_red);
463298091Sloos#endif
464298091Sloos#ifdef ALTQ_RED
465298091Sloos		if (cl->cl_qtype == Q_RED)
466298091Sloos			red_destroy(cl->cl_red);
467298091Sloos#endif
468298133Sloos#ifdef ALTQ_CODEL
469298133Sloos		if (cl->cl_qtype == Q_CODEL)
470298133Sloos			codel_destroy(cl->cl_codel);
471298133Sloos#endif
472298091Sloos	}
473298091Sloos	free(cl->cl_buckets, M_DEVBUF);
474298091Sloos	free(cl, M_DEVBUF);
475298091Sloos
476298091Sloos	return (0);
477298091Sloos}
478298091Sloos
479298091Sloos/*
480298091Sloos * fairq_enqueue is an enqueue function to be registered to
481298091Sloos * (*altq_enqueue) in struct ifaltq.
482298091Sloos */
483298091Sloosstatic int
484298091Sloosfairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
485298091Sloos{
486298091Sloos	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
487298091Sloos	struct fairq_class *cl = NULL; /* Make compiler happy */
488298091Sloos	struct pf_mtag *t;
489298091Sloos	u_int32_t qid_hash = 0;
490298091Sloos	int len;
491298091Sloos
492298091Sloos	IFQ_LOCK_ASSERT(ifq);
493298091Sloos
494298091Sloos	/* grab class set by classifier */
495298091Sloos	if ((m->m_flags & M_PKTHDR) == 0) {
496298091Sloos		/* should not happen */
497298091Sloos		printf("altq: packet for %s does not have pkthdr\n",
498298091Sloos			ifq->altq_ifp->if_xname);
499298091Sloos		m_freem(m);
500298091Sloos		return (ENOBUFS);
501298091Sloos	}
502298091Sloos
503298091Sloos	if ((t = pf_find_mtag(m)) != NULL) {
504298091Sloos		cl = clh_to_clp(pif, t->qid);
505298091Sloos		qid_hash = t->qid_hash;
506298091Sloos	}
507298091Sloos	if (cl == NULL) {
508298091Sloos		cl = pif->pif_default;
509298091Sloos		if (cl == NULL) {
510298091Sloos			m_freem(m);
511298091Sloos			return (ENOBUFS);
512298091Sloos		}
513298091Sloos	}
514298091Sloos	cl->cl_flags |= FARF_HAS_PACKETS;
515298091Sloos	cl->cl_pktattr = NULL;
516298091Sloos	len = m_pktlen(m);
517298091Sloos	if (fairq_addq(cl, m, qid_hash) != 0) {
518298091Sloos		/* drop occurred.  mbuf was freed in fairq_addq. */
519298091Sloos		PKTCNTR_ADD(&cl->cl_dropcnt, len);
520298091Sloos		return (ENOBUFS);
521298091Sloos	}
522298091Sloos	IFQ_INC_LEN(ifq);
523298091Sloos
524298091Sloos	return (0);
525298091Sloos}
526298091Sloos
527298091Sloos/*
528298091Sloos * fairq_dequeue is a dequeue function to be registered to
529298091Sloos * (*altq_dequeue) in struct ifaltq.
530298091Sloos *
531298091Sloos * note: ALTDQ_POLL returns the next packet without removing the packet
532298091Sloos *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
533298091Sloos *	ALTDQ_REMOVE must return the same packet if called immediately
534298091Sloos *	after ALTDQ_POLL.
535298091Sloos */
536298091Sloosstatic struct mbuf *
537298091Sloosfairq_dequeue(struct ifaltq *ifq, int op)
538298091Sloos{
539298091Sloos	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
540298091Sloos	struct fairq_class *cl;
541298091Sloos	struct fairq_class *best_cl;
542298091Sloos	struct mbuf *best_m;
543298091Sloos	struct mbuf *m = NULL;
544298091Sloos	uint64_t cur_time = read_machclk();
545298091Sloos	int pri;
546298091Sloos	int hit_limit;
547298091Sloos
548298091Sloos	IFQ_LOCK_ASSERT(ifq);
549298091Sloos
550298091Sloos	if (IFQ_IS_EMPTY(ifq)) {
551298091Sloos		return (NULL);
552298091Sloos	}
553298091Sloos
554298091Sloos	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
555298091Sloos		best_cl = pif->pif_poll_cache;
556298091Sloos		m = fairq_getq(best_cl, cur_time);
557298091Sloos		pif->pif_poll_cache = NULL;
558298091Sloos		if (m) {
559298091Sloos			IFQ_DEC_LEN(ifq);
560298091Sloos			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
561298091Sloos			return (m);
562298091Sloos		}
563298091Sloos	} else {
564298091Sloos		best_cl = NULL;
565298091Sloos		best_m = NULL;
566298091Sloos
567298091Sloos		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
568298091Sloos			if ((cl = pif->pif_classes[pri]) == NULL)
569298091Sloos				continue;
570298091Sloos			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
571298091Sloos				continue;
572298091Sloos			m = fairq_pollq(cl, cur_time, &hit_limit);
573298091Sloos			if (m == NULL) {
574298091Sloos				cl->cl_flags &= ~FARF_HAS_PACKETS;
575298091Sloos				continue;
576298091Sloos			}
577298091Sloos
578298091Sloos			/*
579298091Sloos			 * Only override the best choice if we are under
580298091Sloos			 * the BW limit.
581298091Sloos			 */
582298091Sloos			if (hit_limit == 0 || best_cl == NULL) {
583298091Sloos				best_cl = cl;
584298091Sloos				best_m = m;
585298091Sloos			}
586298091Sloos
587298091Sloos			/*
588298091Sloos			 * Remember the highest priority mbuf in case we
589298091Sloos			 * do not find any lower priority mbufs.
590298091Sloos			 */
591298091Sloos			if (hit_limit)
592298091Sloos				continue;
593298091Sloos			break;
594298091Sloos		}
595298091Sloos		if (op == ALTDQ_POLL) {
596298091Sloos			pif->pif_poll_cache = best_cl;
597298091Sloos			m = best_m;
598298091Sloos		} else if (best_cl) {
599298091Sloos			m = fairq_getq(best_cl, cur_time);
600298091Sloos			if (m != NULL) {
601298091Sloos				IFQ_DEC_LEN(ifq);
602298091Sloos				PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
603298091Sloos			}
604298091Sloos		}
605298091Sloos		return (m);
606298091Sloos	}
607298091Sloos	return (NULL);
608298091Sloos}
609298091Sloos
610298091Sloosstatic int
611298091Sloosfairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
612298091Sloos{
613298091Sloos	fairq_bucket_t *b;
614298091Sloos	u_int hindex;
615298091Sloos	uint64_t bw;
616298091Sloos
617298091Sloos	/*
618298091Sloos	 * If the packet doesn't have any keep state put it on the end of
619298091Sloos	 * our queue.  XXX this can result in out of order delivery.
620298091Sloos	 */
621298091Sloos	if (bucketid == 0) {
622298091Sloos		if (cl->cl_head)
623298091Sloos			b = cl->cl_head->prev;
624298091Sloos		else
625298091Sloos			b = &cl->cl_buckets[0];
626298091Sloos	} else {
627298091Sloos		hindex = bucketid & cl->cl_nbucket_mask;
628298091Sloos		b = &cl->cl_buckets[hindex];
629298091Sloos	}
630298091Sloos
631298091Sloos	/*
632298091Sloos	 * Add the bucket to the end of the circular list of active buckets.
633298091Sloos	 *
634298091Sloos	 * As a special case we add the bucket to the beginning of the list
635298091Sloos	 * instead of the end if it was not previously on the list and if
636298091Sloos	 * its traffic is less then the hog level.
637298091Sloos	 */
638298091Sloos	if (b->in_use == 0) {
639298091Sloos		b->in_use = 1;
640298091Sloos		if (cl->cl_head == NULL) {
641298091Sloos			cl->cl_head = b;
642298091Sloos			b->next = b;
643298091Sloos			b->prev = b;
644298091Sloos		} else {
645298091Sloos			b->next = cl->cl_head;
646298091Sloos			b->prev = cl->cl_head->prev;
647298091Sloos			b->prev->next = b;
648298091Sloos			b->next->prev = b;
649298091Sloos
650298091Sloos			if (b->bw_delta && cl->cl_hogs_m1) {
651298091Sloos				bw = b->bw_bytes * machclk_freq / b->bw_delta;
652298091Sloos				if (bw < cl->cl_hogs_m1)
653298091Sloos					cl->cl_head = b;
654298091Sloos			}
655298091Sloos		}
656298091Sloos	}
657298091Sloos
658298091Sloos#ifdef ALTQ_RIO
659298091Sloos	if (cl->cl_qtype == Q_RIO)
660298091Sloos		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
661298091Sloos#endif
662298091Sloos#ifdef ALTQ_RED
663298091Sloos	if (cl->cl_qtype == Q_RED)
664298091Sloos		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
665298091Sloos#endif
666298133Sloos#ifdef ALTQ_CODEL
667298133Sloos	if (cl->cl_qtype == Q_CODEL)
668298133Sloos		return codel_addq(cl->cl_codel, &b->queue, m);
669298133Sloos#endif
670298091Sloos	if (qlen(&b->queue) >= qlimit(&b->queue)) {
671298091Sloos		m_freem(m);
672298091Sloos		return (-1);
673298091Sloos	}
674298091Sloos
675298091Sloos	if (cl->cl_flags & FARF_CLEARDSCP)
676298091Sloos		write_dsfield(m, cl->cl_pktattr, 0);
677298091Sloos
678298091Sloos	_addq(&b->queue, m);
679298091Sloos
680298091Sloos	return (0);
681298091Sloos}
682298091Sloos
683298091Sloosstatic struct mbuf *
684298091Sloosfairq_getq(struct fairq_class *cl, uint64_t cur_time)
685298091Sloos{
686298091Sloos	fairq_bucket_t *b;
687298091Sloos	struct mbuf *m;
688298091Sloos
689298091Sloos	b = fairq_selectq(cl, 0);
690298091Sloos	if (b == NULL)
691298091Sloos		m = NULL;
692298091Sloos#ifdef ALTQ_RIO
693298091Sloos	else if (cl->cl_qtype == Q_RIO)
694298091Sloos		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
695298091Sloos#endif
696298091Sloos#ifdef ALTQ_RED
697298091Sloos	else if (cl->cl_qtype == Q_RED)
698298091Sloos		m = red_getq(cl->cl_red, &b->queue);
699298091Sloos#endif
700298133Sloos#ifdef ALTQ_CODEL
701298133Sloos	else if (cl->cl_qtype == Q_CODEL)
702298133Sloos		m = codel_getq(cl->cl_codel, &b->queue);
703298133Sloos#endif
704298091Sloos	else
705298091Sloos		m = _getq(&b->queue);
706298091Sloos
707298091Sloos	/*
708298091Sloos	 * Calculate the BW change
709298091Sloos	 */
710298091Sloos	if (m != NULL) {
711298091Sloos		uint64_t delta;
712298091Sloos
713298091Sloos		/*
714298091Sloos		 * Per-class bandwidth calculation
715298091Sloos		 */
716298091Sloos		delta = (cur_time - cl->cl_last_time);
717298091Sloos		if (delta > machclk_freq * 8)
718298091Sloos			delta = machclk_freq * 8;
719298091Sloos		cl->cl_bw_delta += delta;
720298091Sloos		cl->cl_bw_bytes += m->m_pkthdr.len;
721298091Sloos		cl->cl_last_time = cur_time;
722298091Sloos		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
723298091Sloos		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
724298091Sloos
725298091Sloos		/*
726298091Sloos		 * Per-bucket bandwidth calculation
727298091Sloos		 */
728298091Sloos		delta = (cur_time - b->last_time);
729298091Sloos		if (delta > machclk_freq * 8)
730298091Sloos			delta = machclk_freq * 8;
731298091Sloos		b->bw_delta += delta;
732298091Sloos		b->bw_bytes += m->m_pkthdr.len;
733298091Sloos		b->last_time = cur_time;
734298091Sloos		b->bw_delta -= b->bw_delta >> 3;
735298091Sloos		b->bw_bytes -= b->bw_bytes >> 3;
736298091Sloos	}
737298091Sloos	return(m);
738298091Sloos}
739298091Sloos
740298091Sloos/*
741298091Sloos * Figure out what the next packet would be if there were no limits.  If
742298091Sloos * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
743298091Sloos * it is set to 0.  A non-NULL mbuf is returned either way.
744298091Sloos */
745298091Sloosstatic struct mbuf *
746298091Sloosfairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
747298091Sloos{
748298091Sloos	fairq_bucket_t *b;
749298091Sloos	struct mbuf *m;
750298091Sloos	uint64_t delta;
751298091Sloos	uint64_t bw;
752298091Sloos
753298091Sloos	*hit_limit = 0;
754298091Sloos	b = fairq_selectq(cl, 1);
755298091Sloos	if (b == NULL)
756298091Sloos		return(NULL);
757298091Sloos	m = qhead(&b->queue);
758298091Sloos
759298091Sloos	/*
760298091Sloos	 * Did this packet exceed the class bandwidth?  Calculate the
761298091Sloos	 * bandwidth component of the packet.
762298091Sloos	 *
763298091Sloos	 * - Calculate bytes per second
764298091Sloos	 */
765298091Sloos	delta = cur_time - cl->cl_last_time;
766298091Sloos	if (delta > machclk_freq * 8)
767298091Sloos		delta = machclk_freq * 8;
768298091Sloos	cl->cl_bw_delta += delta;
769298091Sloos	cl->cl_last_time = cur_time;
770298091Sloos	if (cl->cl_bw_delta) {
771298091Sloos		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
772298091Sloos
773298091Sloos		if (bw > cl->cl_bandwidth)
774298091Sloos			*hit_limit = 1;
775298091Sloos#ifdef ALTQ_DEBUG
776298091Sloos		printf("BW %6ju relative to %6u %d queue %p\n",
777298091Sloos			(uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
778298091Sloos#endif
779298091Sloos	}
780298091Sloos	return(m);
781298091Sloos}
782298091Sloos
783298091Sloos/*
784298091Sloos * Locate the next queue we want to pull a packet out of.  This code
785298091Sloos * is also responsible for removing empty buckets from the circular list.
786298091Sloos */
787298091Sloosstatic
788298091Sloosfairq_bucket_t *
789298091Sloosfairq_selectq(struct fairq_class *cl, int ispoll)
790298091Sloos{
791298091Sloos	fairq_bucket_t *b;
792298091Sloos	uint64_t bw;
793298091Sloos
794298091Sloos	if (ispoll == 0 && cl->cl_polled) {
795298091Sloos		b = cl->cl_polled;
796298091Sloos		cl->cl_polled = NULL;
797298091Sloos		return(b);
798298091Sloos	}
799298091Sloos
800298091Sloos	while ((b = cl->cl_head) != NULL) {
801298091Sloos		/*
802298091Sloos		 * Remove empty queues from consideration
803298091Sloos		 */
804298091Sloos		if (qempty(&b->queue)) {
805298091Sloos			b->in_use = 0;
806298091Sloos			cl->cl_head = b->next;
807298091Sloos			if (cl->cl_head == b) {
808298091Sloos				cl->cl_head = NULL;
809298091Sloos			} else {
810298091Sloos				b->next->prev = b->prev;
811298091Sloos				b->prev->next = b->next;
812298091Sloos			}
813298091Sloos			continue;
814298091Sloos		}
815298091Sloos
816298091Sloos		/*
817298091Sloos		 * Advance the round robin.  Queues with bandwidths less
818298091Sloos		 * then the hog bandwidth are allowed to burst.
819298091Sloos		 */
820298091Sloos		if (cl->cl_hogs_m1 == 0) {
821298091Sloos			cl->cl_head = b->next;
822298091Sloos		} else if (b->bw_delta) {
823298091Sloos			bw = b->bw_bytes * machclk_freq / b->bw_delta;
824298091Sloos			if (bw >= cl->cl_hogs_m1) {
825298091Sloos				cl->cl_head = b->next;
826298091Sloos			}
827298091Sloos			/*
828298091Sloos			 * XXX TODO -
829298091Sloos			 */
830298091Sloos		}
831298091Sloos
832298091Sloos		/*
833298091Sloos		 * Return bucket b.
834298091Sloos		 */
835298091Sloos		break;
836298091Sloos	}
837298091Sloos	if (ispoll)
838298091Sloos		cl->cl_polled = b;
839298091Sloos	return(b);
840298091Sloos}
841298091Sloos
842298091Sloosstatic void
843298091Sloosfairq_purgeq(struct fairq_class *cl)
844298091Sloos{
845298091Sloos	fairq_bucket_t *b;
846298091Sloos	struct mbuf *m;
847298091Sloos
848298091Sloos	while ((b = fairq_selectq(cl, 0)) != NULL) {
849298091Sloos		while ((m = _getq(&b->queue)) != NULL) {
850298091Sloos			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
851298091Sloos			m_freem(m);
852298091Sloos		}
853298091Sloos		ASSERT(qlen(&b->queue) == 0);
854298091Sloos	}
855298091Sloos}
856298091Sloos
857298091Sloosstatic void
858298091Sloosget_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
859298091Sloos{
860298091Sloos	fairq_bucket_t *b;
861298091Sloos
862298091Sloos	sp->class_handle = cl->cl_handle;
863298091Sloos	sp->qlimit = cl->cl_qlimit;
864298091Sloos	sp->xmit_cnt = cl->cl_xmitcnt;
865298091Sloos	sp->drop_cnt = cl->cl_dropcnt;
866298091Sloos	sp->qtype = cl->cl_qtype;
867298091Sloos	sp->qlength = 0;
868298091Sloos
869298091Sloos	if (cl->cl_head) {
870298091Sloos		b = cl->cl_head;
871298091Sloos		do {
872298091Sloos			sp->qlength += qlen(&b->queue);
873298091Sloos			b = b->next;
874298091Sloos		} while (b != cl->cl_head);
875298091Sloos	}
876298091Sloos
877298091Sloos#ifdef ALTQ_RED
878298091Sloos	if (cl->cl_qtype == Q_RED)
879298091Sloos		red_getstats(cl->cl_red, &sp->red[0]);
880298091Sloos#endif
881298091Sloos#ifdef ALTQ_RIO
882298091Sloos	if (cl->cl_qtype == Q_RIO)
883298091Sloos		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
884298091Sloos#endif
885298133Sloos#ifdef ALTQ_CODEL
886298133Sloos	if (cl->cl_qtype == Q_CODEL)
887298133Sloos		codel_getstats(cl->cl_codel, &sp->codel);
888298133Sloos#endif
889298091Sloos}
890298091Sloos
891298091Sloos/* convert a class handle to the corresponding class pointer */
892298091Sloosstatic struct fairq_class *
893298091Sloosclh_to_clp(struct fairq_if *pif, uint32_t chandle)
894298091Sloos{
895298091Sloos	struct fairq_class *cl;
896298091Sloos	int idx;
897298091Sloos
898298091Sloos	if (chandle == 0)
899298091Sloos		return (NULL);
900298091Sloos
901298091Sloos	for (idx = pif->pif_maxpri; idx >= 0; idx--)
902298091Sloos		if ((cl = pif->pif_classes[idx]) != NULL &&
903298091Sloos		    cl->cl_handle == chandle)
904298091Sloos			return (cl);
905298091Sloos
906298091Sloos	return (NULL);
907298091Sloos}
908298091Sloos
909298091Sloos#endif /* ALTQ_FAIRQ */
910