1/*
2 * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in
15 *    the documentation and/or other materials provided with the
16 *    distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 *    contributors may be used to endorse or promote products derived
19 *    from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
35 * $FreeBSD$
36 */
37/*
38 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
39 * fairq.  The fairq algorithm is completely different then priq, of course,
40 * but because I used priq's skeleton I believe I should include priq's
41 * copyright.
42 *
43 * Copyright (C) 2000-2003
44 *	Sony Computer Science Laboratories Inc.  All rights reserved.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 *    notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 *    notice, this list of conditions and the following disclaimer in the
53 *    documentation and/or other materials provided with the distribution.
54 *
55 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 */
67
68/*
69 * FAIRQ - take traffic classified by keep state (hashed into
70 * mbuf->m_pkthdr.altq_state_hash) and bucketize it.  Fairly extract
71 * the first packet from each bucket in a round-robin fashion.
72 *
73 * TODO - better overall qlimit support (right now it is per-bucket).
74 *	- NOTE: red etc is per bucket, not overall.
75 *	- better service curve support.
76 *
77 * EXAMPLE:
78 *
79 *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
80 *  queue std  priority 3 bandwidth 400Kb \
81 *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
82 *  queue bulk priority 2 bandwidth 100Kb \
83 *	fairq (buckets 64, hogs 1Kb) qlimit 50
84 *
85 *  pass out on em0 from any to any keep state queue std
86 *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
87 */
88#include "opt_altq.h"
89#include "opt_inet.h"
90#include "opt_inet6.h"
91
92#ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
93
94#include <sys/param.h>
95#include <sys/malloc.h>
96#include <sys/mbuf.h>
97#include <sys/socket.h>
98#include <sys/sockio.h>
99#include <sys/systm.h>
100#include <sys/proc.h>
101#include <sys/errno.h>
102#include <sys/kernel.h>
103#include <sys/queue.h>
104
105#include <net/if.h>
106#include <net/if_var.h>
107#include <netinet/in.h>
108
109#include <netpfil/pf/pf.h>
110#include <netpfil/pf/pf_altq.h>
111#include <netpfil/pf/pf_mtag.h>
112#include <altq/altq.h>
113#include <altq/altq_fairq.h>
114
115/*
116 * function prototypes
117 */
118static int	fairq_clear_interface(struct fairq_if *);
119static int	fairq_request(struct ifaltq *, int, void *);
120static void	fairq_purge(struct fairq_if *);
121static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
122static int	fairq_class_destroy(struct fairq_class *);
123static int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
124static struct mbuf *fairq_dequeue(struct ifaltq *, int);
125
126static int	fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
127static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
128static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
129static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
130static void	fairq_purgeq(struct fairq_class *);
131
132static void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
133static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
134
135int
136fairq_pfattach(struct pf_altq *a)
137{
138	struct ifnet *ifp;
139	int error;
140
141	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
142		return (EINVAL);
143
144	error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
145	    fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
146
147	return (error);
148}
149
150int
151fairq_add_altq(struct pf_altq *a)
152{
153	struct fairq_if *pif;
154	struct ifnet *ifp;
155
156	if ((ifp = ifunit(a->ifname)) == NULL)
157		return (EINVAL);
158	if (!ALTQ_IS_READY(&ifp->if_snd))
159		return (ENODEV);
160
161
162	pif = malloc(sizeof(struct fairq_if),
163			M_DEVBUF, M_WAITOK | M_ZERO);
164	pif->pif_bandwidth = a->ifbandwidth;
165	pif->pif_maxpri = -1;
166	pif->pif_ifq = &ifp->if_snd;
167
168	/* keep the state in pf_altq */
169	a->altq_disc = pif;
170
171	return (0);
172}
173
174int
175fairq_remove_altq(struct pf_altq *a)
176{
177	struct fairq_if *pif;
178
179	if ((pif = a->altq_disc) == NULL)
180		return (EINVAL);
181	a->altq_disc = NULL;
182
183	fairq_clear_interface(pif);
184
185	free(pif, M_DEVBUF);
186	return (0);
187}
188
189int
190fairq_add_queue(struct pf_altq *a)
191{
192	struct fairq_if *pif;
193	struct fairq_class *cl;
194
195	if ((pif = a->altq_disc) == NULL)
196		return (EINVAL);
197
198	/* check parameters */
199	if (a->priority >= FAIRQ_MAXPRI)
200		return (EINVAL);
201	if (a->qid == 0)
202		return (EINVAL);
203	if (pif->pif_classes[a->priority] != NULL)
204		return (EBUSY);
205	if (clh_to_clp(pif, a->qid) != NULL)
206		return (EBUSY);
207
208	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
209			       &a->pq_u.fairq_opts, a->qid);
210	if (cl == NULL)
211		return (ENOMEM);
212
213	return (0);
214}
215
216int
217fairq_remove_queue(struct pf_altq *a)
218{
219	struct fairq_if *pif;
220	struct fairq_class *cl;
221
222	if ((pif = a->altq_disc) == NULL)
223		return (EINVAL);
224
225	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
226		return (EINVAL);
227
228	return (fairq_class_destroy(cl));
229}
230
231int
232fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
233{
234	struct fairq_if *pif;
235	struct fairq_class *cl;
236	struct fairq_classstats stats;
237	int error = 0;
238
239	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
240		return (EBADF);
241
242	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
243		return (EINVAL);
244
245	if (*nbytes < sizeof(stats))
246		return (EINVAL);
247
248	get_class_stats(&stats, cl);
249
250	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
251		return (error);
252	*nbytes = sizeof(stats);
253	return (0);
254}
255
256/*
257 * bring the interface back to the initial state by discarding
258 * all the filters and classes.
259 */
260static int
261fairq_clear_interface(struct fairq_if *pif)
262{
263	struct fairq_class *cl;
264	int pri;
265
266	/* clear out the classes */
267	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
268		if ((cl = pif->pif_classes[pri]) != NULL)
269			fairq_class_destroy(cl);
270	}
271
272	return (0);
273}
274
275static int
276fairq_request(struct ifaltq *ifq, int req, void *arg)
277{
278	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
279
280	IFQ_LOCK_ASSERT(ifq);
281
282	switch (req) {
283	case ALTRQ_PURGE:
284		fairq_purge(pif);
285		break;
286	}
287	return (0);
288}
289
290/* discard all the queued packets on the interface */
291static void
292fairq_purge(struct fairq_if *pif)
293{
294	struct fairq_class *cl;
295	int pri;
296
297	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
298		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
299			fairq_purgeq(cl);
300	}
301	if (ALTQ_IS_ENABLED(pif->pif_ifq))
302		pif->pif_ifq->ifq_len = 0;
303}
304
305static struct fairq_class *
306fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
307		   u_int bandwidth, struct fairq_opts *opts, int qid)
308{
309	struct fairq_class *cl;
310	int flags = opts->flags;
311	u_int nbuckets = opts->nbuckets;
312	int i;
313
314#ifndef ALTQ_RED
315	if (flags & FARF_RED) {
316#ifdef ALTQ_DEBUG
317		printf("fairq_class_create: RED not configured for FAIRQ!\n");
318#endif
319		return (NULL);
320	}
321#endif
322#ifndef ALTQ_CODEL
323	if (flags & FARF_CODEL) {
324#ifdef ALTQ_DEBUG
325		printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
326#endif
327		return (NULL);
328	}
329#endif
330	if (nbuckets == 0)
331		nbuckets = 256;
332	if (nbuckets > FAIRQ_MAX_BUCKETS)
333		nbuckets = FAIRQ_MAX_BUCKETS;
334	/* enforce power-of-2 size */
335	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
336		++nbuckets;
337
338	if ((cl = pif->pif_classes[pri]) != NULL) {
339		/* modify the class instead of creating a new one */
340		IFQ_LOCK(cl->cl_pif->pif_ifq);
341		if (cl->cl_head)
342			fairq_purgeq(cl);
343		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
344#ifdef ALTQ_RIO
345		if (cl->cl_qtype == Q_RIO)
346			rio_destroy((rio_t *)cl->cl_red);
347#endif
348#ifdef ALTQ_RED
349		if (cl->cl_qtype == Q_RED)
350			red_destroy(cl->cl_red);
351#endif
352#ifdef ALTQ_CODEL
353		if (cl->cl_qtype == Q_CODEL)
354			codel_destroy(cl->cl_codel);
355#endif
356	} else {
357		cl = malloc(sizeof(struct fairq_class),
358				M_DEVBUF, M_WAITOK | M_ZERO);
359		cl->cl_nbuckets = nbuckets;
360		cl->cl_nbucket_mask = nbuckets - 1;
361
362		cl->cl_buckets = malloc(
363			sizeof(struct fairq_bucket) * cl->cl_nbuckets,
364			M_DEVBUF, M_WAITOK | M_ZERO);
365		cl->cl_head = NULL;
366	}
367
368	pif->pif_classes[pri] = cl;
369	if (flags & FARF_DEFAULTCLASS)
370		pif->pif_default = cl;
371	if (qlimit == 0)
372		qlimit = 50;  /* use default */
373	cl->cl_qlimit = qlimit;
374	for (i = 0; i < cl->cl_nbuckets; ++i) {
375		qlimit(&cl->cl_buckets[i].queue) = qlimit;
376	}
377	cl->cl_bandwidth = bandwidth / 8;
378	cl->cl_qtype = Q_DROPTAIL;
379	cl->cl_flags = flags & FARF_USERFLAGS;
380	cl->cl_pri = pri;
381	if (pri > pif->pif_maxpri)
382		pif->pif_maxpri = pri;
383	cl->cl_pif = pif;
384	cl->cl_handle = qid;
385	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
386	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
387
388#ifdef ALTQ_RED
389	if (flags & (FARF_RED|FARF_RIO)) {
390		int red_flags, red_pkttime;
391
392		red_flags = 0;
393		if (flags & FARF_ECN)
394			red_flags |= REDF_ECN;
395#ifdef ALTQ_RIO
396		if (flags & FARF_CLEARDSCP)
397			red_flags |= RIOF_CLEARDSCP;
398#endif
399		if (pif->pif_bandwidth < 8)
400			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
401		else
402			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
403			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
404#ifdef ALTQ_RIO
405		if (flags & FARF_RIO) {
406			cl->cl_red = (red_t *)rio_alloc(0, NULL,
407						red_flags, red_pkttime);
408			if (cl->cl_red != NULL)
409				cl->cl_qtype = Q_RIO;
410		} else
411#endif
412		if (flags & FARF_RED) {
413			cl->cl_red = red_alloc(0, 0,
414			    cl->cl_qlimit * 10/100,
415			    cl->cl_qlimit * 30/100,
416			    red_flags, red_pkttime);
417			if (cl->cl_red != NULL)
418				cl->cl_qtype = Q_RED;
419		}
420	}
421#endif /* ALTQ_RED */
422#ifdef ALTQ_CODEL
423	if (flags & FARF_CODEL) {
424		cl->cl_codel = codel_alloc(5, 100, 0);
425		if (cl->cl_codel != NULL)
426			cl->cl_qtype = Q_CODEL;
427	}
428#endif
429
430	return (cl);
431}
432
433static int
434fairq_class_destroy(struct fairq_class *cl)
435{
436	struct fairq_if *pif;
437	int pri;
438
439	IFQ_LOCK(cl->cl_pif->pif_ifq);
440
441	if (cl->cl_head)
442		fairq_purgeq(cl);
443
444	pif = cl->cl_pif;
445	pif->pif_classes[cl->cl_pri] = NULL;
446	if (pif->pif_poll_cache == cl)
447		pif->pif_poll_cache = NULL;
448	if (pif->pif_maxpri == cl->cl_pri) {
449		for (pri = cl->cl_pri; pri >= 0; pri--)
450			if (pif->pif_classes[pri] != NULL) {
451				pif->pif_maxpri = pri;
452				break;
453			}
454		if (pri < 0)
455			pif->pif_maxpri = -1;
456	}
457	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
458
459	if (cl->cl_red != NULL) {
460#ifdef ALTQ_RIO
461		if (cl->cl_qtype == Q_RIO)
462			rio_destroy((rio_t *)cl->cl_red);
463#endif
464#ifdef ALTQ_RED
465		if (cl->cl_qtype == Q_RED)
466			red_destroy(cl->cl_red);
467#endif
468#ifdef ALTQ_CODEL
469		if (cl->cl_qtype == Q_CODEL)
470			codel_destroy(cl->cl_codel);
471#endif
472	}
473	free(cl->cl_buckets, M_DEVBUF);
474	free(cl, M_DEVBUF);
475
476	return (0);
477}
478
479/*
480 * fairq_enqueue is an enqueue function to be registered to
481 * (*altq_enqueue) in struct ifaltq.
482 */
483static int
484fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
485{
486	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
487	struct fairq_class *cl = NULL; /* Make compiler happy */
488	struct pf_mtag *t;
489	u_int32_t qid_hash = 0;
490	int len;
491
492	IFQ_LOCK_ASSERT(ifq);
493
494	/* grab class set by classifier */
495	if ((m->m_flags & M_PKTHDR) == 0) {
496		/* should not happen */
497		printf("altq: packet for %s does not have pkthdr\n",
498			ifq->altq_ifp->if_xname);
499		m_freem(m);
500		return (ENOBUFS);
501	}
502
503	if ((t = pf_find_mtag(m)) != NULL) {
504		cl = clh_to_clp(pif, t->qid);
505		qid_hash = t->qid_hash;
506	}
507	if (cl == NULL) {
508		cl = pif->pif_default;
509		if (cl == NULL) {
510			m_freem(m);
511			return (ENOBUFS);
512		}
513	}
514	cl->cl_flags |= FARF_HAS_PACKETS;
515	cl->cl_pktattr = NULL;
516	len = m_pktlen(m);
517	if (fairq_addq(cl, m, qid_hash) != 0) {
518		/* drop occurred.  mbuf was freed in fairq_addq. */
519		PKTCNTR_ADD(&cl->cl_dropcnt, len);
520		return (ENOBUFS);
521	}
522	IFQ_INC_LEN(ifq);
523
524	return (0);
525}
526
527/*
528 * fairq_dequeue is a dequeue function to be registered to
529 * (*altq_dequeue) in struct ifaltq.
530 *
531 * note: ALTDQ_POLL returns the next packet without removing the packet
532 *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
533 *	ALTDQ_REMOVE must return the same packet if called immediately
534 *	after ALTDQ_POLL.
535 */
536static struct mbuf *
537fairq_dequeue(struct ifaltq *ifq, int op)
538{
539	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
540	struct fairq_class *cl;
541	struct fairq_class *best_cl;
542	struct mbuf *best_m;
543	struct mbuf *m = NULL;
544	uint64_t cur_time = read_machclk();
545	int pri;
546	int hit_limit;
547
548	IFQ_LOCK_ASSERT(ifq);
549
550	if (IFQ_IS_EMPTY(ifq)) {
551		return (NULL);
552	}
553
554	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
555		best_cl = pif->pif_poll_cache;
556		m = fairq_getq(best_cl, cur_time);
557		pif->pif_poll_cache = NULL;
558		if (m) {
559			IFQ_DEC_LEN(ifq);
560			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
561			return (m);
562		}
563	} else {
564		best_cl = NULL;
565		best_m = NULL;
566
567		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
568			if ((cl = pif->pif_classes[pri]) == NULL)
569				continue;
570			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
571				continue;
572			m = fairq_pollq(cl, cur_time, &hit_limit);
573			if (m == NULL) {
574				cl->cl_flags &= ~FARF_HAS_PACKETS;
575				continue;
576			}
577
578			/*
579			 * Only override the best choice if we are under
580			 * the BW limit.
581			 */
582			if (hit_limit == 0 || best_cl == NULL) {
583				best_cl = cl;
584				best_m = m;
585			}
586
587			/*
588			 * Remember the highest priority mbuf in case we
589			 * do not find any lower priority mbufs.
590			 */
591			if (hit_limit)
592				continue;
593			break;
594		}
595		if (op == ALTDQ_POLL) {
596			pif->pif_poll_cache = best_cl;
597			m = best_m;
598		} else if (best_cl) {
599			m = fairq_getq(best_cl, cur_time);
600			if (m != NULL) {
601				IFQ_DEC_LEN(ifq);
602				PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
603			}
604		}
605		return (m);
606	}
607	return (NULL);
608}
609
610static int
611fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
612{
613	fairq_bucket_t *b;
614	u_int hindex;
615	uint64_t bw;
616
617	/*
618	 * If the packet doesn't have any keep state put it on the end of
619	 * our queue.  XXX this can result in out of order delivery.
620	 */
621	if (bucketid == 0) {
622		if (cl->cl_head)
623			b = cl->cl_head->prev;
624		else
625			b = &cl->cl_buckets[0];
626	} else {
627		hindex = bucketid & cl->cl_nbucket_mask;
628		b = &cl->cl_buckets[hindex];
629	}
630
631	/*
632	 * Add the bucket to the end of the circular list of active buckets.
633	 *
634	 * As a special case we add the bucket to the beginning of the list
635	 * instead of the end if it was not previously on the list and if
636	 * its traffic is less then the hog level.
637	 */
638	if (b->in_use == 0) {
639		b->in_use = 1;
640		if (cl->cl_head == NULL) {
641			cl->cl_head = b;
642			b->next = b;
643			b->prev = b;
644		} else {
645			b->next = cl->cl_head;
646			b->prev = cl->cl_head->prev;
647			b->prev->next = b;
648			b->next->prev = b;
649
650			if (b->bw_delta && cl->cl_hogs_m1) {
651				bw = b->bw_bytes * machclk_freq / b->bw_delta;
652				if (bw < cl->cl_hogs_m1)
653					cl->cl_head = b;
654			}
655		}
656	}
657
658#ifdef ALTQ_RIO
659	if (cl->cl_qtype == Q_RIO)
660		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
661#endif
662#ifdef ALTQ_RED
663	if (cl->cl_qtype == Q_RED)
664		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
665#endif
666#ifdef ALTQ_CODEL
667	if (cl->cl_qtype == Q_CODEL)
668		return codel_addq(cl->cl_codel, &b->queue, m);
669#endif
670	if (qlen(&b->queue) >= qlimit(&b->queue)) {
671		m_freem(m);
672		return (-1);
673	}
674
675	if (cl->cl_flags & FARF_CLEARDSCP)
676		write_dsfield(m, cl->cl_pktattr, 0);
677
678	_addq(&b->queue, m);
679
680	return (0);
681}
682
683static struct mbuf *
684fairq_getq(struct fairq_class *cl, uint64_t cur_time)
685{
686	fairq_bucket_t *b;
687	struct mbuf *m;
688
689	b = fairq_selectq(cl, 0);
690	if (b == NULL)
691		m = NULL;
692#ifdef ALTQ_RIO
693	else if (cl->cl_qtype == Q_RIO)
694		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
695#endif
696#ifdef ALTQ_RED
697	else if (cl->cl_qtype == Q_RED)
698		m = red_getq(cl->cl_red, &b->queue);
699#endif
700#ifdef ALTQ_CODEL
701	else if (cl->cl_qtype == Q_CODEL)
702		m = codel_getq(cl->cl_codel, &b->queue);
703#endif
704	else
705		m = _getq(&b->queue);
706
707	/*
708	 * Calculate the BW change
709	 */
710	if (m != NULL) {
711		uint64_t delta;
712
713		/*
714		 * Per-class bandwidth calculation
715		 */
716		delta = (cur_time - cl->cl_last_time);
717		if (delta > machclk_freq * 8)
718			delta = machclk_freq * 8;
719		cl->cl_bw_delta += delta;
720		cl->cl_bw_bytes += m->m_pkthdr.len;
721		cl->cl_last_time = cur_time;
722		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
723		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
724
725		/*
726		 * Per-bucket bandwidth calculation
727		 */
728		delta = (cur_time - b->last_time);
729		if (delta > machclk_freq * 8)
730			delta = machclk_freq * 8;
731		b->bw_delta += delta;
732		b->bw_bytes += m->m_pkthdr.len;
733		b->last_time = cur_time;
734		b->bw_delta -= b->bw_delta >> 3;
735		b->bw_bytes -= b->bw_bytes >> 3;
736	}
737	return(m);
738}
739
740/*
741 * Figure out what the next packet would be if there were no limits.  If
742 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
743 * it is set to 0.  A non-NULL mbuf is returned either way.
744 */
745static struct mbuf *
746fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
747{
748	fairq_bucket_t *b;
749	struct mbuf *m;
750	uint64_t delta;
751	uint64_t bw;
752
753	*hit_limit = 0;
754	b = fairq_selectq(cl, 1);
755	if (b == NULL)
756		return(NULL);
757	m = qhead(&b->queue);
758
759	/*
760	 * Did this packet exceed the class bandwidth?  Calculate the
761	 * bandwidth component of the packet.
762	 *
763	 * - Calculate bytes per second
764	 */
765	delta = cur_time - cl->cl_last_time;
766	if (delta > machclk_freq * 8)
767		delta = machclk_freq * 8;
768	cl->cl_bw_delta += delta;
769	cl->cl_last_time = cur_time;
770	if (cl->cl_bw_delta) {
771		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
772
773		if (bw > cl->cl_bandwidth)
774			*hit_limit = 1;
775#ifdef ALTQ_DEBUG
776		printf("BW %6ju relative to %6u %d queue %p\n",
777			(uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
778#endif
779	}
780	return(m);
781}
782
783/*
784 * Locate the next queue we want to pull a packet out of.  This code
785 * is also responsible for removing empty buckets from the circular list.
786 */
787static
788fairq_bucket_t *
789fairq_selectq(struct fairq_class *cl, int ispoll)
790{
791	fairq_bucket_t *b;
792	uint64_t bw;
793
794	if (ispoll == 0 && cl->cl_polled) {
795		b = cl->cl_polled;
796		cl->cl_polled = NULL;
797		return(b);
798	}
799
800	while ((b = cl->cl_head) != NULL) {
801		/*
802		 * Remove empty queues from consideration
803		 */
804		if (qempty(&b->queue)) {
805			b->in_use = 0;
806			cl->cl_head = b->next;
807			if (cl->cl_head == b) {
808				cl->cl_head = NULL;
809			} else {
810				b->next->prev = b->prev;
811				b->prev->next = b->next;
812			}
813			continue;
814		}
815
816		/*
817		 * Advance the round robin.  Queues with bandwidths less
818		 * then the hog bandwidth are allowed to burst.
819		 */
820		if (cl->cl_hogs_m1 == 0) {
821			cl->cl_head = b->next;
822		} else if (b->bw_delta) {
823			bw = b->bw_bytes * machclk_freq / b->bw_delta;
824			if (bw >= cl->cl_hogs_m1) {
825				cl->cl_head = b->next;
826			}
827			/*
828			 * XXX TODO -
829			 */
830		}
831
832		/*
833		 * Return bucket b.
834		 */
835		break;
836	}
837	if (ispoll)
838		cl->cl_polled = b;
839	return(b);
840}
841
842static void
843fairq_purgeq(struct fairq_class *cl)
844{
845	fairq_bucket_t *b;
846	struct mbuf *m;
847
848	while ((b = fairq_selectq(cl, 0)) != NULL) {
849		while ((m = _getq(&b->queue)) != NULL) {
850			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
851			m_freem(m);
852		}
853		ASSERT(qlen(&b->queue) == 0);
854	}
855}
856
857static void
858get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
859{
860	fairq_bucket_t *b;
861
862	sp->class_handle = cl->cl_handle;
863	sp->qlimit = cl->cl_qlimit;
864	sp->xmit_cnt = cl->cl_xmitcnt;
865	sp->drop_cnt = cl->cl_dropcnt;
866	sp->qtype = cl->cl_qtype;
867	sp->qlength = 0;
868
869	if (cl->cl_head) {
870		b = cl->cl_head;
871		do {
872			sp->qlength += qlen(&b->queue);
873			b = b->next;
874		} while (b != cl->cl_head);
875	}
876
877#ifdef ALTQ_RED
878	if (cl->cl_qtype == Q_RED)
879		red_getstats(cl->cl_red, &sp->red[0]);
880#endif
881#ifdef ALTQ_RIO
882	if (cl->cl_qtype == Q_RIO)
883		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
884#endif
885#ifdef ALTQ_CODEL
886	if (cl->cl_qtype == Q_CODEL)
887		codel_getstats(cl->cl_codel, &sp->codel);
888#endif
889}
890
891/* convert a class handle to the corresponding class pointer */
892static struct fairq_class *
893clh_to_clp(struct fairq_if *pif, uint32_t chandle)
894{
895	struct fairq_class *cl;
896	int idx;
897
898	if (chandle == 0)
899		return (NULL);
900
901	for (idx = pif->pif_maxpri; idx >= 0; idx--)
902		if ((cl = pif->pif_classes[idx]) != NULL &&
903		    cl->cl_handle == chandle)
904			return (cl);
905
906	return (NULL);
907}
908
909#endif /* ALTQ_FAIRQ */
910