1/*
2 * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in
15 *    the documentation and/or other materials provided with the
16 *    distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 *    contributors may be used to endorse or promote products derived
19 *    from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
35 */
36/*
37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
38 * fairq.  The fairq algorithm is completely different then priq, of course,
39 * but because I used priq's skeleton I believe I should include priq's
40 * copyright.
41 *
42 * Copyright (C) 2000-2003
43 *	Sony Computer Science Laboratories Inc.  All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 *
54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67/*
68 * FAIRQ - take traffic classified by keep state (hashed into
69 * mbuf->m_pkthdr.altq_state_hash) and bucketize it.  Fairly extract
70 * the first packet from each bucket in a round-robin fashion.
71 *
72 * TODO - better overall qlimit support (right now it is per-bucket).
73 *	- NOTE: red etc is per bucket, not overall.
74 *	- better service curve support.
75 *
76 * EXAMPLE:
77 *
78 *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
79 *  queue std  priority 3 bandwidth 400Kb \
80 *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
81 *  queue bulk priority 2 bandwidth 100Kb \
82 *	fairq (buckets 64, hogs 1Kb) qlimit 50
83 *
84 *  pass out on em0 from any to any keep state queue std
85 *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
86 */
87#include "opt_altq.h"
88#include "opt_inet.h"
89#include "opt_inet6.h"
90
91#ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
92
93#include <sys/param.h>
94#include <sys/malloc.h>
95#include <sys/mbuf.h>
96#include <sys/socket.h>
97#include <sys/sockio.h>
98#include <sys/systm.h>
99#include <sys/proc.h>
100#include <sys/errno.h>
101#include <sys/kernel.h>
102#include <sys/queue.h>
103
104#include <net/if.h>
105#include <net/if_var.h>
106#include <net/if_private.h>
107#include <netinet/in.h>
108
109#include <netpfil/pf/pf.h>
110#include <netpfil/pf/pf_altq.h>
111#include <netpfil/pf/pf_mtag.h>
112#include <net/altq/altq.h>
113#include <net/altq/altq_fairq.h>
114
115/*
116 * function prototypes
117 */
118static int	fairq_clear_interface(struct fairq_if *);
119static int	fairq_request(struct ifaltq *, int, void *);
120static void	fairq_purge(struct fairq_if *);
121static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
122static int	fairq_class_destroy(struct fairq_class *);
123static int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
124static struct mbuf *fairq_dequeue(struct ifaltq *, int);
125
126static int	fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
127static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
128static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
129static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
130static void	fairq_purgeq(struct fairq_class *);
131
132static void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
133static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
134
135int
136fairq_pfattach(struct pf_altq *a)
137{
138	struct ifnet *ifp;
139	int error;
140
141	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
142		return (EINVAL);
143
144	error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
145	    fairq_enqueue, fairq_dequeue, fairq_request);
146
147	return (error);
148}
149
150int
151fairq_add_altq(struct ifnet *ifp, struct pf_altq *a)
152{
153	struct fairq_if *pif;
154
155	if (ifp == NULL)
156		return (EINVAL);
157	if (!ALTQ_IS_READY(&ifp->if_snd))
158		return (ENODEV);
159
160	pif = malloc(sizeof(struct fairq_if),
161			M_DEVBUF, M_WAITOK | M_ZERO);
162	pif->pif_bandwidth = a->ifbandwidth;
163	pif->pif_maxpri = -1;
164	pif->pif_ifq = &ifp->if_snd;
165
166	/* keep the state in pf_altq */
167	a->altq_disc = pif;
168
169	return (0);
170}
171
172int
173fairq_remove_altq(struct pf_altq *a)
174{
175	struct fairq_if *pif;
176
177	if ((pif = a->altq_disc) == NULL)
178		return (EINVAL);
179	a->altq_disc = NULL;
180
181	fairq_clear_interface(pif);
182
183	free(pif, M_DEVBUF);
184	return (0);
185}
186
187int
188fairq_add_queue(struct pf_altq *a)
189{
190	struct fairq_if *pif;
191	struct fairq_class *cl;
192
193	if ((pif = a->altq_disc) == NULL)
194		return (EINVAL);
195
196	/* check parameters */
197	if (a->priority >= FAIRQ_MAXPRI)
198		return (EINVAL);
199	if (a->qid == 0)
200		return (EINVAL);
201	if (pif->pif_classes[a->priority] != NULL)
202		return (EBUSY);
203	if (clh_to_clp(pif, a->qid) != NULL)
204		return (EBUSY);
205
206	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
207			       &a->pq_u.fairq_opts, a->qid);
208	if (cl == NULL)
209		return (ENOMEM);
210
211	return (0);
212}
213
214int
215fairq_remove_queue(struct pf_altq *a)
216{
217	struct fairq_if *pif;
218	struct fairq_class *cl;
219
220	if ((pif = a->altq_disc) == NULL)
221		return (EINVAL);
222
223	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
224		return (EINVAL);
225
226	return (fairq_class_destroy(cl));
227}
228
229int
230fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
231{
232	struct fairq_if *pif;
233	struct fairq_class *cl;
234	struct fairq_classstats stats;
235	int error = 0;
236
237	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
238		return (EBADF);
239
240	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
241		return (EINVAL);
242
243	if (*nbytes < sizeof(stats))
244		return (EINVAL);
245
246	get_class_stats(&stats, cl);
247
248	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
249		return (error);
250	*nbytes = sizeof(stats);
251	return (0);
252}
253
254/*
255 * bring the interface back to the initial state by discarding
256 * all the filters and classes.
257 */
258static int
259fairq_clear_interface(struct fairq_if *pif)
260{
261	struct fairq_class *cl;
262	int pri;
263
264	/* clear out the classes */
265	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
266		if ((cl = pif->pif_classes[pri]) != NULL)
267			fairq_class_destroy(cl);
268	}
269
270	return (0);
271}
272
273static int
274fairq_request(struct ifaltq *ifq, int req, void *arg)
275{
276	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
277
278	IFQ_LOCK_ASSERT(ifq);
279
280	switch (req) {
281	case ALTRQ_PURGE:
282		fairq_purge(pif);
283		break;
284	}
285	return (0);
286}
287
288/* discard all the queued packets on the interface */
289static void
290fairq_purge(struct fairq_if *pif)
291{
292	struct fairq_class *cl;
293	int pri;
294
295	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
296		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
297			fairq_purgeq(cl);
298	}
299	if (ALTQ_IS_ENABLED(pif->pif_ifq))
300		pif->pif_ifq->ifq_len = 0;
301}
302
303static struct fairq_class *
304fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
305		   u_int bandwidth, struct fairq_opts *opts, int qid)
306{
307	struct fairq_class *cl;
308	int flags = opts->flags;
309	u_int nbuckets = opts->nbuckets;
310	int i;
311
312#ifndef ALTQ_RED
313	if (flags & FARF_RED) {
314#ifdef ALTQ_DEBUG
315		printf("fairq_class_create: RED not configured for FAIRQ!\n");
316#endif
317		return (NULL);
318	}
319#endif
320#ifndef ALTQ_CODEL
321	if (flags & FARF_CODEL) {
322#ifdef ALTQ_DEBUG
323		printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
324#endif
325		return (NULL);
326	}
327#endif
328	if (nbuckets == 0)
329		nbuckets = 256;
330	if (nbuckets > FAIRQ_MAX_BUCKETS)
331		nbuckets = FAIRQ_MAX_BUCKETS;
332	/* enforce power-of-2 size */
333	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
334		++nbuckets;
335
336	if ((cl = pif->pif_classes[pri]) != NULL) {
337		/* modify the class instead of creating a new one */
338		IFQ_LOCK(cl->cl_pif->pif_ifq);
339		if (cl->cl_head)
340			fairq_purgeq(cl);
341		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
342#ifdef ALTQ_RIO
343		if (cl->cl_qtype == Q_RIO)
344			rio_destroy((rio_t *)cl->cl_red);
345#endif
346#ifdef ALTQ_RED
347		if (cl->cl_qtype == Q_RED)
348			red_destroy(cl->cl_red);
349#endif
350#ifdef ALTQ_CODEL
351		if (cl->cl_qtype == Q_CODEL)
352			codel_destroy(cl->cl_codel);
353#endif
354	} else {
355		cl = malloc(sizeof(struct fairq_class),
356				M_DEVBUF, M_WAITOK | M_ZERO);
357		cl->cl_nbuckets = nbuckets;
358		cl->cl_nbucket_mask = nbuckets - 1;
359
360		cl->cl_buckets = malloc(
361			sizeof(struct fairq_bucket) * cl->cl_nbuckets,
362			M_DEVBUF, M_WAITOK | M_ZERO);
363		cl->cl_head = NULL;
364	}
365
366	pif->pif_classes[pri] = cl;
367	if (flags & FARF_DEFAULTCLASS)
368		pif->pif_default = cl;
369	if (qlimit == 0)
370		qlimit = 50;  /* use default */
371	cl->cl_qlimit = qlimit;
372	for (i = 0; i < cl->cl_nbuckets; ++i) {
373		qlimit(&cl->cl_buckets[i].queue) = qlimit;
374	}
375	cl->cl_bandwidth = bandwidth / 8;
376	cl->cl_qtype = Q_DROPTAIL;
377	cl->cl_flags = flags & FARF_USERFLAGS;
378	cl->cl_pri = pri;
379	if (pri > pif->pif_maxpri)
380		pif->pif_maxpri = pri;
381	cl->cl_pif = pif;
382	cl->cl_handle = qid;
383	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
384	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
385
386#ifdef ALTQ_RED
387	if (flags & (FARF_RED|FARF_RIO)) {
388		int red_flags, red_pkttime;
389
390		red_flags = 0;
391		if (flags & FARF_ECN)
392			red_flags |= REDF_ECN;
393#ifdef ALTQ_RIO
394		if (flags & FARF_CLEARDSCP)
395			red_flags |= RIOF_CLEARDSCP;
396#endif
397		if (pif->pif_bandwidth < 8)
398			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
399		else
400			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
401			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
402#ifdef ALTQ_RIO
403		if (flags & FARF_RIO) {
404			cl->cl_red = (red_t *)rio_alloc(0, NULL,
405						red_flags, red_pkttime);
406			if (cl->cl_red != NULL)
407				cl->cl_qtype = Q_RIO;
408		} else
409#endif
410		if (flags & FARF_RED) {
411			cl->cl_red = red_alloc(0, 0,
412			    cl->cl_qlimit * 10/100,
413			    cl->cl_qlimit * 30/100,
414			    red_flags, red_pkttime);
415			if (cl->cl_red != NULL)
416				cl->cl_qtype = Q_RED;
417		}
418	}
419#endif /* ALTQ_RED */
420#ifdef ALTQ_CODEL
421	if (flags & FARF_CODEL) {
422		cl->cl_codel = codel_alloc(5, 100, 0);
423		if (cl->cl_codel != NULL)
424			cl->cl_qtype = Q_CODEL;
425	}
426#endif
427
428	return (cl);
429}
430
431static int
432fairq_class_destroy(struct fairq_class *cl)
433{
434	struct fairq_if *pif;
435	int pri;
436
437	IFQ_LOCK(cl->cl_pif->pif_ifq);
438
439	if (cl->cl_head)
440		fairq_purgeq(cl);
441
442	pif = cl->cl_pif;
443	pif->pif_classes[cl->cl_pri] = NULL;
444	if (pif->pif_poll_cache == cl)
445		pif->pif_poll_cache = NULL;
446	if (pif->pif_maxpri == cl->cl_pri) {
447		for (pri = cl->cl_pri; pri >= 0; pri--)
448			if (pif->pif_classes[pri] != NULL) {
449				pif->pif_maxpri = pri;
450				break;
451			}
452		if (pri < 0)
453			pif->pif_maxpri = -1;
454	}
455	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
456
457	if (cl->cl_red != NULL) {
458#ifdef ALTQ_RIO
459		if (cl->cl_qtype == Q_RIO)
460			rio_destroy((rio_t *)cl->cl_red);
461#endif
462#ifdef ALTQ_RED
463		if (cl->cl_qtype == Q_RED)
464			red_destroy(cl->cl_red);
465#endif
466#ifdef ALTQ_CODEL
467		if (cl->cl_qtype == Q_CODEL)
468			codel_destroy(cl->cl_codel);
469#endif
470	}
471	free(cl->cl_buckets, M_DEVBUF);
472	free(cl, M_DEVBUF);
473
474	return (0);
475}
476
477/*
478 * fairq_enqueue is an enqueue function to be registered to
479 * (*altq_enqueue) in struct ifaltq.
480 */
481static int
482fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
483{
484	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
485	struct fairq_class *cl = NULL; /* Make compiler happy */
486	struct pf_mtag *t;
487	u_int32_t qid_hash = 0;
488	int len;
489
490	IFQ_LOCK_ASSERT(ifq);
491
492	/* grab class set by classifier */
493	if ((m->m_flags & M_PKTHDR) == 0) {
494		/* should not happen */
495		printf("altq: packet for %s does not have pkthdr\n",
496			ifq->altq_ifp->if_xname);
497		m_freem(m);
498		return (ENOBUFS);
499	}
500
501	if ((t = pf_find_mtag(m)) != NULL) {
502		cl = clh_to_clp(pif, t->qid);
503		qid_hash = t->qid_hash;
504	}
505	if (cl == NULL) {
506		cl = pif->pif_default;
507		if (cl == NULL) {
508			m_freem(m);
509			return (ENOBUFS);
510		}
511	}
512	cl->cl_flags |= FARF_HAS_PACKETS;
513	cl->cl_pktattr = NULL;
514	len = m_pktlen(m);
515	if (fairq_addq(cl, m, qid_hash) != 0) {
516		/* drop occurred.  mbuf was freed in fairq_addq. */
517		PKTCNTR_ADD(&cl->cl_dropcnt, len);
518		return (ENOBUFS);
519	}
520	IFQ_INC_LEN(ifq);
521
522	return (0);
523}
524
525/*
526 * fairq_dequeue is a dequeue function to be registered to
527 * (*altq_dequeue) in struct ifaltq.
528 *
529 * note: ALTDQ_POLL returns the next packet without removing the packet
530 *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
531 *	ALTDQ_REMOVE must return the same packet if called immediately
532 *	after ALTDQ_POLL.
533 */
534static struct mbuf *
535fairq_dequeue(struct ifaltq *ifq, int op)
536{
537	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
538	struct fairq_class *cl;
539	struct fairq_class *best_cl;
540	struct mbuf *best_m;
541	struct mbuf *m = NULL;
542	uint64_t cur_time = read_machclk();
543	int pri;
544	int hit_limit;
545
546	IFQ_LOCK_ASSERT(ifq);
547
548	if (IFQ_IS_EMPTY(ifq)) {
549		return (NULL);
550	}
551
552	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
553		best_cl = pif->pif_poll_cache;
554		m = fairq_getq(best_cl, cur_time);
555		pif->pif_poll_cache = NULL;
556		if (m) {
557			IFQ_DEC_LEN(ifq);
558			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
559			return (m);
560		}
561	} else {
562		best_cl = NULL;
563		best_m = NULL;
564
565		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
566			if ((cl = pif->pif_classes[pri]) == NULL)
567				continue;
568			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
569				continue;
570			m = fairq_pollq(cl, cur_time, &hit_limit);
571			if (m == NULL) {
572				cl->cl_flags &= ~FARF_HAS_PACKETS;
573				continue;
574			}
575
576			/*
577			 * Only override the best choice if we are under
578			 * the BW limit.
579			 */
580			if (hit_limit == 0 || best_cl == NULL) {
581				best_cl = cl;
582				best_m = m;
583			}
584
585			/*
586			 * Remember the highest priority mbuf in case we
587			 * do not find any lower priority mbufs.
588			 */
589			if (hit_limit)
590				continue;
591			break;
592		}
593		if (op == ALTDQ_POLL) {
594			pif->pif_poll_cache = best_cl;
595			m = best_m;
596		} else if (best_cl) {
597			m = fairq_getq(best_cl, cur_time);
598			if (m != NULL) {
599				IFQ_DEC_LEN(ifq);
600				PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
601			}
602		}
603		return (m);
604	}
605	return (NULL);
606}
607
608static int
609fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
610{
611	fairq_bucket_t *b;
612	u_int hindex;
613	uint64_t bw;
614
615	/*
616	 * If the packet doesn't have any keep state put it on the end of
617	 * our queue.  XXX this can result in out of order delivery.
618	 */
619	if (bucketid == 0) {
620		if (cl->cl_head)
621			b = cl->cl_head->prev;
622		else
623			b = &cl->cl_buckets[0];
624	} else {
625		hindex = bucketid & cl->cl_nbucket_mask;
626		b = &cl->cl_buckets[hindex];
627	}
628
629	/*
630	 * Add the bucket to the end of the circular list of active buckets.
631	 *
632	 * As a special case we add the bucket to the beginning of the list
633	 * instead of the end if it was not previously on the list and if
634	 * its traffic is less then the hog level.
635	 */
636	if (b->in_use == 0) {
637		b->in_use = 1;
638		if (cl->cl_head == NULL) {
639			cl->cl_head = b;
640			b->next = b;
641			b->prev = b;
642		} else {
643			b->next = cl->cl_head;
644			b->prev = cl->cl_head->prev;
645			b->prev->next = b;
646			b->next->prev = b;
647
648			if (b->bw_delta && cl->cl_hogs_m1) {
649				bw = b->bw_bytes * machclk_freq / b->bw_delta;
650				if (bw < cl->cl_hogs_m1)
651					cl->cl_head = b;
652			}
653		}
654	}
655
656#ifdef ALTQ_RIO
657	if (cl->cl_qtype == Q_RIO)
658		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
659#endif
660#ifdef ALTQ_RED
661	if (cl->cl_qtype == Q_RED)
662		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
663#endif
664#ifdef ALTQ_CODEL
665	if (cl->cl_qtype == Q_CODEL)
666		return codel_addq(cl->cl_codel, &b->queue, m);
667#endif
668	if (qlen(&b->queue) >= qlimit(&b->queue)) {
669		m_freem(m);
670		return (-1);
671	}
672
673	if (cl->cl_flags & FARF_CLEARDSCP)
674		write_dsfield(m, cl->cl_pktattr, 0);
675
676	_addq(&b->queue, m);
677
678	return (0);
679}
680
681static struct mbuf *
682fairq_getq(struct fairq_class *cl, uint64_t cur_time)
683{
684	fairq_bucket_t *b;
685	struct mbuf *m;
686
687	b = fairq_selectq(cl, 0);
688	if (b == NULL)
689		m = NULL;
690#ifdef ALTQ_RIO
691	else if (cl->cl_qtype == Q_RIO)
692		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
693#endif
694#ifdef ALTQ_RED
695	else if (cl->cl_qtype == Q_RED)
696		m = red_getq(cl->cl_red, &b->queue);
697#endif
698#ifdef ALTQ_CODEL
699	else if (cl->cl_qtype == Q_CODEL)
700		m = codel_getq(cl->cl_codel, &b->queue);
701#endif
702	else
703		m = _getq(&b->queue);
704
705	/*
706	 * Calculate the BW change
707	 */
708	if (m != NULL) {
709		uint64_t delta;
710
711		/*
712		 * Per-class bandwidth calculation
713		 */
714		delta = (cur_time - cl->cl_last_time);
715		if (delta > machclk_freq * 8)
716			delta = machclk_freq * 8;
717		cl->cl_bw_delta += delta;
718		cl->cl_bw_bytes += m->m_pkthdr.len;
719		cl->cl_last_time = cur_time;
720		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
721		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
722
723		/*
724		 * Per-bucket bandwidth calculation
725		 */
726		delta = (cur_time - b->last_time);
727		if (delta > machclk_freq * 8)
728			delta = machclk_freq * 8;
729		b->bw_delta += delta;
730		b->bw_bytes += m->m_pkthdr.len;
731		b->last_time = cur_time;
732		b->bw_delta -= b->bw_delta >> 3;
733		b->bw_bytes -= b->bw_bytes >> 3;
734	}
735	return(m);
736}
737
738/*
739 * Figure out what the next packet would be if there were no limits.  If
740 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
741 * it is set to 0.  A non-NULL mbuf is returned either way.
742 */
743static struct mbuf *
744fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
745{
746	fairq_bucket_t *b;
747	struct mbuf *m;
748	uint64_t delta;
749	uint64_t bw;
750
751	*hit_limit = 0;
752	b = fairq_selectq(cl, 1);
753	if (b == NULL)
754		return(NULL);
755	m = qhead(&b->queue);
756
757	/*
758	 * Did this packet exceed the class bandwidth?  Calculate the
759	 * bandwidth component of the packet.
760	 *
761	 * - Calculate bytes per second
762	 */
763	delta = cur_time - cl->cl_last_time;
764	if (delta > machclk_freq * 8)
765		delta = machclk_freq * 8;
766	cl->cl_bw_delta += delta;
767	cl->cl_last_time = cur_time;
768	if (cl->cl_bw_delta) {
769		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
770
771		if (bw > cl->cl_bandwidth)
772			*hit_limit = 1;
773#ifdef ALTQ_DEBUG
774		printf("BW %6ju relative to %6u %d queue %p\n",
775			(uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
776#endif
777	}
778	return(m);
779}
780
781/*
782 * Locate the next queue we want to pull a packet out of.  This code
783 * is also responsible for removing empty buckets from the circular list.
784 */
785static
786fairq_bucket_t *
787fairq_selectq(struct fairq_class *cl, int ispoll)
788{
789	fairq_bucket_t *b;
790	uint64_t bw;
791
792	if (ispoll == 0 && cl->cl_polled) {
793		b = cl->cl_polled;
794		cl->cl_polled = NULL;
795		return(b);
796	}
797
798	while ((b = cl->cl_head) != NULL) {
799		/*
800		 * Remove empty queues from consideration
801		 */
802		if (qempty(&b->queue)) {
803			b->in_use = 0;
804			cl->cl_head = b->next;
805			if (cl->cl_head == b) {
806				cl->cl_head = NULL;
807			} else {
808				b->next->prev = b->prev;
809				b->prev->next = b->next;
810			}
811			continue;
812		}
813
814		/*
815		 * Advance the round robin.  Queues with bandwidths less
816		 * then the hog bandwidth are allowed to burst.
817		 */
818		if (cl->cl_hogs_m1 == 0) {
819			cl->cl_head = b->next;
820		} else if (b->bw_delta) {
821			bw = b->bw_bytes * machclk_freq / b->bw_delta;
822			if (bw >= cl->cl_hogs_m1) {
823				cl->cl_head = b->next;
824			}
825			/*
826			 * XXX TODO -
827			 */
828		}
829
830		/*
831		 * Return bucket b.
832		 */
833		break;
834	}
835	if (ispoll)
836		cl->cl_polled = b;
837	return(b);
838}
839
840static void
841fairq_purgeq(struct fairq_class *cl)
842{
843	fairq_bucket_t *b;
844	struct mbuf *m;
845
846	while ((b = fairq_selectq(cl, 0)) != NULL) {
847		while ((m = _getq(&b->queue)) != NULL) {
848			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
849			m_freem(m);
850		}
851		ASSERT(qlen(&b->queue) == 0);
852	}
853}
854
855static void
856get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
857{
858	fairq_bucket_t *b;
859
860	sp->class_handle = cl->cl_handle;
861	sp->qlimit = cl->cl_qlimit;
862	sp->xmit_cnt = cl->cl_xmitcnt;
863	sp->drop_cnt = cl->cl_dropcnt;
864	sp->qtype = cl->cl_qtype;
865	sp->qlength = 0;
866
867	if (cl->cl_head) {
868		b = cl->cl_head;
869		do {
870			sp->qlength += qlen(&b->queue);
871			b = b->next;
872		} while (b != cl->cl_head);
873	}
874
875#ifdef ALTQ_RED
876	if (cl->cl_qtype == Q_RED)
877		red_getstats(cl->cl_red, &sp->red[0]);
878#endif
879#ifdef ALTQ_RIO
880	if (cl->cl_qtype == Q_RIO)
881		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
882#endif
883#ifdef ALTQ_CODEL
884	if (cl->cl_qtype == Q_CODEL)
885		codel_getstats(cl->cl_codel, &sp->codel);
886#endif
887}
888
889/* convert a class handle to the corresponding class pointer */
890static struct fairq_class *
891clh_to_clp(struct fairq_if *pif, uint32_t chandle)
892{
893	struct fairq_class *cl;
894	int idx;
895
896	if (chandle == 0)
897		return (NULL);
898
899	for (idx = pif->pif_maxpri; idx >= 0; idx--)
900		if ((cl = pif->pif_classes[idx]) != NULL &&
901		    cl->cl_handle == chandle)
902			return (cl);
903
904	return (NULL);
905}
906
907#endif /* ALTQ_FAIRQ */
908