1/* 2 * Copyright (c) 2011-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente 31 * All rights reserved 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55/* 56 * Quick Fair Queueing is described in 57 * "QFQ: Efficient Packet Scheduling with Tight Bandwidth Distribution 58 * Guarantees" by Fabio Checconi, Paolo Valente, and Luigi Rizzo. 59 * 60 * This code is ported from the dummynet(4) QFQ implementation. 61 * See also http://info.iet.unipi.it/~luigi/qfq/ 62 */ 63 64#include <sys/cdefs.h> 65#include <sys/param.h> 66#include <sys/malloc.h> 67#include <sys/mbuf.h> 68#include <sys/systm.h> 69#include <sys/errno.h> 70#include <sys/kernel.h> 71#include <sys/syslog.h> 72 73#include <kern/zalloc.h> 74 75#include <net/if.h> 76#include <net/net_osdep.h> 77 78#include <net/pktsched/pktsched_qfq.h> 79#include <netinet/in.h> 80 81/* 82 * function prototypes 83 */ 84static int qfq_enqueue_ifclassq(struct ifclassq *, struct mbuf *); 85static struct mbuf *qfq_dequeue_ifclassq(struct ifclassq *, cqdq_op_t); 86static int qfq_request_ifclassq(struct ifclassq *, cqrq_t, void *); 87static int qfq_clear_interface(struct qfq_if *); 88static struct qfq_class *qfq_class_create(struct qfq_if *, u_int32_t, 89 u_int32_t, u_int32_t, u_int32_t, u_int32_t); 90static int qfq_class_destroy(struct qfq_if *, struct qfq_class *); 91static int qfq_destroy_locked(struct qfq_if *); 92static inline int qfq_addq(struct qfq_class *, struct mbuf *, struct pf_mtag *); 93static inline struct mbuf *qfq_getq(struct qfq_class *); 94static inline struct mbuf *qfq_pollq(struct qfq_class *); 95static void qfq_purgeq(struct qfq_if *, struct qfq_class *, u_int32_t, 96 u_int32_t *, u_int32_t *); 97static void qfq_purge_sc(struct qfq_if *, cqrq_purge_sc_t *); 98static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t); 99static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *); 100static int qfq_resumeq(struct qfq_if *, struct qfq_class *); 101static int qfq_suspendq(struct qfq_if *, struct qfq_class *); 102static int qfq_stat_sc(struct qfq_if *, cqrq_stat_sc_t *); 103static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t); 104static const char *qfq_style(struct qfq_if *); 105 106static inline int qfq_gt(u_int64_t, u_int64_t); 107static inline u_int64_t qfq_round_down(u_int64_t, u_int32_t); 108static inline struct qfq_group *qfq_ffs(struct qfq_if *, pktsched_bitmap_t); 109static int qfq_calc_index(struct qfq_class *, u_int32_t, u_int32_t); 110static inline pktsched_bitmap_t mask_from(pktsched_bitmap_t, int); 111static inline u_int32_t qfq_calc_state(struct qfq_if *, struct qfq_group *); 112static inline void qfq_move_groups(struct qfq_if *, pktsched_bitmap_t, 113 int, int); 114static inline void qfq_unblock_groups(struct qfq_if *, int, u_int64_t); 115static inline void qfq_make_eligible(struct qfq_if *, u_int64_t); 116static inline void qfq_slot_insert(struct qfq_if *, struct qfq_group *, 117 struct qfq_class *, u_int64_t); 118static inline void qfq_front_slot_remove(struct qfq_group *); 119static inline struct qfq_class *qfq_slot_scan(struct qfq_if *, 120 struct qfq_group *); 121static inline void qfq_slot_rotate(struct qfq_if *, struct qfq_group *, 122 u_int64_t); 123static inline void qfq_update_eligible(struct qfq_if *, u_int64_t); 124static inline int qfq_update_class(struct qfq_if *, struct qfq_group *, 125 struct qfq_class *); 126static inline void qfq_update_start(struct qfq_if *, struct qfq_class *); 127static inline void qfq_slot_remove(struct qfq_if *, struct qfq_group *, 128 struct qfq_class *); 129static void qfq_deactivate_class(struct qfq_if *, struct qfq_class *); 130static const char *qfq_state2str(int); 131#if QFQ_DEBUG 132static void qfq_dump_groups(struct qfq_if *, u_int32_t); 133static void qfq_dump_sched(struct qfq_if *, const char *); 134#endif /* QFQ_DEBUG */ 135 136#define QFQ_ZONE_MAX 32 /* maximum elements in zone */ 137#define QFQ_ZONE_NAME "pktsched_qfq" /* zone name */ 138 139static unsigned int qfq_size; /* size of zone element */ 140static struct zone *qfq_zone; /* zone for qfq */ 141 142#define QFQ_CL_ZONE_MAX 32 /* maximum elements in zone */ 143#define QFQ_CL_ZONE_NAME "pktsched_qfq_cl" /* zone name */ 144 145static unsigned int qfq_cl_size; /* size of zone element */ 146static struct zone *qfq_cl_zone; /* zone for qfq_class */ 147 148/* 149 * Maximum number of consecutive slots occupied by backlogged classes 150 * inside a group. This is approx lmax/lmin + 5. Used when ALTQ is 151 * available. 152 * 153 * XXX check because it poses constraints on MAX_INDEX 154 */ 155#define QFQ_MAX_SLOTS 32 /* default when ALTQ is available */ 156 157void 158qfq_init(void) 159{ 160 qfq_size = sizeof (struct qfq_if); 161 qfq_zone = zinit(qfq_size, QFQ_ZONE_MAX * qfq_size, 162 0, QFQ_ZONE_NAME); 163 if (qfq_zone == NULL) { 164 panic("%s: failed allocating %s", __func__, QFQ_ZONE_NAME); 165 /* NOTREACHED */ 166 } 167 zone_change(qfq_zone, Z_EXPAND, TRUE); 168 zone_change(qfq_zone, Z_CALLERACCT, TRUE); 169 170 qfq_cl_size = sizeof (struct qfq_class); 171 qfq_cl_zone = zinit(qfq_cl_size, QFQ_CL_ZONE_MAX * qfq_cl_size, 172 0, QFQ_CL_ZONE_NAME); 173 if (qfq_cl_zone == NULL) { 174 panic("%s: failed allocating %s", __func__, QFQ_CL_ZONE_NAME); 175 /* NOTREACHED */ 176 } 177 zone_change(qfq_cl_zone, Z_EXPAND, TRUE); 178 zone_change(qfq_cl_zone, Z_CALLERACCT, TRUE); 179} 180 181struct qfq_if * 182qfq_alloc(struct ifnet *ifp, int how, boolean_t altq) 183{ 184 struct qfq_if *qif; 185 186 qif = (how == M_WAITOK) ? zalloc(qfq_zone) : zalloc_noblock(qfq_zone); 187 if (qif == NULL) 188 return (NULL); 189 190 bzero(qif, qfq_size); 191 qif->qif_ifq = &ifp->if_snd; 192 if (altq) { 193 qif->qif_maxclasses = QFQ_MAX_CLASSES; 194 qif->qif_maxslots = QFQ_MAX_SLOTS; 195 qif->qif_flags |= QFQIFF_ALTQ; 196 } else { 197 qif->qif_maxclasses = IFCQ_SC_MAX; 198 /* 199 * TODO: adi@apple.com 200 * 201 * Ideally I would like to have the following 202 * but QFQ needs further modifications. 203 * 204 * qif->qif_maxslots = IFCQ_SC_MAX; 205 */ 206 qif->qif_maxslots = QFQ_MAX_SLOTS; 207 } 208 209 if ((qif->qif_class_tbl = _MALLOC(sizeof (struct qfq_class *) * 210 qif->qif_maxclasses, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 211 log(LOG_ERR, "%s: %s unable to allocate class table array\n", 212 if_name(ifp), qfq_style(qif)); 213 goto error; 214 } 215 216 if ((qif->qif_groups = _MALLOC(sizeof (struct qfq_group *) * 217 (QFQ_MAX_INDEX + 1), M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 218 log(LOG_ERR, "%s: %s unable to allocate group array\n", 219 if_name(ifp), qfq_style(qif)); 220 goto error; 221 } 222 223 if (pktsched_verbose) { 224 log(LOG_DEBUG, "%s: %s scheduler allocated\n", 225 if_name(ifp), qfq_style(qif)); 226 } 227 228 return (qif); 229 230error: 231 if (qif->qif_class_tbl != NULL) { 232 _FREE(qif->qif_class_tbl, M_DEVBUF); 233 qif->qif_class_tbl = NULL; 234 } 235 if (qif->qif_groups != NULL) { 236 _FREE(qif->qif_groups, M_DEVBUF); 237 qif->qif_groups = NULL; 238 } 239 zfree(qfq_zone, qif); 240 241 return (NULL); 242} 243 244int 245qfq_destroy(struct qfq_if *qif) 246{ 247 struct ifclassq *ifq = qif->qif_ifq; 248 int err; 249 250 IFCQ_LOCK(ifq); 251 err = qfq_destroy_locked(qif); 252 IFCQ_UNLOCK(ifq); 253 254 return (err); 255} 256 257static int 258qfq_destroy_locked(struct qfq_if *qif) 259{ 260 int i; 261 262 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 263 264 (void) qfq_clear_interface(qif); 265 266 VERIFY(qif->qif_class_tbl != NULL); 267 _FREE(qif->qif_class_tbl, M_DEVBUF); 268 qif->qif_class_tbl = NULL; 269 270 VERIFY(qif->qif_groups != NULL); 271 for (i = 0; i <= QFQ_MAX_INDEX; i++) { 272 struct qfq_group *grp = qif->qif_groups[i]; 273 274 if (grp != NULL) { 275 VERIFY(grp->qfg_slots != NULL); 276 _FREE(grp->qfg_slots, M_DEVBUF); 277 grp->qfg_slots = NULL; 278 _FREE(grp, M_DEVBUF); 279 qif->qif_groups[i] = NULL; 280 } 281 } 282 _FREE(qif->qif_groups, M_DEVBUF); 283 qif->qif_groups = NULL; 284 285 if (pktsched_verbose) { 286 log(LOG_DEBUG, "%s: %s scheduler destroyed\n", 287 if_name(QFQIF_IFP(qif)), qfq_style(qif)); 288 } 289 290 zfree(qfq_zone, qif); 291 292 return (0); 293} 294 295/* 296 * bring the interface back to the initial state by discarding 297 * all the filters and classes. 298 */ 299static int 300qfq_clear_interface(struct qfq_if *qif) 301{ 302 struct qfq_class *cl; 303 int i; 304 305 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 306 307 /* clear out the classes */ 308 for (i = 0; i < qif->qif_maxclasses; i++) 309 if ((cl = qif->qif_class_tbl[i]) != NULL) 310 qfq_class_destroy(qif, cl); 311 312 return (0); 313} 314 315/* discard all the queued packets on the interface */ 316void 317qfq_purge(struct qfq_if *qif) 318{ 319 struct qfq_class *cl; 320 int i; 321 322 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 323 324 for (i = 0; i < qif->qif_maxclasses; i++) { 325 if ((cl = qif->qif_class_tbl[i]) != NULL) 326 qfq_purgeq(qif, cl, 0, NULL, NULL); 327 } 328#if !PF_ALTQ 329 /* 330 * This assertion is safe to be made only when PF_ALTQ is not 331 * configured; otherwise, IFCQ_LEN represents the sum of the 332 * packets managed by ifcq_disc and altq_disc instances, which 333 * is possible when transitioning between the two. 334 */ 335 VERIFY(IFCQ_LEN(qif->qif_ifq) == 0); 336#endif /* !PF_ALTQ */ 337} 338 339static void 340qfq_purge_sc(struct qfq_if *qif, cqrq_purge_sc_t *pr) 341{ 342 struct ifclassq *ifq = qif->qif_ifq; 343 u_int32_t i; 344 345 IFCQ_LOCK_ASSERT_HELD(ifq); 346 347 VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc)); 348 VERIFY(pr->flow != 0); 349 350 if (pr->sc != MBUF_SC_UNSPEC) { 351 i = MBUF_SCIDX(pr->sc); 352 VERIFY(i < IFCQ_SC_MAX); 353 354 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl, 355 pr->flow, &pr->packets, &pr->bytes); 356 } else { 357 u_int32_t cnt, len; 358 359 pr->packets = 0; 360 pr->bytes = 0; 361 362 for (i = 0; i < IFCQ_SC_MAX; i++) { 363 qfq_purgeq(qif, ifq->ifcq_disc_slots[i].cl, 364 pr->flow, &cnt, &len); 365 pr->packets += cnt; 366 pr->bytes += len; 367 } 368 } 369} 370 371void 372qfq_event(struct qfq_if *qif, cqev_t ev) 373{ 374 struct qfq_class *cl; 375 int i; 376 377 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 378 379 for (i = 0; i < qif->qif_maxclasses; i++) 380 if ((cl = qif->qif_class_tbl[i]) != NULL) 381 qfq_updateq(qif, cl, ev); 382} 383 384int 385qfq_add_queue(struct qfq_if *qif, u_int32_t qlimit, u_int32_t weight, 386 u_int32_t maxsz, u_int32_t flags, u_int32_t qid, struct qfq_class **clp) 387{ 388 struct qfq_class *cl; 389 u_int32_t w; 390 391 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 392 393 if (qfq_clh_to_clp(qif, qid) != NULL) 394 return (EBUSY); 395 396 /* check parameters */ 397 if (weight == 0 || weight > QFQ_MAX_WEIGHT) 398 return (EINVAL); 399 400 w = (QFQ_ONE_FP / (QFQ_ONE_FP / weight)); 401 if (qif->qif_wsum + w > QFQ_MAX_WSUM) 402 return (EINVAL); 403 404 if (maxsz == 0 || maxsz > (1 << QFQ_MTU_SHIFT)) 405 return (EINVAL); 406 407 cl = qfq_class_create(qif, weight, qlimit, flags, maxsz, qid); 408 if (cl == NULL) 409 return (ENOMEM); 410 411 if (clp != NULL) 412 *clp = cl; 413 414 return (0); 415} 416 417static struct qfq_class * 418qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit, 419 u_int32_t flags, u_int32_t maxsz, u_int32_t qid) 420{ 421 struct ifnet *ifp; 422 struct ifclassq *ifq; 423 struct qfq_group *grp; 424 struct qfq_class *cl; 425 u_int32_t w; /* approximated weight */ 426 int i; 427 428 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 429 430 /* Sanitize flags unless internally configured */ 431 if (qif->qif_flags & QFQIFF_ALTQ) 432 flags &= QFCF_USERFLAGS; 433 434 if (qif->qif_classes >= qif->qif_maxclasses) { 435 log(LOG_ERR, "%s: %s out of classes! (max %d)\n", 436 if_name(QFQIF_IFP(qif)), qfq_style(qif), 437 qif->qif_maxclasses); 438 return (NULL); 439 } 440 441#if !CLASSQ_RED 442 if (flags & QFCF_RED) { 443 log(LOG_ERR, "%s: %s RED not available!\n", 444 if_name(QFQIF_IFP(qif)), qfq_style(qif)); 445 return (NULL); 446 } 447#endif /* !CLASSQ_RED */ 448 449#if !CLASSQ_RIO 450 if (flags & QFCF_RIO) { 451 log(LOG_ERR, "%s: %s RIO not available!\n", 452 if_name(QFQIF_IFP(qif)), qfq_style(qif)); 453 return (NULL); 454 } 455#endif /* CLASSQ_RIO */ 456 457#if !CLASSQ_BLUE 458 if (flags & QFCF_BLUE) { 459 log(LOG_ERR, "%s: %s BLUE not available!\n", 460 if_name(QFQIF_IFP(qif)), qfq_style(qif)); 461 return (NULL); 462 } 463#endif /* CLASSQ_BLUE */ 464 465 /* These are mutually exclusive */ 466 if ((flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) && 467 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RED && 468 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_RIO && 469 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_BLUE && 470 (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) != QFCF_SFB) { 471 log(LOG_ERR, "%s: %s more than one RED|RIO|BLUE|SFB\n", 472 if_name(QFQIF_IFP(qif)), qfq_style(qif)); 473 return (NULL); 474 } 475 476 ifq = qif->qif_ifq; 477 ifp = QFQIF_IFP(qif); 478 479 cl = zalloc(qfq_cl_zone); 480 if (cl == NULL) 481 return (NULL); 482 483 bzero(cl, qfq_cl_size); 484 485 if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) { 486 qlimit = IFCQ_MAXLEN(ifq); 487 if (qlimit == 0) 488 qlimit = DEFAULT_QLIMIT; /* use default */ 489 } 490 _qinit(&cl->cl_q, Q_DROPTAIL, qlimit); 491 cl->cl_qif = qif; 492 cl->cl_flags = flags; 493 cl->cl_handle = qid; 494 495 /* 496 * Find a free slot in the class table. If the slot matching 497 * the lower bits of qid is free, use this slot. Otherwise, 498 * use the first free slot. 499 */ 500 i = qid % qif->qif_maxclasses; 501 if (qif->qif_class_tbl[i] == NULL) { 502 qif->qif_class_tbl[i] = cl; 503 } else { 504 for (i = 0; i < qif->qif_maxclasses; i++) { 505 if (qif->qif_class_tbl[i] == NULL) { 506 qif->qif_class_tbl[i] = cl; 507 break; 508 } 509 } 510 if (i == qif->qif_maxclasses) { 511 zfree(qfq_cl_zone, cl); 512 return (NULL); 513 } 514 } 515 516 w = weight; 517 VERIFY(w > 0 && w <= QFQ_MAX_WEIGHT); 518 cl->cl_lmax = maxsz; 519 cl->cl_inv_w = (QFQ_ONE_FP / w); 520 w = (QFQ_ONE_FP / cl->cl_inv_w); 521 VERIFY(qif->qif_wsum + w <= QFQ_MAX_WSUM); 522 523 i = qfq_calc_index(cl, cl->cl_inv_w, cl->cl_lmax); 524 VERIFY(i <= QFQ_MAX_INDEX); 525 grp = qif->qif_groups[i]; 526 if (grp == NULL) { 527 grp = _MALLOC(sizeof (*grp), M_DEVBUF, M_WAITOK|M_ZERO); 528 if (grp != NULL) { 529 grp->qfg_index = i; 530 grp->qfg_slot_shift = 531 QFQ_MTU_SHIFT + QFQ_FRAC_BITS - (QFQ_MAX_INDEX - i); 532 grp->qfg_slots = _MALLOC(sizeof (struct qfq_class *) * 533 qif->qif_maxslots, M_DEVBUF, M_WAITOK|M_ZERO); 534 if (grp->qfg_slots == NULL) { 535 log(LOG_ERR, "%s: %s unable to allocate group " 536 "slots for index %d\n", if_name(ifp), 537 qfq_style(qif), i); 538 } 539 } else { 540 log(LOG_ERR, "%s: %s unable to allocate group for " 541 "qid=%d\n", if_name(ifp), qfq_style(qif), 542 cl->cl_handle); 543 } 544 if (grp == NULL || grp->qfg_slots == NULL) { 545 qif->qif_class_tbl[qid % qif->qif_maxclasses] = NULL; 546 if (grp != NULL) 547 _FREE(grp, M_DEVBUF); 548 zfree(qfq_cl_zone, cl); 549 return (NULL); 550 } else { 551 qif->qif_groups[i] = grp; 552 } 553 } 554 cl->cl_grp = grp; 555 qif->qif_wsum += w; 556 /* XXX cl->cl_S = qif->qif_V; ? */ 557 /* XXX compute qif->qif_i_wsum */ 558 559 qif->qif_classes++; 560 561 if (flags & QFCF_DEFAULTCLASS) 562 qif->qif_default = cl; 563 564 if (flags & (QFCF_RED|QFCF_RIO|QFCF_BLUE|QFCF_SFB)) { 565#if CLASSQ_RED || CLASSQ_RIO 566 u_int64_t ifbandwidth = ifnet_output_linkrate(ifp); 567 int pkttime; 568#endif /* CLASSQ_RED || CLASSQ_RIO */ 569 570 cl->cl_qflags = 0; 571 if (flags & QFCF_ECN) { 572 if (flags & QFCF_BLUE) 573 cl->cl_qflags |= BLUEF_ECN; 574 else if (flags & QFCF_SFB) 575 cl->cl_qflags |= SFBF_ECN; 576 else if (flags & QFCF_RED) 577 cl->cl_qflags |= REDF_ECN; 578 else if (flags & QFCF_RIO) 579 cl->cl_qflags |= RIOF_ECN; 580 } 581 if (flags & QFCF_FLOWCTL) { 582 if (flags & QFCF_SFB) 583 cl->cl_qflags |= SFBF_FLOWCTL; 584 } 585 if (flags & QFCF_DELAYBASED) { 586 if (flags & QFCF_SFB) 587 cl->cl_qflags |= SFBF_DELAYBASED; 588 } 589 if (flags & QFCF_CLEARDSCP) { 590 if (flags & QFCF_RIO) 591 cl->cl_qflags |= RIOF_CLEARDSCP; 592 } 593#if CLASSQ_RED || CLASSQ_RIO 594 /* 595 * XXX: RED & RIO should be watching link speed and MTU 596 * events and recompute pkttime accordingly. 597 */ 598 if (ifbandwidth < 8) 599 pkttime = 1000 * 1000 * 1000; /* 1 sec */ 600 else 601 pkttime = (int64_t)ifp->if_mtu * 1000 * 1000 * 1000 / 602 (ifbandwidth / 8); 603 604 /* Test for exclusivity {RED,RIO,BLUE,SFB} was done above */ 605#if CLASSQ_RED 606 if (flags & QFCF_RED) { 607 cl->cl_red = red_alloc(ifp, 0, 0, 608 qlimit(&cl->cl_q) * 10/100, 609 qlimit(&cl->cl_q) * 30/100, 610 cl->cl_qflags, pkttime); 611 if (cl->cl_red != NULL) 612 qtype(&cl->cl_q) = Q_RED; 613 } 614#endif /* CLASSQ_RED */ 615#if CLASSQ_RIO 616 if (flags & QFCF_RIO) { 617 cl->cl_rio = 618 rio_alloc(ifp, 0, NULL, cl->cl_qflags, pkttime); 619 if (cl->cl_rio != NULL) 620 qtype(&cl->cl_q) = Q_RIO; 621 } 622#endif /* CLASSQ_RIO */ 623#endif /* CLASSQ_RED || CLASSQ_RIO */ 624#if CLASSQ_BLUE 625 if (flags & QFCF_BLUE) { 626 cl->cl_blue = blue_alloc(ifp, 0, 0, cl->cl_qflags); 627 if (cl->cl_blue != NULL) 628 qtype(&cl->cl_q) = Q_BLUE; 629 } 630#endif /* CLASSQ_BLUE */ 631 if (flags & QFCF_SFB) { 632 if (!(cl->cl_flags & QFCF_LAZY)) 633 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle, 634 qlimit(&cl->cl_q), cl->cl_qflags); 635 if (cl->cl_sfb != NULL || (cl->cl_flags & QFCF_LAZY)) 636 qtype(&cl->cl_q) = Q_SFB; 637 } 638 } 639 640 if (pktsched_verbose) { 641 log(LOG_DEBUG, "%s: %s created qid=%d grp=%d weight=%d " 642 "qlimit=%d flags=%b\n", if_name(ifp), qfq_style(qif), 643 cl->cl_handle, cl->cl_grp->qfg_index, weight, qlimit, 644 flags, QFCF_BITS); 645 } 646 647 return (cl); 648} 649 650int 651qfq_remove_queue(struct qfq_if *qif, u_int32_t qid) 652{ 653 struct qfq_class *cl; 654 655 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 656 657 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL) 658 return (EINVAL); 659 660 return (qfq_class_destroy(qif, cl)); 661} 662 663static int 664qfq_class_destroy(struct qfq_if *qif, struct qfq_class *cl) 665{ 666 struct ifclassq *ifq = qif->qif_ifq; 667 int i; 668 669 IFCQ_LOCK_ASSERT_HELD(ifq); 670 671 qfq_purgeq(qif, cl, 0, NULL, NULL); 672 673 if (cl->cl_inv_w != 0) { 674 qif->qif_wsum -= (QFQ_ONE_FP / cl->cl_inv_w); 675 cl->cl_inv_w = 0; /* reset weight to avoid run twice */ 676 } 677 678 for (i = 0; i < qif->qif_maxclasses; i++) { 679 if (qif->qif_class_tbl[i] == cl) { 680 qif->qif_class_tbl[i] = NULL; 681 break; 682 } 683 } 684 qif->qif_classes--; 685 686 if (cl->cl_qalg.ptr != NULL) { 687#if CLASSQ_RIO 688 if (q_is_rio(&cl->cl_q)) 689 rio_destroy(cl->cl_rio); 690#endif /* CLASSQ_RIO */ 691#if CLASSQ_RED 692 if (q_is_red(&cl->cl_q)) 693 red_destroy(cl->cl_red); 694#endif /* CLASSQ_RED */ 695#if CLASSQ_BLUE 696 if (q_is_blue(&cl->cl_q)) 697 blue_destroy(cl->cl_blue); 698#endif /* CLASSQ_BLUE */ 699 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 700 sfb_destroy(cl->cl_sfb); 701 cl->cl_qalg.ptr = NULL; 702 qtype(&cl->cl_q) = Q_DROPTAIL; 703 qstate(&cl->cl_q) = QS_RUNNING; 704 } 705 706 if (qif->qif_default == cl) 707 qif->qif_default = NULL; 708 709 if (pktsched_verbose) { 710 log(LOG_DEBUG, "%s: %s destroyed qid=%d\n", 711 if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle); 712 } 713 714 zfree(qfq_cl_zone, cl); 715 716 return (0); 717} 718 719/* 720 * Calculate a mask to mimic what would be ffs_from() 721 */ 722static inline pktsched_bitmap_t 723mask_from(pktsched_bitmap_t bitmap, int from) 724{ 725 return (bitmap & ~((1UL << from) - 1)); 726} 727 728/* 729 * The state computation relies on ER=0, IR=1, EB=2, IB=3 730 * First compute eligibility comparing grp->qfg_S, qif->qif_V, 731 * then check if someone is blocking us and possibly add EB 732 */ 733static inline u_int32_t 734qfq_calc_state(struct qfq_if *qif, struct qfq_group *grp) 735{ 736 /* if S > V we are not eligible */ 737 u_int32_t state = qfq_gt(grp->qfg_S, qif->qif_V); 738 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], 739 grp->qfg_index); 740 struct qfq_group *next; 741 742 if (mask) { 743 next = qfq_ffs(qif, mask); 744 if (qfq_gt(grp->qfg_F, next->qfg_F)) 745 state |= EB; 746 } 747 748 return (state); 749} 750 751/* 752 * In principle 753 * qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask; 754 * qif->qif_bitmaps[src] &= ~mask; 755 * but we should make sure that src != dst 756 */ 757static inline void 758qfq_move_groups(struct qfq_if *qif, pktsched_bitmap_t mask, int src, int dst) 759{ 760 qif->qif_bitmaps[dst] |= qif->qif_bitmaps[src] & mask; 761 qif->qif_bitmaps[src] &= ~mask; 762} 763 764static inline void 765qfq_unblock_groups(struct qfq_if *qif, int index, u_int64_t old_finish) 766{ 767 pktsched_bitmap_t mask = mask_from(qif->qif_bitmaps[ER], index + 1); 768 struct qfq_group *next; 769 770 if (mask) { 771 next = qfq_ffs(qif, mask); 772 if (!qfq_gt(next->qfg_F, old_finish)) 773 return; 774 } 775 776 mask = (1UL << index) - 1; 777 qfq_move_groups(qif, mask, EB, ER); 778 qfq_move_groups(qif, mask, IB, IR); 779} 780 781/* 782 * perhaps 783 * 784 * old_V ^= qif->qif_V; 785 * old_V >>= QFQ_MIN_SLOT_SHIFT; 786 * if (old_V) { 787 * ... 788 * } 789 */ 790static inline void 791qfq_make_eligible(struct qfq_if *qif, u_int64_t old_V) 792{ 793 pktsched_bitmap_t mask, vslot, old_vslot; 794 795 vslot = qif->qif_V >> QFQ_MIN_SLOT_SHIFT; 796 old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT; 797 798 if (vslot != old_vslot) { 799 mask = (2UL << (__fls(vslot ^ old_vslot))) - 1; 800 qfq_move_groups(qif, mask, IR, ER); 801 qfq_move_groups(qif, mask, IB, EB); 802 } 803} 804 805/* 806 * XXX we should make sure that slot becomes less than 32. 807 * This is guaranteed by the input values. 808 * roundedS is always cl->qfg_S rounded on grp->qfg_slot_shift bits. 809 */ 810static inline void 811qfq_slot_insert(struct qfq_if *qif, struct qfq_group *grp, 812 struct qfq_class *cl, u_int64_t roundedS) 813{ 814 u_int64_t slot = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift; 815 u_int32_t i = (grp->qfg_front + slot) % qif->qif_maxslots; 816 817 cl->cl_next = grp->qfg_slots[i]; 818 grp->qfg_slots[i] = cl; 819 pktsched_bit_set(slot, &grp->qfg_full_slots); 820} 821 822/* 823 * remove the entry from the slot 824 */ 825static inline void 826qfq_front_slot_remove(struct qfq_group *grp) 827{ 828 struct qfq_class **h = &grp->qfg_slots[grp->qfg_front]; 829 830 *h = (*h)->cl_next; 831 if (!*h) 832 pktsched_bit_clr(0, &grp->qfg_full_slots); 833} 834 835/* 836 * Returns the first full queue in a group. As a side effect, 837 * adjust the bucket list so the first non-empty bucket is at 838 * position 0 in qfg_full_slots. 839 */ 840static inline struct qfq_class * 841qfq_slot_scan(struct qfq_if *qif, struct qfq_group *grp) 842{ 843 int i; 844 845 if (pktsched_verbose > 2) { 846 log(LOG_DEBUG, "%s: %s grp=%d full_slots=0x%x\n", 847 if_name(QFQIF_IFP(qif)), qfq_style(qif), grp->qfg_index, 848 grp->qfg_full_slots); 849 } 850 851 if (grp->qfg_full_slots == 0) 852 return (NULL); 853 854 i = pktsched_ffs(grp->qfg_full_slots) - 1; /* zero-based */ 855 if (i > 0) { 856 grp->qfg_front = (grp->qfg_front + i) % qif->qif_maxslots; 857 grp->qfg_full_slots >>= i; 858 } 859 860 return (grp->qfg_slots[grp->qfg_front]); 861} 862 863/* 864 * adjust the bucket list. When the start time of a group decreases, 865 * we move the index down (modulo qif->qif_maxslots) so we don't need to 866 * move the objects. The mask of occupied slots must be shifted 867 * because we use ffs() to find the first non-empty slot. 868 * This covers decreases in the group's start time, but what about 869 * increases of the start time ? 870 * Here too we should make sure that i is less than 32 871 */ 872static inline void 873qfq_slot_rotate(struct qfq_if *qif, struct qfq_group *grp, u_int64_t roundedS) 874{ 875#pragma unused(qif) 876 u_int32_t i = (grp->qfg_S - roundedS) >> grp->qfg_slot_shift; 877 878 grp->qfg_full_slots <<= i; 879 grp->qfg_front = (grp->qfg_front - i) % qif->qif_maxslots; 880} 881 882static inline void 883qfq_update_eligible(struct qfq_if *qif, u_int64_t old_V) 884{ 885 pktsched_bitmap_t ineligible; 886 887 ineligible = qif->qif_bitmaps[IR] | qif->qif_bitmaps[IB]; 888 if (ineligible) { 889 if (!qif->qif_bitmaps[ER]) { 890 struct qfq_group *grp; 891 grp = qfq_ffs(qif, ineligible); 892 if (qfq_gt(grp->qfg_S, qif->qif_V)) 893 qif->qif_V = grp->qfg_S; 894 } 895 qfq_make_eligible(qif, old_V); 896 } 897} 898 899/* 900 * Updates the class, returns true if also the group needs to be updated. 901 */ 902static inline int 903qfq_update_class(struct qfq_if *qif, struct qfq_group *grp, 904 struct qfq_class *cl) 905{ 906#pragma unused(qif) 907 cl->cl_S = cl->cl_F; 908 if (qempty(&cl->cl_q)) { 909 qfq_front_slot_remove(grp); 910 } else { 911 u_int32_t len; 912 u_int64_t roundedS; 913 914 len = m_pktlen(qhead(&cl->cl_q)); 915 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w; 916 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift); 917 if (roundedS == grp->qfg_S) 918 return (0); 919 920 qfq_front_slot_remove(grp); 921 qfq_slot_insert(qif, grp, cl, roundedS); 922 } 923 return (1); 924} 925 926/* 927 * note: CLASSQDQ_POLL returns the next packet without removing the packet 928 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation. 929 * CLASSQDQ_REMOVE must return the same packet if called immediately 930 * after CLASSQDQ_POLL. 931 */ 932struct mbuf * 933qfq_dequeue(struct qfq_if *qif, cqdq_op_t op) 934{ 935 pktsched_bitmap_t er_bits = qif->qif_bitmaps[ER]; 936 struct ifclassq *ifq = qif->qif_ifq; 937 struct qfq_group *grp; 938 struct qfq_class *cl; 939 struct mbuf *m; 940 u_int64_t old_V; 941 u_int32_t len; 942 943 IFCQ_LOCK_ASSERT_HELD(ifq); 944 945 for (;;) { 946 if (er_bits == 0) { 947#if QFQ_DEBUG 948 if (qif->qif_queued && pktsched_verbose > 1) 949 qfq_dump_sched(qif, "start dequeue"); 950#endif /* QFQ_DEBUG */ 951 /* no eligible and ready packet */ 952 return (NULL); 953 } 954 grp = qfq_ffs(qif, er_bits); 955 /* if group is non-empty, use it */ 956 if (grp->qfg_full_slots != 0) 957 break; 958 pktsched_bit_clr(grp->qfg_index, &er_bits); 959#if QFQ_DEBUG 960 qif->qif_emptygrp++; 961#endif /* QFQ_DEBUG */ 962 } 963 VERIFY(!IFCQ_IS_EMPTY(ifq)); 964 965 cl = grp->qfg_slots[grp->qfg_front]; 966 VERIFY(cl != NULL && !qempty(&cl->cl_q)); 967 968 if (op == CLASSQDQ_POLL) 969 return (qfq_pollq(cl)); 970 971 m = qfq_getq(cl); 972 VERIFY(m != NULL); /* qalg must be work conserving */ 973 len = m_pktlen(m); 974 975#if QFQ_DEBUG 976 qif->qif_queued--; 977#endif /* QFQ_DEBUG */ 978 979 IFCQ_DEC_LEN(ifq); 980 if (qempty(&cl->cl_q)) 981 cl->cl_period++; 982 PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len); 983 IFCQ_XMIT_ADD(ifq, 1, len); 984 985 old_V = qif->qif_V; 986 qif->qif_V += (u_int64_t)len * QFQ_IWSUM; 987 988 if (pktsched_verbose > 2) { 989 log(LOG_DEBUG, "%s: %s qid=%d dequeue m=0x%llx F=0x%llx " 990 "V=0x%llx", if_name(QFQIF_IFP(qif)), qfq_style(qif), 991 cl->cl_handle, (uint64_t)VM_KERNEL_ADDRPERM(m), cl->cl_F, 992 qif->qif_V); 993 } 994 995 if (qfq_update_class(qif, grp, cl)) { 996 u_int64_t old_F = grp->qfg_F; 997 998 cl = qfq_slot_scan(qif, grp); 999 if (!cl) { /* group gone, remove from ER */ 1000 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]); 1001 } else { 1002 u_int32_t s; 1003 u_int64_t roundedS = 1004 qfq_round_down(cl->cl_S, grp->qfg_slot_shift); 1005 1006 if (grp->qfg_S == roundedS) 1007 goto skip_unblock; 1008 1009 grp->qfg_S = roundedS; 1010 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift); 1011 1012 /* remove from ER and put in the new set */ 1013 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]); 1014 s = qfq_calc_state(qif, grp); 1015 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]); 1016 } 1017 /* we need to unblock even if the group has gone away */ 1018 qfq_unblock_groups(qif, grp->qfg_index, old_F); 1019 } 1020 1021skip_unblock: 1022 qfq_update_eligible(qif, old_V); 1023 1024#if QFQ_DEBUG 1025 if (!qif->qif_bitmaps[ER] && qif->qif_queued && pktsched_verbose > 1) 1026 qfq_dump_sched(qif, "end dequeue"); 1027#endif /* QFQ_DEBUG */ 1028 1029 return (m); 1030} 1031 1032/* 1033 * Assign a reasonable start time for a new flow k in group i. 1034 * Admissible values for hat(F) are multiples of sigma_i 1035 * no greater than V+sigma_i . Larger values mean that 1036 * we had a wraparound so we consider the timestamp to be stale. 1037 * 1038 * If F is not stale and F >= V then we set S = F. 1039 * Otherwise we should assign S = V, but this may violate 1040 * the ordering in ER. So, if we have groups in ER, set S to 1041 * the F_j of the first group j which would be blocking us. 1042 * We are guaranteed not to move S backward because 1043 * otherwise our group i would still be blocked. 1044 */ 1045static inline void 1046qfq_update_start(struct qfq_if *qif, struct qfq_class *cl) 1047{ 1048 pktsched_bitmap_t mask; 1049 u_int64_t limit, roundedF; 1050 int slot_shift = cl->cl_grp->qfg_slot_shift; 1051 1052 roundedF = qfq_round_down(cl->cl_F, slot_shift); 1053 limit = qfq_round_down(qif->qif_V, slot_shift) + (1UL << slot_shift); 1054 1055 if (!qfq_gt(cl->cl_F, qif->qif_V) || qfq_gt(roundedF, limit)) { 1056 /* timestamp was stale */ 1057 mask = mask_from(qif->qif_bitmaps[ER], cl->cl_grp->qfg_index); 1058 if (mask) { 1059 struct qfq_group *next = qfq_ffs(qif, mask); 1060 if (qfq_gt(roundedF, next->qfg_F)) { 1061 cl->cl_S = next->qfg_F; 1062 return; 1063 } 1064 } 1065 cl->cl_S = qif->qif_V; 1066 } else { /* timestamp is not stale */ 1067 cl->cl_S = cl->cl_F; 1068 } 1069} 1070 1071int 1072qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m, 1073 struct pf_mtag *t) 1074{ 1075 struct ifclassq *ifq = qif->qif_ifq; 1076 struct qfq_group *grp; 1077 u_int64_t roundedS; 1078 int len, ret, s; 1079 1080 IFCQ_LOCK_ASSERT_HELD(ifq); 1081 VERIFY(cl == NULL || cl->cl_qif == qif); 1082 1083 if (cl == NULL) { 1084#if PF_ALTQ 1085 cl = qfq_clh_to_clp(qif, t->pftag_qid); 1086#else /* !PF_ALTQ */ 1087 cl = qfq_clh_to_clp(qif, 0); 1088#endif /* !PF_ALTQ */ 1089 if (cl == NULL) { 1090 cl = qif->qif_default; 1091 if (cl == NULL) { 1092 IFCQ_CONVERT_LOCK(ifq); 1093 m_freem(m); 1094 return (ENOBUFS); 1095 } 1096 } 1097 } 1098 1099 len = m_pktlen(m); 1100 1101 ret = qfq_addq(cl, m, t); 1102 if (ret != 0) { 1103 if (ret == CLASSQEQ_SUCCESS_FC) { 1104 /* packet enqueued, return advisory feedback */ 1105 ret = EQFULL; 1106 } else { 1107 VERIFY(ret == CLASSQEQ_DROPPED || 1108 ret == CLASSQEQ_DROPPED_FC || 1109 ret == CLASSQEQ_DROPPED_SP); 1110 /* packet has been freed in qfq_addq */ 1111 PKTCNTR_ADD(&cl->cl_dropcnt, 1, len); 1112 IFCQ_DROP_ADD(ifq, 1, len); 1113 switch (ret) { 1114 case CLASSQEQ_DROPPED: 1115 return (ENOBUFS); 1116 case CLASSQEQ_DROPPED_FC: 1117 return (EQFULL); 1118 case CLASSQEQ_DROPPED_SP: 1119 return (EQSUSPENDED); 1120 } 1121 /* NOT REACHED */ 1122 } 1123 } 1124 IFCQ_INC_LEN(ifq); 1125 1126#if QFQ_DEBUG 1127 qif->qif_queued++; 1128#endif /* QFQ_DEBUG */ 1129 1130 /* queue was not idle, we're done */ 1131 if (qlen(&cl->cl_q) > 1) 1132 goto done; 1133 1134 /* queue was idle */ 1135 grp = cl->cl_grp; 1136 qfq_update_start(qif, cl); /* adjust start time */ 1137 1138 /* compute new finish time and rounded start */ 1139 cl->cl_F = cl->cl_S + (u_int64_t)len * cl->cl_inv_w; 1140 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift); 1141 1142 /* 1143 * Insert cl in the correct bucket. 1144 * 1145 * If cl->cl_S >= grp->qfg_S we don't need to adjust the bucket list 1146 * and simply go to the insertion phase. Otherwise grp->qfg_S is 1147 * decreasing, we must make room in the bucket list, and also 1148 * recompute the group state. Finally, if there were no flows 1149 * in this group and nobody was in ER make sure to adjust V. 1150 */ 1151 if (grp->qfg_full_slots != 0) { 1152 if (!qfq_gt(grp->qfg_S, cl->cl_S)) 1153 goto skip_update; 1154 1155 /* create a slot for this cl->cl_S */ 1156 qfq_slot_rotate(qif, grp, roundedS); 1157 1158 /* group was surely ineligible, remove */ 1159 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]); 1160 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]); 1161 } else if (!qif->qif_bitmaps[ER] && qfq_gt(roundedS, qif->qif_V)) { 1162 qif->qif_V = roundedS; 1163 } 1164 1165 grp->qfg_S = roundedS; 1166 grp->qfg_F = 1167 roundedS + (2ULL << grp->qfg_slot_shift); /* i.e. 2 sigma_i */ 1168 s = qfq_calc_state(qif, grp); 1169 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]); 1170 1171 if (pktsched_verbose > 2) { 1172 log(LOG_DEBUG, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x " 1173 "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)), 1174 qfq_style(qif), cl->cl_handle, 1175 (uint64_t)VM_KERNEL_ADDRPERM(m), qfq_state2str(s), 1176 qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V); 1177 } 1178 1179skip_update: 1180 qfq_slot_insert(qif, grp, cl, roundedS); 1181 1182done: 1183 /* successfully queued. */ 1184 return (ret); 1185} 1186 1187static inline void 1188qfq_slot_remove(struct qfq_if *qif, struct qfq_group *grp, 1189 struct qfq_class *cl) 1190{ 1191#pragma unused(qif) 1192 struct qfq_class **pprev; 1193 u_int32_t i, offset; 1194 u_int64_t roundedS; 1195 1196 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift); 1197 offset = (roundedS - grp->qfg_S) >> grp->qfg_slot_shift; 1198 i = (grp->qfg_front + offset) % qif->qif_maxslots; 1199 1200 pprev = &grp->qfg_slots[i]; 1201 while (*pprev && *pprev != cl) 1202 pprev = &(*pprev)->cl_next; 1203 1204 *pprev = cl->cl_next; 1205 if (!grp->qfg_slots[i]) 1206 pktsched_bit_clr(offset, &grp->qfg_full_slots); 1207} 1208 1209/* 1210 * Called to forcibly destroy a queue. 1211 * If the queue is not in the front bucket, or if it has 1212 * other queues in the front bucket, we can simply remove 1213 * the queue with no other side effects. 1214 * Otherwise we must propagate the event up. 1215 * XXX description to be completed. 1216 */ 1217static void 1218qfq_deactivate_class(struct qfq_if *qif, struct qfq_class *cl) 1219{ 1220 struct qfq_group *grp = cl->cl_grp; 1221 pktsched_bitmap_t mask; 1222 u_int64_t roundedS; 1223 int s; 1224 1225 if (pktsched_verbose) { 1226 log(LOG_DEBUG, "%s: %s deactivate qid=%d grp=%d " 1227 "full_slots=0x%x front=%d bitmaps={ER=0x%x,EB=0x%x," 1228 "IR=0x%x,IB=0x%x}\n", 1229 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif), 1230 cl->cl_handle, grp->qfg_index, grp->qfg_full_slots, 1231 grp->qfg_front, qif->qif_bitmaps[ER], qif->qif_bitmaps[EB], 1232 qif->qif_bitmaps[IR], qif->qif_bitmaps[IB]); 1233#if QFQ_DEBUG 1234 if (pktsched_verbose > 1) 1235 qfq_dump_sched(qif, "start deactivate"); 1236#endif /* QFQ_DEBUG */ 1237 } 1238 1239 cl->cl_F = cl->cl_S; /* not needed if the class goes away */ 1240 qfq_slot_remove(qif, grp, cl); 1241 1242 if (grp->qfg_full_slots == 0) { 1243 /* 1244 * Nothing left in the group, remove from all sets. 1245 * Do ER last because if we were blocking other groups 1246 * we must unblock them. 1247 */ 1248 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]); 1249 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]); 1250 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]); 1251 1252 if (pktsched_bit_tst(grp->qfg_index, &qif->qif_bitmaps[ER]) && 1253 !(qif->qif_bitmaps[ER] & ~((1UL << grp->qfg_index) - 1))) { 1254 mask = qif->qif_bitmaps[ER] & 1255 ((1UL << grp->qfg_index) - 1); 1256 if (mask) 1257 mask = ~((1UL << __fls(mask)) - 1); 1258 else 1259 mask = (pktsched_bitmap_t)~0UL; 1260 qfq_move_groups(qif, mask, EB, ER); 1261 qfq_move_groups(qif, mask, IB, IR); 1262 } 1263 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]); 1264 } else if (!grp->qfg_slots[grp->qfg_front]) { 1265 cl = qfq_slot_scan(qif, grp); 1266 roundedS = qfq_round_down(cl->cl_S, grp->qfg_slot_shift); 1267 if (grp->qfg_S != roundedS) { 1268 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[ER]); 1269 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IR]); 1270 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[EB]); 1271 pktsched_bit_clr(grp->qfg_index, &qif->qif_bitmaps[IB]); 1272 grp->qfg_S = roundedS; 1273 grp->qfg_F = roundedS + (2ULL << grp->qfg_slot_shift); 1274 s = qfq_calc_state(qif, grp); 1275 pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]); 1276 } 1277 } 1278 qfq_update_eligible(qif, qif->qif_V); 1279 1280#if QFQ_DEBUG 1281 if (pktsched_verbose > 1) 1282 qfq_dump_sched(qif, "end deactivate"); 1283#endif /* QFQ_DEBUG */ 1284} 1285 1286static const char * 1287qfq_state2str(int s) 1288{ 1289 const char *c; 1290 1291 switch (s) { 1292 case ER: 1293 c = "ER"; 1294 break; 1295 case IR: 1296 c = "IR"; 1297 break; 1298 case EB: 1299 c = "EB"; 1300 break; 1301 case IB: 1302 c = "IB"; 1303 break; 1304 default: 1305 c = "?"; 1306 break; 1307 } 1308 return (c); 1309} 1310 1311static inline int 1312qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t) 1313{ 1314 struct qfq_if *qif = cl->cl_qif; 1315 struct ifclassq *ifq = qif->qif_ifq; 1316 1317 IFCQ_LOCK_ASSERT_HELD(ifq); 1318 1319#if CLASSQ_RIO 1320 if (q_is_rio(&cl->cl_q)) 1321 return (rio_addq(cl->cl_rio, &cl->cl_q, m, t)); 1322 else 1323#endif /* CLASSQ_RIO */ 1324#if CLASSQ_RED 1325 if (q_is_red(&cl->cl_q)) 1326 return (red_addq(cl->cl_red, &cl->cl_q, m, t)); 1327 else 1328#endif /* CLASSQ_RED */ 1329#if CLASSQ_BLUE 1330 if (q_is_blue(&cl->cl_q)) 1331 return (blue_addq(cl->cl_blue, &cl->cl_q, m, t)); 1332 else 1333#endif /* CLASSQ_BLUE */ 1334 if (q_is_sfb(&cl->cl_q)) { 1335 if (cl->cl_sfb == NULL) { 1336 struct ifnet *ifp = QFQIF_IFP(qif); 1337 1338 VERIFY(cl->cl_flags & QFCF_LAZY); 1339 cl->cl_flags &= ~QFCF_LAZY; 1340 IFCQ_CONVERT_LOCK(ifq); 1341 1342 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle, 1343 qlimit(&cl->cl_q), cl->cl_qflags); 1344 if (cl->cl_sfb == NULL) { 1345 /* fall back to droptail */ 1346 qtype(&cl->cl_q) = Q_DROPTAIL; 1347 cl->cl_flags &= ~QFCF_SFB; 1348 cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL); 1349 1350 log(LOG_ERR, "%s: %s SFB lazy allocation " 1351 "failed for qid=%d grp=%d, falling back " 1352 "to DROPTAIL\n", if_name(ifp), 1353 qfq_style(qif), cl->cl_handle, 1354 cl->cl_grp->qfg_index); 1355 } else if (qif->qif_throttle != IFNET_THROTTLE_OFF) { 1356 /* if there's pending throttling, set it */ 1357 cqrq_throttle_t tr = { 1, qif->qif_throttle }; 1358 int err = qfq_throttle(qif, &tr); 1359 1360 if (err == EALREADY) 1361 err = 0; 1362 if (err != 0) { 1363 tr.level = IFNET_THROTTLE_OFF; 1364 (void) qfq_throttle(qif, &tr); 1365 } 1366 } 1367 } 1368 if (cl->cl_sfb != NULL) 1369 return (sfb_addq(cl->cl_sfb, &cl->cl_q, m, t)); 1370 } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) { 1371 IFCQ_CONVERT_LOCK(ifq); 1372 m_freem(m); 1373 return (CLASSQEQ_DROPPED); 1374 } 1375 1376#if PF_ECN 1377 if (cl->cl_flags & QFCF_CLEARDSCP) 1378 write_dsfield(m, t, 0); 1379#endif /* PF_ECN */ 1380 1381 _addq(&cl->cl_q, m); 1382 1383 return (0); 1384} 1385 1386static inline struct mbuf * 1387qfq_getq(struct qfq_class *cl) 1388{ 1389 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq); 1390 1391#if CLASSQ_RIO 1392 if (q_is_rio(&cl->cl_q)) 1393 return (rio_getq(cl->cl_rio, &cl->cl_q)); 1394 else 1395#endif /* CLASSQ_RIO */ 1396#if CLASSQ_RED 1397 if (q_is_red(&cl->cl_q)) 1398 return (red_getq(cl->cl_red, &cl->cl_q)); 1399 else 1400#endif /* CLASSQ_RED */ 1401#if CLASSQ_BLUE 1402 if (q_is_blue(&cl->cl_q)) 1403 return (blue_getq(cl->cl_blue, &cl->cl_q)); 1404 else 1405#endif /* CLASSQ_BLUE */ 1406 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 1407 return (sfb_getq(cl->cl_sfb, &cl->cl_q)); 1408 1409 return (_getq(&cl->cl_q)); 1410} 1411 1412static inline struct mbuf * 1413qfq_pollq(struct qfq_class *cl) 1414{ 1415 IFCQ_LOCK_ASSERT_HELD(cl->cl_qif->qif_ifq); 1416 1417 return (qhead(&cl->cl_q)); 1418} 1419 1420static void 1421qfq_purgeq(struct qfq_if *qif, struct qfq_class *cl, u_int32_t flow, 1422 u_int32_t *packets, u_int32_t *bytes) 1423{ 1424 struct ifclassq *ifq = qif->qif_ifq; 1425 u_int32_t cnt = 0, len = 0, qlen; 1426 1427 IFCQ_LOCK_ASSERT_HELD(ifq); 1428 1429 if ((qlen = qlen(&cl->cl_q)) == 0) 1430 goto done; 1431 1432 /* become regular mutex before freeing mbufs */ 1433 IFCQ_CONVERT_LOCK(ifq); 1434 1435#if CLASSQ_RIO 1436 if (q_is_rio(&cl->cl_q)) 1437 rio_purgeq(cl->cl_rio, &cl->cl_q, flow, &cnt, &len); 1438 else 1439#endif /* CLASSQ_RIO */ 1440#if CLASSQ_RED 1441 if (q_is_red(&cl->cl_q)) 1442 red_purgeq(cl->cl_red, &cl->cl_q, flow, &cnt, &len); 1443 else 1444#endif /* CLASSQ_RED */ 1445#if CLASSQ_BLUE 1446 if (q_is_blue(&cl->cl_q)) 1447 blue_purgeq(cl->cl_blue, &cl->cl_q, flow, &cnt, &len); 1448 else 1449#endif /* CLASSQ_BLUE */ 1450 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 1451 sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len); 1452 else 1453 _flushq_flow(&cl->cl_q, flow, &cnt, &len); 1454 1455 if (cnt > 0) { 1456 VERIFY(qlen(&cl->cl_q) == (qlen - cnt)); 1457#if QFQ_DEBUG 1458 VERIFY(qif->qif_queued >= cnt); 1459 qif->qif_queued -= cnt; 1460#endif /* QFQ_DEBUG */ 1461 1462 PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len); 1463 IFCQ_DROP_ADD(ifq, cnt, len); 1464 1465 VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0); 1466 IFCQ_LEN(ifq) -= cnt; 1467 1468 if (qempty(&cl->cl_q)) 1469 qfq_deactivate_class(qif, cl); 1470 1471 if (pktsched_verbose) { 1472 log(LOG_DEBUG, "%s: %s purge qid=%d weight=%d " 1473 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n", 1474 if_name(QFQIF_IFP(qif)), 1475 qfq_style(qif), cl->cl_handle, 1476 (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), qlen, 1477 qlen(&cl->cl_q), cnt, len, flow); 1478 } 1479 } 1480done: 1481 if (packets != NULL) 1482 *packets = cnt; 1483 if (bytes != NULL) 1484 *bytes = len; 1485} 1486 1487static void 1488qfq_updateq(struct qfq_if *qif, struct qfq_class *cl, cqev_t ev) 1489{ 1490 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 1491 1492 if (pktsched_verbose) { 1493 log(LOG_DEBUG, "%s: %s update qid=%d weight=%d event=%s\n", 1494 if_name(QFQIF_IFP(qif)), qfq_style(qif), 1495 cl->cl_handle, (u_int32_t)(QFQ_ONE_FP / cl->cl_inv_w), 1496 ifclassq_ev2str(ev)); 1497 } 1498 1499#if CLASSQ_RIO 1500 if (q_is_rio(&cl->cl_q)) 1501 return (rio_updateq(cl->cl_rio, ev)); 1502#endif /* CLASSQ_RIO */ 1503#if CLASSQ_RED 1504 if (q_is_red(&cl->cl_q)) 1505 return (red_updateq(cl->cl_red, ev)); 1506#endif /* CLASSQ_RED */ 1507#if CLASSQ_BLUE 1508 if (q_is_blue(&cl->cl_q)) 1509 return (blue_updateq(cl->cl_blue, ev)); 1510#endif /* CLASSQ_BLUE */ 1511 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 1512 return (sfb_updateq(cl->cl_sfb, ev)); 1513} 1514 1515int 1516qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid, 1517 struct qfq_classstats *sp) 1518{ 1519 struct qfq_class *cl; 1520 1521 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 1522 1523 if ((cl = qfq_clh_to_clp(qif, qid)) == NULL) 1524 return (EINVAL); 1525 1526 sp->class_handle = cl->cl_handle; 1527 sp->index = cl->cl_grp->qfg_index; 1528 sp->weight = (QFQ_ONE_FP / cl->cl_inv_w); 1529 sp->lmax = cl->cl_lmax; 1530 sp->qlength = qlen(&cl->cl_q); 1531 sp->qlimit = qlimit(&cl->cl_q); 1532 sp->period = cl->cl_period; 1533 sp->xmitcnt = cl->cl_xmitcnt; 1534 sp->dropcnt = cl->cl_dropcnt; 1535 1536 sp->qtype = qtype(&cl->cl_q); 1537 sp->qstate = qstate(&cl->cl_q); 1538#if CLASSQ_RED 1539 if (q_is_red(&cl->cl_q)) 1540 red_getstats(cl->cl_red, &sp->red[0]); 1541#endif /* CLASSQ_RED */ 1542#if CLASSQ_RIO 1543 if (q_is_rio(&cl->cl_q)) 1544 rio_getstats(cl->cl_rio, &sp->red[0]); 1545#endif /* CLASSQ_RIO */ 1546#if CLASSQ_BLUE 1547 if (q_is_blue(&cl->cl_q)) 1548 blue_getstats(cl->cl_blue, &sp->blue); 1549#endif /* CLASSQ_BLUE */ 1550 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 1551 sfb_getstats(cl->cl_sfb, &sp->sfb); 1552 1553 return (0); 1554} 1555 1556static int 1557qfq_stat_sc(struct qfq_if *qif, cqrq_stat_sc_t *sr) 1558{ 1559 struct ifclassq *ifq = qif->qif_ifq; 1560 struct qfq_class *cl; 1561 u_int32_t i; 1562 1563 IFCQ_LOCK_ASSERT_HELD(ifq); 1564 1565 VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc)); 1566 1567 i = MBUF_SCIDX(sr->sc); 1568 VERIFY(i < IFCQ_SC_MAX); 1569 1570 cl = ifq->ifcq_disc_slots[i].cl; 1571 sr->packets = qlen(&cl->cl_q); 1572 sr->bytes = qsize(&cl->cl_q); 1573 1574 return (0); 1575} 1576 1577/* convert a class handle to the corresponding class pointer */ 1578static inline struct qfq_class * 1579qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle) 1580{ 1581 struct qfq_class *cl; 1582 int i; 1583 1584 IFCQ_LOCK_ASSERT_HELD(qif->qif_ifq); 1585 1586 /* 1587 * First, try optimistically the slot matching the lower bits of 1588 * the handle. If it fails, do the linear table search. 1589 */ 1590 i = chandle % qif->qif_maxclasses; 1591 if ((cl = qif->qif_class_tbl[i]) != NULL && cl->cl_handle == chandle) 1592 return (cl); 1593 for (i = 0; i < qif->qif_maxclasses; i++) 1594 if ((cl = qif->qif_class_tbl[i]) != NULL && 1595 cl->cl_handle == chandle) 1596 return (cl); 1597 1598 return (NULL); 1599} 1600 1601static const char * 1602qfq_style(struct qfq_if *qif) 1603{ 1604 return ((qif->qif_flags & QFQIFF_ALTQ) ? "ALTQ_QFQ" : "QFQ"); 1605} 1606 1607/* 1608 * Generic comparison function, handling wraparound 1609 */ 1610static inline int 1611qfq_gt(u_int64_t a, u_int64_t b) 1612{ 1613 return ((int64_t)(a - b) > 0); 1614} 1615 1616/* 1617 * Round a precise timestamp to its slotted value 1618 */ 1619static inline u_int64_t 1620qfq_round_down(u_int64_t ts, u_int32_t shift) 1621{ 1622 return (ts & ~((1ULL << shift) - 1)); 1623} 1624 1625/* 1626 * Return the pointer to the group with lowest index in the bitmap 1627 */ 1628static inline struct qfq_group * 1629qfq_ffs(struct qfq_if *qif, pktsched_bitmap_t bitmap) 1630{ 1631 int index = pktsched_ffs(bitmap) - 1; /* zero-based */ 1632 VERIFY(index >= 0 && index <= QFQ_MAX_INDEX && 1633 qif->qif_groups[index] != NULL); 1634 return (qif->qif_groups[index]); 1635} 1636 1637/* 1638 * Calculate a flow index, given its weight and maximum packet length. 1639 * index = log_2(maxlen/weight) but we need to apply the scaling. 1640 * This is used only once at flow creation. 1641 */ 1642static int 1643qfq_calc_index(struct qfq_class *cl, u_int32_t inv_w, u_int32_t maxlen) 1644{ 1645 u_int64_t slot_size = (u_int64_t)maxlen *inv_w; 1646 pktsched_bitmap_t size_map; 1647 int index = 0; 1648 1649 size_map = (pktsched_bitmap_t)(slot_size >> QFQ_MIN_SLOT_SHIFT); 1650 if (!size_map) 1651 goto out; 1652 1653 index = __fls(size_map) + 1; /* basically a log_2() */ 1654 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1))); 1655 1656 if (index < 0) 1657 index = 0; 1658out: 1659 if (pktsched_verbose) { 1660 log(LOG_DEBUG, "%s: %s qid=%d grp=%d W=%u, L=%u, I=%d\n", 1661 if_name(QFQIF_IFP(cl->cl_qif)), qfq_style(cl->cl_qif), 1662 cl->cl_handle, index, (u_int32_t)(QFQ_ONE_FP/inv_w), 1663 maxlen, index); 1664 } 1665 return (index); 1666} 1667 1668#if QFQ_DEBUG 1669static void 1670qfq_dump_groups(struct qfq_if *qif, u_int32_t mask) 1671{ 1672 int i, j; 1673 1674 for (i = 0; i < QFQ_MAX_INDEX + 1; i++) { 1675 struct qfq_group *g = qif->qif_groups[i]; 1676 1677 if (0 == (mask & (1 << i))) 1678 continue; 1679 if (g == NULL) 1680 continue; 1681 1682 log(LOG_DEBUG, "%s: %s [%2d] full_slots 0x%x\n", 1683 if_name(QFQIF_IFP(qif)), qfq_style(qif), i, 1684 g->qfg_full_slots); 1685 log(LOG_DEBUG, "%s: %s S 0x%20llx F 0x%llx %c\n", 1686 if_name(QFQIF_IFP(qif)), qfq_style(qif), 1687 g->qfg_S, g->qfg_F, mask & (1 << i) ? '1' : '0'); 1688 1689 for (j = 0; j < qif->qif_maxslots; j++) { 1690 if (g->qfg_slots[j]) { 1691 log(LOG_DEBUG, "%s: %s bucket %d 0x%llx " 1692 "qid %d\n", if_name(QFQIF_IFP(qif)), 1693 qfq_style(qif), j, 1694 (uint64_t)VM_KERNEL_ADDRPERM( 1695 g->qfg_slots[j]), 1696 g->qfg_slots[j]->cl_handle); 1697 } 1698 } 1699 } 1700} 1701 1702static void 1703qfq_dump_sched(struct qfq_if *qif, const char *msg) 1704{ 1705 log(LOG_DEBUG, "%s: %s --- in %s: ---\n", 1706 if_name(QFQIF_IFP(qif)), qfq_style(qif), msg); 1707 log(LOG_DEBUG, "%s: %s emptygrp %d queued %d V 0x%llx\n", 1708 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_emptygrp, 1709 qif->qif_queued, qif->qif_V); 1710 log(LOG_DEBUG, "%s: %s ER 0x%08x\n", 1711 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[ER]); 1712 log(LOG_DEBUG, "%s: %s EB 0x%08x\n", 1713 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[EB]); 1714 log(LOG_DEBUG, "%s: %s IR 0x%08x\n", 1715 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IR]); 1716 log(LOG_DEBUG, "%s: %s IB 0x%08x\n", 1717 if_name(QFQIF_IFP(qif)), qfq_style(qif), qif->qif_bitmaps[IB]); 1718 qfq_dump_groups(qif, 0xffffffff); 1719}; 1720#endif /* QFQ_DEBUG */ 1721 1722/* 1723 * qfq_enqueue_ifclassq is an enqueue function to be registered to 1724 * (*ifcq_enqueue) in struct ifclassq. 1725 */ 1726static int 1727qfq_enqueue_ifclassq(struct ifclassq *ifq, struct mbuf *m) 1728{ 1729 u_int32_t i; 1730 1731 IFCQ_LOCK_ASSERT_HELD(ifq); 1732 1733 if (!(m->m_flags & M_PKTHDR)) { 1734 /* should not happen */ 1735 log(LOG_ERR, "%s: packet does not have pkthdr\n", 1736 if_name(ifq->ifcq_ifp)); 1737 IFCQ_CONVERT_LOCK(ifq); 1738 m_freem(m); 1739 return (ENOBUFS); 1740 } 1741 1742 i = MBUF_SCIDX(mbuf_get_service_class(m)); 1743 VERIFY((u_int32_t)i < IFCQ_SC_MAX); 1744 1745 return (qfq_enqueue(ifq->ifcq_disc, 1746 ifq->ifcq_disc_slots[i].cl, m, m_pftag(m))); 1747} 1748 1749/* 1750 * qfq_dequeue_ifclassq is a dequeue function to be registered to 1751 * (*ifcq_dequeue) in struct ifclass. 1752 * 1753 * note: CLASSQDQ_POLL returns the next packet without removing the packet 1754 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation. 1755 * CLASSQDQ_REMOVE must return the same packet if called immediately 1756 * after CLASSQDQ_POLL. 1757 */ 1758static struct mbuf * 1759qfq_dequeue_ifclassq(struct ifclassq *ifq, cqdq_op_t op) 1760{ 1761 return (qfq_dequeue(ifq->ifcq_disc, op)); 1762} 1763 1764static int 1765qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg) 1766{ 1767 struct qfq_if *qif = (struct qfq_if *)ifq->ifcq_disc; 1768 int err = 0; 1769 1770 IFCQ_LOCK_ASSERT_HELD(ifq); 1771 1772 switch (req) { 1773 case CLASSQRQ_PURGE: 1774 qfq_purge(qif); 1775 break; 1776 1777 case CLASSQRQ_PURGE_SC: 1778 qfq_purge_sc(qif, (cqrq_purge_sc_t *)arg); 1779 break; 1780 1781 case CLASSQRQ_EVENT: 1782 qfq_event(qif, (cqev_t)arg); 1783 break; 1784 1785 case CLASSQRQ_THROTTLE: 1786 err = qfq_throttle(qif, (cqrq_throttle_t *)arg); 1787 break; 1788 case CLASSQRQ_STAT_SC: 1789 err = qfq_stat_sc(qif, (cqrq_stat_sc_t *)arg); 1790 break; 1791 } 1792 return (err); 1793} 1794 1795int 1796qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags) 1797{ 1798 struct ifnet *ifp = ifq->ifcq_ifp; 1799 struct qfq_class *cl0, *cl1, *cl2, *cl3, *cl4; 1800 struct qfq_class *cl5, *cl6, *cl7, *cl8, *cl9; 1801 struct qfq_if *qif; 1802 u_int32_t maxlen = 0, qflags = 0; 1803 int err = 0; 1804 1805 IFCQ_LOCK_ASSERT_HELD(ifq); 1806 VERIFY(ifq->ifcq_disc == NULL); 1807 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); 1808 1809 if (flags & PKTSCHEDF_QALG_RED) 1810 qflags |= QFCF_RED; 1811 if (flags & PKTSCHEDF_QALG_RIO) 1812 qflags |= QFCF_RIO; 1813 if (flags & PKTSCHEDF_QALG_BLUE) 1814 qflags |= QFCF_BLUE; 1815 if (flags & PKTSCHEDF_QALG_SFB) 1816 qflags |= QFCF_SFB; 1817 if (flags & PKTSCHEDF_QALG_ECN) 1818 qflags |= QFCF_ECN; 1819 if (flags & PKTSCHEDF_QALG_FLOWCTL) 1820 qflags |= QFCF_FLOWCTL; 1821 if (flags & PKTSCHEDF_QALG_DELAYBASED) 1822 qflags |= QFCF_DELAYBASED; 1823 1824 qif = qfq_alloc(ifp, M_WAITOK, FALSE); 1825 if (qif == NULL) 1826 return (ENOMEM); 1827 1828 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) 1829 maxlen = if_sndq_maxlen; 1830 1831 if ((err = qfq_add_queue(qif, maxlen, 300, 1200, 1832 qflags | QFCF_LAZY, SCIDX_BK_SYS, &cl0)) != 0) 1833 goto cleanup; 1834 1835 if ((err = qfq_add_queue(qif, maxlen, 600, 1400, 1836 qflags | QFCF_LAZY, SCIDX_BK, &cl1)) != 0) 1837 goto cleanup; 1838 1839 if ((err = qfq_add_queue(qif, maxlen, 2400, 600, 1840 qflags | QFCF_DEFAULTCLASS, SCIDX_BE, &cl2)) != 0) 1841 goto cleanup; 1842 1843 if ((err = qfq_add_queue(qif, maxlen, 2700, 600, 1844 qflags | QFCF_LAZY, SCIDX_RD, &cl3)) != 0) 1845 goto cleanup; 1846 1847 if ((err = qfq_add_queue(qif, maxlen, 3000, 400, 1848 qflags | QFCF_LAZY, SCIDX_OAM, &cl4)) != 0) 1849 goto cleanup; 1850 1851 if ((err = qfq_add_queue(qif, maxlen, 8000, 1000, 1852 qflags | QFCF_LAZY, SCIDX_AV, &cl5)) != 0) 1853 goto cleanup; 1854 1855 if ((err = qfq_add_queue(qif, maxlen, 15000, 1200, 1856 qflags | QFCF_LAZY, SCIDX_RV, &cl6)) != 0) 1857 goto cleanup; 1858 1859 if ((err = qfq_add_queue(qif, maxlen, 20000, 1400, 1860 qflags | QFCF_LAZY, SCIDX_VI, &cl7)) != 0) 1861 goto cleanup; 1862 1863 if ((err = qfq_add_queue(qif, maxlen, 23000, 200, 1864 qflags | QFCF_LAZY, SCIDX_VO, &cl8)) != 0) 1865 goto cleanup; 1866 1867 if ((err = qfq_add_queue(qif, maxlen, 25000, 200, 1868 qflags, SCIDX_CTL, &cl9)) != 0) 1869 goto cleanup; 1870 1871 err = ifclassq_attach(ifq, PKTSCHEDT_QFQ, qif, 1872 qfq_enqueue_ifclassq, qfq_dequeue_ifclassq, NULL, 1873 qfq_request_ifclassq); 1874 1875 /* cache these for faster lookup */ 1876 if (err == 0) { 1877 ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK_SYS; 1878 ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0; 1879 1880 ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK; 1881 ifq->ifcq_disc_slots[SCIDX_BK].cl = cl1; 1882 1883 ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE; 1884 ifq->ifcq_disc_slots[SCIDX_BE].cl = cl2; 1885 1886 ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_RD; 1887 ifq->ifcq_disc_slots[SCIDX_RD].cl = cl3; 1888 1889 ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_OAM; 1890 ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl4; 1891 1892 ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_AV; 1893 ifq->ifcq_disc_slots[SCIDX_AV].cl = cl5; 1894 1895 ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_RV; 1896 ifq->ifcq_disc_slots[SCIDX_RV].cl = cl6; 1897 1898 ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI; 1899 ifq->ifcq_disc_slots[SCIDX_VI].cl = cl7; 1900 1901 ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO; 1902 ifq->ifcq_disc_slots[SCIDX_VO].cl = cl8; 1903 1904 ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_CTL; 1905 ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl9; 1906 } 1907 1908cleanup: 1909 if (err != 0) 1910 (void) qfq_destroy_locked(qif); 1911 1912 return (err); 1913} 1914 1915int 1916qfq_teardown_ifclassq(struct ifclassq *ifq) 1917{ 1918 struct qfq_if *qif = ifq->ifcq_disc; 1919 int i; 1920 1921 IFCQ_LOCK_ASSERT_HELD(ifq); 1922 VERIFY(qif != NULL && ifq->ifcq_type == PKTSCHEDT_QFQ); 1923 1924 (void) qfq_destroy_locked(qif); 1925 1926 ifq->ifcq_disc = NULL; 1927 for (i = 0; i < IFCQ_SC_MAX; i++) { 1928 ifq->ifcq_disc_slots[i].qid = 0; 1929 ifq->ifcq_disc_slots[i].cl = NULL; 1930 } 1931 1932 return (ifclassq_detach(ifq)); 1933} 1934 1935int 1936qfq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot, 1937 struct if_ifclassq_stats *ifqs) 1938{ 1939 struct qfq_if *qif = ifq->ifcq_disc; 1940 1941 IFCQ_LOCK_ASSERT_HELD(ifq); 1942 VERIFY(ifq->ifcq_type == PKTSCHEDT_QFQ); 1943 1944 if (slot >= IFCQ_SC_MAX) 1945 return (EINVAL); 1946 1947 return (qfq_get_class_stats(qif, ifq->ifcq_disc_slots[slot].qid, 1948 &ifqs->ifqs_qfq_stats)); 1949} 1950 1951static int 1952qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr) 1953{ 1954 struct ifclassq *ifq = qif->qif_ifq; 1955 struct qfq_class *cl; 1956 int err = 0; 1957 1958 IFCQ_LOCK_ASSERT_HELD(ifq); 1959 VERIFY(!(qif->qif_flags & QFQIFF_ALTQ)); 1960 1961 if (!tr->set) { 1962 tr->level = qif->qif_throttle; 1963 return (0); 1964 } 1965 1966 if (tr->level == qif->qif_throttle) 1967 return (EALREADY); 1968 1969 /* Current throttling levels only involve BK_SYS class */ 1970 cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl; 1971 1972 switch (tr->level) { 1973 case IFNET_THROTTLE_OFF: 1974 err = qfq_resumeq(qif, cl); 1975 break; 1976 1977 case IFNET_THROTTLE_OPPORTUNISTIC: 1978 err = qfq_suspendq(qif, cl); 1979 break; 1980 1981 default: 1982 VERIFY(0); 1983 /* NOTREACHED */ 1984 } 1985 1986 if (err == 0 || err == ENXIO) { 1987 if (pktsched_verbose) { 1988 log(LOG_DEBUG, "%s: %s throttling level %sset %d->%d\n", 1989 if_name(QFQIF_IFP(qif)), qfq_style(qif), 1990 (err == 0) ? "" : "lazy ", qif->qif_throttle, 1991 tr->level); 1992 } 1993 qif->qif_throttle = tr->level; 1994 if (err != 0) 1995 err = 0; 1996 else 1997 qfq_purgeq(qif, cl, 0, NULL, NULL); 1998 } else { 1999 log(LOG_ERR, "%s: %s unable to set throttling level " 2000 "%d->%d [error=%d]\n", if_name(QFQIF_IFP(qif)), 2001 qfq_style(qif), qif->qif_throttle, tr->level, err); 2002 } 2003 2004 return (err); 2005} 2006 2007static int 2008qfq_resumeq(struct qfq_if *qif, struct qfq_class *cl) 2009{ 2010 struct ifclassq *ifq = qif->qif_ifq; 2011 int err = 0; 2012 2013 IFCQ_LOCK_ASSERT_HELD(ifq); 2014 2015#if CLASSQ_RIO 2016 if (q_is_rio(&cl->cl_q)) 2017 err = rio_suspendq(cl->cl_rio, &cl->cl_q, FALSE); 2018 else 2019#endif /* CLASSQ_RIO */ 2020#if CLASSQ_RED 2021 if (q_is_red(&cl->cl_q)) 2022 err = red_suspendq(cl->cl_red, &cl->cl_q, FALSE); 2023 else 2024#endif /* CLASSQ_RED */ 2025#if CLASSQ_BLUE 2026 if (q_is_blue(&cl->cl_q)) 2027 err = blue_suspendq(cl->cl_blue, &cl->cl_q, FALSE); 2028 else 2029#endif /* CLASSQ_BLUE */ 2030 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) 2031 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE); 2032 2033 if (err == 0) 2034 qstate(&cl->cl_q) = QS_RUNNING; 2035 2036 return (err); 2037} 2038 2039static int 2040qfq_suspendq(struct qfq_if *qif, struct qfq_class *cl) 2041{ 2042 struct ifclassq *ifq = qif->qif_ifq; 2043 int err = 0; 2044 2045 IFCQ_LOCK_ASSERT_HELD(ifq); 2046 2047#if CLASSQ_RIO 2048 if (q_is_rio(&cl->cl_q)) 2049 err = rio_suspendq(cl->cl_rio, &cl->cl_q, TRUE); 2050 else 2051#endif /* CLASSQ_RIO */ 2052#if CLASSQ_RED 2053 if (q_is_red(&cl->cl_q)) 2054 err = red_suspendq(cl->cl_red, &cl->cl_q, TRUE); 2055 else 2056#endif /* CLASSQ_RED */ 2057#if CLASSQ_BLUE 2058 if (q_is_blue(&cl->cl_q)) 2059 err = blue_suspendq(cl->cl_blue, &cl->cl_q, TRUE); 2060 else 2061#endif /* CLASSQ_BLUE */ 2062 if (q_is_sfb(&cl->cl_q)) { 2063 if (cl->cl_sfb != NULL) { 2064 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE); 2065 } else { 2066 VERIFY(cl->cl_flags & QFCF_LAZY); 2067 err = ENXIO; /* delayed throttling */ 2068 } 2069 } 2070 2071 if (err == 0 || err == ENXIO) 2072 qstate(&cl->cl_q) = QS_SUSPENDED; 2073 2074 return (err); 2075} 2076