1/* $NetBSD: altq_blue.c,v 1.26 2021/09/21 14:30:15 christos Exp $ */ 2/* $KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */ 3 4/* 5 * Copyright (C) 1997-2002 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30/* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63#include <sys/cdefs.h> 64__KERNEL_RCSID(0, "$NetBSD: altq_blue.c,v 1.26 2021/09/21 14:30:15 christos Exp $"); 65 66#ifdef _KERNEL_OPT 67#include "opt_altq.h" 68#include "opt_inet.h" 69#endif 70 71#ifdef ALTQ_BLUE /* blue is enabled by ALTQ_BLUE option in opt_altq.h */ 72 73#include <sys/param.h> 74#include <sys/malloc.h> 75#include <sys/mbuf.h> 76#include <sys/socket.h> 77#include <sys/sockio.h> 78#include <sys/systm.h> 79#include <sys/proc.h> 80#include <sys/errno.h> 81#include <sys/kernel.h> 82#include <sys/kauth.h> 83#include <sys/cprng.h> 84 85#include <net/if.h> 86#include <net/if_types.h> 87#include <netinet/in.h> 88#include <netinet/in_systm.h> 89#include <netinet/ip.h> 90#ifdef INET6 91#include <netinet/ip6.h> 92#endif 93 94#include <altq/altq.h> 95#include <altq/altq_conf.h> 96#include <altq/altq_blue.h> 97 98#ifdef ALTQ3_COMPAT 99/* 100 * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>. 101 * more information on Blue is available from 102 * http://www.eecs.umich.edu/~wuchang/blue/ 103 */ 104 105/* fixed-point uses 12-bit decimal places */ 106#define FP_SHIFT 12 /* fixed-point shift */ 107 108#define BLUE_LIMIT 200 /* default max queue length */ 109#define BLUE_STATS /* collect statistics */ 110 111/* blue_list keeps all blue_state_t's allocated. */ 112static blue_queue_t *blue_list = NULL; 113 114/* internal function prototypes */ 115static int blue_enqueue(struct ifaltq *, struct mbuf *); 116static struct mbuf *blue_dequeue(struct ifaltq *, int); 117static int drop_early(blue_t *); 118static int mark_ecn(struct mbuf *, struct altq_pktattr *, int); 119static int blue_detach(blue_queue_t *); 120static int blue_request(struct ifaltq *, int, void *); 121 122/* 123 * blue device interface 124 */ 125altqdev_decl(blue); 126 127int 128blueopen(dev_t dev, int flag, int fmt, 129 struct lwp *l) 130{ 131 /* everything will be done when the queueing scheme is attached. */ 132 return 0; 133} 134 135int 136blueclose(dev_t dev, int flag, int fmt, 137 struct lwp *l) 138{ 139 blue_queue_t *rqp; 140 int err, error = 0; 141 142 while ((rqp = blue_list) != NULL) { 143 /* destroy all */ 144 err = blue_detach(rqp); 145 if (err != 0 && error == 0) 146 error = err; 147 } 148 149 return error; 150} 151 152int 153blueioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 154 struct lwp *l) 155{ 156 blue_queue_t *rqp; 157 struct blue_interface *ifacep; 158 struct ifnet *ifp; 159 int error = 0; 160 161 /* check super-user privilege */ 162 switch (cmd) { 163 case BLUE_GETSTATS: 164 break; 165 default: 166 if ((error = kauth_authorize_network(l->l_cred, 167 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_BLUE, NULL, 168 NULL, NULL)) != 0) 169 return (error); 170 break; 171 } 172 173 switch (cmd) { 174 175 case BLUE_ENABLE: 176 ifacep = (struct blue_interface *)addr; 177 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) { 178 error = EBADF; 179 break; 180 } 181 error = altq_enable(rqp->rq_ifq); 182 break; 183 184 case BLUE_DISABLE: 185 ifacep = (struct blue_interface *)addr; 186 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) { 187 error = EBADF; 188 break; 189 } 190 error = altq_disable(rqp->rq_ifq); 191 break; 192 193 case BLUE_IF_ATTACH: 194 ifp = ifunit(((struct blue_interface *)addr)->blue_ifname); 195 if (ifp == NULL) { 196 error = ENXIO; 197 break; 198 } 199 200 /* allocate and initialize blue_state_t */ 201 rqp = malloc(sizeof(blue_queue_t), M_DEVBUF, M_WAITOK|M_ZERO); 202 if (rqp == NULL) { 203 error = ENOMEM; 204 break; 205 } 206 207 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, 208 M_WAITOK|M_ZERO); 209 if (rqp->rq_q == NULL) { 210 free(rqp, M_DEVBUF); 211 error = ENOMEM; 212 break; 213 } 214 215 rqp->rq_blue = malloc(sizeof(blue_t), M_DEVBUF, 216 M_WAITOK|M_ZERO); 217 if (rqp->rq_blue == NULL) { 218 free(rqp->rq_q, M_DEVBUF); 219 free(rqp, M_DEVBUF); 220 error = ENOMEM; 221 break; 222 } 223 224 rqp->rq_ifq = &ifp->if_snd; 225 qtail(rqp->rq_q) = NULL; 226 qlen(rqp->rq_q) = 0; 227 qlimit(rqp->rq_q) = BLUE_LIMIT; 228 229 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 230 blue_init(rqp->rq_blue, 0, 800, 1000, 50000); 231 232 /* 233 * set BLUE to this ifnet structure. 234 */ 235 error = altq_attach(rqp->rq_ifq, ALTQT_BLUE, rqp, 236 blue_enqueue, blue_dequeue, blue_request, 237 NULL, NULL); 238 if (error) { 239 free(rqp->rq_blue, M_DEVBUF); 240 free(rqp->rq_q, M_DEVBUF); 241 free(rqp, M_DEVBUF); 242 break; 243 } 244 245 /* add this state to the blue list */ 246 rqp->rq_next = blue_list; 247 blue_list = rqp; 248 break; 249 250 case BLUE_IF_DETACH: 251 ifacep = (struct blue_interface *)addr; 252 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) { 253 error = EBADF; 254 break; 255 } 256 error = blue_detach(rqp); 257 break; 258 259 case BLUE_GETSTATS: 260 do { 261 struct blue_stats *q_stats; 262 blue_t *rp; 263 264 q_stats = (struct blue_stats *)addr; 265 if ((rqp = altq_lookup(q_stats->iface.blue_ifname, 266 ALTQT_BLUE)) == NULL) { 267 error = EBADF; 268 break; 269 } 270 271 q_stats->q_len = qlen(rqp->rq_q); 272 q_stats->q_limit = qlimit(rqp->rq_q); 273 274 rp = rqp->rq_blue; 275 q_stats->q_pmark = rp->blue_pmark; 276 q_stats->xmit_packets = rp->blue_stats.xmit_packets; 277 q_stats->xmit_bytes = rp->blue_stats.xmit_bytes; 278 q_stats->drop_packets = rp->blue_stats.drop_packets; 279 q_stats->drop_bytes = rp->blue_stats.drop_bytes; 280 q_stats->drop_forced = rp->blue_stats.drop_forced; 281 q_stats->drop_unforced = rp->blue_stats.drop_unforced; 282 q_stats->marked_packets = rp->blue_stats.marked_packets; 283 284 } while (/*CONSTCOND*/ 0); 285 break; 286 287 case BLUE_CONFIG: 288 do { 289 struct blue_conf *fc; 290 int limit; 291 292 fc = (struct blue_conf *)addr; 293 if ((rqp = altq_lookup(fc->iface.blue_ifname, 294 ALTQT_BLUE)) == NULL) { 295 error = EBADF; 296 break; 297 } 298 limit = fc->blue_limit; 299 qlimit(rqp->rq_q) = limit; 300 fc->blue_limit = limit; /* write back the new value */ 301 if (fc->blue_pkttime > 0) 302 rqp->rq_blue->blue_pkttime = fc->blue_pkttime; 303 if (fc->blue_max_pmark > 0) 304 rqp->rq_blue->blue_max_pmark = fc->blue_max_pmark; 305 if (fc->blue_hold_time > 0) 306 rqp->rq_blue->blue_hold_time = fc->blue_hold_time; 307 rqp->rq_blue->blue_flags = fc->blue_flags; 308 309 blue_init(rqp->rq_blue, rqp->rq_blue->blue_flags, 310 rqp->rq_blue->blue_pkttime, 311 rqp->rq_blue->blue_max_pmark, 312 rqp->rq_blue->blue_hold_time); 313 } while (/*CONSTCOND*/ 0); 314 break; 315 316 default: 317 error = EINVAL; 318 break; 319 } 320 return error; 321} 322 323static int 324blue_detach(blue_queue_t *rqp) 325{ 326 blue_queue_t *tmp; 327 int error = 0; 328 329 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 330 altq_disable(rqp->rq_ifq); 331 332 if ((error = altq_detach(rqp->rq_ifq))) 333 return (error); 334 335 if (blue_list == rqp) 336 blue_list = rqp->rq_next; 337 else { 338 for (tmp = blue_list; tmp != NULL; tmp = tmp->rq_next) 339 if (tmp->rq_next == rqp) { 340 tmp->rq_next = rqp->rq_next; 341 break; 342 } 343 if (tmp == NULL) 344 printf("blue_detach: no state found in blue_list!\n"); 345 } 346 347 free(rqp->rq_q, M_DEVBUF); 348 free(rqp->rq_blue, M_DEVBUF); 349 free(rqp, M_DEVBUF); 350 return (error); 351} 352 353/* 354 * blue support routines 355 */ 356 357int 358blue_init(blue_t *rp, int flags, int pkttime, int blue_max_pmark, 359 int blue_hold_time) 360{ 361 int npkts_per_sec; 362 363 rp->blue_idle = 1; 364 rp->blue_flags = flags; 365 rp->blue_pkttime = pkttime; 366 rp->blue_max_pmark = blue_max_pmark; 367 rp->blue_hold_time = blue_hold_time; 368 if (pkttime == 0) 369 rp->blue_pkttime = 1; 370 371 /* when the link is very slow, adjust blue parameters */ 372 npkts_per_sec = 1000000 / rp->blue_pkttime; 373 if (npkts_per_sec < 50) { 374 } 375 else if (npkts_per_sec < 300) { 376 } 377 378 microtime(&rp->blue_last); 379 return (0); 380} 381 382/* 383 * enqueue routine: 384 * 385 * returns: 0 when successfully queued. 386 * ENOBUFS when drop occurs. 387 */ 388static int 389blue_enqueue(struct ifaltq *ifq, struct mbuf *m) 390{ 391 struct altq_pktattr pktattr; 392 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc; 393 int error = 0; 394 395 pktattr.pattr_class = m->m_pkthdr.pattr_class; 396 pktattr.pattr_af = m->m_pkthdr.pattr_af; 397 pktattr.pattr_hdr = m->m_pkthdr.pattr_hdr; 398 399 if (blue_addq(rqp->rq_blue, rqp->rq_q, m, &pktattr) == 0) 400 ifq->ifq_len++; 401 else 402 error = ENOBUFS; 403 return error; 404} 405 406#define DTYPE_NODROP 0 /* no drop */ 407#define DTYPE_FORCED 1 /* a "forced" drop */ 408#define DTYPE_EARLY 2 /* an "unforced" (early) drop */ 409 410int 411blue_addq(blue_t *rp, class_queue_t *q, struct mbuf *m, 412 struct altq_pktattr *pktattr) 413{ 414 int droptype; 415 416 /* 417 * if we were idle, this is an enqueue onto an empty queue 418 * and we should decrement marking probability 419 * 420 */ 421 if (rp->blue_idle) { 422 struct timeval now; 423 int t; 424 rp->blue_idle = 0; 425 microtime(&now); 426 t = (now.tv_sec - rp->blue_last.tv_sec); 427 if ( t > 1) { 428 rp->blue_pmark = 1; 429 microtime(&rp->blue_last); 430 } else { 431 t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec); 432 if (t > rp->blue_hold_time) { 433 rp->blue_pmark--; 434 if (rp->blue_pmark < 0) rp->blue_pmark = 0; 435 microtime(&rp->blue_last); 436 } 437 } 438 } 439 440 /* see if we drop early */ 441 droptype = DTYPE_NODROP; 442 if (drop_early(rp) && qlen(q) > 1) { 443 /* mark or drop by blue */ 444 if ((rp->blue_flags & BLUEF_ECN) && 445 mark_ecn(m, pktattr, rp->blue_flags)) { 446 /* successfully marked. do not drop. */ 447#ifdef BLUE_STATS 448 rp->blue_stats.marked_packets++; 449#endif 450 } else { 451 /* unforced drop by blue */ 452 droptype = DTYPE_EARLY; 453 } 454 } 455 456 /* 457 * if the queue length hits the hard limit, it's a forced drop. 458 */ 459 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 460 droptype = DTYPE_FORCED; 461 462 /* if successful or forced drop, enqueue this packet. */ 463 if (droptype != DTYPE_EARLY) 464 _addq(q, m); 465 466 if (droptype != DTYPE_NODROP) { 467 if (droptype == DTYPE_EARLY) { 468 /* drop the incoming packet */ 469#ifdef BLUE_STATS 470 rp->blue_stats.drop_unforced++; 471#endif 472 } else { 473 struct timeval now; 474 int t; 475 /* forced drop, select a victim packet in the queue. */ 476 m = _getq_random(q); 477 microtime(&now); 478 t = (now.tv_sec - rp->blue_last.tv_sec); 479 t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec); 480 if (t > rp->blue_hold_time) { 481 rp->blue_pmark += rp->blue_max_pmark >> 3; 482 if (rp->blue_pmark > rp->blue_max_pmark) 483 rp->blue_pmark = rp->blue_max_pmark; 484 microtime(&rp->blue_last); 485 } 486#ifdef BLUE_STATS 487 rp->blue_stats.drop_forced++; 488#endif 489 } 490#ifdef BLUE_STATS 491 rp->blue_stats.drop_packets++; 492 rp->blue_stats.drop_bytes += m->m_pkthdr.len; 493#endif 494 m_freem(m); 495 return (-1); 496 } 497 /* successfully queued */ 498 return (0); 499} 500 501/* 502 * early-drop probability is kept in blue_pmark 503 * 504 */ 505static int 506drop_early(blue_t *rp) 507{ 508 if ((cprng_fast32() % rp->blue_max_pmark) < rp->blue_pmark) { 509 /* drop or mark */ 510 return (1); 511 } 512 /* no drop/mark */ 513 return (0); 514} 515 516/* 517 * try to mark CE bit to the packet. 518 * returns 1 if successfully marked, 0 otherwise. 519 */ 520static int 521mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 522{ 523 struct mbuf *m0; 524 525 if (pktattr == NULL || 526 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 527 return (0); 528 529 /* verify that pattr_hdr is within the mbuf data */ 530 for (m0 = m; m0 != NULL; m0 = m0->m_next) 531 if (((char *)pktattr->pattr_hdr >= m0->m_data) && 532 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len)) 533 break; 534 if (m0 == NULL) { 535 /* ick, pattr_hdr is stale */ 536 pktattr->pattr_af = AF_UNSPEC; 537 return (0); 538 } 539 540 switch (pktattr->pattr_af) { 541 case AF_INET: 542 if (flags & BLUEF_ECN4) { 543 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 544 u_int8_t otos; 545 int sum; 546 547 if (ip->ip_v != 4) 548 return (0); /* version mismatch! */ 549 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 550 return (0); /* not-ECT */ 551 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 552 return (1); /* already marked */ 553 554 /* 555 * ecn-capable but not marked, 556 * mark CE and update checksum 557 */ 558 otos = ip->ip_tos; 559 ip->ip_tos |= IPTOS_ECN_CE; 560 /* 561 * update checksum (from RFC1624) 562 * HC' = ~(~HC + ~m + m') 563 */ 564 sum = ~ntohs(ip->ip_sum) & 0xffff; 565 sum += (~otos & 0xffff) + ip->ip_tos; 566 sum = (sum >> 16) + (sum & 0xffff); 567 sum += (sum >> 16); /* add carry */ 568 ip->ip_sum = htons(~sum & 0xffff); 569 return (1); 570 } 571 break; 572#ifdef INET6 573 case AF_INET6: 574 if (flags & BLUEF_ECN6) { 575 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 576 u_int32_t flowlabel; 577 578 flowlabel = ntohl(ip6->ip6_flow); 579 if ((flowlabel >> 28) != 6) 580 return (0); /* version mismatch! */ 581 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 582 (IPTOS_ECN_NOTECT << 20)) 583 return (0); /* not-ECT */ 584 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 585 (IPTOS_ECN_CE << 20)) 586 return (1); /* already marked */ 587 /* 588 * ecn-capable but not marked, mark CE 589 */ 590 flowlabel |= (IPTOS_ECN_CE << 20); 591 ip6->ip6_flow = htonl(flowlabel); 592 return (1); 593 } 594 break; 595#endif /* INET6 */ 596 } 597 598 /* not marked */ 599 return (0); 600} 601 602/* 603 * dequeue routine: 604 * must be called in splnet. 605 * 606 * returns: mbuf dequeued. 607 * NULL when no packet is available in the queue. 608 */ 609 610static struct mbuf * 611blue_dequeue(struct ifaltq * ifq, int op) 612{ 613 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc; 614 struct mbuf *m = NULL; 615 616 if (op == ALTDQ_POLL) 617 return (qhead(rqp->rq_q)); 618 619 m = blue_getq(rqp->rq_blue, rqp->rq_q); 620 if (m != NULL) 621 ifq->ifq_len--; 622 return m; 623} 624 625struct mbuf * 626blue_getq(blue_t *rp, class_queue_t *q) 627{ 628 struct mbuf *m; 629 630 if ((m = _getq(q)) == NULL) { 631 if (rp->blue_idle == 0) { 632 rp->blue_idle = 1; 633 microtime(&rp->blue_last); 634 } 635 return NULL; 636 } 637 638 rp->blue_idle = 0; 639#ifdef BLUE_STATS 640 rp->blue_stats.xmit_packets++; 641 rp->blue_stats.xmit_bytes += m->m_pkthdr.len; 642#endif 643 return (m); 644} 645 646static int 647blue_request(struct ifaltq *ifq, int req, void *arg) 648{ 649 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc; 650 651 switch (req) { 652 case ALTRQ_PURGE: 653 _flushq(rqp->rq_q); 654 if (ALTQ_IS_ENABLED(ifq)) 655 ifq->ifq_len = 0; 656 break; 657 } 658 return (0); 659} 660 661 662#ifdef KLD_MODULE 663 664static struct altqsw blue_sw = 665 {"blue", blueopen, blueclose, blueioctl}; 666 667ALTQ_MODULE(altq_blue, ALTQT_BLUE, &blue_sw); 668 669#endif /* KLD_MODULE */ 670 671#endif /* ALTQ3_COMPAT */ 672#endif /* ALTQ_BLUE */ 673