ifq.c revision 1.34
1/* $OpenBSD: ifq.c,v 1.34 2019/08/16 04:09:02 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26#include <sys/sysctl.h> 27 28#include <net/if.h> 29#include <net/if_var.h> 30 31#if NBPFILTER > 0 32#include <net/bpf.h> 33#endif 34 35/* 36 * priq glue 37 */ 38unsigned int priq_idx(unsigned int, const struct mbuf *); 39struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 40struct mbuf *priq_deq_begin(struct ifqueue *, void **); 41void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 42void priq_purge(struct ifqueue *, struct mbuf_list *); 43 44void *priq_alloc(unsigned int, void *); 45void priq_free(unsigned int, void *); 46 47const struct ifq_ops priq_ops = { 48 priq_idx, 49 priq_enq, 50 priq_deq_begin, 51 priq_deq_commit, 52 priq_purge, 53 priq_alloc, 54 priq_free, 55}; 56 57const struct ifq_ops * const ifq_priq_ops = &priq_ops; 58 59/* 60 * priq internal structures 61 */ 62 63struct priq { 64 struct mbuf_list pq_lists[IFQ_NQUEUES]; 65}; 66 67/* 68 * ifqueue serialiser 69 */ 70 71void ifq_start_task(void *); 72void ifq_restart_task(void *); 73void ifq_barrier_task(void *); 74void ifq_bundle_task(void *); 75 76static inline void 77ifq_run_start(struct ifqueue *ifq) 78{ 79 ifq_serialize(ifq, &ifq->ifq_start); 80} 81 82void 83ifq_serialize(struct ifqueue *ifq, struct task *t) 84{ 85 struct task work; 86 87 if (ISSET(t->t_flags, TASK_ONQUEUE)) 88 return; 89 90 mtx_enter(&ifq->ifq_task_mtx); 91 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 92 SET(t->t_flags, TASK_ONQUEUE); 93 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 94 } 95 96 if (ifq->ifq_serializer == NULL) { 97 ifq->ifq_serializer = curcpu(); 98 99 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 100 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 101 CLR(t->t_flags, TASK_ONQUEUE); 102 work = *t; /* copy to caller to avoid races */ 103 104 mtx_leave(&ifq->ifq_task_mtx); 105 106 (*work.t_func)(work.t_arg); 107 108 mtx_enter(&ifq->ifq_task_mtx); 109 } 110 111 ifq->ifq_serializer = NULL; 112 } 113 mtx_leave(&ifq->ifq_task_mtx); 114} 115 116int 117ifq_is_serialized(struct ifqueue *ifq) 118{ 119 return (ifq->ifq_serializer == curcpu()); 120} 121 122void 123ifq_start(struct ifqueue *ifq) 124{ 125 if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { 126 task_del(ifq->ifq_softnet, &ifq->ifq_bundle); 127 ifq_run_start(ifq); 128 } else 129 task_add(ifq->ifq_softnet, &ifq->ifq_bundle); 130} 131 132void 133ifq_start_task(void *p) 134{ 135 struct ifqueue *ifq = p; 136 struct ifnet *ifp = ifq->ifq_if; 137 138 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 139 ifq_empty(ifq) || ifq_is_oactive(ifq)) 140 return; 141 142 ifp->if_qstart(ifq); 143} 144 145void 146ifq_restart_task(void *p) 147{ 148 struct ifqueue *ifq = p; 149 struct ifnet *ifp = ifq->ifq_if; 150 151 ifq_clr_oactive(ifq); 152 ifp->if_qstart(ifq); 153} 154 155void 156ifq_bundle_task(void *p) 157{ 158 struct ifqueue *ifq = p; 159 160 ifq_run_start(ifq); 161} 162 163void 164ifq_barrier(struct ifqueue *ifq) 165{ 166 struct cond c = COND_INITIALIZER(); 167 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 168 169 task_del(ifq->ifq_softnet, &ifq->ifq_bundle); 170 171 if (ifq->ifq_serializer == NULL) 172 return; 173 174 ifq_serialize(ifq, &t); 175 176 cond_wait(&c, "ifqbar"); 177} 178 179void 180ifq_barrier_task(void *p) 181{ 182 struct cond *c = p; 183 184 cond_signal(c); 185} 186 187/* 188 * ifqueue mbuf queue API 189 */ 190 191void 192ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 193{ 194 ifq->ifq_if = ifp; 195 ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ 196 ifq->ifq_softc = NULL; 197 198 mtx_init(&ifq->ifq_mtx, IPL_NET); 199 200 /* default to priq */ 201 ifq->ifq_ops = &priq_ops; 202 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 203 204 ml_init(&ifq->ifq_free); 205 ifq->ifq_len = 0; 206 207 ifq->ifq_packets = 0; 208 ifq->ifq_bytes = 0; 209 ifq->ifq_qdrops = 0; 210 ifq->ifq_errors = 0; 211 ifq->ifq_mcasts = 0; 212 213 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 214 TAILQ_INIT(&ifq->ifq_task_list); 215 ifq->ifq_serializer = NULL; 216 task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq); 217 218 task_set(&ifq->ifq_start, ifq_start_task, ifq); 219 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 220 221 if (ifq->ifq_maxlen == 0) 222 ifq_set_maxlen(ifq, IFQ_MAXLEN); 223 224 ifq->ifq_idx = idx; 225} 226 227void 228ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 229{ 230 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 231 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 232 struct mbuf *m; 233 const struct ifq_ops *oldops; 234 void *newq, *oldq; 235 236 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 237 238 mtx_enter(&ifq->ifq_mtx); 239 ifq->ifq_ops->ifqop_purge(ifq, &ml); 240 ifq->ifq_len = 0; 241 242 oldops = ifq->ifq_ops; 243 oldq = ifq->ifq_q; 244 245 ifq->ifq_ops = newops; 246 ifq->ifq_q = newq; 247 248 while ((m = ml_dequeue(&ml)) != NULL) { 249 m = ifq->ifq_ops->ifqop_enq(ifq, m); 250 if (m != NULL) { 251 ifq->ifq_qdrops++; 252 ml_enqueue(&free_ml, m); 253 } else 254 ifq->ifq_len++; 255 } 256 mtx_leave(&ifq->ifq_mtx); 257 258 oldops->ifqop_free(ifq->ifq_idx, oldq); 259 260 ml_purge(&free_ml); 261} 262 263void 264ifq_destroy(struct ifqueue *ifq) 265{ 266 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 267 268 NET_ASSERT_UNLOCKED(); 269 if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) 270 taskq_barrier(ifq->ifq_softnet); 271 272 /* don't need to lock because this is the last use of the ifq */ 273 274 ifq->ifq_ops->ifqop_purge(ifq, &ml); 275 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 276 277 ml_purge(&ml); 278} 279 280void 281ifq_add_data(struct ifqueue *ifq, struct if_data *data) 282{ 283 mtx_enter(&ifq->ifq_mtx); 284 data->ifi_opackets += ifq->ifq_packets; 285 data->ifi_obytes += ifq->ifq_bytes; 286 data->ifi_oqdrops += ifq->ifq_qdrops; 287 data->ifi_omcasts += ifq->ifq_mcasts; 288 /* ifp->if_data.ifi_oerrors */ 289 mtx_leave(&ifq->ifq_mtx); 290} 291 292int 293ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 294{ 295 struct mbuf *dm; 296 297 mtx_enter(&ifq->ifq_mtx); 298 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 299 if (dm != m) { 300 ifq->ifq_packets++; 301 ifq->ifq_bytes += m->m_pkthdr.len; 302 if (ISSET(m->m_flags, M_MCAST)) 303 ifq->ifq_mcasts++; 304 } 305 306 if (dm == NULL) 307 ifq->ifq_len++; 308 else 309 ifq->ifq_qdrops++; 310 mtx_leave(&ifq->ifq_mtx); 311 312 if (dm != NULL) 313 m_freem(dm); 314 315 return (dm == m ? ENOBUFS : 0); 316} 317 318static inline void 319ifq_deq_enter(struct ifqueue *ifq) 320{ 321 mtx_enter(&ifq->ifq_mtx); 322} 323 324static inline void 325ifq_deq_leave(struct ifqueue *ifq) 326{ 327 struct mbuf_list ml; 328 329 ml = ifq->ifq_free; 330 ml_init(&ifq->ifq_free); 331 332 mtx_leave(&ifq->ifq_mtx); 333 334 if (!ml_empty(&ml)) 335 ml_purge(&ml); 336} 337 338struct mbuf * 339ifq_deq_begin(struct ifqueue *ifq) 340{ 341 struct mbuf *m = NULL; 342 void *cookie; 343 344 ifq_deq_enter(ifq); 345 if (ifq->ifq_len == 0 || 346 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 347 ifq_deq_leave(ifq); 348 return (NULL); 349 } 350 351 m->m_pkthdr.ph_cookie = cookie; 352 353 return (m); 354} 355 356void 357ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 358{ 359 void *cookie; 360 361 KASSERT(m != NULL); 362 cookie = m->m_pkthdr.ph_cookie; 363 364 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 365 ifq->ifq_len--; 366 ifq_deq_leave(ifq); 367} 368 369void 370ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 371{ 372 KASSERT(m != NULL); 373 374 ifq_deq_leave(ifq); 375} 376 377struct mbuf * 378ifq_dequeue(struct ifqueue *ifq) 379{ 380 struct mbuf *m; 381 382 m = ifq_deq_begin(ifq); 383 if (m == NULL) 384 return (NULL); 385 386 ifq_deq_commit(ifq, m); 387 388 return (m); 389} 390 391int 392ifq_hdatalen(struct ifqueue *ifq) 393{ 394 struct mbuf *m; 395 int len = 0; 396 397 m = ifq_deq_begin(ifq); 398 if (m != NULL) { 399 len = m->m_pkthdr.len; 400 ifq_deq_rollback(ifq, m); 401 } 402 403 return (len); 404} 405 406unsigned int 407ifq_purge(struct ifqueue *ifq) 408{ 409 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 410 unsigned int rv; 411 412 mtx_enter(&ifq->ifq_mtx); 413 ifq->ifq_ops->ifqop_purge(ifq, &ml); 414 rv = ifq->ifq_len; 415 ifq->ifq_len = 0; 416 ifq->ifq_qdrops += rv; 417 mtx_leave(&ifq->ifq_mtx); 418 419 KASSERT(rv == ml_len(&ml)); 420 421 ml_purge(&ml); 422 423 return (rv); 424} 425 426void * 427ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 428{ 429 mtx_enter(&ifq->ifq_mtx); 430 if (ifq->ifq_ops == ops) 431 return (ifq->ifq_q); 432 433 mtx_leave(&ifq->ifq_mtx); 434 435 return (NULL); 436} 437 438void 439ifq_q_leave(struct ifqueue *ifq, void *q) 440{ 441 KASSERT(q == ifq->ifq_q); 442 mtx_leave(&ifq->ifq_mtx); 443} 444 445void 446ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 447{ 448 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 449 450 ifq->ifq_len--; 451 ifq->ifq_qdrops++; 452 ml_enqueue(&ifq->ifq_free, m); 453} 454 455void 456ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 457{ 458 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 459 460 ifq->ifq_len -= ml_len(ml); 461 ifq->ifq_qdrops += ml_len(ml); 462 ml_enlist(&ifq->ifq_free, ml); 463} 464 465/* 466 * ifiq 467 */ 468 469static void ifiq_process(void *); 470 471void 472ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 473{ 474 ifiq->ifiq_if = ifp; 475 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 476 ifiq->ifiq_softc = NULL; 477 478 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 479 ml_init(&ifiq->ifiq_ml); 480 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 481 ifiq->ifiq_pressure = 0; 482 483 ifiq->ifiq_packets = 0; 484 ifiq->ifiq_bytes = 0; 485 ifiq->ifiq_qdrops = 0; 486 ifiq->ifiq_errors = 0; 487 488 ifiq->ifiq_idx = idx; 489} 490 491void 492ifiq_destroy(struct ifiqueue *ifiq) 493{ 494 NET_ASSERT_UNLOCKED(); 495 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 496 taskq_barrier(ifiq->ifiq_softnet); 497 498 /* don't need to lock because this is the last use of the ifiq */ 499 ml_purge(&ifiq->ifiq_ml); 500} 501 502unsigned int ifiq_pressure_drop = 8; 503unsigned int ifiq_pressure_return = 6; 504 505int 506ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) 507{ 508 struct ifnet *ifp = ifiq->ifiq_if; 509 struct mbuf *m; 510 uint64_t packets; 511 uint64_t bytes = 0; 512 unsigned int pressure; 513#if NBPFILTER > 0 514 caddr_t if_bpf; 515#endif 516 517 if (ml_empty(ml)) 518 return (0); 519 520 MBUF_LIST_FOREACH(ml, m) { 521 m->m_pkthdr.ph_ifidx = ifp->if_index; 522 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 523 bytes += m->m_pkthdr.len; 524 } 525 packets = ml_len(ml); 526 527#if NBPFILTER > 0 528 if_bpf = ifp->if_bpf; 529 if (if_bpf) { 530 struct mbuf_list ml0 = *ml; 531 532 ml_init(ml); 533 534 while ((m = ml_dequeue(&ml0)) != NULL) { 535 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 536 m_freem(m); 537 else 538 ml_enqueue(ml, m); 539 } 540 541 if (ml_empty(ml)) { 542 mtx_enter(&ifiq->ifiq_mtx); 543 ifiq->ifiq_packets += packets; 544 ifiq->ifiq_bytes += bytes; 545 mtx_leave(&ifiq->ifiq_mtx); 546 547 return (0); 548 } 549 } 550#endif 551 552 mtx_enter(&ifiq->ifiq_mtx); 553 ifiq->ifiq_packets += packets; 554 ifiq->ifiq_bytes += bytes; 555 556 pressure = ++ifiq->ifiq_pressure; 557 if (pressure > ifiq_pressure_drop) 558 ifiq->ifiq_qdrops += ml_len(ml); 559 else 560 ml_enlist(&ifiq->ifiq_ml, ml); 561 mtx_leave(&ifiq->ifiq_mtx); 562 563 if (ml_empty(ml)) 564 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 565 else 566 ml_purge(ml); 567 568 return (pressure > ifiq_pressure_return); 569} 570 571void 572ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 573{ 574 mtx_enter(&ifiq->ifiq_mtx); 575 data->ifi_ipackets += ifiq->ifiq_packets; 576 data->ifi_ibytes += ifiq->ifiq_bytes; 577 data->ifi_iqdrops += ifiq->ifiq_qdrops; 578 mtx_leave(&ifiq->ifiq_mtx); 579} 580 581int 582ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 583{ 584 mtx_enter(&ifiq->ifiq_mtx); 585 ml_enqueue(&ifiq->ifiq_ml, m); 586 mtx_leave(&ifiq->ifiq_mtx); 587 588 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 589 590 return (0); 591} 592 593static void 594ifiq_process(void *arg) 595{ 596 struct ifiqueue *ifiq = arg; 597 struct mbuf_list ml; 598 599 if (ifiq_empty(ifiq)) 600 return; 601 602 mtx_enter(&ifiq->ifiq_mtx); 603 ifiq->ifiq_pressure = 0; 604 ml = ifiq->ifiq_ml; 605 ml_init(&ifiq->ifiq_ml); 606 mtx_leave(&ifiq->ifiq_mtx); 607 608 if_input_process(ifiq->ifiq_if, &ml); 609} 610 611int 612net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 613 void *newp, size_t newlen) 614{ 615 int val; 616 int error; 617 618 if (namelen != 1) 619 return (EISDIR); 620 621 switch (name[0]) { 622 case NET_LINK_IFRXQ_PRESSURE_RETURN: 623 val = ifiq_pressure_return; 624 error = sysctl_int(oldp, oldlenp, newp, newlen, &val); 625 if (error != 0) 626 return (error); 627 if (val < 1 || val > ifiq_pressure_drop) 628 return (EINVAL); 629 ifiq_pressure_return = val; 630 break; 631 case NET_LINK_IFRXQ_PRESSURE_DROP: 632 val = ifiq_pressure_drop; 633 error = sysctl_int(oldp, oldlenp, newp, newlen, &val); 634 if (error != 0) 635 return (error); 636 if (ifiq_pressure_return > val) 637 return (EINVAL); 638 ifiq_pressure_drop = val; 639 break; 640 default: 641 error = EOPNOTSUPP; 642 break; 643 } 644 645 return (error); 646} 647 648/* 649 * priq implementation 650 */ 651 652unsigned int 653priq_idx(unsigned int nqueues, const struct mbuf *m) 654{ 655 unsigned int flow = 0; 656 657 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 658 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 659 660 return (flow % nqueues); 661} 662 663void * 664priq_alloc(unsigned int idx, void *null) 665{ 666 struct priq *pq; 667 int i; 668 669 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 670 for (i = 0; i < IFQ_NQUEUES; i++) 671 ml_init(&pq->pq_lists[i]); 672 return (pq); 673} 674 675void 676priq_free(unsigned int idx, void *pq) 677{ 678 free(pq, M_DEVBUF, sizeof(struct priq)); 679} 680 681struct mbuf * 682priq_enq(struct ifqueue *ifq, struct mbuf *m) 683{ 684 struct priq *pq; 685 struct mbuf_list *pl; 686 struct mbuf *n = NULL; 687 unsigned int prio; 688 689 pq = ifq->ifq_q; 690 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 691 692 /* Find a lower priority queue to drop from */ 693 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 694 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 695 pl = &pq->pq_lists[prio]; 696 if (ml_len(pl) > 0) { 697 n = ml_dequeue(pl); 698 goto enqueue; 699 } 700 } 701 /* 702 * There's no lower priority queue that we can 703 * drop from so don't enqueue this one. 704 */ 705 return (m); 706 } 707 708 enqueue: 709 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 710 ml_enqueue(pl, m); 711 712 return (n); 713} 714 715struct mbuf * 716priq_deq_begin(struct ifqueue *ifq, void **cookiep) 717{ 718 struct priq *pq = ifq->ifq_q; 719 struct mbuf_list *pl; 720 unsigned int prio = nitems(pq->pq_lists); 721 struct mbuf *m; 722 723 do { 724 pl = &pq->pq_lists[--prio]; 725 m = MBUF_LIST_FIRST(pl); 726 if (m != NULL) { 727 *cookiep = pl; 728 return (m); 729 } 730 } while (prio > 0); 731 732 return (NULL); 733} 734 735void 736priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 737{ 738 struct mbuf_list *pl = cookie; 739 740 KASSERT(MBUF_LIST_FIRST(pl) == m); 741 742 ml_dequeue(pl); 743} 744 745void 746priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 747{ 748 struct priq *pq = ifq->ifq_q; 749 struct mbuf_list *pl; 750 unsigned int prio = nitems(pq->pq_lists); 751 752 do { 753 pl = &pq->pq_lists[--prio]; 754 ml_enlist(ml, pl); 755 } while (prio > 0); 756} 757