ifq.c revision 1.31
1/* $OpenBSD: ifq.c,v 1.31 2019/04/16 04:04:19 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73void ifq_bundle_task(void *); 74 75static inline void 76ifq_run_start(struct ifqueue *ifq) 77{ 78 ifq_serialize(ifq, &ifq->ifq_start); 79} 80 81void 82ifq_serialize(struct ifqueue *ifq, struct task *t) 83{ 84 struct task work; 85 86 if (ISSET(t->t_flags, TASK_ONQUEUE)) 87 return; 88 89 mtx_enter(&ifq->ifq_task_mtx); 90 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 91 SET(t->t_flags, TASK_ONQUEUE); 92 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 93 } 94 95 if (ifq->ifq_serializer == NULL) { 96 ifq->ifq_serializer = curcpu(); 97 98 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 99 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 100 CLR(t->t_flags, TASK_ONQUEUE); 101 work = *t; /* copy to caller to avoid races */ 102 103 mtx_leave(&ifq->ifq_task_mtx); 104 105 (*work.t_func)(work.t_arg); 106 107 mtx_enter(&ifq->ifq_task_mtx); 108 } 109 110 ifq->ifq_serializer = NULL; 111 } 112 mtx_leave(&ifq->ifq_task_mtx); 113} 114 115int 116ifq_is_serialized(struct ifqueue *ifq) 117{ 118 return (ifq->ifq_serializer == curcpu()); 119} 120 121void 122ifq_start(struct ifqueue *ifq) 123{ 124 if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { 125 task_del(ifq->ifq_softnet, &ifq->ifq_bundle); 126 ifq_run_start(ifq); 127 } else 128 task_add(ifq->ifq_softnet, &ifq->ifq_bundle); 129} 130 131void 132ifq_start_task(void *p) 133{ 134 struct ifqueue *ifq = p; 135 struct ifnet *ifp = ifq->ifq_if; 136 137 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 138 ifq_empty(ifq) || ifq_is_oactive(ifq)) 139 return; 140 141 ifp->if_qstart(ifq); 142} 143 144void 145ifq_restart_task(void *p) 146{ 147 struct ifqueue *ifq = p; 148 struct ifnet *ifp = ifq->ifq_if; 149 150 ifq_clr_oactive(ifq); 151 ifp->if_qstart(ifq); 152} 153 154void 155ifq_bundle_task(void *p) 156{ 157 struct ifqueue *ifq = p; 158 159 ifq_run_start(ifq); 160} 161 162void 163ifq_barrier(struct ifqueue *ifq) 164{ 165 struct cond c = COND_INITIALIZER(); 166 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 167 168 task_del(ifq->ifq_softnet, &ifq->ifq_bundle); 169 170 if (ifq->ifq_serializer == NULL) 171 return; 172 173 ifq_serialize(ifq, &t); 174 175 cond_wait(&c, "ifqbar"); 176} 177 178void 179ifq_barrier_task(void *p) 180{ 181 struct cond *c = p; 182 183 cond_signal(c); 184} 185 186/* 187 * ifqueue mbuf queue API 188 */ 189 190void 191ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 192{ 193 ifq->ifq_if = ifp; 194 ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ 195 ifq->ifq_softc = NULL; 196 197 mtx_init(&ifq->ifq_mtx, IPL_NET); 198 199 /* default to priq */ 200 ifq->ifq_ops = &priq_ops; 201 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 202 203 ml_init(&ifq->ifq_free); 204 ifq->ifq_len = 0; 205 206 ifq->ifq_packets = 0; 207 ifq->ifq_bytes = 0; 208 ifq->ifq_qdrops = 0; 209 ifq->ifq_errors = 0; 210 ifq->ifq_mcasts = 0; 211 212 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 213 TAILQ_INIT(&ifq->ifq_task_list); 214 ifq->ifq_serializer = NULL; 215 task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq); 216 217 task_set(&ifq->ifq_start, ifq_start_task, ifq); 218 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 219 220 if (ifq->ifq_maxlen == 0) 221 ifq_set_maxlen(ifq, IFQ_MAXLEN); 222 223 ifq->ifq_idx = idx; 224} 225 226void 227ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 228{ 229 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 230 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 231 struct mbuf *m; 232 const struct ifq_ops *oldops; 233 void *newq, *oldq; 234 235 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 236 237 mtx_enter(&ifq->ifq_mtx); 238 ifq->ifq_ops->ifqop_purge(ifq, &ml); 239 ifq->ifq_len = 0; 240 241 oldops = ifq->ifq_ops; 242 oldq = ifq->ifq_q; 243 244 ifq->ifq_ops = newops; 245 ifq->ifq_q = newq; 246 247 while ((m = ml_dequeue(&ml)) != NULL) { 248 m = ifq->ifq_ops->ifqop_enq(ifq, m); 249 if (m != NULL) { 250 ifq->ifq_qdrops++; 251 ml_enqueue(&free_ml, m); 252 } else 253 ifq->ifq_len++; 254 } 255 mtx_leave(&ifq->ifq_mtx); 256 257 oldops->ifqop_free(ifq->ifq_idx, oldq); 258 259 ml_purge(&free_ml); 260} 261 262void 263ifq_destroy(struct ifqueue *ifq) 264{ 265 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 266 267 NET_ASSERT_UNLOCKED(); 268 if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) 269 taskq_barrier(ifq->ifq_softnet); 270 271 /* don't need to lock because this is the last use of the ifq */ 272 273 ifq->ifq_ops->ifqop_purge(ifq, &ml); 274 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 275 276 ml_purge(&ml); 277} 278 279void 280ifq_add_data(struct ifqueue *ifq, struct if_data *data) 281{ 282 mtx_enter(&ifq->ifq_mtx); 283 data->ifi_opackets += ifq->ifq_packets; 284 data->ifi_obytes += ifq->ifq_bytes; 285 data->ifi_oqdrops += ifq->ifq_qdrops; 286 data->ifi_omcasts += ifq->ifq_mcasts; 287 /* ifp->if_data.ifi_oerrors */ 288 mtx_leave(&ifq->ifq_mtx); 289} 290 291int 292ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 293{ 294 struct mbuf *dm; 295 296 mtx_enter(&ifq->ifq_mtx); 297 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 298 if (dm != m) { 299 ifq->ifq_packets++; 300 ifq->ifq_bytes += m->m_pkthdr.len; 301 if (ISSET(m->m_flags, M_MCAST)) 302 ifq->ifq_mcasts++; 303 } 304 305 if (dm == NULL) 306 ifq->ifq_len++; 307 else 308 ifq->ifq_qdrops++; 309 mtx_leave(&ifq->ifq_mtx); 310 311 if (dm != NULL) 312 m_freem(dm); 313 314 return (dm == m ? ENOBUFS : 0); 315} 316 317static inline void 318ifq_deq_enter(struct ifqueue *ifq) 319{ 320 mtx_enter(&ifq->ifq_mtx); 321} 322 323static inline void 324ifq_deq_leave(struct ifqueue *ifq) 325{ 326 struct mbuf_list ml; 327 328 ml = ifq->ifq_free; 329 ml_init(&ifq->ifq_free); 330 331 mtx_leave(&ifq->ifq_mtx); 332 333 if (!ml_empty(&ml)) 334 ml_purge(&ml); 335} 336 337struct mbuf * 338ifq_deq_begin(struct ifqueue *ifq) 339{ 340 struct mbuf *m = NULL; 341 void *cookie; 342 343 ifq_deq_enter(ifq); 344 if (ifq->ifq_len == 0 || 345 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 346 ifq_deq_leave(ifq); 347 return (NULL); 348 } 349 350 m->m_pkthdr.ph_cookie = cookie; 351 352 return (m); 353} 354 355void 356ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 357{ 358 void *cookie; 359 360 KASSERT(m != NULL); 361 cookie = m->m_pkthdr.ph_cookie; 362 363 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 364 ifq->ifq_len--; 365 ifq_deq_leave(ifq); 366} 367 368void 369ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 370{ 371 KASSERT(m != NULL); 372 373 ifq_deq_leave(ifq); 374} 375 376struct mbuf * 377ifq_dequeue(struct ifqueue *ifq) 378{ 379 struct mbuf *m; 380 381 m = ifq_deq_begin(ifq); 382 if (m == NULL) 383 return (NULL); 384 385 ifq_deq_commit(ifq, m); 386 387 return (m); 388} 389 390int 391ifq_hdatalen(struct ifqueue *ifq) 392{ 393 struct mbuf *m; 394 int len = 0; 395 396 m = ifq_deq_begin(ifq); 397 if (m != NULL) { 398 len = m->m_pkthdr.len; 399 ifq_deq_commit(ifq, m); 400 } 401 402 return (len); 403} 404 405unsigned int 406ifq_purge(struct ifqueue *ifq) 407{ 408 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 409 unsigned int rv; 410 411 mtx_enter(&ifq->ifq_mtx); 412 ifq->ifq_ops->ifqop_purge(ifq, &ml); 413 rv = ifq->ifq_len; 414 ifq->ifq_len = 0; 415 ifq->ifq_qdrops += rv; 416 mtx_leave(&ifq->ifq_mtx); 417 418 KASSERT(rv == ml_len(&ml)); 419 420 ml_purge(&ml); 421 422 return (rv); 423} 424 425void * 426ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 427{ 428 mtx_enter(&ifq->ifq_mtx); 429 if (ifq->ifq_ops == ops) 430 return (ifq->ifq_q); 431 432 mtx_leave(&ifq->ifq_mtx); 433 434 return (NULL); 435} 436 437void 438ifq_q_leave(struct ifqueue *ifq, void *q) 439{ 440 KASSERT(q == ifq->ifq_q); 441 mtx_leave(&ifq->ifq_mtx); 442} 443 444void 445ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 446{ 447 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 448 449 ifq->ifq_len--; 450 ifq->ifq_qdrops++; 451 ml_enqueue(&ifq->ifq_free, m); 452} 453 454void 455ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 456{ 457 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 458 459 ifq->ifq_len -= ml_len(ml); 460 ifq->ifq_qdrops += ml_len(ml); 461 ml_enlist(&ifq->ifq_free, ml); 462} 463 464/* 465 * ifiq 466 */ 467 468static void ifiq_process(void *); 469 470void 471ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 472{ 473 ifiq->ifiq_if = ifp; 474 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 475 ifiq->ifiq_softc = NULL; 476 477 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 478 ml_init(&ifiq->ifiq_ml); 479 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 480 ifiq->ifiq_pressure = 0; 481 482 ifiq->ifiq_packets = 0; 483 ifiq->ifiq_bytes = 0; 484 ifiq->ifiq_qdrops = 0; 485 ifiq->ifiq_errors = 0; 486 487 ifiq->ifiq_idx = idx; 488} 489 490void 491ifiq_destroy(struct ifiqueue *ifiq) 492{ 493 NET_ASSERT_UNLOCKED(); 494 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 495 taskq_barrier(ifiq->ifiq_softnet); 496 497 /* don't need to lock because this is the last use of the ifiq */ 498 ml_purge(&ifiq->ifiq_ml); 499} 500 501unsigned int ifiq_maxlen_drop = 2048 * 5; 502unsigned int ifiq_maxlen_return = 2048 * 3; 503 504int 505ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) 506{ 507 struct ifnet *ifp = ifiq->ifiq_if; 508 struct mbuf *m; 509 uint64_t packets; 510 uint64_t bytes = 0; 511 unsigned int len; 512#if NBPFILTER > 0 513 caddr_t if_bpf; 514#endif 515 516 if (ml_empty(ml)) 517 return (0); 518 519 MBUF_LIST_FOREACH(ml, m) { 520 m->m_pkthdr.ph_ifidx = ifp->if_index; 521 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 522 bytes += m->m_pkthdr.len; 523 } 524 packets = ml_len(ml); 525 526#if NBPFILTER > 0 527 if_bpf = ifp->if_bpf; 528 if (if_bpf) { 529 struct mbuf_list ml0 = *ml; 530 531 ml_init(ml); 532 533 while ((m = ml_dequeue(&ml0)) != NULL) { 534 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 535 m_freem(m); 536 else 537 ml_enqueue(ml, m); 538 } 539 540 if (ml_empty(ml)) { 541 mtx_enter(&ifiq->ifiq_mtx); 542 ifiq->ifiq_packets += packets; 543 ifiq->ifiq_bytes += bytes; 544 mtx_leave(&ifiq->ifiq_mtx); 545 546 return (0); 547 } 548 } 549#endif 550 551 mtx_enter(&ifiq->ifiq_mtx); 552 ifiq->ifiq_packets += packets; 553 ifiq->ifiq_bytes += bytes; 554 555 len = ml_len(&ifiq->ifiq_ml); 556 if (len > ifiq_maxlen_drop) 557 ifiq->ifiq_qdrops += ml_len(ml); 558 else 559 ml_enlist(&ifiq->ifiq_ml, ml); 560 mtx_leave(&ifiq->ifiq_mtx); 561 562 if (ml_empty(ml)) 563 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 564 else 565 ml_purge(ml); 566 567 return (len > ifiq_maxlen_return); 568} 569 570void 571ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 572{ 573 mtx_enter(&ifiq->ifiq_mtx); 574 data->ifi_ipackets += ifiq->ifiq_packets; 575 data->ifi_ibytes += ifiq->ifiq_bytes; 576 data->ifi_iqdrops += ifiq->ifiq_qdrops; 577 mtx_leave(&ifiq->ifiq_mtx); 578} 579 580int 581ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 582{ 583 mtx_enter(&ifiq->ifiq_mtx); 584 ml_enqueue(&ifiq->ifiq_ml, m); 585 mtx_leave(&ifiq->ifiq_mtx); 586 587 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 588 589 return (0); 590} 591 592static void 593ifiq_process(void *arg) 594{ 595 struct ifiqueue *ifiq = arg; 596 struct mbuf_list ml; 597 598 if (ifiq_empty(ifiq)) 599 return; 600 601 mtx_enter(&ifiq->ifiq_mtx); 602 ml = ifiq->ifiq_ml; 603 ml_init(&ifiq->ifiq_ml); 604 mtx_leave(&ifiq->ifiq_mtx); 605 606 if_input_process(ifiq->ifiq_if, &ml); 607} 608 609/* 610 * priq implementation 611 */ 612 613unsigned int 614priq_idx(unsigned int nqueues, const struct mbuf *m) 615{ 616 unsigned int flow = 0; 617 618 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 619 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 620 621 return (flow % nqueues); 622} 623 624void * 625priq_alloc(unsigned int idx, void *null) 626{ 627 struct priq *pq; 628 int i; 629 630 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 631 for (i = 0; i < IFQ_NQUEUES; i++) 632 ml_init(&pq->pq_lists[i]); 633 return (pq); 634} 635 636void 637priq_free(unsigned int idx, void *pq) 638{ 639 free(pq, M_DEVBUF, sizeof(struct priq)); 640} 641 642struct mbuf * 643priq_enq(struct ifqueue *ifq, struct mbuf *m) 644{ 645 struct priq *pq; 646 struct mbuf_list *pl; 647 struct mbuf *n = NULL; 648 unsigned int prio; 649 650 pq = ifq->ifq_q; 651 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 652 653 /* Find a lower priority queue to drop from */ 654 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 655 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 656 pl = &pq->pq_lists[prio]; 657 if (ml_len(pl) > 0) { 658 n = ml_dequeue(pl); 659 goto enqueue; 660 } 661 } 662 /* 663 * There's no lower priority queue that we can 664 * drop from so don't enqueue this one. 665 */ 666 return (m); 667 } 668 669 enqueue: 670 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 671 ml_enqueue(pl, m); 672 673 return (n); 674} 675 676struct mbuf * 677priq_deq_begin(struct ifqueue *ifq, void **cookiep) 678{ 679 struct priq *pq = ifq->ifq_q; 680 struct mbuf_list *pl; 681 unsigned int prio = nitems(pq->pq_lists); 682 struct mbuf *m; 683 684 do { 685 pl = &pq->pq_lists[--prio]; 686 m = MBUF_LIST_FIRST(pl); 687 if (m != NULL) { 688 *cookiep = pl; 689 return (m); 690 } 691 } while (prio > 0); 692 693 return (NULL); 694} 695 696void 697priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 698{ 699 struct mbuf_list *pl = cookie; 700 701 KASSERT(MBUF_LIST_FIRST(pl) == m); 702 703 ml_dequeue(pl); 704} 705 706void 707priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 708{ 709 struct priq *pq = ifq->ifq_q; 710 struct mbuf_list *pl; 711 unsigned int prio = nitems(pq->pq_lists); 712 713 do { 714 pl = &pq->pq_lists[--prio]; 715 ml_enlist(ml, pl); 716 } while (prio > 0); 717} 718