ifq.c revision 1.27
1/* $OpenBSD: ifq.c,v 1.27 2019/03/04 21:34:08 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73 74void 75ifq_serialize(struct ifqueue *ifq, struct task *t) 76{ 77 struct task work; 78 79 if (ISSET(t->t_flags, TASK_ONQUEUE)) 80 return; 81 82 mtx_enter(&ifq->ifq_task_mtx); 83 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 84 SET(t->t_flags, TASK_ONQUEUE); 85 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 86 } 87 88 if (ifq->ifq_serializer == NULL) { 89 ifq->ifq_serializer = curcpu(); 90 91 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 92 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 93 CLR(t->t_flags, TASK_ONQUEUE); 94 work = *t; /* copy to caller to avoid races */ 95 96 mtx_leave(&ifq->ifq_task_mtx); 97 98 (*work.t_func)(work.t_arg); 99 100 mtx_enter(&ifq->ifq_task_mtx); 101 } 102 103 ifq->ifq_serializer = NULL; 104 } 105 mtx_leave(&ifq->ifq_task_mtx); 106} 107 108int 109ifq_is_serialized(struct ifqueue *ifq) 110{ 111 return (ifq->ifq_serializer == curcpu()); 112} 113 114void 115ifq_start_task(void *p) 116{ 117 struct ifqueue *ifq = p; 118 struct ifnet *ifp = ifq->ifq_if; 119 120 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 121 ifq_empty(ifq) || ifq_is_oactive(ifq)) 122 return; 123 124 ifp->if_qstart(ifq); 125} 126 127void 128ifq_restart_task(void *p) 129{ 130 struct ifqueue *ifq = p; 131 struct ifnet *ifp = ifq->ifq_if; 132 133 ifq_clr_oactive(ifq); 134 ifp->if_qstart(ifq); 135} 136 137void 138ifq_barrier(struct ifqueue *ifq) 139{ 140 struct cond c = COND_INITIALIZER(); 141 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 142 143 if (ifq->ifq_serializer == NULL) 144 return; 145 146 ifq_serialize(ifq, &t); 147 148 cond_wait(&c, "ifqbar"); 149} 150 151void 152ifq_barrier_task(void *p) 153{ 154 struct cond *c = p; 155 156 cond_signal(c); 157} 158 159/* 160 * ifqueue mbuf queue API 161 */ 162 163void 164ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 165{ 166 ifq->ifq_if = ifp; 167 ifq->ifq_softc = NULL; 168 169 mtx_init(&ifq->ifq_mtx, IPL_NET); 170 171 /* default to priq */ 172 ifq->ifq_ops = &priq_ops; 173 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 174 175 ml_init(&ifq->ifq_free); 176 ifq->ifq_len = 0; 177 178 ifq->ifq_packets = 0; 179 ifq->ifq_bytes = 0; 180 ifq->ifq_qdrops = 0; 181 ifq->ifq_errors = 0; 182 ifq->ifq_mcasts = 0; 183 184 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 185 TAILQ_INIT(&ifq->ifq_task_list); 186 ifq->ifq_serializer = NULL; 187 188 task_set(&ifq->ifq_start, ifq_start_task, ifq); 189 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 190 191 if (ifq->ifq_maxlen == 0) 192 ifq_set_maxlen(ifq, IFQ_MAXLEN); 193 194 ifq->ifq_idx = idx; 195} 196 197void 198ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 199{ 200 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 201 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 202 struct mbuf *m; 203 const struct ifq_ops *oldops; 204 void *newq, *oldq; 205 206 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 207 208 mtx_enter(&ifq->ifq_mtx); 209 ifq->ifq_ops->ifqop_purge(ifq, &ml); 210 ifq->ifq_len = 0; 211 212 oldops = ifq->ifq_ops; 213 oldq = ifq->ifq_q; 214 215 ifq->ifq_ops = newops; 216 ifq->ifq_q = newq; 217 218 while ((m = ml_dequeue(&ml)) != NULL) { 219 m = ifq->ifq_ops->ifqop_enq(ifq, m); 220 if (m != NULL) { 221 ifq->ifq_qdrops++; 222 ml_enqueue(&free_ml, m); 223 } else 224 ifq->ifq_len++; 225 } 226 mtx_leave(&ifq->ifq_mtx); 227 228 oldops->ifqop_free(ifq->ifq_idx, oldq); 229 230 ml_purge(&free_ml); 231} 232 233void 234ifq_destroy(struct ifqueue *ifq) 235{ 236 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 237 238 /* don't need to lock because this is the last use of the ifq */ 239 240 ifq->ifq_ops->ifqop_purge(ifq, &ml); 241 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 242 243 ml_purge(&ml); 244} 245 246void 247ifq_add_data(struct ifqueue *ifq, struct if_data *data) 248{ 249 mtx_enter(&ifq->ifq_mtx); 250 data->ifi_opackets += ifq->ifq_packets; 251 data->ifi_obytes += ifq->ifq_bytes; 252 data->ifi_oqdrops += ifq->ifq_qdrops; 253 data->ifi_omcasts += ifq->ifq_mcasts; 254 /* ifp->if_data.ifi_oerrors */ 255 mtx_leave(&ifq->ifq_mtx); 256} 257 258int 259ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 260{ 261 struct mbuf *dm; 262 263 mtx_enter(&ifq->ifq_mtx); 264 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 265 if (dm != m) { 266 ifq->ifq_packets++; 267 ifq->ifq_bytes += m->m_pkthdr.len; 268 if (ISSET(m->m_flags, M_MCAST)) 269 ifq->ifq_mcasts++; 270 } 271 272 if (dm == NULL) 273 ifq->ifq_len++; 274 else 275 ifq->ifq_qdrops++; 276 mtx_leave(&ifq->ifq_mtx); 277 278 if (dm != NULL) 279 m_freem(dm); 280 281 return (dm == m ? ENOBUFS : 0); 282} 283 284static inline void 285ifq_deq_enter(struct ifqueue *ifq) 286{ 287 mtx_enter(&ifq->ifq_mtx); 288} 289 290static inline void 291ifq_deq_leave(struct ifqueue *ifq) 292{ 293 struct mbuf_list ml; 294 295 ml = ifq->ifq_free; 296 ml_init(&ifq->ifq_free); 297 298 mtx_leave(&ifq->ifq_mtx); 299 300 if (!ml_empty(&ml)) 301 ml_purge(&ml); 302} 303 304struct mbuf * 305ifq_deq_begin(struct ifqueue *ifq) 306{ 307 struct mbuf *m = NULL; 308 void *cookie; 309 310 ifq_deq_enter(ifq); 311 if (ifq->ifq_len == 0 || 312 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 313 ifq_deq_leave(ifq); 314 return (NULL); 315 } 316 317 m->m_pkthdr.ph_cookie = cookie; 318 319 return (m); 320} 321 322void 323ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 324{ 325 void *cookie; 326 327 KASSERT(m != NULL); 328 cookie = m->m_pkthdr.ph_cookie; 329 330 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 331 ifq->ifq_len--; 332 ifq_deq_leave(ifq); 333} 334 335void 336ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 337{ 338 KASSERT(m != NULL); 339 340 ifq_deq_leave(ifq); 341} 342 343struct mbuf * 344ifq_dequeue(struct ifqueue *ifq) 345{ 346 struct mbuf *m; 347 348 m = ifq_deq_begin(ifq); 349 if (m == NULL) 350 return (NULL); 351 352 ifq_deq_commit(ifq, m); 353 354 return (m); 355} 356 357int 358ifq_hdatalen(struct ifqueue *ifq) 359{ 360 struct mbuf *m; 361 int len = 0; 362 363 m = ifq_deq_begin(ifq); 364 if (m != NULL) { 365 len = m->m_pkthdr.len; 366 ifq_deq_commit(ifq, m); 367 } 368 369 return (len); 370} 371 372unsigned int 373ifq_purge(struct ifqueue *ifq) 374{ 375 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 376 unsigned int rv; 377 378 mtx_enter(&ifq->ifq_mtx); 379 ifq->ifq_ops->ifqop_purge(ifq, &ml); 380 rv = ifq->ifq_len; 381 ifq->ifq_len = 0; 382 ifq->ifq_qdrops += rv; 383 mtx_leave(&ifq->ifq_mtx); 384 385 KASSERT(rv == ml_len(&ml)); 386 387 ml_purge(&ml); 388 389 return (rv); 390} 391 392void * 393ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 394{ 395 mtx_enter(&ifq->ifq_mtx); 396 if (ifq->ifq_ops == ops) 397 return (ifq->ifq_q); 398 399 mtx_leave(&ifq->ifq_mtx); 400 401 return (NULL); 402} 403 404void 405ifq_q_leave(struct ifqueue *ifq, void *q) 406{ 407 KASSERT(q == ifq->ifq_q); 408 mtx_leave(&ifq->ifq_mtx); 409} 410 411void 412ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 413{ 414 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 415 416 ifq->ifq_len--; 417 ifq->ifq_qdrops++; 418 ml_enqueue(&ifq->ifq_free, m); 419} 420 421void 422ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 423{ 424 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 425 426 ifq->ifq_len -= ml_len(ml); 427 ifq->ifq_qdrops += ml_len(ml); 428 ml_enlist(&ifq->ifq_free, ml); 429} 430 431/* 432 * ifiq 433 */ 434 435static void ifiq_process(void *); 436 437void 438ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 439{ 440 ifiq->ifiq_if = ifp; 441 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 442 ifiq->ifiq_softc = NULL; 443 444 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 445 ml_init(&ifiq->ifiq_ml); 446 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 447 ifiq->ifiq_pressure = 0; 448 449 ifiq->ifiq_packets = 0; 450 ifiq->ifiq_bytes = 0; 451 ifiq->ifiq_qdrops = 0; 452 ifiq->ifiq_errors = 0; 453 454 ifiq->ifiq_idx = idx; 455} 456 457void 458ifiq_destroy(struct ifiqueue *ifiq) 459{ 460 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) { 461 NET_ASSERT_UNLOCKED(); 462 taskq_barrier(ifiq->ifiq_softnet); 463 } 464 465 /* don't need to lock because this is the last use of the ifiq */ 466 ml_purge(&ifiq->ifiq_ml); 467} 468 469unsigned int ifiq_pressure_drop = 16; 470unsigned int ifiq_pressure_return = 2; 471 472int 473ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) 474{ 475 struct ifnet *ifp = ifiq->ifiq_if; 476 struct mbuf *m; 477 uint64_t packets; 478 uint64_t bytes = 0; 479 unsigned int pressure; 480#if NBPFILTER > 0 481 caddr_t if_bpf; 482#endif 483 484 if (ml_empty(ml)) 485 return (0); 486 487 MBUF_LIST_FOREACH(ml, m) { 488 m->m_pkthdr.ph_ifidx = ifp->if_index; 489 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 490 bytes += m->m_pkthdr.len; 491 } 492 packets = ml_len(ml); 493 494#if NBPFILTER > 0 495 if_bpf = ifp->if_bpf; 496 if (if_bpf) { 497 struct mbuf_list ml0 = *ml; 498 499 ml_init(ml); 500 501 while ((m = ml_dequeue(&ml0)) != NULL) { 502 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 503 m_freem(m); 504 else 505 ml_enqueue(ml, m); 506 } 507 508 if (ml_empty(ml)) { 509 mtx_enter(&ifiq->ifiq_mtx); 510 ifiq->ifiq_packets += packets; 511 ifiq->ifiq_bytes += bytes; 512 mtx_leave(&ifiq->ifiq_mtx); 513 514 return (0); 515 } 516 } 517#endif 518 519 mtx_enter(&ifiq->ifiq_mtx); 520 ifiq->ifiq_packets += packets; 521 ifiq->ifiq_bytes += bytes; 522 523 pressure = ++ifiq->ifiq_pressure; 524 if (pressure > ifiq_pressure_drop) 525 ifiq->ifiq_qdrops += ml_len(ml); 526 else 527 ml_enlist(&ifiq->ifiq_ml, ml); 528 mtx_leave(&ifiq->ifiq_mtx); 529 530 if (ml_empty(ml)) 531 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 532 else 533 ml_purge(ml); 534 535 return (pressure > ifiq_pressure_return); 536} 537 538void 539ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 540{ 541 mtx_enter(&ifiq->ifiq_mtx); 542 data->ifi_ipackets += ifiq->ifiq_packets; 543 data->ifi_ibytes += ifiq->ifiq_bytes; 544 data->ifi_iqdrops += ifiq->ifiq_qdrops; 545 mtx_leave(&ifiq->ifiq_mtx); 546} 547 548void 549ifiq_barrier(struct ifiqueue *ifiq) 550{ 551 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 552 taskq_barrier(ifiq->ifiq_softnet); 553} 554 555int 556ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 557{ 558 mtx_enter(&ifiq->ifiq_mtx); 559 ml_enqueue(&ifiq->ifiq_ml, m); 560 mtx_leave(&ifiq->ifiq_mtx); 561 562 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 563 564 return (0); 565} 566 567static void 568ifiq_process(void *arg) 569{ 570 struct ifiqueue *ifiq = arg; 571 struct mbuf_list ml; 572 573 if (ifiq_empty(ifiq)) 574 return; 575 576 mtx_enter(&ifiq->ifiq_mtx); 577 ifiq->ifiq_pressure = 0; 578 ml = ifiq->ifiq_ml; 579 ml_init(&ifiq->ifiq_ml); 580 mtx_leave(&ifiq->ifiq_mtx); 581 582 if_input_process(ifiq->ifiq_if, &ml); 583} 584 585/* 586 * priq implementation 587 */ 588 589unsigned int 590priq_idx(unsigned int nqueues, const struct mbuf *m) 591{ 592 unsigned int flow = 0; 593 594 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 595 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 596 597 return (flow % nqueues); 598} 599 600void * 601priq_alloc(unsigned int idx, void *null) 602{ 603 struct priq *pq; 604 int i; 605 606 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 607 for (i = 0; i < IFQ_NQUEUES; i++) 608 ml_init(&pq->pq_lists[i]); 609 return (pq); 610} 611 612void 613priq_free(unsigned int idx, void *pq) 614{ 615 free(pq, M_DEVBUF, sizeof(struct priq)); 616} 617 618struct mbuf * 619priq_enq(struct ifqueue *ifq, struct mbuf *m) 620{ 621 struct priq *pq; 622 struct mbuf_list *pl; 623 struct mbuf *n = NULL; 624 unsigned int prio; 625 626 pq = ifq->ifq_q; 627 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 628 629 /* Find a lower priority queue to drop from */ 630 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 631 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 632 pl = &pq->pq_lists[prio]; 633 if (ml_len(pl) > 0) { 634 n = ml_dequeue(pl); 635 goto enqueue; 636 } 637 } 638 /* 639 * There's no lower priority queue that we can 640 * drop from so don't enqueue this one. 641 */ 642 return (m); 643 } 644 645 enqueue: 646 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 647 ml_enqueue(pl, m); 648 649 return (n); 650} 651 652struct mbuf * 653priq_deq_begin(struct ifqueue *ifq, void **cookiep) 654{ 655 struct priq *pq = ifq->ifq_q; 656 struct mbuf_list *pl; 657 unsigned int prio = nitems(pq->pq_lists); 658 struct mbuf *m; 659 660 do { 661 pl = &pq->pq_lists[--prio]; 662 m = MBUF_LIST_FIRST(pl); 663 if (m != NULL) { 664 *cookiep = pl; 665 return (m); 666 } 667 } while (prio > 0); 668 669 return (NULL); 670} 671 672void 673priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 674{ 675 struct mbuf_list *pl = cookie; 676 677 KASSERT(MBUF_LIST_FIRST(pl) == m); 678 679 ml_dequeue(pl); 680} 681 682void 683priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 684{ 685 struct priq *pq = ifq->ifq_q; 686 struct mbuf_list *pl; 687 unsigned int prio = nitems(pq->pq_lists); 688 689 do { 690 pl = &pq->pq_lists[--prio]; 691 ml_enlist(ml, pl); 692 } while (prio > 0); 693} 694