ifq.c revision 1.30
1/* $OpenBSD: ifq.c,v 1.30 2019/03/29 04:21:55 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bpfilter.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/socket.h> 24#include <sys/mbuf.h> 25#include <sys/proc.h> 26 27#include <net/if.h> 28#include <net/if_var.h> 29 30#if NBPFILTER > 0 31#include <net/bpf.h> 32#endif 33 34/* 35 * priq glue 36 */ 37unsigned int priq_idx(unsigned int, const struct mbuf *); 38struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); 39struct mbuf *priq_deq_begin(struct ifqueue *, void **); 40void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); 41void priq_purge(struct ifqueue *, struct mbuf_list *); 42 43void *priq_alloc(unsigned int, void *); 44void priq_free(unsigned int, void *); 45 46const struct ifq_ops priq_ops = { 47 priq_idx, 48 priq_enq, 49 priq_deq_begin, 50 priq_deq_commit, 51 priq_purge, 52 priq_alloc, 53 priq_free, 54}; 55 56const struct ifq_ops * const ifq_priq_ops = &priq_ops; 57 58/* 59 * priq internal structures 60 */ 61 62struct priq { 63 struct mbuf_list pq_lists[IFQ_NQUEUES]; 64}; 65 66/* 67 * ifqueue serialiser 68 */ 69 70void ifq_start_task(void *); 71void ifq_restart_task(void *); 72void ifq_barrier_task(void *); 73 74void 75ifq_serialize(struct ifqueue *ifq, struct task *t) 76{ 77 struct task work; 78 79 if (ISSET(t->t_flags, TASK_ONQUEUE)) 80 return; 81 82 mtx_enter(&ifq->ifq_task_mtx); 83 if (!ISSET(t->t_flags, TASK_ONQUEUE)) { 84 SET(t->t_flags, TASK_ONQUEUE); 85 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); 86 } 87 88 if (ifq->ifq_serializer == NULL) { 89 ifq->ifq_serializer = curcpu(); 90 91 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) { 92 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); 93 CLR(t->t_flags, TASK_ONQUEUE); 94 work = *t; /* copy to caller to avoid races */ 95 96 mtx_leave(&ifq->ifq_task_mtx); 97 98 (*work.t_func)(work.t_arg); 99 100 mtx_enter(&ifq->ifq_task_mtx); 101 } 102 103 ifq->ifq_serializer = NULL; 104 } 105 mtx_leave(&ifq->ifq_task_mtx); 106} 107 108int 109ifq_is_serialized(struct ifqueue *ifq) 110{ 111 return (ifq->ifq_serializer == curcpu()); 112} 113 114void 115ifq_start_task(void *p) 116{ 117 struct ifqueue *ifq = p; 118 struct ifnet *ifp = ifq->ifq_if; 119 120 if (!ISSET(ifp->if_flags, IFF_RUNNING) || 121 ifq_empty(ifq) || ifq_is_oactive(ifq)) 122 return; 123 124 ifp->if_qstart(ifq); 125} 126 127void 128ifq_restart_task(void *p) 129{ 130 struct ifqueue *ifq = p; 131 struct ifnet *ifp = ifq->ifq_if; 132 133 ifq_clr_oactive(ifq); 134 ifp->if_qstart(ifq); 135} 136 137void 138ifq_barrier(struct ifqueue *ifq) 139{ 140 struct cond c = COND_INITIALIZER(); 141 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); 142 143 if (ifq->ifq_serializer == NULL) 144 return; 145 146 ifq_serialize(ifq, &t); 147 148 cond_wait(&c, "ifqbar"); 149} 150 151void 152ifq_barrier_task(void *p) 153{ 154 struct cond *c = p; 155 156 cond_signal(c); 157} 158 159/* 160 * ifqueue mbuf queue API 161 */ 162 163void 164ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) 165{ 166 ifq->ifq_if = ifp; 167 ifq->ifq_softc = NULL; 168 169 mtx_init(&ifq->ifq_mtx, IPL_NET); 170 171 /* default to priq */ 172 ifq->ifq_ops = &priq_ops; 173 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL); 174 175 ml_init(&ifq->ifq_free); 176 ifq->ifq_len = 0; 177 178 ifq->ifq_packets = 0; 179 ifq->ifq_bytes = 0; 180 ifq->ifq_qdrops = 0; 181 ifq->ifq_errors = 0; 182 ifq->ifq_mcasts = 0; 183 184 mtx_init(&ifq->ifq_task_mtx, IPL_NET); 185 TAILQ_INIT(&ifq->ifq_task_list); 186 ifq->ifq_serializer = NULL; 187 188 task_set(&ifq->ifq_start, ifq_start_task, ifq); 189 task_set(&ifq->ifq_restart, ifq_restart_task, ifq); 190 191 if (ifq->ifq_maxlen == 0) 192 ifq_set_maxlen(ifq, IFQ_MAXLEN); 193 194 ifq->ifq_idx = idx; 195} 196 197void 198ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg) 199{ 200 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 201 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 202 struct mbuf *m; 203 const struct ifq_ops *oldops; 204 void *newq, *oldq; 205 206 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg); 207 208 mtx_enter(&ifq->ifq_mtx); 209 ifq->ifq_ops->ifqop_purge(ifq, &ml); 210 ifq->ifq_len = 0; 211 212 oldops = ifq->ifq_ops; 213 oldq = ifq->ifq_q; 214 215 ifq->ifq_ops = newops; 216 ifq->ifq_q = newq; 217 218 while ((m = ml_dequeue(&ml)) != NULL) { 219 m = ifq->ifq_ops->ifqop_enq(ifq, m); 220 if (m != NULL) { 221 ifq->ifq_qdrops++; 222 ml_enqueue(&free_ml, m); 223 } else 224 ifq->ifq_len++; 225 } 226 mtx_leave(&ifq->ifq_mtx); 227 228 oldops->ifqop_free(ifq->ifq_idx, oldq); 229 230 ml_purge(&free_ml); 231} 232 233void 234ifq_destroy(struct ifqueue *ifq) 235{ 236 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 237 238 /* don't need to lock because this is the last use of the ifq */ 239 240 ifq->ifq_ops->ifqop_purge(ifq, &ml); 241 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q); 242 243 ml_purge(&ml); 244} 245 246void 247ifq_add_data(struct ifqueue *ifq, struct if_data *data) 248{ 249 mtx_enter(&ifq->ifq_mtx); 250 data->ifi_opackets += ifq->ifq_packets; 251 data->ifi_obytes += ifq->ifq_bytes; 252 data->ifi_oqdrops += ifq->ifq_qdrops; 253 data->ifi_omcasts += ifq->ifq_mcasts; 254 /* ifp->if_data.ifi_oerrors */ 255 mtx_leave(&ifq->ifq_mtx); 256} 257 258int 259ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) 260{ 261 struct mbuf *dm; 262 263 mtx_enter(&ifq->ifq_mtx); 264 dm = ifq->ifq_ops->ifqop_enq(ifq, m); 265 if (dm != m) { 266 ifq->ifq_packets++; 267 ifq->ifq_bytes += m->m_pkthdr.len; 268 if (ISSET(m->m_flags, M_MCAST)) 269 ifq->ifq_mcasts++; 270 } 271 272 if (dm == NULL) 273 ifq->ifq_len++; 274 else 275 ifq->ifq_qdrops++; 276 mtx_leave(&ifq->ifq_mtx); 277 278 if (dm != NULL) 279 m_freem(dm); 280 281 return (dm == m ? ENOBUFS : 0); 282} 283 284static inline void 285ifq_deq_enter(struct ifqueue *ifq) 286{ 287 mtx_enter(&ifq->ifq_mtx); 288} 289 290static inline void 291ifq_deq_leave(struct ifqueue *ifq) 292{ 293 struct mbuf_list ml; 294 295 ml = ifq->ifq_free; 296 ml_init(&ifq->ifq_free); 297 298 mtx_leave(&ifq->ifq_mtx); 299 300 if (!ml_empty(&ml)) 301 ml_purge(&ml); 302} 303 304struct mbuf * 305ifq_deq_begin(struct ifqueue *ifq) 306{ 307 struct mbuf *m = NULL; 308 void *cookie; 309 310 ifq_deq_enter(ifq); 311 if (ifq->ifq_len == 0 || 312 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { 313 ifq_deq_leave(ifq); 314 return (NULL); 315 } 316 317 m->m_pkthdr.ph_cookie = cookie; 318 319 return (m); 320} 321 322void 323ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m) 324{ 325 void *cookie; 326 327 KASSERT(m != NULL); 328 cookie = m->m_pkthdr.ph_cookie; 329 330 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); 331 ifq->ifq_len--; 332 ifq_deq_leave(ifq); 333} 334 335void 336ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m) 337{ 338 KASSERT(m != NULL); 339 340 ifq_deq_leave(ifq); 341} 342 343struct mbuf * 344ifq_dequeue(struct ifqueue *ifq) 345{ 346 struct mbuf *m; 347 348 m = ifq_deq_begin(ifq); 349 if (m == NULL) 350 return (NULL); 351 352 ifq_deq_commit(ifq, m); 353 354 return (m); 355} 356 357int 358ifq_hdatalen(struct ifqueue *ifq) 359{ 360 struct mbuf *m; 361 int len = 0; 362 363 m = ifq_deq_begin(ifq); 364 if (m != NULL) { 365 len = m->m_pkthdr.len; 366 ifq_deq_commit(ifq, m); 367 } 368 369 return (len); 370} 371 372unsigned int 373ifq_purge(struct ifqueue *ifq) 374{ 375 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 376 unsigned int rv; 377 378 mtx_enter(&ifq->ifq_mtx); 379 ifq->ifq_ops->ifqop_purge(ifq, &ml); 380 rv = ifq->ifq_len; 381 ifq->ifq_len = 0; 382 ifq->ifq_qdrops += rv; 383 mtx_leave(&ifq->ifq_mtx); 384 385 KASSERT(rv == ml_len(&ml)); 386 387 ml_purge(&ml); 388 389 return (rv); 390} 391 392void * 393ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops) 394{ 395 mtx_enter(&ifq->ifq_mtx); 396 if (ifq->ifq_ops == ops) 397 return (ifq->ifq_q); 398 399 mtx_leave(&ifq->ifq_mtx); 400 401 return (NULL); 402} 403 404void 405ifq_q_leave(struct ifqueue *ifq, void *q) 406{ 407 KASSERT(q == ifq->ifq_q); 408 mtx_leave(&ifq->ifq_mtx); 409} 410 411void 412ifq_mfreem(struct ifqueue *ifq, struct mbuf *m) 413{ 414 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 415 416 ifq->ifq_len--; 417 ifq->ifq_qdrops++; 418 ml_enqueue(&ifq->ifq_free, m); 419} 420 421void 422ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml) 423{ 424 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx); 425 426 ifq->ifq_len -= ml_len(ml); 427 ifq->ifq_qdrops += ml_len(ml); 428 ml_enlist(&ifq->ifq_free, ml); 429} 430 431/* 432 * ifiq 433 */ 434 435static void ifiq_process(void *); 436 437void 438ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) 439{ 440 ifiq->ifiq_if = ifp; 441 ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ 442 ifiq->ifiq_softc = NULL; 443 444 mtx_init(&ifiq->ifiq_mtx, IPL_NET); 445 ml_init(&ifiq->ifiq_ml); 446 task_set(&ifiq->ifiq_task, ifiq_process, ifiq); 447 ifiq->ifiq_pressure = 0; 448 449 ifiq->ifiq_packets = 0; 450 ifiq->ifiq_bytes = 0; 451 ifiq->ifiq_qdrops = 0; 452 ifiq->ifiq_errors = 0; 453 454 ifiq->ifiq_idx = idx; 455} 456 457void 458ifiq_destroy(struct ifiqueue *ifiq) 459{ 460 NET_ASSERT_UNLOCKED(); 461 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) 462 taskq_barrier(ifiq->ifiq_softnet); 463 464 /* don't need to lock because this is the last use of the ifiq */ 465 ml_purge(&ifiq->ifiq_ml); 466} 467 468unsigned int ifiq_maxlen_drop = 2048 * 5; 469unsigned int ifiq_maxlen_return = 2048 * 3; 470 471int 472ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) 473{ 474 struct ifnet *ifp = ifiq->ifiq_if; 475 struct mbuf *m; 476 uint64_t packets; 477 uint64_t bytes = 0; 478 unsigned int len; 479#if NBPFILTER > 0 480 caddr_t if_bpf; 481#endif 482 483 if (ml_empty(ml)) 484 return (0); 485 486 MBUF_LIST_FOREACH(ml, m) { 487 m->m_pkthdr.ph_ifidx = ifp->if_index; 488 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 489 bytes += m->m_pkthdr.len; 490 } 491 packets = ml_len(ml); 492 493#if NBPFILTER > 0 494 if_bpf = ifp->if_bpf; 495 if (if_bpf) { 496 struct mbuf_list ml0 = *ml; 497 498 ml_init(ml); 499 500 while ((m = ml_dequeue(&ml0)) != NULL) { 501 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) 502 m_freem(m); 503 else 504 ml_enqueue(ml, m); 505 } 506 507 if (ml_empty(ml)) { 508 mtx_enter(&ifiq->ifiq_mtx); 509 ifiq->ifiq_packets += packets; 510 ifiq->ifiq_bytes += bytes; 511 mtx_leave(&ifiq->ifiq_mtx); 512 513 return (0); 514 } 515 } 516#endif 517 518 mtx_enter(&ifiq->ifiq_mtx); 519 ifiq->ifiq_packets += packets; 520 ifiq->ifiq_bytes += bytes; 521 522 len = ml_len(&ifiq->ifiq_ml); 523 if (len > ifiq_maxlen_drop) 524 ifiq->ifiq_qdrops += ml_len(ml); 525 else 526 ml_enlist(&ifiq->ifiq_ml, ml); 527 mtx_leave(&ifiq->ifiq_mtx); 528 529 if (ml_empty(ml)) 530 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 531 else 532 ml_purge(ml); 533 534 return (len > ifiq_maxlen_return); 535} 536 537void 538ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data) 539{ 540 mtx_enter(&ifiq->ifiq_mtx); 541 data->ifi_ipackets += ifiq->ifiq_packets; 542 data->ifi_ibytes += ifiq->ifiq_bytes; 543 data->ifi_iqdrops += ifiq->ifiq_qdrops; 544 mtx_leave(&ifiq->ifiq_mtx); 545} 546 547int 548ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) 549{ 550 mtx_enter(&ifiq->ifiq_mtx); 551 ml_enqueue(&ifiq->ifiq_ml, m); 552 mtx_leave(&ifiq->ifiq_mtx); 553 554 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); 555 556 return (0); 557} 558 559static void 560ifiq_process(void *arg) 561{ 562 struct ifiqueue *ifiq = arg; 563 struct mbuf_list ml; 564 565 if (ifiq_empty(ifiq)) 566 return; 567 568 mtx_enter(&ifiq->ifiq_mtx); 569 ml = ifiq->ifiq_ml; 570 ml_init(&ifiq->ifiq_ml); 571 mtx_leave(&ifiq->ifiq_mtx); 572 573 if_input_process(ifiq->ifiq_if, &ml); 574} 575 576/* 577 * priq implementation 578 */ 579 580unsigned int 581priq_idx(unsigned int nqueues, const struct mbuf *m) 582{ 583 unsigned int flow = 0; 584 585 if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) 586 flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; 587 588 return (flow % nqueues); 589} 590 591void * 592priq_alloc(unsigned int idx, void *null) 593{ 594 struct priq *pq; 595 int i; 596 597 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK); 598 for (i = 0; i < IFQ_NQUEUES; i++) 599 ml_init(&pq->pq_lists[i]); 600 return (pq); 601} 602 603void 604priq_free(unsigned int idx, void *pq) 605{ 606 free(pq, M_DEVBUF, sizeof(struct priq)); 607} 608 609struct mbuf * 610priq_enq(struct ifqueue *ifq, struct mbuf *m) 611{ 612 struct priq *pq; 613 struct mbuf_list *pl; 614 struct mbuf *n = NULL; 615 unsigned int prio; 616 617 pq = ifq->ifq_q; 618 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); 619 620 /* Find a lower priority queue to drop from */ 621 if (ifq_len(ifq) >= ifq->ifq_maxlen) { 622 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) { 623 pl = &pq->pq_lists[prio]; 624 if (ml_len(pl) > 0) { 625 n = ml_dequeue(pl); 626 goto enqueue; 627 } 628 } 629 /* 630 * There's no lower priority queue that we can 631 * drop from so don't enqueue this one. 632 */ 633 return (m); 634 } 635 636 enqueue: 637 pl = &pq->pq_lists[m->m_pkthdr.pf.prio]; 638 ml_enqueue(pl, m); 639 640 return (n); 641} 642 643struct mbuf * 644priq_deq_begin(struct ifqueue *ifq, void **cookiep) 645{ 646 struct priq *pq = ifq->ifq_q; 647 struct mbuf_list *pl; 648 unsigned int prio = nitems(pq->pq_lists); 649 struct mbuf *m; 650 651 do { 652 pl = &pq->pq_lists[--prio]; 653 m = MBUF_LIST_FIRST(pl); 654 if (m != NULL) { 655 *cookiep = pl; 656 return (m); 657 } 658 } while (prio > 0); 659 660 return (NULL); 661} 662 663void 664priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 665{ 666 struct mbuf_list *pl = cookie; 667 668 KASSERT(MBUF_LIST_FIRST(pl) == m); 669 670 ml_dequeue(pl); 671} 672 673void 674priq_purge(struct ifqueue *ifq, struct mbuf_list *ml) 675{ 676 struct priq *pq = ifq->ifq_q; 677 struct mbuf_list *pl; 678 unsigned int prio = nitems(pq->pq_lists); 679 680 do { 681 pl = &pq->pq_lists[--prio]; 682 ml_enlist(ml, pl); 683 } while (prio > 0); 684} 685